在前后端开发联调前的提交20260223

2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions
--- a/scripts/ops/_verify_bd_manual_fields.py
+++ b/scripts/ops/_verify_bd_manual_fields.py
@@ -0,0 +1,153 @@
+# -*- coding: utf-8 -*-
+"""比对 BD_Manual 文档中的字段列表与数据库实际列，输出差异报告。
+
+用法：python scripts/ops/_verify_bd_manual_fields.py
+输出：stdout（差异报告）
+"""
+import os
+import sys
+import re
+
+# 加载 .env
+from dotenv import load_dotenv
+load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
+
+import psycopg2
+
+DSN = os.environ.get("TEST_DB_DSN")
+if not DSN:
+    print("ERROR: TEST_DB_DSN 未设置", file=sys.stderr)
+    sys.exit(1)
+
+# 要验证的表（BD_Manual 文件 → 表列表）
+TABLES_TO_CHECK = [
+    # assistant_service_records
+    "dwd.dwd_assistant_service_log",
+    "dwd.dwd_assistant_service_log_ex",
+    # recharge_settlements
+    "dwd.dwd_recharge_order",
+    "dwd.dwd_recharge_order_ex",
+    # store_goods_master
+    "dwd.dim_store_goods",
+    "dwd.dim_store_goods_ex",
+    # site_tables_master
+    "dwd.dim_table",
+    "dwd.dim_table_ex",
+    # goods_stock_movements
+    "dwd.dwd_goods_stock_movement",
+    # goods_stock_summary
+    "dwd.dwd_goods_stock_summary",
+    # member_balance_changes
+    "dwd.dwd_member_balance_change",
+    "dwd.dwd_member_balance_change_ex",
+    # store_goods_sales_records
+    "dwd.dwd_store_goods_sale",
+    "dwd.dwd_store_goods_sale_ex",
+    # DWS
+    "dws.dws_goods_stock_daily_summary",
+    "dws.dws_goods_stock_monthly_summary",
+]
+
+# BD_Manual 文件 → 文档中列出的列名
+BD_MANUAL_COLS: dict[str, list[str]] = {}
+
+def parse_md_table_cols(filepath: str) -> dict[str, list[str]]:
+    """从 BD_Manual markdown 文件中提取每个表的列名列表。"""
+    result = {}
+    current_table = None
+    in_table = False
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+
+    for line in lines:
+        line = line.strip()
+        # 检测表名（如 "## 1. dwd_assistant_service_log（主表）"）
+        m = re.match(r'^##\s+\d+\.\s+(\w+)', line)
+        if m:
+            current_table = m.group(1)
+            in_table = False
+            continue
+
+        # 检测 markdown 表格行
+        if current_table and '|' in line:
+            cells = [c.strip() for c in line.split('|')]
+            cells = [c for c in cells if c]
+            if len(cells) >= 2:
+                first = cells[0]
+                # 跳过表头分隔行
+                if first.startswith('---') or first.startswith(':---'):
+                    continue
+                # 跳过表头行
+                if first in ('DWD 列名', 'DWS 列名', 'ODS 字段', '日期'):
+                    in_table = True
+                    continue
+                if in_table:
+                    # 提取列名（去掉 backtick）
+                    col = first.strip('`').strip()
+                    if col and not col.startswith('~~') and col != 'scd2_*':
+                        if current_table not in result:
+                            result[current_table] = []
+                        result[current_table].append(col)
+
+    return result
+
+
+# 解析所有 BD_Manual 文件
+BD_FILES = [
+    "docs/database/BD_Manual_assistant_service_records.md",
+    "docs/database/BD_Manual_recharge_settlements.md",
+    "docs/database/BD_Manual_store_goods_master.md",
+    "docs/database/BD_Manual_site_tables_master.md",
+    "docs/database/BD_Manual_goods_stock_movements.md",
+    "docs/database/BD_Manual_goods_stock_summary.md",
+    "docs/database/BD_Manual_member_balance_changes.md",
+    "docs/database/BD_Manual_store_goods_sales_records.md",
+    "docs/database/BD_Manual_dws_goods_stock_summary.md",
+]
+
+all_doc_cols: dict[str, list[str]] = {}
+for f in BD_FILES:
+    parsed = parse_md_table_cols(f)
+    for table, cols in parsed.items():
+        all_doc_cols[table] = cols
+
+# 查询数据库实际列
+SCD2_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
+
+conn = psycopg2.connect(DSN)
+try:
+    cur = conn.cursor()
+    for full_table in TABLES_TO_CHECK:
+        schema, table = full_table.split('.')
+        cur.execute("""
+            SELECT column_name
+            FROM information_schema.columns
+            WHERE table_schema = %s AND table_name = %s
+            ORDER BY ordinal_position
+        """, (schema, table))
+        db_cols = [row[0] for row in cur.fetchall()]
+        db_cols_no_scd2 = [c for c in db_cols if c not in SCD2_COLS]
+
+        doc_cols = all_doc_cols.get(table, [])
+
+        if not doc_cols:
+            print(f"\n⚠️  {full_table}: 文档中未找到列定义（表名 '{table}' 未匹配）")
+            print(f"   DB 列 ({len(db_cols)}): {db_cols}")
+            continue
+
+        doc_set = set(doc_cols)
+        db_set = set(db_cols_no_scd2)
+
+        in_doc_not_db = doc_set - db_set
+        in_db_not_doc = db_set - doc_set
+
+        status = "✅" if not in_doc_not_db and not in_db_not_doc else "❌"
+        print(f"\n{status} {full_table}: 文档 {len(doc_cols)} 列, DB {len(db_cols_no_scd2)} 列 (不含 SCD2)")
+
+        if in_doc_not_db:
+            print(f"   📄 文档有但 DB 无: {sorted(in_doc_not_db)}")
+        if in_db_not_doc:
+            print(f"   🗄️  DB 有但文档无: {sorted(in_db_not_doc)}")
+finally:
+    conn.close()