在前后端开发联调前 的提交20260223
This commit is contained in:
153
scripts/ops/_verify_bd_manual_fields.py
Normal file
153
scripts/ops/_verify_bd_manual_fields.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""比对 BD_Manual 文档中的字段列表与数据库实际列,输出差异报告。
|
||||
|
||||
用法:python scripts/ops/_verify_bd_manual_fields.py
|
||||
输出:stdout(差异报告)
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
# 加载 .env
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
|
||||
|
||||
import psycopg2
|
||||
|
||||
DSN = os.environ.get("TEST_DB_DSN")
|
||||
if not DSN:
|
||||
print("ERROR: TEST_DB_DSN 未设置", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# 要验证的表(BD_Manual 文件 → 表列表)
|
||||
TABLES_TO_CHECK = [
|
||||
# assistant_service_records
|
||||
"dwd.dwd_assistant_service_log",
|
||||
"dwd.dwd_assistant_service_log_ex",
|
||||
# recharge_settlements
|
||||
"dwd.dwd_recharge_order",
|
||||
"dwd.dwd_recharge_order_ex",
|
||||
# store_goods_master
|
||||
"dwd.dim_store_goods",
|
||||
"dwd.dim_store_goods_ex",
|
||||
# site_tables_master
|
||||
"dwd.dim_table",
|
||||
"dwd.dim_table_ex",
|
||||
# goods_stock_movements
|
||||
"dwd.dwd_goods_stock_movement",
|
||||
# goods_stock_summary
|
||||
"dwd.dwd_goods_stock_summary",
|
||||
# member_balance_changes
|
||||
"dwd.dwd_member_balance_change",
|
||||
"dwd.dwd_member_balance_change_ex",
|
||||
# store_goods_sales_records
|
||||
"dwd.dwd_store_goods_sale",
|
||||
"dwd.dwd_store_goods_sale_ex",
|
||||
# DWS
|
||||
"dws.dws_goods_stock_daily_summary",
|
||||
"dws.dws_goods_stock_monthly_summary",
|
||||
]
|
||||
|
||||
# BD_Manual 文件 → 文档中列出的列名
|
||||
BD_MANUAL_COLS: dict[str, list[str]] = {}
|
||||
|
||||
def parse_md_table_cols(filepath: str) -> dict[str, list[str]]:
|
||||
"""从 BD_Manual markdown 文件中提取每个表的列名列表。"""
|
||||
result = {}
|
||||
current_table = None
|
||||
in_table = False
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
# 检测表名(如 "## 1. dwd_assistant_service_log(主表)")
|
||||
m = re.match(r'^##\s+\d+\.\s+(\w+)', line)
|
||||
if m:
|
||||
current_table = m.group(1)
|
||||
in_table = False
|
||||
continue
|
||||
|
||||
# 检测 markdown 表格行
|
||||
if current_table and '|' in line:
|
||||
cells = [c.strip() for c in line.split('|')]
|
||||
cells = [c for c in cells if c]
|
||||
if len(cells) >= 2:
|
||||
first = cells[0]
|
||||
# 跳过表头分隔行
|
||||
if first.startswith('---') or first.startswith(':---'):
|
||||
continue
|
||||
# 跳过表头行
|
||||
if first in ('DWD 列名', 'DWS 列名', 'ODS 字段', '日期'):
|
||||
in_table = True
|
||||
continue
|
||||
if in_table:
|
||||
# 提取列名(去掉 backtick)
|
||||
col = first.strip('`').strip()
|
||||
if col and not col.startswith('~~') and col != 'scd2_*':
|
||||
if current_table not in result:
|
||||
result[current_table] = []
|
||||
result[current_table].append(col)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# 解析所有 BD_Manual 文件
|
||||
BD_FILES = [
|
||||
"docs/database/BD_Manual_assistant_service_records.md",
|
||||
"docs/database/BD_Manual_recharge_settlements.md",
|
||||
"docs/database/BD_Manual_store_goods_master.md",
|
||||
"docs/database/BD_Manual_site_tables_master.md",
|
||||
"docs/database/BD_Manual_goods_stock_movements.md",
|
||||
"docs/database/BD_Manual_goods_stock_summary.md",
|
||||
"docs/database/BD_Manual_member_balance_changes.md",
|
||||
"docs/database/BD_Manual_store_goods_sales_records.md",
|
||||
"docs/database/BD_Manual_dws_goods_stock_summary.md",
|
||||
]
|
||||
|
||||
all_doc_cols: dict[str, list[str]] = {}
|
||||
for f in BD_FILES:
|
||||
parsed = parse_md_table_cols(f)
|
||||
for table, cols in parsed.items():
|
||||
all_doc_cols[table] = cols
|
||||
|
||||
# 查询数据库实际列
|
||||
SCD2_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
for full_table in TABLES_TO_CHECK:
|
||||
schema, table = full_table.split('.')
|
||||
cur.execute("""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""", (schema, table))
|
||||
db_cols = [row[0] for row in cur.fetchall()]
|
||||
db_cols_no_scd2 = [c for c in db_cols if c not in SCD2_COLS]
|
||||
|
||||
doc_cols = all_doc_cols.get(table, [])
|
||||
|
||||
if not doc_cols:
|
||||
print(f"\n⚠️ {full_table}: 文档中未找到列定义(表名 '{table}' 未匹配)")
|
||||
print(f" DB 列 ({len(db_cols)}): {db_cols}")
|
||||
continue
|
||||
|
||||
doc_set = set(doc_cols)
|
||||
db_set = set(db_cols_no_scd2)
|
||||
|
||||
in_doc_not_db = doc_set - db_set
|
||||
in_db_not_doc = db_set - doc_set
|
||||
|
||||
status = "✅" if not in_doc_not_db and not in_db_not_doc else "❌"
|
||||
print(f"\n{status} {full_table}: 文档 {len(doc_cols)} 列, DB {len(db_cols_no_scd2)} 列 (不含 SCD2)")
|
||||
|
||||
if in_doc_not_db:
|
||||
print(f" 📄 文档有但 DB 无: {sorted(in_doc_not_db)}")
|
||||
if in_db_not_doc:
|
||||
print(f" 🗄️ DB 有但文档无: {sorted(in_db_not_doc)}")
|
||||
finally:
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user