在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -0,0 +1,153 @@
# -*- coding: utf-8 -*-
"""比对 BD_Manual 文档中的字段列表与数据库实际列,输出差异报告。
用法python scripts/ops/_verify_bd_manual_fields.py
输出stdout差异报告
"""
import os
import sys
import re
# 加载 .env
from dotenv import load_dotenv
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
import psycopg2
DSN = os.environ.get("TEST_DB_DSN")
if not DSN:
print("ERROR: TEST_DB_DSN 未设置", file=sys.stderr)
sys.exit(1)
# 要验证的表BD_Manual 文件 → 表列表)
TABLES_TO_CHECK = [
# assistant_service_records
"dwd.dwd_assistant_service_log",
"dwd.dwd_assistant_service_log_ex",
# recharge_settlements
"dwd.dwd_recharge_order",
"dwd.dwd_recharge_order_ex",
# store_goods_master
"dwd.dim_store_goods",
"dwd.dim_store_goods_ex",
# site_tables_master
"dwd.dim_table",
"dwd.dim_table_ex",
# goods_stock_movements
"dwd.dwd_goods_stock_movement",
# goods_stock_summary
"dwd.dwd_goods_stock_summary",
# member_balance_changes
"dwd.dwd_member_balance_change",
"dwd.dwd_member_balance_change_ex",
# store_goods_sales_records
"dwd.dwd_store_goods_sale",
"dwd.dwd_store_goods_sale_ex",
# DWS
"dws.dws_goods_stock_daily_summary",
"dws.dws_goods_stock_monthly_summary",
]
# BD_Manual 文件 → 文档中列出的列名
BD_MANUAL_COLS: dict[str, list[str]] = {}
def parse_md_table_cols(filepath: str) -> dict[str, list[str]]:
"""从 BD_Manual markdown 文件中提取每个表的列名列表。"""
result = {}
current_table = None
in_table = False
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
# 检测表名(如 "## 1. dwd_assistant_service_log主表"
m = re.match(r'^##\s+\d+\.\s+(\w+)', line)
if m:
current_table = m.group(1)
in_table = False
continue
# 检测 markdown 表格行
if current_table and '|' in line:
cells = [c.strip() for c in line.split('|')]
cells = [c for c in cells if c]
if len(cells) >= 2:
first = cells[0]
# 跳过表头分隔行
if first.startswith('---') or first.startswith(':---'):
continue
# 跳过表头行
if first in ('DWD 列名', 'DWS 列名', 'ODS 字段', '日期'):
in_table = True
continue
if in_table:
# 提取列名(去掉 backtick
col = first.strip('`').strip()
if col and not col.startswith('~~') and col != 'scd2_*':
if current_table not in result:
result[current_table] = []
result[current_table].append(col)
return result
# 解析所有 BD_Manual 文件
BD_FILES = [
"docs/database/BD_Manual_assistant_service_records.md",
"docs/database/BD_Manual_recharge_settlements.md",
"docs/database/BD_Manual_store_goods_master.md",
"docs/database/BD_Manual_site_tables_master.md",
"docs/database/BD_Manual_goods_stock_movements.md",
"docs/database/BD_Manual_goods_stock_summary.md",
"docs/database/BD_Manual_member_balance_changes.md",
"docs/database/BD_Manual_store_goods_sales_records.md",
"docs/database/BD_Manual_dws_goods_stock_summary.md",
]
all_doc_cols: dict[str, list[str]] = {}
for f in BD_FILES:
parsed = parse_md_table_cols(f)
for table, cols in parsed.items():
all_doc_cols[table] = cols
# 查询数据库实际列
SCD2_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
conn = psycopg2.connect(DSN)
try:
cur = conn.cursor()
for full_table in TABLES_TO_CHECK:
schema, table = full_table.split('.')
cur.execute("""
SELECT column_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
ORDER BY ordinal_position
""", (schema, table))
db_cols = [row[0] for row in cur.fetchall()]
db_cols_no_scd2 = [c for c in db_cols if c not in SCD2_COLS]
doc_cols = all_doc_cols.get(table, [])
if not doc_cols:
print(f"\n⚠️ {full_table}: 文档中未找到列定义(表名 '{table}' 未匹配)")
print(f" DB 列 ({len(db_cols)}): {db_cols}")
continue
doc_set = set(doc_cols)
db_set = set(db_cols_no_scd2)
in_doc_not_db = doc_set - db_set
in_db_not_doc = db_set - doc_set
status = "" if not in_doc_not_db and not in_db_not_doc else ""
print(f"\n{status} {full_table}: 文档 {len(doc_cols)} 列, DB {len(db_cols_no_scd2)} 列 (不含 SCD2)")
if in_doc_not_db:
print(f" 📄 文档有但 DB 无: {sorted(in_doc_not_db)}")
if in_db_not_doc:
print(f" 🗄️ DB 有但文档无: {sorted(in_db_not_doc)}")
finally:
conn.close()