微信小程序页面迁移校验之前 P5任务处理之前

2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions
--- a/scripts/ops/_db_docs_reconcile.py
+++ b/scripts/ops/_db_docs_reconcile.py
@@ -0,0 +1,149 @@
+"""
+DB 文档全量对账脚本（审计用，一次性）。
+连接测试库，查询 information_schema，与 docs/database/ 现有文档对比。
+输出 JSON 摘要到 stdout。
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+# 加载根 .env
+load_dotenv(Path(__file__).resolve().parents[2] / ".env")
+
+TEST_ETL_DSN = os.environ.get("TEST_DB_DSN")
+TEST_APP_DSN = os.environ.get("TEST_APP_DB_DSN")
+
+if not TEST_ETL_DSN or not TEST_APP_DSN:
+    print("ERROR: TEST_DB_DSN or TEST_APP_DB_DSN not set", file=sys.stderr)
+    sys.exit(1)
+
+import psycopg2  # noqa: E402
+
+
+def query_tables_and_columns(dsn: str, schemas: list[str]) -> dict:
+    """查询指定 schema 下所有表和字段。"""
+    conn = psycopg2.connect(dsn)
+    try:
+        with conn.cursor() as cur:
+            placeholders = ",".join(["%s"] * len(schemas))
+            # 查询表
+            cur.execute(
+                f"""
+                SELECT table_schema, table_name
+                FROM information_schema.tables
+                WHERE table_schema IN ({placeholders})
+                  AND table_type = 'BASE TABLE'
+                ORDER BY table_schema, table_name
+                """,
+                schemas,
+            )
+            tables = cur.fetchall()
+
+            # 查询字段
+            cur.execute(
+                f"""
+                SELECT table_schema, table_name, column_name,
+                       data_type, is_nullable, column_default
+                FROM information_schema.columns
+                WHERE table_schema IN ({placeholders})
+                ORDER BY table_schema, table_name, ordinal_position
+                """,
+                schemas,
+            )
+            columns = cur.fetchall()
+    finally:
+        conn.close()
+
+    result = {}
+    for schema, table in tables:
+        key = f"{schema}.{table}"
+        result[key] = {"schema": schema, "table": table, "columns": []}
+
+    for schema, table, col_name, data_type, nullable, default in columns:
+        key = f"{schema}.{table}"
+        if key in result:
+            result[key]["columns"].append({
+                "name": col_name,
+                "type": data_type,
+                "nullable": nullable,
+                "default": default,
+            })
+
+    return result
+
+
+def scan_existing_docs(docs_dir: Path) -> set[str]:
+    """扫描 docs/database/ 下的 BD_Manual_*.md，提取已文档化的表名关键词。"""
+    documented = set()
+    for f in docs_dir.glob("BD_Manual_*.md"):
+        # 从文件名提取表名关键词
+        stem = f.stem.replace("BD_Manual_", "")
+        documented.add(stem.lower())
+        # 也从文件内容提取 schema.table 引用
+        try:
+            content = f.read_text(encoding="utf-8")
+            # 匹配 schema.table_name 模式
+            for m in re.finditer(r"(\w+)\.(\w+)", content):
+                schema, table = m.group(1), m.group(2)
+                if schema in (
+                    "ods", "dwd", "dws", "meta", "core", "app",
+                    "public", "auth",
+                ):
+                    documented.add(f"{schema}.{table}".lower())
+        except Exception:
+            pass
+    return documented
+
+
+def reconcile(db_tables: dict, documented: set[str]) -> dict:
+    """对账：找出缺失文档的表。"""
+    missing = []
+    for key, info in sorted(db_tables.items()):
+        key_lower = key.lower()
+        table_lower = info["table"].lower()
+        # 检查是否有文档覆盖
+        if key_lower not in documented and table_lower not in documented:
+            missing.append({
+                "schema_table": key,
+                "column_count": len(info["columns"]),
+            })
+    return {
+        "total_db_tables": len(db_tables),
+        "documented_refs": len(documented),
+        "missing_docs": missing,
+        "missing_count": len(missing),
+    }
+
+
+def main():
+    docs_dir = Path(__file__).resolve().parents[2] / "docs" / "database"
+
+    # ETL 库（六层 schema）
+    etl_schemas = ["ods", "dwd", "dws", "meta", "core", "app"]
+    etl_tables = query_tables_and_columns(TEST_ETL_DSN, etl_schemas)
+
+    # 业务库
+    app_schemas = ["public", "auth"]
+    app_tables = query_tables_and_columns(TEST_APP_DSN, app_schemas)
+
+    # 合并
+    all_tables = {**etl_tables, **app_tables}
+
+    # 扫描现有文档
+    documented = scan_existing_docs(docs_dir)
+
+    # 对账
+    result = reconcile(all_tables, documented)
+
+    # 输出 JSON
+    print(json.dumps(result, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()