微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
149
scripts/ops/_db_docs_reconcile.py
Normal file
149
scripts/ops/_db_docs_reconcile.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
DB 文档全量对账脚本(审计用,一次性)。
|
||||
连接测试库,查询 information_schema,与 docs/database/ 现有文档对比。
|
||||
输出 JSON 摘要到 stdout。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 加载根 .env
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
TEST_ETL_DSN = os.environ.get("TEST_DB_DSN")
|
||||
TEST_APP_DSN = os.environ.get("TEST_APP_DB_DSN")
|
||||
|
||||
if not TEST_ETL_DSN or not TEST_APP_DSN:
|
||||
print("ERROR: TEST_DB_DSN or TEST_APP_DB_DSN not set", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
import psycopg2 # noqa: E402
|
||||
|
||||
|
||||
def query_tables_and_columns(dsn: str, schemas: list[str]) -> dict:
|
||||
"""查询指定 schema 下所有表和字段。"""
|
||||
conn = psycopg2.connect(dsn)
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
placeholders = ",".join(["%s"] * len(schemas))
|
||||
# 查询表
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT table_schema, table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema IN ({placeholders})
|
||||
AND table_type = 'BASE TABLE'
|
||||
ORDER BY table_schema, table_name
|
||||
""",
|
||||
schemas,
|
||||
)
|
||||
tables = cur.fetchall()
|
||||
|
||||
# 查询字段
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT table_schema, table_name, column_name,
|
||||
data_type, is_nullable, column_default
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema IN ({placeholders})
|
||||
ORDER BY table_schema, table_name, ordinal_position
|
||||
""",
|
||||
schemas,
|
||||
)
|
||||
columns = cur.fetchall()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
result = {}
|
||||
for schema, table in tables:
|
||||
key = f"{schema}.{table}"
|
||||
result[key] = {"schema": schema, "table": table, "columns": []}
|
||||
|
||||
for schema, table, col_name, data_type, nullable, default in columns:
|
||||
key = f"{schema}.{table}"
|
||||
if key in result:
|
||||
result[key]["columns"].append({
|
||||
"name": col_name,
|
||||
"type": data_type,
|
||||
"nullable": nullable,
|
||||
"default": default,
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def scan_existing_docs(docs_dir: Path) -> set[str]:
|
||||
"""扫描 docs/database/ 下的 BD_Manual_*.md,提取已文档化的表名关键词。"""
|
||||
documented = set()
|
||||
for f in docs_dir.glob("BD_Manual_*.md"):
|
||||
# 从文件名提取表名关键词
|
||||
stem = f.stem.replace("BD_Manual_", "")
|
||||
documented.add(stem.lower())
|
||||
# 也从文件内容提取 schema.table 引用
|
||||
try:
|
||||
content = f.read_text(encoding="utf-8")
|
||||
# 匹配 schema.table_name 模式
|
||||
for m in re.finditer(r"(\w+)\.(\w+)", content):
|
||||
schema, table = m.group(1), m.group(2)
|
||||
if schema in (
|
||||
"ods", "dwd", "dws", "meta", "core", "app",
|
||||
"public", "auth",
|
||||
):
|
||||
documented.add(f"{schema}.{table}".lower())
|
||||
except Exception:
|
||||
pass
|
||||
return documented
|
||||
|
||||
|
||||
def reconcile(db_tables: dict, documented: set[str]) -> dict:
|
||||
"""对账:找出缺失文档的表。"""
|
||||
missing = []
|
||||
for key, info in sorted(db_tables.items()):
|
||||
key_lower = key.lower()
|
||||
table_lower = info["table"].lower()
|
||||
# 检查是否有文档覆盖
|
||||
if key_lower not in documented and table_lower not in documented:
|
||||
missing.append({
|
||||
"schema_table": key,
|
||||
"column_count": len(info["columns"]),
|
||||
})
|
||||
return {
|
||||
"total_db_tables": len(db_tables),
|
||||
"documented_refs": len(documented),
|
||||
"missing_docs": missing,
|
||||
"missing_count": len(missing),
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
docs_dir = Path(__file__).resolve().parents[2] / "docs" / "database"
|
||||
|
||||
# ETL 库(六层 schema)
|
||||
etl_schemas = ["ods", "dwd", "dws", "meta", "core", "app"]
|
||||
etl_tables = query_tables_and_columns(TEST_ETL_DSN, etl_schemas)
|
||||
|
||||
# 业务库
|
||||
app_schemas = ["public", "auth"]
|
||||
app_tables = query_tables_and_columns(TEST_APP_DSN, app_schemas)
|
||||
|
||||
# 合并
|
||||
all_tables = {**etl_tables, **app_tables}
|
||||
|
||||
# 扫描现有文档
|
||||
documented = scan_existing_docs(docs_dir)
|
||||
|
||||
# 对账
|
||||
result = reconcile(all_tables, documented)
|
||||
|
||||
# 输出 JSON
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user