# -*- coding: utf-8 -*- """校验并同步 bd_manual 文档与数据库结构""" import json import re from pathlib import Path import psycopg2 DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test' # 类型映射 (PostgreSQL -> 文档显示格式) TYPE_MAP = { 'bigint': 'BIGINT', 'integer': 'INTEGER', 'smallint': 'SMALLINT', 'numeric': 'NUMERIC', 'text': 'TEXT', 'character varying': 'VARCHAR', 'boolean': 'BOOLEAN', 'timestamp with time zone': 'TIMESTAMPTZ', 'timestamp without time zone': 'TIMESTAMP', 'date': 'DATE', 'jsonb': 'JSONB', 'json': 'JSON', } def get_db_schema(): """获取数据库 schema""" conn = psycopg2.connect(DSN) cur = conn.cursor() cur.execute(""" SELECT table_name, column_name, data_type, is_nullable, COALESCE(character_maximum_length, numeric_precision) as max_length, numeric_scale FROM information_schema.columns WHERE table_schema = 'billiards_dwd' ORDER BY table_name, ordinal_position """) tables = {} for row in cur.fetchall(): table_name, col_name, data_type, nullable, max_len, scale = row if table_name not in tables: tables[table_name] = [] # 格式化类型 type_str = TYPE_MAP.get(data_type, data_type.upper()) if data_type == 'numeric' and max_len and scale is not None: type_str = f'NUMERIC({max_len},{scale})' elif data_type == 'character varying' and max_len: type_str = f'VARCHAR({max_len})' tables[table_name].append({ 'column': col_name, 'type': type_str, 'nullable': 'YES' if nullable == 'YES' else 'NO', }) cur.close() conn.close() return tables def parse_md_fields(content): """解析 MD 文档中的字段列表""" fields = {} # 匹配字段表格行 pattern = r'\|\s*\d+\s*\|\s*(\w+)\s*\|\s*([^|]+)\s*\|\s*(\w+)\s*\|' for match in re.finditer(pattern, content): col_name = match.group(1).strip() col_type = match.group(2).strip() nullable = match.group(3).strip() fields[col_name] = {'type': col_type, 'nullable': nullable} return fields def compare_and_report(table_name, db_cols, doc_path): """对比数据库和文档,返回差异""" if not doc_path.exists(): return {'missing_doc': True, 'table': table_name} content = doc_path.read_text(encoding='utf-8') doc_fields = parse_md_fields(content) db_field_names = {c['column'] for c in db_cols} doc_field_names = set(doc_fields.keys()) # 找出差异 missing_in_doc = db_field_names - doc_field_names extra_in_doc = doc_field_names - db_field_names type_mismatches = [] for col in db_cols: col_name = col['column'] if col_name in doc_fields: # 检查类型是否匹配 (忽略大小写和空格) db_type = col['type'].upper().replace(' ', '') doc_type = doc_fields[col_name]['type'].upper().replace(' ', '') if db_type != doc_type: type_mismatches.append({ 'column': col_name, 'db_type': col['type'], 'doc_type': doc_fields[col_name]['type'] }) return { 'table': table_name, 'missing_in_doc': list(missing_in_doc), 'extra_in_doc': list(extra_in_doc), 'type_mismatches': type_mismatches, 'doc_path': str(doc_path), } def main(): db_schema = get_db_schema() main_dir = Path('etl_billiards/docs/bd_manual/main') ex_dir = Path('etl_billiards/docs/bd_manual/Ex') all_diffs = [] for table_name, columns in sorted(db_schema.items()): # 确定文档路径 if table_name.endswith('_ex'): base_name = table_name[:-3] # 去掉 _ex doc_path = ex_dir / f'BD_manual_{table_name}.md' else: doc_path = main_dir / f'BD_manual_{table_name}.md' diff = compare_and_report(table_name, columns, doc_path) if diff.get('missing_in_doc') or diff.get('extra_in_doc') or diff.get('type_mismatches') or diff.get('missing_doc'): all_diffs.append(diff) # 输出报告 print("=" * 80) print("BD Manual vs Database Schema Comparison Report") print("=" * 80) total_missing = 0 total_extra = 0 total_type_mismatch = 0 for diff in all_diffs: table = diff['table'] if diff.get('missing_doc'): print(f"\n### {table}: MISSING DOCUMENT ###") continue has_issues = False if diff['missing_in_doc']: if not has_issues: print(f"\n### {table} ###") has_issues = True print(f" Missing in doc ({len(diff['missing_in_doc'])}): {', '.join(sorted(diff['missing_in_doc']))}") total_missing += len(diff['missing_in_doc']) if diff['extra_in_doc']: if not has_issues: print(f"\n### {table} ###") has_issues = True print(f" Extra in doc ({len(diff['extra_in_doc'])}): {', '.join(sorted(diff['extra_in_doc']))}") total_extra += len(diff['extra_in_doc']) if diff['type_mismatches']: if not has_issues: print(f"\n### {table} ###") has_issues = True print(f" Type mismatches ({len(diff['type_mismatches'])}):") for m in diff['type_mismatches']: print(f" - {m['column']}: doc={m['doc_type']}, db={m['db_type']}") total_type_mismatch += len(diff['type_mismatches']) print("\n" + "=" * 80) print(f"Summary: {total_missing} missing, {total_extra} extra, {total_type_mismatch} type mismatches") print("=" * 80) # 保存详细结果到 JSON with open('tmp/bd_manual_diff.json', 'w', encoding='utf-8') as f: json.dump(all_diffs, f, ensure_ascii=False, indent=2) print(f"\nDetailed results saved to tmp/bd_manual_diff.json") if __name__ == '__main__': main()