Updata2

2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions
--- a/tmp/sync_bd_manual.py
+++ b/tmp/sync_bd_manual.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+"""校验并同步 bd_manual 文档与数据库结构"""
+import json
+import re
+from pathlib import Path
+import psycopg2
+
+DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
+
+# 类型映射 (PostgreSQL -> 文档显示格式)
+TYPE_MAP = {
+    'bigint': 'BIGINT',
+    'integer': 'INTEGER',
+    'smallint': 'SMALLINT',
+    'numeric': 'NUMERIC',
+    'text': 'TEXT',
+    'character varying': 'VARCHAR',
+    'boolean': 'BOOLEAN',
+    'timestamp with time zone': 'TIMESTAMPTZ',
+    'timestamp without time zone': 'TIMESTAMP',
+    'date': 'DATE',
+    'jsonb': 'JSONB',
+    'json': 'JSON',
+}
+
+def get_db_schema():
+    """获取数据库 schema"""
+    conn = psycopg2.connect(DSN)
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT table_name, column_name, data_type, is_nullable,
+               COALESCE(character_maximum_length, numeric_precision) as max_length,
+               numeric_scale
+        FROM information_schema.columns
+        WHERE table_schema = 'billiards_dwd'
+        ORDER BY table_name, ordinal_position
+    """)
+    
+    tables = {}
+    for row in cur.fetchall():
+        table_name, col_name, data_type, nullable, max_len, scale = row
+        if table_name not in tables:
+            tables[table_name] = []
+        
+        # 格式化类型
+        type_str = TYPE_MAP.get(data_type, data_type.upper())
+        if data_type == 'numeric' and max_len and scale is not None:
+            type_str = f'NUMERIC({max_len},{scale})'
+        elif data_type == 'character varying' and max_len:
+            type_str = f'VARCHAR({max_len})'
+        
+        tables[table_name].append({
+            'column': col_name,
+            'type': type_str,
+            'nullable': 'YES' if nullable == 'YES' else 'NO',
+        })
+    
+    cur.close()
+    conn.close()
+    return tables
+
+def parse_md_fields(content):
+    """解析 MD 文档中的字段列表"""
+    fields = {}
+    # 匹配字段表格行
+    pattern = r'\|\s*\d+\s*\|\s*(\w+)\s*\|\s*([^|]+)\s*\|\s*(\w+)\s*\|'
+    for match in re.finditer(pattern, content):
+        col_name = match.group(1).strip()
+        col_type = match.group(2).strip()
+        nullable = match.group(3).strip()
+        fields[col_name] = {'type': col_type, 'nullable': nullable}
+    return fields
+
+def compare_and_report(table_name, db_cols, doc_path):
+    """对比数据库和文档，返回差异"""
+    if not doc_path.exists():
+        return {'missing_doc': True, 'table': table_name}
+    
+    content = doc_path.read_text(encoding='utf-8')
+    doc_fields = parse_md_fields(content)
+    
+    db_field_names = {c['column'] for c in db_cols}
+    doc_field_names = set(doc_fields.keys())
+    
+    # 找出差异
+    missing_in_doc = db_field_names - doc_field_names
+    extra_in_doc = doc_field_names - db_field_names
+    type_mismatches = []
+    
+    for col in db_cols:
+        col_name = col['column']
+        if col_name in doc_fields:
+            # 检查类型是否匹配 (忽略大小写和空格)
+            db_type = col['type'].upper().replace(' ', '')
+            doc_type = doc_fields[col_name]['type'].upper().replace(' ', '')
+            if db_type != doc_type:
+                type_mismatches.append({
+                    'column': col_name,
+                    'db_type': col['type'],
+                    'doc_type': doc_fields[col_name]['type']
+                })
+    
+    return {
+        'table': table_name,
+        'missing_in_doc': list(missing_in_doc),
+        'extra_in_doc': list(extra_in_doc),
+        'type_mismatches': type_mismatches,
+        'doc_path': str(doc_path),
+    }
+
+def main():
+    db_schema = get_db_schema()
+    
+    main_dir = Path('etl_billiards/docs/bd_manual/main')
+    ex_dir = Path('etl_billiards/docs/bd_manual/Ex')
+    
+    all_diffs = []
+    
+    for table_name, columns in sorted(db_schema.items()):
+        # 确定文档路径
+        if table_name.endswith('_ex'):
+            base_name = table_name[:-3]  # 去掉 _ex
+            doc_path = ex_dir / f'BD_manual_{table_name}.md'
+        else:
+            doc_path = main_dir / f'BD_manual_{table_name}.md'
+        
+        diff = compare_and_report(table_name, columns, doc_path)
+        if diff.get('missing_in_doc') or diff.get('extra_in_doc') or diff.get('type_mismatches') or diff.get('missing_doc'):
+            all_diffs.append(diff)
+    
+    # 输出报告
+    print("=" * 80)
+    print("BD Manual vs Database Schema Comparison Report")
+    print("=" * 80)
+    
+    total_missing = 0
+    total_extra = 0
+    total_type_mismatch = 0
+    
+    for diff in all_diffs:
+        table = diff['table']
+        if diff.get('missing_doc'):
+            print(f"\n### {table}: MISSING DOCUMENT ###")
+            continue
+            
+        has_issues = False
+        
+        if diff['missing_in_doc']:
+            if not has_issues:
+                print(f"\n### {table} ###")
+                has_issues = True
+            print(f"  Missing in doc ({len(diff['missing_in_doc'])}): {', '.join(sorted(diff['missing_in_doc']))}")
+            total_missing += len(diff['missing_in_doc'])
+        
+        if diff['extra_in_doc']:
+            if not has_issues:
+                print(f"\n### {table} ###")
+                has_issues = True
+            print(f"  Extra in doc ({len(diff['extra_in_doc'])}): {', '.join(sorted(diff['extra_in_doc']))}")
+            total_extra += len(diff['extra_in_doc'])
+        
+        if diff['type_mismatches']:
+            if not has_issues:
+                print(f"\n### {table} ###")
+                has_issues = True
+            print(f"  Type mismatches ({len(diff['type_mismatches'])}):")
+            for m in diff['type_mismatches']:
+                print(f"    - {m['column']}: doc={m['doc_type']}, db={m['db_type']}")
+            total_type_mismatch += len(diff['type_mismatches'])
+    
+    print("\n" + "=" * 80)
+    print(f"Summary: {total_missing} missing, {total_extra} extra, {total_type_mismatch} type mismatches")
+    print("=" * 80)
+    
+    # 保存详细结果到 JSON
+    with open('tmp/bd_manual_diff.json', 'w', encoding='utf-8') as f:
+        json.dump(all_diffs, f, ensure_ascii=False, indent=2)
+    print(f"\nDetailed results saved to tmp/bd_manual_diff.json")
+
+if __name__ == '__main__':
+    main()