# -*- coding: utf-8 -*- """自动修复 bd_manual 文档中的类型不匹配问题""" import json import re from pathlib import Path def fix_type_in_doc(doc_path, type_mismatches): """修复文档中的类型不匹配""" if not Path(doc_path).exists(): print(f" SKIP: {doc_path} not found") return False content = Path(doc_path).read_text(encoding='utf-8') original = content for m in type_mismatches: col_name = m['column'] old_type = m['doc_type'] new_type = m['db_type'] # 匹配字段行并替换类型 # 格式: | 序号 | 字段名 | 类型 | 可空 | ... pattern = rf'(\|\s*\d+\s*\|\s*{col_name}\s*\|\s*){re.escape(old_type)}(\s*\|)' replacement = rf'\g<1>{new_type}\g<2>' content, count = re.subn(pattern, replacement, content) if count > 0: print(f" Fixed: {col_name}: {old_type} -> {new_type}") else: # 尝试更宽松的匹配 pattern2 = rf'(\|\s*{col_name}\s*\|\s*){re.escape(old_type)}(\s*\|)' content, count = re.subn(pattern2, replacement.replace(r'\g<1>', r'\1').replace(r'\g<2>', r'\2'), content) if count > 0: print(f" Fixed (alt): {col_name}: {old_type} -> {new_type}") else: print(f" WARN: Could not fix {col_name}") if content != original: Path(doc_path).write_text(content, encoding='utf-8') return True return False def add_missing_field(doc_path, table_name, field_name, db_schema): """向文档中添加缺失的字段""" if not Path(doc_path).exists(): return False # 从 db_schema 获取字段信息 field_info = None for col in db_schema.get(table_name, []): if col['column'] == field_name: field_info = col break if not field_info: print(f" WARN: Could not find {field_name} in db_schema") return False content = Path(doc_path).read_text(encoding='utf-8') # 找到字段表格的最后一行,在其后添加新字段 # 格式: | 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 | lines = content.split('\n') insert_idx = None last_seq = 0 for i, line in enumerate(lines): # 匹配字段行 match = re.match(r'\|\s*(\d+)\s*\|\s*(\w+)\s*\|', line) if match: seq = int(match.group(1)) if seq > last_seq: last_seq = seq insert_idx = i if insert_idx is not None: new_seq = last_seq + 1 nullable = 'YES' if field_info['nullable'] == 'YES' else 'NO' new_line = f"| {new_seq} | {field_name} | {field_info['type']} | {nullable} | | 调整时间 |" lines.insert(insert_idx + 1, new_line) Path(doc_path).write_text('\n'.join(lines), encoding='utf-8') print(f" Added: {field_name} (type: {field_info['type']})") return True return False def main(): # 加载差异数据 with open('tmp/bd_manual_diff.json', 'r', encoding='utf-8') as f: diffs = json.load(f) # 加载数据库 schema (需要重新获取带详细信息的) import psycopg2 DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test' conn = psycopg2.connect(DSN) cur = conn.cursor() cur.execute(""" SELECT table_name, column_name, data_type, is_nullable, COALESCE(character_maximum_length, numeric_precision) as max_length, numeric_scale FROM information_schema.columns WHERE table_schema = 'billiards_dwd' ORDER BY table_name, ordinal_position """) db_schema = {} TYPE_MAP = { 'bigint': 'BIGINT', 'integer': 'INTEGER', 'smallint': 'SMALLINT', 'numeric': 'NUMERIC', 'text': 'TEXT', 'character varying': 'VARCHAR', 'boolean': 'BOOLEAN', 'timestamp with time zone': 'TIMESTAMPTZ', 'timestamp without time zone': 'TIMESTAMP', 'date': 'DATE', 'jsonb': 'JSONB', 'json': 'JSON', } for row in cur.fetchall(): table_name, col_name, data_type, nullable, max_len, scale = row if table_name not in db_schema: db_schema[table_name] = [] type_str = TYPE_MAP.get(data_type, data_type.upper()) if data_type == 'numeric' and max_len and scale is not None: type_str = f'NUMERIC({max_len},{scale})' elif data_type == 'character varying' and max_len: type_str = f'VARCHAR({max_len})' db_schema[table_name].append({ 'column': col_name, 'type': type_str, 'nullable': nullable, }) cur.close() conn.close() print("=" * 80) print("Fixing BD Manual Documents") print("=" * 80) fixed_count = 0 for diff in diffs: table = diff['table'] doc_path = diff.get('doc_path', '') if not doc_path: continue has_changes = False # 修复类型不匹配 if diff.get('type_mismatches'): print(f"\n### {table} (type fixes) ###") if fix_type_in_doc(doc_path, diff['type_mismatches']): has_changes = True # 添加缺失字段 if diff.get('missing_in_doc'): print(f"\n### {table} (missing fields) ###") for field in diff['missing_in_doc']: if add_missing_field(doc_path, table, field, db_schema): has_changes = True if has_changes: fixed_count += 1 print("\n" + "=" * 80) print(f"Fixed {fixed_count} documents") print("=" * 80) if __name__ == '__main__': main()