This commit is contained in:
Neo
2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions

181
tmp/sync_bd_manual.py Normal file
View File

@@ -0,0 +1,181 @@
# -*- coding: utf-8 -*-
"""校验并同步 bd_manual 文档与数据库结构"""
import json
import re
from pathlib import Path
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
# 类型映射 (PostgreSQL -> 文档显示格式)
TYPE_MAP = {
'bigint': 'BIGINT',
'integer': 'INTEGER',
'smallint': 'SMALLINT',
'numeric': 'NUMERIC',
'text': 'TEXT',
'character varying': 'VARCHAR',
'boolean': 'BOOLEAN',
'timestamp with time zone': 'TIMESTAMPTZ',
'timestamp without time zone': 'TIMESTAMP',
'date': 'DATE',
'jsonb': 'JSONB',
'json': 'JSON',
}
def get_db_schema():
"""获取数据库 schema"""
conn = psycopg2.connect(DSN)
cur = conn.cursor()
cur.execute("""
SELECT table_name, column_name, data_type, is_nullable,
COALESCE(character_maximum_length, numeric_precision) as max_length,
numeric_scale
FROM information_schema.columns
WHERE table_schema = 'billiards_dwd'
ORDER BY table_name, ordinal_position
""")
tables = {}
for row in cur.fetchall():
table_name, col_name, data_type, nullable, max_len, scale = row
if table_name not in tables:
tables[table_name] = []
# 格式化类型
type_str = TYPE_MAP.get(data_type, data_type.upper())
if data_type == 'numeric' and max_len and scale is not None:
type_str = f'NUMERIC({max_len},{scale})'
elif data_type == 'character varying' and max_len:
type_str = f'VARCHAR({max_len})'
tables[table_name].append({
'column': col_name,
'type': type_str,
'nullable': 'YES' if nullable == 'YES' else 'NO',
})
cur.close()
conn.close()
return tables
def parse_md_fields(content):
"""解析 MD 文档中的字段列表"""
fields = {}
# 匹配字段表格行
pattern = r'\|\s*\d+\s*\|\s*(\w+)\s*\|\s*([^|]+)\s*\|\s*(\w+)\s*\|'
for match in re.finditer(pattern, content):
col_name = match.group(1).strip()
col_type = match.group(2).strip()
nullable = match.group(3).strip()
fields[col_name] = {'type': col_type, 'nullable': nullable}
return fields
def compare_and_report(table_name, db_cols, doc_path):
"""对比数据库和文档,返回差异"""
if not doc_path.exists():
return {'missing_doc': True, 'table': table_name}
content = doc_path.read_text(encoding='utf-8')
doc_fields = parse_md_fields(content)
db_field_names = {c['column'] for c in db_cols}
doc_field_names = set(doc_fields.keys())
# 找出差异
missing_in_doc = db_field_names - doc_field_names
extra_in_doc = doc_field_names - db_field_names
type_mismatches = []
for col in db_cols:
col_name = col['column']
if col_name in doc_fields:
# 检查类型是否匹配 (忽略大小写和空格)
db_type = col['type'].upper().replace(' ', '')
doc_type = doc_fields[col_name]['type'].upper().replace(' ', '')
if db_type != doc_type:
type_mismatches.append({
'column': col_name,
'db_type': col['type'],
'doc_type': doc_fields[col_name]['type']
})
return {
'table': table_name,
'missing_in_doc': list(missing_in_doc),
'extra_in_doc': list(extra_in_doc),
'type_mismatches': type_mismatches,
'doc_path': str(doc_path),
}
def main():
db_schema = get_db_schema()
main_dir = Path('etl_billiards/docs/bd_manual/main')
ex_dir = Path('etl_billiards/docs/bd_manual/Ex')
all_diffs = []
for table_name, columns in sorted(db_schema.items()):
# 确定文档路径
if table_name.endswith('_ex'):
base_name = table_name[:-3] # 去掉 _ex
doc_path = ex_dir / f'BD_manual_{table_name}.md'
else:
doc_path = main_dir / f'BD_manual_{table_name}.md'
diff = compare_and_report(table_name, columns, doc_path)
if diff.get('missing_in_doc') or diff.get('extra_in_doc') or diff.get('type_mismatches') or diff.get('missing_doc'):
all_diffs.append(diff)
# 输出报告
print("=" * 80)
print("BD Manual vs Database Schema Comparison Report")
print("=" * 80)
total_missing = 0
total_extra = 0
total_type_mismatch = 0
for diff in all_diffs:
table = diff['table']
if diff.get('missing_doc'):
print(f"\n### {table}: MISSING DOCUMENT ###")
continue
has_issues = False
if diff['missing_in_doc']:
if not has_issues:
print(f"\n### {table} ###")
has_issues = True
print(f" Missing in doc ({len(diff['missing_in_doc'])}): {', '.join(sorted(diff['missing_in_doc']))}")
total_missing += len(diff['missing_in_doc'])
if diff['extra_in_doc']:
if not has_issues:
print(f"\n### {table} ###")
has_issues = True
print(f" Extra in doc ({len(diff['extra_in_doc'])}): {', '.join(sorted(diff['extra_in_doc']))}")
total_extra += len(diff['extra_in_doc'])
if diff['type_mismatches']:
if not has_issues:
print(f"\n### {table} ###")
has_issues = True
print(f" Type mismatches ({len(diff['type_mismatches'])}):")
for m in diff['type_mismatches']:
print(f" - {m['column']}: doc={m['doc_type']}, db={m['db_type']}")
total_type_mismatch += len(diff['type_mismatches'])
print("\n" + "=" * 80)
print(f"Summary: {total_missing} missing, {total_extra} extra, {total_type_mismatch} type mismatches")
print("=" * 80)
# 保存详细结果到 JSON
with open('tmp/bd_manual_diff.json', 'w', encoding='utf-8') as f:
json.dump(all_diffs, f, ensure_ascii=False, indent=2)
print(f"\nDetailed results saved to tmp/bd_manual_diff.json")
if __name__ == '__main__':
main()