Updata2
This commit is contained in:
181
tmp/sync_bd_manual.py
Normal file
181
tmp/sync_bd_manual.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""校验并同步 bd_manual 文档与数据库结构"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import psycopg2
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
# 类型映射 (PostgreSQL -> 文档显示格式)
|
||||
TYPE_MAP = {
|
||||
'bigint': 'BIGINT',
|
||||
'integer': 'INTEGER',
|
||||
'smallint': 'SMALLINT',
|
||||
'numeric': 'NUMERIC',
|
||||
'text': 'TEXT',
|
||||
'character varying': 'VARCHAR',
|
||||
'boolean': 'BOOLEAN',
|
||||
'timestamp with time zone': 'TIMESTAMPTZ',
|
||||
'timestamp without time zone': 'TIMESTAMP',
|
||||
'date': 'DATE',
|
||||
'jsonb': 'JSONB',
|
||||
'json': 'JSON',
|
||||
}
|
||||
|
||||
def get_db_schema():
|
||||
"""获取数据库 schema"""
|
||||
conn = psycopg2.connect(DSN)
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT table_name, column_name, data_type, is_nullable,
|
||||
COALESCE(character_maximum_length, numeric_precision) as max_length,
|
||||
numeric_scale
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'billiards_dwd'
|
||||
ORDER BY table_name, ordinal_position
|
||||
""")
|
||||
|
||||
tables = {}
|
||||
for row in cur.fetchall():
|
||||
table_name, col_name, data_type, nullable, max_len, scale = row
|
||||
if table_name not in tables:
|
||||
tables[table_name] = []
|
||||
|
||||
# 格式化类型
|
||||
type_str = TYPE_MAP.get(data_type, data_type.upper())
|
||||
if data_type == 'numeric' and max_len and scale is not None:
|
||||
type_str = f'NUMERIC({max_len},{scale})'
|
||||
elif data_type == 'character varying' and max_len:
|
||||
type_str = f'VARCHAR({max_len})'
|
||||
|
||||
tables[table_name].append({
|
||||
'column': col_name,
|
||||
'type': type_str,
|
||||
'nullable': 'YES' if nullable == 'YES' else 'NO',
|
||||
})
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return tables
|
||||
|
||||
def parse_md_fields(content):
|
||||
"""解析 MD 文档中的字段列表"""
|
||||
fields = {}
|
||||
# 匹配字段表格行
|
||||
pattern = r'\|\s*\d+\s*\|\s*(\w+)\s*\|\s*([^|]+)\s*\|\s*(\w+)\s*\|'
|
||||
for match in re.finditer(pattern, content):
|
||||
col_name = match.group(1).strip()
|
||||
col_type = match.group(2).strip()
|
||||
nullable = match.group(3).strip()
|
||||
fields[col_name] = {'type': col_type, 'nullable': nullable}
|
||||
return fields
|
||||
|
||||
def compare_and_report(table_name, db_cols, doc_path):
|
||||
"""对比数据库和文档,返回差异"""
|
||||
if not doc_path.exists():
|
||||
return {'missing_doc': True, 'table': table_name}
|
||||
|
||||
content = doc_path.read_text(encoding='utf-8')
|
||||
doc_fields = parse_md_fields(content)
|
||||
|
||||
db_field_names = {c['column'] for c in db_cols}
|
||||
doc_field_names = set(doc_fields.keys())
|
||||
|
||||
# 找出差异
|
||||
missing_in_doc = db_field_names - doc_field_names
|
||||
extra_in_doc = doc_field_names - db_field_names
|
||||
type_mismatches = []
|
||||
|
||||
for col in db_cols:
|
||||
col_name = col['column']
|
||||
if col_name in doc_fields:
|
||||
# 检查类型是否匹配 (忽略大小写和空格)
|
||||
db_type = col['type'].upper().replace(' ', '')
|
||||
doc_type = doc_fields[col_name]['type'].upper().replace(' ', '')
|
||||
if db_type != doc_type:
|
||||
type_mismatches.append({
|
||||
'column': col_name,
|
||||
'db_type': col['type'],
|
||||
'doc_type': doc_fields[col_name]['type']
|
||||
})
|
||||
|
||||
return {
|
||||
'table': table_name,
|
||||
'missing_in_doc': list(missing_in_doc),
|
||||
'extra_in_doc': list(extra_in_doc),
|
||||
'type_mismatches': type_mismatches,
|
||||
'doc_path': str(doc_path),
|
||||
}
|
||||
|
||||
def main():
|
||||
db_schema = get_db_schema()
|
||||
|
||||
main_dir = Path('etl_billiards/docs/bd_manual/main')
|
||||
ex_dir = Path('etl_billiards/docs/bd_manual/Ex')
|
||||
|
||||
all_diffs = []
|
||||
|
||||
for table_name, columns in sorted(db_schema.items()):
|
||||
# 确定文档路径
|
||||
if table_name.endswith('_ex'):
|
||||
base_name = table_name[:-3] # 去掉 _ex
|
||||
doc_path = ex_dir / f'BD_manual_{table_name}.md'
|
||||
else:
|
||||
doc_path = main_dir / f'BD_manual_{table_name}.md'
|
||||
|
||||
diff = compare_and_report(table_name, columns, doc_path)
|
||||
if diff.get('missing_in_doc') or diff.get('extra_in_doc') or diff.get('type_mismatches') or diff.get('missing_doc'):
|
||||
all_diffs.append(diff)
|
||||
|
||||
# 输出报告
|
||||
print("=" * 80)
|
||||
print("BD Manual vs Database Schema Comparison Report")
|
||||
print("=" * 80)
|
||||
|
||||
total_missing = 0
|
||||
total_extra = 0
|
||||
total_type_mismatch = 0
|
||||
|
||||
for diff in all_diffs:
|
||||
table = diff['table']
|
||||
if diff.get('missing_doc'):
|
||||
print(f"\n### {table}: MISSING DOCUMENT ###")
|
||||
continue
|
||||
|
||||
has_issues = False
|
||||
|
||||
if diff['missing_in_doc']:
|
||||
if not has_issues:
|
||||
print(f"\n### {table} ###")
|
||||
has_issues = True
|
||||
print(f" Missing in doc ({len(diff['missing_in_doc'])}): {', '.join(sorted(diff['missing_in_doc']))}")
|
||||
total_missing += len(diff['missing_in_doc'])
|
||||
|
||||
if diff['extra_in_doc']:
|
||||
if not has_issues:
|
||||
print(f"\n### {table} ###")
|
||||
has_issues = True
|
||||
print(f" Extra in doc ({len(diff['extra_in_doc'])}): {', '.join(sorted(diff['extra_in_doc']))}")
|
||||
total_extra += len(diff['extra_in_doc'])
|
||||
|
||||
if diff['type_mismatches']:
|
||||
if not has_issues:
|
||||
print(f"\n### {table} ###")
|
||||
has_issues = True
|
||||
print(f" Type mismatches ({len(diff['type_mismatches'])}):")
|
||||
for m in diff['type_mismatches']:
|
||||
print(f" - {m['column']}: doc={m['doc_type']}, db={m['db_type']}")
|
||||
total_type_mismatch += len(diff['type_mismatches'])
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"Summary: {total_missing} missing, {total_extra} extra, {total_type_mismatch} type mismatches")
|
||||
print("=" * 80)
|
||||
|
||||
# 保存详细结果到 JSON
|
||||
with open('tmp/bd_manual_diff.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(all_diffs, f, ensure_ascii=False, indent=2)
|
||||
print(f"\nDetailed results saved to tmp/bd_manual_diff.json")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user