Files
feiqiu-ETL/tmp/fix_bd_manual.py
2026-02-04 21:39:01 +08:00

181 lines
5.8 KiB
Python

# -*- coding: utf-8 -*-
"""自动修复 bd_manual 文档中的类型不匹配问题"""
import json
import re
from pathlib import Path
def fix_type_in_doc(doc_path, type_mismatches):
"""修复文档中的类型不匹配"""
if not Path(doc_path).exists():
print(f" SKIP: {doc_path} not found")
return False
content = Path(doc_path).read_text(encoding='utf-8')
original = content
for m in type_mismatches:
col_name = m['column']
old_type = m['doc_type']
new_type = m['db_type']
# 匹配字段行并替换类型
# 格式: | 序号 | 字段名 | 类型 | 可空 | ...
pattern = rf'(\|\s*\d+\s*\|\s*{col_name}\s*\|\s*){re.escape(old_type)}(\s*\|)'
replacement = rf'\g<1>{new_type}\g<2>'
content, count = re.subn(pattern, replacement, content)
if count > 0:
print(f" Fixed: {col_name}: {old_type} -> {new_type}")
else:
# 尝试更宽松的匹配
pattern2 = rf'(\|\s*{col_name}\s*\|\s*){re.escape(old_type)}(\s*\|)'
content, count = re.subn(pattern2, replacement.replace(r'\g<1>', r'\1').replace(r'\g<2>', r'\2'), content)
if count > 0:
print(f" Fixed (alt): {col_name}: {old_type} -> {new_type}")
else:
print(f" WARN: Could not fix {col_name}")
if content != original:
Path(doc_path).write_text(content, encoding='utf-8')
return True
return False
def add_missing_field(doc_path, table_name, field_name, db_schema):
"""向文档中添加缺失的字段"""
if not Path(doc_path).exists():
return False
# 从 db_schema 获取字段信息
field_info = None
for col in db_schema.get(table_name, []):
if col['column'] == field_name:
field_info = col
break
if not field_info:
print(f" WARN: Could not find {field_name} in db_schema")
return False
content = Path(doc_path).read_text(encoding='utf-8')
# 找到字段表格的最后一行,在其后添加新字段
# 格式: | 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
lines = content.split('\n')
insert_idx = None
last_seq = 0
for i, line in enumerate(lines):
# 匹配字段行
match = re.match(r'\|\s*(\d+)\s*\|\s*(\w+)\s*\|', line)
if match:
seq = int(match.group(1))
if seq > last_seq:
last_seq = seq
insert_idx = i
if insert_idx is not None:
new_seq = last_seq + 1
nullable = 'YES' if field_info['nullable'] == 'YES' else 'NO'
new_line = f"| {new_seq} | {field_name} | {field_info['type']} | {nullable} | | 调整时间 |"
lines.insert(insert_idx + 1, new_line)
Path(doc_path).write_text('\n'.join(lines), encoding='utf-8')
print(f" Added: {field_name} (type: {field_info['type']})")
return True
return False
def main():
# 加载差异数据
with open('tmp/bd_manual_diff.json', 'r', encoding='utf-8') as f:
diffs = json.load(f)
# 加载数据库 schema (需要重新获取带详细信息的)
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
conn = psycopg2.connect(DSN)
cur = conn.cursor()
cur.execute("""
SELECT table_name, column_name, data_type, is_nullable,
COALESCE(character_maximum_length, numeric_precision) as max_length,
numeric_scale
FROM information_schema.columns
WHERE table_schema = 'billiards_dwd'
ORDER BY table_name, ordinal_position
""")
db_schema = {}
TYPE_MAP = {
'bigint': 'BIGINT',
'integer': 'INTEGER',
'smallint': 'SMALLINT',
'numeric': 'NUMERIC',
'text': 'TEXT',
'character varying': 'VARCHAR',
'boolean': 'BOOLEAN',
'timestamp with time zone': 'TIMESTAMPTZ',
'timestamp without time zone': 'TIMESTAMP',
'date': 'DATE',
'jsonb': 'JSONB',
'json': 'JSON',
}
for row in cur.fetchall():
table_name, col_name, data_type, nullable, max_len, scale = row
if table_name not in db_schema:
db_schema[table_name] = []
type_str = TYPE_MAP.get(data_type, data_type.upper())
if data_type == 'numeric' and max_len and scale is not None:
type_str = f'NUMERIC({max_len},{scale})'
elif data_type == 'character varying' and max_len:
type_str = f'VARCHAR({max_len})'
db_schema[table_name].append({
'column': col_name,
'type': type_str,
'nullable': nullable,
})
cur.close()
conn.close()
print("=" * 80)
print("Fixing BD Manual Documents")
print("=" * 80)
fixed_count = 0
for diff in diffs:
table = diff['table']
doc_path = diff.get('doc_path', '')
if not doc_path:
continue
has_changes = False
# 修复类型不匹配
if diff.get('type_mismatches'):
print(f"\n### {table} (type fixes) ###")
if fix_type_in_doc(doc_path, diff['type_mismatches']):
has_changes = True
# 添加缺失字段
if diff.get('missing_in_doc'):
print(f"\n### {table} (missing fields) ###")
for field in diff['missing_in_doc']:
if add_missing_field(doc_path, table, field, db_schema):
has_changes = True
if has_changes:
fixed_count += 1
print("\n" + "=" * 80)
print(f"Fixed {fixed_count} documents")
print("=" * 80)
if __name__ == '__main__':
main()