# -*- coding: utf-8 -*- """ 检查缺失字段是否是拼写变体(驼峰式/下划线式、大小写差异等) """ import os import sys import json import re import psycopg2 from psycopg2.extras import RealDictCursor # 配置 DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test' def camel_to_snake(name): """驼峰转下划线""" s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() def snake_to_camel(name): """下划线转驼峰""" components = name.split('_') return components[0] + ''.join(x.title() for x in components[1:]) def normalize_field(name): """标准化字段名 - 去除下划线,全小写""" return name.lower().replace('_', '') def find_variants(api_field, ods_columns): """查找 API 字段在 ODS 中的可能变体""" api_lower = api_field.lower() api_normalized = normalize_field(api_field) api_snake = camel_to_snake(api_field) api_camel = snake_to_camel(api_field) matches = [] for ods_col in ods_columns: ods_lower = ods_col.lower() ods_normalized = normalize_field(ods_col) # 完全匹配 if api_lower == ods_lower: matches.append((ods_col, 'exact')) continue # 标准化后匹配(忽略下划线和大小写) if api_normalized == ods_normalized: matches.append((ods_col, 'normalized')) continue # 驼峰转下划线匹配 if api_snake == ods_lower: matches.append((ods_col, 'camel_to_snake')) continue # 下划线转驼峰匹配 if api_camel.lower() == ods_lower: matches.append((ods_col, 'snake_to_camel')) continue # 部分匹配 - 一个是另一个的子串 if len(api_normalized) > 3 and len(ods_normalized) > 3: if api_normalized in ods_normalized or ods_normalized in api_normalized: matches.append((ods_col, 'partial')) continue return matches def get_ods_table_columns(conn, table_name): """获取 ODS 表的字段结构""" if '.' in table_name: schema, name = table_name.split('.', 1) else: schema, name = 'public', table_name sql = """ SELECT column_name FROM information_schema.columns WHERE table_schema = %s AND table_name = %s ORDER BY ordinal_position """ with conn.cursor(cursor_factory=RealDictCursor) as cur: cur.execute(sql, (schema, name)) rows = cur.fetchall() return [row['column_name'] for row in rows] def main(): # 读取之前的对比结果 json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json') with open(json_path, 'r', encoding='utf-8') as f: results = json.load(f) conn = psycopg2.connect(DSN) print("=" * 100) print("缺失字段拼写变体检查") print("=" * 100) all_findings = {} for code, data in results.items(): missing = data.get('missing_in_ods', []) if not missing: continue table_name = data['table_name'] ods_columns = get_ods_table_columns(conn, table_name) print(f"\n### {code}") print(f"表名: {table_name}") findings = [] true_missing = [] for api_field in missing: variants = find_variants(api_field, ods_columns) if variants: for ods_col, match_type in variants: findings.append({ 'api_field': api_field, 'ods_column': ods_col, 'match_type': match_type }) print(f" [发现变体] API: `{api_field}` -> ODS: `{ods_col}` ({match_type})") else: true_missing.append(api_field) if findings: all_findings[code] = { 'table_name': table_name, 'variants': findings, 'true_missing': true_missing } if true_missing: print(f"\n **确认缺失 ({len(true_missing)}):**") for f in true_missing: print(f" - {f}") conn.close() # 输出汇总 print("\n") print("=" * 100) print("汇总 - 发现的拼写变体") print("=" * 100) for code, data in all_findings.items(): if data['variants']: print(f"\n### {code} (`{data['table_name']}`)") print("\n| API 字段 | ODS 字段 | 匹配类型 |") print("|----------|----------|----------|") for v in data['variants']: print(f"| `{v['api_field']}` | `{v['ods_column']}` | {v['match_type']} |") print("\n") print("=" * 100) print("汇总 - 确认缺失的字段(无变体)") print("=" * 100) for code, data in results.items(): missing = data.get('missing_in_ods', []) if not missing: continue if code in all_findings: true_missing = all_findings[code]['true_missing'] else: true_missing = missing if true_missing: print(f"\n### {code} (`{data['table_name']}`)") print(f"缺失 {len(true_missing)} 个字段:") print("\n| 字段名 | 说明 |") print("|--------|------|") for f in true_missing: print(f"| `{f}` | |") if __name__ == '__main__': main()