182 lines
5.6 KiB
Python
182 lines
5.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
检查缺失字段是否是拼写变体(驼峰式/下划线式、大小写差异等)
|
|
"""
|
|
import os
|
|
import sys
|
|
import json
|
|
import re
|
|
import psycopg2
|
|
from psycopg2.extras import RealDictCursor
|
|
|
|
# 配置
|
|
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
|
|
|
def camel_to_snake(name):
|
|
"""驼峰转下划线"""
|
|
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
|
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
|
|
|
def snake_to_camel(name):
|
|
"""下划线转驼峰"""
|
|
components = name.split('_')
|
|
return components[0] + ''.join(x.title() for x in components[1:])
|
|
|
|
def normalize_field(name):
|
|
"""标准化字段名 - 去除下划线,全小写"""
|
|
return name.lower().replace('_', '')
|
|
|
|
def find_variants(api_field, ods_columns):
|
|
"""查找 API 字段在 ODS 中的可能变体"""
|
|
api_lower = api_field.lower()
|
|
api_normalized = normalize_field(api_field)
|
|
api_snake = camel_to_snake(api_field)
|
|
api_camel = snake_to_camel(api_field)
|
|
|
|
matches = []
|
|
for ods_col in ods_columns:
|
|
ods_lower = ods_col.lower()
|
|
ods_normalized = normalize_field(ods_col)
|
|
|
|
# 完全匹配
|
|
if api_lower == ods_lower:
|
|
matches.append((ods_col, 'exact'))
|
|
continue
|
|
|
|
# 标准化后匹配(忽略下划线和大小写)
|
|
if api_normalized == ods_normalized:
|
|
matches.append((ods_col, 'normalized'))
|
|
continue
|
|
|
|
# 驼峰转下划线匹配
|
|
if api_snake == ods_lower:
|
|
matches.append((ods_col, 'camel_to_snake'))
|
|
continue
|
|
|
|
# 下划线转驼峰匹配
|
|
if api_camel.lower() == ods_lower:
|
|
matches.append((ods_col, 'snake_to_camel'))
|
|
continue
|
|
|
|
# 部分匹配 - 一个是另一个的子串
|
|
if len(api_normalized) > 3 and len(ods_normalized) > 3:
|
|
if api_normalized in ods_normalized or ods_normalized in api_normalized:
|
|
matches.append((ods_col, 'partial'))
|
|
continue
|
|
|
|
return matches
|
|
|
|
def get_ods_table_columns(conn, table_name):
|
|
"""获取 ODS 表的字段结构"""
|
|
if '.' in table_name:
|
|
schema, name = table_name.split('.', 1)
|
|
else:
|
|
schema, name = 'public', table_name
|
|
|
|
sql = """
|
|
SELECT column_name
|
|
FROM information_schema.columns
|
|
WHERE table_schema = %s AND table_name = %s
|
|
ORDER BY ordinal_position
|
|
"""
|
|
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
|
cur.execute(sql, (schema, name))
|
|
rows = cur.fetchall()
|
|
|
|
return [row['column_name'] for row in rows]
|
|
|
|
def main():
|
|
# 读取之前的对比结果
|
|
json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
|
|
with open(json_path, 'r', encoding='utf-8') as f:
|
|
results = json.load(f)
|
|
|
|
conn = psycopg2.connect(DSN)
|
|
|
|
print("=" * 100)
|
|
print("缺失字段拼写变体检查")
|
|
print("=" * 100)
|
|
|
|
all_findings = {}
|
|
|
|
for code, data in results.items():
|
|
missing = data.get('missing_in_ods', [])
|
|
if not missing:
|
|
continue
|
|
|
|
table_name = data['table_name']
|
|
ods_columns = get_ods_table_columns(conn, table_name)
|
|
|
|
print(f"\n### {code}")
|
|
print(f"表名: {table_name}")
|
|
|
|
findings = []
|
|
true_missing = []
|
|
|
|
for api_field in missing:
|
|
variants = find_variants(api_field, ods_columns)
|
|
if variants:
|
|
for ods_col, match_type in variants:
|
|
findings.append({
|
|
'api_field': api_field,
|
|
'ods_column': ods_col,
|
|
'match_type': match_type
|
|
})
|
|
print(f" [发现变体] API: `{api_field}` -> ODS: `{ods_col}` ({match_type})")
|
|
else:
|
|
true_missing.append(api_field)
|
|
|
|
if findings:
|
|
all_findings[code] = {
|
|
'table_name': table_name,
|
|
'variants': findings,
|
|
'true_missing': true_missing
|
|
}
|
|
|
|
if true_missing:
|
|
print(f"\n **确认缺失 ({len(true_missing)}):**")
|
|
for f in true_missing:
|
|
print(f" - {f}")
|
|
|
|
conn.close()
|
|
|
|
# 输出汇总
|
|
print("\n")
|
|
print("=" * 100)
|
|
print("汇总 - 发现的拼写变体")
|
|
print("=" * 100)
|
|
|
|
for code, data in all_findings.items():
|
|
if data['variants']:
|
|
print(f"\n### {code} (`{data['table_name']}`)")
|
|
print("\n| API 字段 | ODS 字段 | 匹配类型 |")
|
|
print("|----------|----------|----------|")
|
|
for v in data['variants']:
|
|
print(f"| `{v['api_field']}` | `{v['ods_column']}` | {v['match_type']} |")
|
|
|
|
print("\n")
|
|
print("=" * 100)
|
|
print("汇总 - 确认缺失的字段(无变体)")
|
|
print("=" * 100)
|
|
|
|
for code, data in results.items():
|
|
missing = data.get('missing_in_ods', [])
|
|
if not missing:
|
|
continue
|
|
|
|
if code in all_findings:
|
|
true_missing = all_findings[code]['true_missing']
|
|
else:
|
|
true_missing = missing
|
|
|
|
if true_missing:
|
|
print(f"\n### {code} (`{data['table_name']}`)")
|
|
print(f"缺失 {len(true_missing)} 个字段:")
|
|
print("\n| 字段名 | 说明 |")
|
|
print("|--------|------|")
|
|
for f in true_missing:
|
|
print(f"| `{f}` | |")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|