Updata2
This commit is contained in:
181
tmp/check_field_variants.py
Normal file
181
tmp/check_field_variants.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
检查缺失字段是否是拼写变体(驼峰式/下划线式、大小写差异等)
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
# 配置
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
def camel_to_snake(name):
|
||||
"""驼峰转下划线"""
|
||||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
||||
|
||||
def snake_to_camel(name):
|
||||
"""下划线转驼峰"""
|
||||
components = name.split('_')
|
||||
return components[0] + ''.join(x.title() for x in components[1:])
|
||||
|
||||
def normalize_field(name):
|
||||
"""标准化字段名 - 去除下划线,全小写"""
|
||||
return name.lower().replace('_', '')
|
||||
|
||||
def find_variants(api_field, ods_columns):
|
||||
"""查找 API 字段在 ODS 中的可能变体"""
|
||||
api_lower = api_field.lower()
|
||||
api_normalized = normalize_field(api_field)
|
||||
api_snake = camel_to_snake(api_field)
|
||||
api_camel = snake_to_camel(api_field)
|
||||
|
||||
matches = []
|
||||
for ods_col in ods_columns:
|
||||
ods_lower = ods_col.lower()
|
||||
ods_normalized = normalize_field(ods_col)
|
||||
|
||||
# 完全匹配
|
||||
if api_lower == ods_lower:
|
||||
matches.append((ods_col, 'exact'))
|
||||
continue
|
||||
|
||||
# 标准化后匹配(忽略下划线和大小写)
|
||||
if api_normalized == ods_normalized:
|
||||
matches.append((ods_col, 'normalized'))
|
||||
continue
|
||||
|
||||
# 驼峰转下划线匹配
|
||||
if api_snake == ods_lower:
|
||||
matches.append((ods_col, 'camel_to_snake'))
|
||||
continue
|
||||
|
||||
# 下划线转驼峰匹配
|
||||
if api_camel.lower() == ods_lower:
|
||||
matches.append((ods_col, 'snake_to_camel'))
|
||||
continue
|
||||
|
||||
# 部分匹配 - 一个是另一个的子串
|
||||
if len(api_normalized) > 3 and len(ods_normalized) > 3:
|
||||
if api_normalized in ods_normalized or ods_normalized in api_normalized:
|
||||
matches.append((ods_col, 'partial'))
|
||||
continue
|
||||
|
||||
return matches
|
||||
|
||||
def get_ods_table_columns(conn, table_name):
|
||||
"""获取 ODS 表的字段结构"""
|
||||
if '.' in table_name:
|
||||
schema, name = table_name.split('.', 1)
|
||||
else:
|
||||
schema, name = 'public', table_name
|
||||
|
||||
sql = """
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
"""
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(sql, (schema, name))
|
||||
rows = cur.fetchall()
|
||||
|
||||
return [row['column_name'] for row in rows]
|
||||
|
||||
def main():
|
||||
# 读取之前的对比结果
|
||||
json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
results = json.load(f)
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
print("=" * 100)
|
||||
print("缺失字段拼写变体检查")
|
||||
print("=" * 100)
|
||||
|
||||
all_findings = {}
|
||||
|
||||
for code, data in results.items():
|
||||
missing = data.get('missing_in_ods', [])
|
||||
if not missing:
|
||||
continue
|
||||
|
||||
table_name = data['table_name']
|
||||
ods_columns = get_ods_table_columns(conn, table_name)
|
||||
|
||||
print(f"\n### {code}")
|
||||
print(f"表名: {table_name}")
|
||||
|
||||
findings = []
|
||||
true_missing = []
|
||||
|
||||
for api_field in missing:
|
||||
variants = find_variants(api_field, ods_columns)
|
||||
if variants:
|
||||
for ods_col, match_type in variants:
|
||||
findings.append({
|
||||
'api_field': api_field,
|
||||
'ods_column': ods_col,
|
||||
'match_type': match_type
|
||||
})
|
||||
print(f" [发现变体] API: `{api_field}` -> ODS: `{ods_col}` ({match_type})")
|
||||
else:
|
||||
true_missing.append(api_field)
|
||||
|
||||
if findings:
|
||||
all_findings[code] = {
|
||||
'table_name': table_name,
|
||||
'variants': findings,
|
||||
'true_missing': true_missing
|
||||
}
|
||||
|
||||
if true_missing:
|
||||
print(f"\n **确认缺失 ({len(true_missing)}):**")
|
||||
for f in true_missing:
|
||||
print(f" - {f}")
|
||||
|
||||
conn.close()
|
||||
|
||||
# 输出汇总
|
||||
print("\n")
|
||||
print("=" * 100)
|
||||
print("汇总 - 发现的拼写变体")
|
||||
print("=" * 100)
|
||||
|
||||
for code, data in all_findings.items():
|
||||
if data['variants']:
|
||||
print(f"\n### {code} (`{data['table_name']}`)")
|
||||
print("\n| API 字段 | ODS 字段 | 匹配类型 |")
|
||||
print("|----------|----------|----------|")
|
||||
for v in data['variants']:
|
||||
print(f"| `{v['api_field']}` | `{v['ods_column']}` | {v['match_type']} |")
|
||||
|
||||
print("\n")
|
||||
print("=" * 100)
|
||||
print("汇总 - 确认缺失的字段(无变体)")
|
||||
print("=" * 100)
|
||||
|
||||
for code, data in results.items():
|
||||
missing = data.get('missing_in_ods', [])
|
||||
if not missing:
|
||||
continue
|
||||
|
||||
if code in all_findings:
|
||||
true_missing = all_findings[code]['true_missing']
|
||||
else:
|
||||
true_missing = missing
|
||||
|
||||
if true_missing:
|
||||
print(f"\n### {code} (`{data['table_name']}`)")
|
||||
print(f"缺失 {len(true_missing)} 个字段:")
|
||||
print("\n| 字段名 | 说明 |")
|
||||
print("|--------|------|")
|
||||
for f in true_missing:
|
||||
print(f"| `{f}` | |")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user