This commit is contained in:
Neo
2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions

181
tmp/check_field_variants.py Normal file
View File

@@ -0,0 +1,181 @@
# -*- coding: utf-8 -*-
"""
检查缺失字段是否是拼写变体(驼峰式/下划线式、大小写差异等)
"""
import os
import sys
import json
import re
import psycopg2
from psycopg2.extras import RealDictCursor
# 配置
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
def camel_to_snake(name):
"""驼峰转下划线"""
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def snake_to_camel(name):
"""下划线转驼峰"""
components = name.split('_')
return components[0] + ''.join(x.title() for x in components[1:])
def normalize_field(name):
"""标准化字段名 - 去除下划线,全小写"""
return name.lower().replace('_', '')
def find_variants(api_field, ods_columns):
"""查找 API 字段在 ODS 中的可能变体"""
api_lower = api_field.lower()
api_normalized = normalize_field(api_field)
api_snake = camel_to_snake(api_field)
api_camel = snake_to_camel(api_field)
matches = []
for ods_col in ods_columns:
ods_lower = ods_col.lower()
ods_normalized = normalize_field(ods_col)
# 完全匹配
if api_lower == ods_lower:
matches.append((ods_col, 'exact'))
continue
# 标准化后匹配(忽略下划线和大小写)
if api_normalized == ods_normalized:
matches.append((ods_col, 'normalized'))
continue
# 驼峰转下划线匹配
if api_snake == ods_lower:
matches.append((ods_col, 'camel_to_snake'))
continue
# 下划线转驼峰匹配
if api_camel.lower() == ods_lower:
matches.append((ods_col, 'snake_to_camel'))
continue
# 部分匹配 - 一个是另一个的子串
if len(api_normalized) > 3 and len(ods_normalized) > 3:
if api_normalized in ods_normalized or ods_normalized in api_normalized:
matches.append((ods_col, 'partial'))
continue
return matches
def get_ods_table_columns(conn, table_name):
"""获取 ODS 表的字段结构"""
if '.' in table_name:
schema, name = table_name.split('.', 1)
else:
schema, name = 'public', table_name
sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
ORDER BY ordinal_position
"""
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(sql, (schema, name))
rows = cur.fetchall()
return [row['column_name'] for row in rows]
def main():
# 读取之前的对比结果
json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
with open(json_path, 'r', encoding='utf-8') as f:
results = json.load(f)
conn = psycopg2.connect(DSN)
print("=" * 100)
print("缺失字段拼写变体检查")
print("=" * 100)
all_findings = {}
for code, data in results.items():
missing = data.get('missing_in_ods', [])
if not missing:
continue
table_name = data['table_name']
ods_columns = get_ods_table_columns(conn, table_name)
print(f"\n### {code}")
print(f"表名: {table_name}")
findings = []
true_missing = []
for api_field in missing:
variants = find_variants(api_field, ods_columns)
if variants:
for ods_col, match_type in variants:
findings.append({
'api_field': api_field,
'ods_column': ods_col,
'match_type': match_type
})
print(f" [发现变体] API: `{api_field}` -> ODS: `{ods_col}` ({match_type})")
else:
true_missing.append(api_field)
if findings:
all_findings[code] = {
'table_name': table_name,
'variants': findings,
'true_missing': true_missing
}
if true_missing:
print(f"\n **确认缺失 ({len(true_missing)}):**")
for f in true_missing:
print(f" - {f}")
conn.close()
# 输出汇总
print("\n")
print("=" * 100)
print("汇总 - 发现的拼写变体")
print("=" * 100)
for code, data in all_findings.items():
if data['variants']:
print(f"\n### {code} (`{data['table_name']}`)")
print("\n| API 字段 | ODS 字段 | 匹配类型 |")
print("|----------|----------|----------|")
for v in data['variants']:
print(f"| `{v['api_field']}` | `{v['ods_column']}` | {v['match_type']} |")
print("\n")
print("=" * 100)
print("汇总 - 确认缺失的字段(无变体)")
print("=" * 100)
for code, data in results.items():
missing = data.get('missing_in_ods', [])
if not missing:
continue
if code in all_findings:
true_missing = all_findings[code]['true_missing']
else:
true_missing = missing
if true_missing:
print(f"\n### {code} (`{data['table_name']}`)")
print(f"缺失 {len(true_missing)} 个字段:")
print("\n| 字段名 | 说明 |")
print("|--------|------|")
for f in true_missing:
print(f"| `{f}` | |")
if __name__ == '__main__':
main()