Updata2

2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions
--- a/tmp/check_field_variants.py
+++ b/tmp/check_field_variants.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+"""
+检查缺失字段是否是拼写变体（驼峰式/下划线式、大小写差异等）
+"""
+import os
+import sys
+import json
+import re
+import psycopg2
+from psycopg2.extras import RealDictCursor
+
+# 配置
+DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
+
+def camel_to_snake(name):
+    """驼峰转下划线"""
+    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
+    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
+
+def snake_to_camel(name):
+    """下划线转驼峰"""
+    components = name.split('_')
+    return components[0] + ''.join(x.title() for x in components[1:])
+
+def normalize_field(name):
+    """标准化字段名 - 去除下划线，全小写"""
+    return name.lower().replace('_', '')
+
+def find_variants(api_field, ods_columns):
+    """查找 API 字段在 ODS 中的可能变体"""
+    api_lower = api_field.lower()
+    api_normalized = normalize_field(api_field)
+    api_snake = camel_to_snake(api_field)
+    api_camel = snake_to_camel(api_field)
+    
+    matches = []
+    for ods_col in ods_columns:
+        ods_lower = ods_col.lower()
+        ods_normalized = normalize_field(ods_col)
+        
+        # 完全匹配
+        if api_lower == ods_lower:
+            matches.append((ods_col, 'exact'))
+            continue
+        
+        # 标准化后匹配（忽略下划线和大小写）
+        if api_normalized == ods_normalized:
+            matches.append((ods_col, 'normalized'))
+            continue
+        
+        # 驼峰转下划线匹配
+        if api_snake == ods_lower:
+            matches.append((ods_col, 'camel_to_snake'))
+            continue
+        
+        # 下划线转驼峰匹配
+        if api_camel.lower() == ods_lower:
+            matches.append((ods_col, 'snake_to_camel'))
+            continue
+        
+        # 部分匹配 - 一个是另一个的子串
+        if len(api_normalized) > 3 and len(ods_normalized) > 3:
+            if api_normalized in ods_normalized or ods_normalized in api_normalized:
+                matches.append((ods_col, 'partial'))
+                continue
+    
+    return matches
+
+def get_ods_table_columns(conn, table_name):
+    """获取 ODS 表的字段结构"""
+    if '.' in table_name:
+        schema, name = table_name.split('.', 1)
+    else:
+        schema, name = 'public', table_name
+    
+    sql = """
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_schema = %s AND table_name = %s
+        ORDER BY ordinal_position
+    """
+    with conn.cursor(cursor_factory=RealDictCursor) as cur:
+        cur.execute(sql, (schema, name))
+        rows = cur.fetchall()
+    
+    return [row['column_name'] for row in rows]
+
+def main():
+    # 读取之前的对比结果
+    json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
+    with open(json_path, 'r', encoding='utf-8') as f:
+        results = json.load(f)
+    
+    conn = psycopg2.connect(DSN)
+    
+    print("=" * 100)
+    print("缺失字段拼写变体检查")
+    print("=" * 100)
+    
+    all_findings = {}
+    
+    for code, data in results.items():
+        missing = data.get('missing_in_ods', [])
+        if not missing:
+            continue
+        
+        table_name = data['table_name']
+        ods_columns = get_ods_table_columns(conn, table_name)
+        
+        print(f"\n### {code}")
+        print(f"表名: {table_name}")
+        
+        findings = []
+        true_missing = []
+        
+        for api_field in missing:
+            variants = find_variants(api_field, ods_columns)
+            if variants:
+                for ods_col, match_type in variants:
+                    findings.append({
+                        'api_field': api_field,
+                        'ods_column': ods_col,
+                        'match_type': match_type
+                    })
+                    print(f"  [发现变体] API: `{api_field}` -> ODS: `{ods_col}` ({match_type})")
+            else:
+                true_missing.append(api_field)
+        
+        if findings:
+            all_findings[code] = {
+                'table_name': table_name,
+                'variants': findings,
+                'true_missing': true_missing
+            }
+        
+        if true_missing:
+            print(f"\n  **确认缺失 ({len(true_missing)}):**")
+            for f in true_missing:
+                print(f"    - {f}")
+    
+    conn.close()
+    
+    # 输出汇总
+    print("\n")
+    print("=" * 100)
+    print("汇总 - 发现的拼写变体")
+    print("=" * 100)
+    
+    for code, data in all_findings.items():
+        if data['variants']:
+            print(f"\n### {code} (`{data['table_name']}`)")
+            print("\n| API 字段 | ODS 字段 | 匹配类型 |")
+            print("|----------|----------|----------|")
+            for v in data['variants']:
+                print(f"| `{v['api_field']}` | `{v['ods_column']}` | {v['match_type']} |")
+    
+    print("\n")
+    print("=" * 100)
+    print("汇总 - 确认缺失的字段（无变体）")
+    print("=" * 100)
+    
+    for code, data in results.items():
+        missing = data.get('missing_in_ods', [])
+        if not missing:
+            continue
+        
+        if code in all_findings:
+            true_missing = all_findings[code]['true_missing']
+        else:
+            true_missing = missing
+        
+        if true_missing:
+            print(f"\n### {code} (`{data['table_name']}`)")
+            print(f"缺失 {len(true_missing)} 个字段:")
+            print("\n| 字段名 | 说明 |")
+            print("|--------|------|")
+            for f in true_missing:
+                print(f"| `{f}` | |")
+
+if __name__ == '__main__':
+    main()