# -*- coding: utf-8 -*-
"""校验并同步 bd_manual 文档与数据库结构"""
import json
import re
from pathlib import Path
import psycopg2

DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'

# 类型映射 (PostgreSQL -> 文档显示格式)
TYPE_MAP = {
    'bigint': 'BIGINT',
    'integer': 'INTEGER',
    'smallint': 'SMALLINT',
    'numeric': 'NUMERIC',
    'text': 'TEXT',
    'character varying': 'VARCHAR',
    'boolean': 'BOOLEAN',
    'timestamp with time zone': 'TIMESTAMPTZ',
    'timestamp without time zone': 'TIMESTAMP',
    'date': 'DATE',
    'jsonb': 'JSONB',
    'json': 'JSON',
}

def get_db_schema():
    """获取数据库 schema"""
    conn = psycopg2.connect(DSN)
    cur = conn.cursor()
    cur.execute("""
        SELECT table_name, column_name, data_type, is_nullable,
               COALESCE(character_maximum_length, numeric_precision) as max_length,
               numeric_scale
        FROM information_schema.columns
        WHERE table_schema = 'billiards_dwd'
        ORDER BY table_name, ordinal_position
    """)
    
    tables = {}
    for row in cur.fetchall():
        table_name, col_name, data_type, nullable, max_len, scale = row
        if table_name not in tables:
            tables[table_name] = []
        
        # 格式化类型
        type_str = TYPE_MAP.get(data_type, data_type.upper())
        if data_type == 'numeric' and max_len and scale is not None:
            type_str = f'NUMERIC({max_len},{scale})'
        elif data_type == 'character varying' and max_len:
            type_str = f'VARCHAR({max_len})'
        
        tables[table_name].append({
            'column': col_name,
            'type': type_str,
            'nullable': 'YES' if nullable == 'YES' else 'NO',
        })
    
    cur.close()
    conn.close()
    return tables

def parse_md_fields(content):
    """解析 MD 文档中的字段列表"""
    fields = {}
    # 匹配字段表格行
    pattern = r'\|\s*\d+\s*\|\s*(\w+)\s*\|\s*([^|]+)\s*\|\s*(\w+)\s*\|'
    for match in re.finditer(pattern, content):
        col_name = match.group(1).strip()
        col_type = match.group(2).strip()
        nullable = match.group(3).strip()
        fields[col_name] = {'type': col_type, 'nullable': nullable}
    return fields

def compare_and_report(table_name, db_cols, doc_path):
    """对比数据库和文档，返回差异"""
    if not doc_path.exists():
        return {'missing_doc': True, 'table': table_name}
    
    content = doc_path.read_text(encoding='utf-8')
    doc_fields = parse_md_fields(content)
    
    db_field_names = {c['column'] for c in db_cols}
    doc_field_names = set(doc_fields.keys())
    
    # 找出差异
    missing_in_doc = db_field_names - doc_field_names
    extra_in_doc = doc_field_names - db_field_names
    type_mismatches = []
    
    for col in db_cols:
        col_name = col['column']
        if col_name in doc_fields:
            # 检查类型是否匹配 (忽略大小写和空格)
            db_type = col['type'].upper().replace(' ', '')
            doc_type = doc_fields[col_name]['type'].upper().replace(' ', '')
            if db_type != doc_type:
                type_mismatches.append({
                    'column': col_name,
                    'db_type': col['type'],
                    'doc_type': doc_fields[col_name]['type']
                })
    
    return {
        'table': table_name,
        'missing_in_doc': list(missing_in_doc),
        'extra_in_doc': list(extra_in_doc),
        'type_mismatches': type_mismatches,
        'doc_path': str(doc_path),
    }

def main():
    db_schema = get_db_schema()
    
    main_dir = Path('etl_billiards/docs/bd_manual/main')
    ex_dir = Path('etl_billiards/docs/bd_manual/Ex')
    
    all_diffs = []
    
    for table_name, columns in sorted(db_schema.items()):
        # 确定文档路径
        if table_name.endswith('_ex'):
            base_name = table_name[:-3]  # 去掉 _ex
            doc_path = ex_dir / f'BD_manual_{table_name}.md'
        else:
            doc_path = main_dir / f'BD_manual_{table_name}.md'
        
        diff = compare_and_report(table_name, columns, doc_path)
        if diff.get('missing_in_doc') or diff.get('extra_in_doc') or diff.get('type_mismatches') or diff.get('missing_doc'):
            all_diffs.append(diff)
    
    # 输出报告
    print("=" * 80)
    print("BD Manual vs Database Schema Comparison Report")
    print("=" * 80)
    
    total_missing = 0
    total_extra = 0
    total_type_mismatch = 0
    
    for diff in all_diffs:
        table = diff['table']
        if diff.get('missing_doc'):
            print(f"\n### {table}: MISSING DOCUMENT ###")
            continue
            
        has_issues = False
        
        if diff['missing_in_doc']:
            if not has_issues:
                print(f"\n### {table} ###")
                has_issues = True
            print(f"  Missing in doc ({len(diff['missing_in_doc'])}): {', '.join(sorted(diff['missing_in_doc']))}")
            total_missing += len(diff['missing_in_doc'])
        
        if diff['extra_in_doc']:
            if not has_issues:
                print(f"\n### {table} ###")
                has_issues = True
            print(f"  Extra in doc ({len(diff['extra_in_doc'])}): {', '.join(sorted(diff['extra_in_doc']))}")
            total_extra += len(diff['extra_in_doc'])
        
        if diff['type_mismatches']:
            if not has_issues:
                print(f"\n### {table} ###")
                has_issues = True
            print(f"  Type mismatches ({len(diff['type_mismatches'])}):")
            for m in diff['type_mismatches']:
                print(f"    - {m['column']}: doc={m['doc_type']}, db={m['db_type']}")
            total_type_mismatch += len(diff['type_mismatches'])
    
    print("\n" + "=" * 80)
    print(f"Summary: {total_missing} missing, {total_extra} extra, {total_type_mismatch} type mismatches")
    print("=" * 80)
    
    # 保存详细结果到 JSON
    with open('tmp/bd_manual_diff.json', 'w', encoding='utf-8') as f:
        json.dump(all_diffs, f, ensure_ascii=False, indent=2)
    print(f"\nDetailed results saved to tmp/bd_manual_diff.json")

if __name__ == '__main__':
    main()