feiqiu-ETL/tmp/query_missing_tables.py

import psycopg2
from decimal import Decimal
from datetime import datetime, date, time
import os

dsn = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
conn = psycopg2.connect(dsn)
cur = conn.cursor()

missing_tables = [
    'dwd_assistant_trash_event',
    'dwd_assistant_trash_event_ex',
    'dwd_groupbuy_redemption',
    'dwd_groupbuy_redemption_ex',
    'dwd_platform_coupon_redemption',
    'dwd_platform_coupon_redemption_ex'
]

output_dir = r'c:\dev\LLTQ\ETL\feiqiu-ETL\tmp\table_analysis'
os.makedirs(output_dir, exist_ok=True)

def safe_str(val):
    if val is None:
        return 'NULL'
    if isinstance(val, (Decimal, float)):
        return f'{val:.2f}' if isinstance(val, Decimal) else str(val)
    if isinstance(val, (datetime, date, time)):
        return str(val)
    return str(val)

for table_name in missing_tables:
    print(f"Processing {table_name}...")
    output_file = os.path.join(output_dir, f'{table_name}.txt')

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write('='*80 + '\n')
        f.write(f'Table: billiards_dwd.{table_name}\n')

        # Get primary key
        cur.execute("""
            SELECT a.attname
            FROM pg_index i
            JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
            WHERE i.indrelid = %s::regclass AND i.indisprimary
        """, (f'billiards_dwd.{table_name}',))
        pk_cols = [row[0] for row in cur.fetchall()]
        pk_str = ', '.join(pk_cols) if pk_cols else 'None'
        f.write(f'Primary Key: {pk_str}\n')
        f.write('='*80 + '\n\n')

        # Get columns
        cur.execute("""
            SELECT column_name, data_type, is_nullable,
                   character_maximum_length, numeric_precision, numeric_scale
            FROM information_schema.columns
            WHERE table_schema = 'billiards_dwd' AND table_name = %s
            ORDER BY ordinal_position
        """, (table_name,))
        columns = cur.fetchall()

        f.write('## COLUMNS\n')
        f.write('-'*80 + '\n')
        for col in columns:
            col_name, data_type, nullable, char_len, num_prec, num_scale = col
            type_str = data_type
            if char_len:
                type_str = f'{data_type}({char_len})'
            elif num_prec and num_scale:
                type_str = f'{data_type}({num_prec},{num_scale})'
            pk_mark = ' | PK' if col_name in pk_cols else ''
            f.write(f'{col_name}: {type_str} | nullable={nullable}{pk_mark}\n')

        f.write('\n## VALUE ANALYSIS (for enum detection)\n')
        f.write('-'*80 + '\n\n')

        # Get row count
        cur.execute(f'SELECT COUNT(*) FROM billiards_dwd.{table_name}')
        row_count = cur.fetchone()[0]

        col_names = [c[0] for c in columns]
        col_types = [c[1] for c in columns]

        for col_name, col_type in zip(col_names, col_types):
            f.write(f'{col_name}:\n')

            # Count total, non-null, distinct
            cur.execute(f'''
                SELECT COUNT(*), COUNT({col_name}), COUNT(DISTINCT {col_name})
                FROM billiards_dwd.{table_name}
            ''')
            total, non_null, distinct = cur.fetchone()
            f.write(f'  Total: {total}, Non-null: {non_null}, Distinct: {distinct}\n')

            # Flag likely enum
            if distinct > 0 and distinct <= 15:
                f.write('  *** LIKELY ENUM (distinct <= 15) ***\n')

            # Top 15 values
            if col_type not in ('bytea', 'json', 'jsonb'):
                try:
                    cur.execute(f'''
                        SELECT {col_name}, COUNT(*) as cnt
                        FROM billiards_dwd.{table_name}
                        WHERE {col_name} IS NOT NULL
                        GROUP BY {col_name}
                        ORDER BY cnt DESC
                        LIMIT 15
                    ''')
                    top_vals = cur.fetchall()
                    f.write('  Top values:\n')
                    for val, cnt in top_vals:
                        f.write(f'    {safe_str(val)}: {cnt}\n')
                except Exception as e:
                    f.write(f'  Error getting top values: {e}\n')
            f.write('\n')

        # Sample data
        f.write('## SAMPLE DATA (first 10 rows)\n')
        f.write('-'*80 + '\n')
        try:
            cur.execute(f'SELECT * FROM billiards_dwd.{table_name} LIMIT 10')
            sample_rows = cur.fetchall()
            f.write(f'Columns: {col_names}\n\n')
            for i, row in enumerate(sample_rows, 1):
                f.write(f'Row {i}:\n')
                for col_name, val in zip(col_names, row):
                    f.write(f'  {col_name}: {safe_str(val)}\n')
                f.write('\n')
            if not sample_rows:
                f.write('No sample data available.\n')
        except Exception as e:
            f.write(f'Error fetching samples: {e}\n')

    print(f"  -> {output_file} (rows: {row_count})")

conn.close()
print("\nDone!")