import psycopg2 from decimal import Decimal from datetime import datetime, date, time import os dsn = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test' conn = psycopg2.connect(dsn) cur = conn.cursor() missing_tables = [ 'dwd_assistant_trash_event', 'dwd_assistant_trash_event_ex', 'dwd_groupbuy_redemption', 'dwd_groupbuy_redemption_ex', 'dwd_platform_coupon_redemption', 'dwd_platform_coupon_redemption_ex' ] output_dir = r'c:\dev\LLTQ\ETL\feiqiu-ETL\tmp\table_analysis' os.makedirs(output_dir, exist_ok=True) def safe_str(val): if val is None: return 'NULL' if isinstance(val, (Decimal, float)): return f'{val:.2f}' if isinstance(val, Decimal) else str(val) if isinstance(val, (datetime, date, time)): return str(val) return str(val) for table_name in missing_tables: print(f"Processing {table_name}...") output_file = os.path.join(output_dir, f'{table_name}.txt') with open(output_file, 'w', encoding='utf-8') as f: f.write('='*80 + '\n') f.write(f'Table: billiards_dwd.{table_name}\n') # Get primary key cur.execute(""" SELECT a.attname FROM pg_index i JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) WHERE i.indrelid = %s::regclass AND i.indisprimary """, (f'billiards_dwd.{table_name}',)) pk_cols = [row[0] for row in cur.fetchall()] pk_str = ', '.join(pk_cols) if pk_cols else 'None' f.write(f'Primary Key: {pk_str}\n') f.write('='*80 + '\n\n') # Get columns cur.execute(""" SELECT column_name, data_type, is_nullable, character_maximum_length, numeric_precision, numeric_scale FROM information_schema.columns WHERE table_schema = 'billiards_dwd' AND table_name = %s ORDER BY ordinal_position """, (table_name,)) columns = cur.fetchall() f.write('## COLUMNS\n') f.write('-'*80 + '\n') for col in columns: col_name, data_type, nullable, char_len, num_prec, num_scale = col type_str = data_type if char_len: type_str = f'{data_type}({char_len})' elif num_prec and num_scale: type_str = f'{data_type}({num_prec},{num_scale})' pk_mark = ' | PK' if col_name in pk_cols else '' f.write(f'{col_name}: {type_str} | nullable={nullable}{pk_mark}\n') f.write('\n## VALUE ANALYSIS (for enum detection)\n') f.write('-'*80 + '\n\n') # Get row count cur.execute(f'SELECT COUNT(*) FROM billiards_dwd.{table_name}') row_count = cur.fetchone()[0] col_names = [c[0] for c in columns] col_types = [c[1] for c in columns] for col_name, col_type in zip(col_names, col_types): f.write(f'{col_name}:\n') # Count total, non-null, distinct cur.execute(f''' SELECT COUNT(*), COUNT({col_name}), COUNT(DISTINCT {col_name}) FROM billiards_dwd.{table_name} ''') total, non_null, distinct = cur.fetchone() f.write(f' Total: {total}, Non-null: {non_null}, Distinct: {distinct}\n') # Flag likely enum if distinct > 0 and distinct <= 15: f.write(' *** LIKELY ENUM (distinct <= 15) ***\n') # Top 15 values if col_type not in ('bytea', 'json', 'jsonb'): try: cur.execute(f''' SELECT {col_name}, COUNT(*) as cnt FROM billiards_dwd.{table_name} WHERE {col_name} IS NOT NULL GROUP BY {col_name} ORDER BY cnt DESC LIMIT 15 ''') top_vals = cur.fetchall() f.write(' Top values:\n') for val, cnt in top_vals: f.write(f' {safe_str(val)}: {cnt}\n') except Exception as e: f.write(f' Error getting top values: {e}\n') f.write('\n') # Sample data f.write('## SAMPLE DATA (first 10 rows)\n') f.write('-'*80 + '\n') try: cur.execute(f'SELECT * FROM billiards_dwd.{table_name} LIMIT 10') sample_rows = cur.fetchall() f.write(f'Columns: {col_names}\n\n') for i, row in enumerate(sample_rows, 1): f.write(f'Row {i}:\n') for col_name, val in zip(col_names, row): f.write(f' {col_name}: {safe_str(val)}\n') f.write('\n') if not sample_rows: f.write('No sample data available.\n') except Exception as e: f.write(f'Error fetching samples: {e}\n') print(f" -> {output_file} (rows: {row_count})") conn.close() print("\nDone!")