数据库数据校验写入等逻辑更新。

2026-02-01 03:46:16 +08:00
parent 9948000b71
commit 076f5755ca
128 changed files with 494310 additions and 2819 deletions
--- a/tmp/query_missing_tables.py
+++ b/tmp/query_missing_tables.py
@@ -0,0 +1,137 @@
+import psycopg2
+from decimal import Decimal
+from datetime import datetime, date, time
+import os
+
+dsn = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
+conn = psycopg2.connect(dsn)
+cur = conn.cursor()
+
+missing_tables = [
+    'dwd_assistant_trash_event',
+    'dwd_assistant_trash_event_ex',
+    'dwd_groupbuy_redemption',
+    'dwd_groupbuy_redemption_ex',
+    'dwd_platform_coupon_redemption',
+    'dwd_platform_coupon_redemption_ex'
+]
+
+output_dir = r'c:\dev\LLTQ\ETL\feiqiu-ETL\tmp\table_analysis'
+os.makedirs(output_dir, exist_ok=True)
+
+def safe_str(val):
+    if val is None:
+        return 'NULL'
+    if isinstance(val, (Decimal, float)):
+        return f'{val:.2f}' if isinstance(val, Decimal) else str(val)
+    if isinstance(val, (datetime, date, time)):
+        return str(val)
+    return str(val)
+
+for table_name in missing_tables:
+    print(f"Processing {table_name}...")
+    output_file = os.path.join(output_dir, f'{table_name}.txt')
+    
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write('='*80 + '\n')
+        f.write(f'Table: billiards_dwd.{table_name}\n')
+        
+        # Get primary key
+        cur.execute("""
+            SELECT a.attname
+            FROM pg_index i
+            JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
+            WHERE i.indrelid = %s::regclass AND i.indisprimary
+        """, (f'billiards_dwd.{table_name}',))
+        pk_cols = [row[0] for row in cur.fetchall()]
+        pk_str = ', '.join(pk_cols) if pk_cols else 'None'
+        f.write(f'Primary Key: {pk_str}\n')
+        f.write('='*80 + '\n\n')
+        
+        # Get columns
+        cur.execute("""
+            SELECT column_name, data_type, is_nullable, 
+                   character_maximum_length, numeric_precision, numeric_scale
+            FROM information_schema.columns
+            WHERE table_schema = 'billiards_dwd' AND table_name = %s
+            ORDER BY ordinal_position
+        """, (table_name,))
+        columns = cur.fetchall()
+        
+        f.write('## COLUMNS\n')
+        f.write('-'*80 + '\n')
+        for col in columns:
+            col_name, data_type, nullable, char_len, num_prec, num_scale = col
+            type_str = data_type
+            if char_len:
+                type_str = f'{data_type}({char_len})'
+            elif num_prec and num_scale:
+                type_str = f'{data_type}({num_prec},{num_scale})'
+            pk_mark = ' | PK' if col_name in pk_cols else ''
+            f.write(f'{col_name}: {type_str} | nullable={nullable}{pk_mark}\n')
+        
+        f.write('\n## VALUE ANALYSIS (for enum detection)\n')
+        f.write('-'*80 + '\n\n')
+        
+        # Get row count
+        cur.execute(f'SELECT COUNT(*) FROM billiards_dwd.{table_name}')
+        row_count = cur.fetchone()[0]
+        
+        col_names = [c[0] for c in columns]
+        col_types = [c[1] for c in columns]
+        
+        for col_name, col_type in zip(col_names, col_types):
+            f.write(f'{col_name}:\n')
+            
+            # Count total, non-null, distinct
+            cur.execute(f'''
+                SELECT COUNT(*), COUNT({col_name}), COUNT(DISTINCT {col_name})
+                FROM billiards_dwd.{table_name}
+            ''')
+            total, non_null, distinct = cur.fetchone()
+            f.write(f'  Total: {total}, Non-null: {non_null}, Distinct: {distinct}\n')
+            
+            # Flag likely enum
+            if distinct > 0 and distinct <= 15:
+                f.write('  *** LIKELY ENUM (distinct <= 15) ***\n')
+            
+            # Top 15 values
+            if col_type not in ('bytea', 'json', 'jsonb'):
+                try:
+                    cur.execute(f'''
+                        SELECT {col_name}, COUNT(*) as cnt
+                        FROM billiards_dwd.{table_name}
+                        WHERE {col_name} IS NOT NULL
+                        GROUP BY {col_name}
+                        ORDER BY cnt DESC
+                        LIMIT 15
+                    ''')
+                    top_vals = cur.fetchall()
+                    f.write('  Top values:\n')
+                    for val, cnt in top_vals:
+                        f.write(f'    {safe_str(val)}: {cnt}\n')
+                except Exception as e:
+                    f.write(f'  Error getting top values: {e}\n')
+            f.write('\n')
+        
+        # Sample data
+        f.write('## SAMPLE DATA (first 10 rows)\n')
+        f.write('-'*80 + '\n')
+        try:
+            cur.execute(f'SELECT * FROM billiards_dwd.{table_name} LIMIT 10')
+            sample_rows = cur.fetchall()
+            f.write(f'Columns: {col_names}\n\n')
+            for i, row in enumerate(sample_rows, 1):
+                f.write(f'Row {i}:\n')
+                for col_name, val in zip(col_names, row):
+                    f.write(f'  {col_name}: {safe_str(val)}\n')
+                f.write('\n')
+            if not sample_rows:
+                f.write('No sample data available.\n')
+        except Exception as e:
+            f.write(f'Error fetching samples: {e}\n')
+    
+    print(f"  -> {output_file} (rows: {row_count})")
+
+conn.close()
+print("\nDone!")