138 lines
5.0 KiB
Python
138 lines
5.0 KiB
Python
import psycopg2
|
|
from decimal import Decimal
|
|
from datetime import datetime, date, time
|
|
import os
|
|
|
|
dsn = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
|
conn = psycopg2.connect(dsn)
|
|
cur = conn.cursor()
|
|
|
|
missing_tables = [
|
|
'dwd_assistant_trash_event',
|
|
'dwd_assistant_trash_event_ex',
|
|
'dwd_groupbuy_redemption',
|
|
'dwd_groupbuy_redemption_ex',
|
|
'dwd_platform_coupon_redemption',
|
|
'dwd_platform_coupon_redemption_ex'
|
|
]
|
|
|
|
output_dir = r'c:\dev\LLTQ\ETL\feiqiu-ETL\tmp\table_analysis'
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
def safe_str(val):
|
|
if val is None:
|
|
return 'NULL'
|
|
if isinstance(val, (Decimal, float)):
|
|
return f'{val:.2f}' if isinstance(val, Decimal) else str(val)
|
|
if isinstance(val, (datetime, date, time)):
|
|
return str(val)
|
|
return str(val)
|
|
|
|
for table_name in missing_tables:
|
|
print(f"Processing {table_name}...")
|
|
output_file = os.path.join(output_dir, f'{table_name}.txt')
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write('='*80 + '\n')
|
|
f.write(f'Table: billiards_dwd.{table_name}\n')
|
|
|
|
# Get primary key
|
|
cur.execute("""
|
|
SELECT a.attname
|
|
FROM pg_index i
|
|
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
|
WHERE i.indrelid = %s::regclass AND i.indisprimary
|
|
""", (f'billiards_dwd.{table_name}',))
|
|
pk_cols = [row[0] for row in cur.fetchall()]
|
|
pk_str = ', '.join(pk_cols) if pk_cols else 'None'
|
|
f.write(f'Primary Key: {pk_str}\n')
|
|
f.write('='*80 + '\n\n')
|
|
|
|
# Get columns
|
|
cur.execute("""
|
|
SELECT column_name, data_type, is_nullable,
|
|
character_maximum_length, numeric_precision, numeric_scale
|
|
FROM information_schema.columns
|
|
WHERE table_schema = 'billiards_dwd' AND table_name = %s
|
|
ORDER BY ordinal_position
|
|
""", (table_name,))
|
|
columns = cur.fetchall()
|
|
|
|
f.write('## COLUMNS\n')
|
|
f.write('-'*80 + '\n')
|
|
for col in columns:
|
|
col_name, data_type, nullable, char_len, num_prec, num_scale = col
|
|
type_str = data_type
|
|
if char_len:
|
|
type_str = f'{data_type}({char_len})'
|
|
elif num_prec and num_scale:
|
|
type_str = f'{data_type}({num_prec},{num_scale})'
|
|
pk_mark = ' | PK' if col_name in pk_cols else ''
|
|
f.write(f'{col_name}: {type_str} | nullable={nullable}{pk_mark}\n')
|
|
|
|
f.write('\n## VALUE ANALYSIS (for enum detection)\n')
|
|
f.write('-'*80 + '\n\n')
|
|
|
|
# Get row count
|
|
cur.execute(f'SELECT COUNT(*) FROM billiards_dwd.{table_name}')
|
|
row_count = cur.fetchone()[0]
|
|
|
|
col_names = [c[0] for c in columns]
|
|
col_types = [c[1] for c in columns]
|
|
|
|
for col_name, col_type in zip(col_names, col_types):
|
|
f.write(f'{col_name}:\n')
|
|
|
|
# Count total, non-null, distinct
|
|
cur.execute(f'''
|
|
SELECT COUNT(*), COUNT({col_name}), COUNT(DISTINCT {col_name})
|
|
FROM billiards_dwd.{table_name}
|
|
''')
|
|
total, non_null, distinct = cur.fetchone()
|
|
f.write(f' Total: {total}, Non-null: {non_null}, Distinct: {distinct}\n')
|
|
|
|
# Flag likely enum
|
|
if distinct > 0 and distinct <= 15:
|
|
f.write(' *** LIKELY ENUM (distinct <= 15) ***\n')
|
|
|
|
# Top 15 values
|
|
if col_type not in ('bytea', 'json', 'jsonb'):
|
|
try:
|
|
cur.execute(f'''
|
|
SELECT {col_name}, COUNT(*) as cnt
|
|
FROM billiards_dwd.{table_name}
|
|
WHERE {col_name} IS NOT NULL
|
|
GROUP BY {col_name}
|
|
ORDER BY cnt DESC
|
|
LIMIT 15
|
|
''')
|
|
top_vals = cur.fetchall()
|
|
f.write(' Top values:\n')
|
|
for val, cnt in top_vals:
|
|
f.write(f' {safe_str(val)}: {cnt}\n')
|
|
except Exception as e:
|
|
f.write(f' Error getting top values: {e}\n')
|
|
f.write('\n')
|
|
|
|
# Sample data
|
|
f.write('## SAMPLE DATA (first 10 rows)\n')
|
|
f.write('-'*80 + '\n')
|
|
try:
|
|
cur.execute(f'SELECT * FROM billiards_dwd.{table_name} LIMIT 10')
|
|
sample_rows = cur.fetchall()
|
|
f.write(f'Columns: {col_names}\n\n')
|
|
for i, row in enumerate(sample_rows, 1):
|
|
f.write(f'Row {i}:\n')
|
|
for col_name, val in zip(col_names, row):
|
|
f.write(f' {col_name}: {safe_str(val)}\n')
|
|
f.write('\n')
|
|
if not sample_rows:
|
|
f.write('No sample data available.\n')
|
|
except Exception as e:
|
|
f.write(f'Error fetching samples: {e}\n')
|
|
|
|
print(f" -> {output_file} (rows: {row_count})")
|
|
|
|
conn.close()
|
|
print("\nDone!")
|