数据库 数据校验写入等逻辑更新。
This commit is contained in:
137
tmp/query_missing_tables.py
Normal file
137
tmp/query_missing_tables.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import psycopg2
|
||||
from decimal import Decimal
|
||||
from datetime import datetime, date, time
|
||||
import os
|
||||
|
||||
dsn = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
conn = psycopg2.connect(dsn)
|
||||
cur = conn.cursor()
|
||||
|
||||
missing_tables = [
|
||||
'dwd_assistant_trash_event',
|
||||
'dwd_assistant_trash_event_ex',
|
||||
'dwd_groupbuy_redemption',
|
||||
'dwd_groupbuy_redemption_ex',
|
||||
'dwd_platform_coupon_redemption',
|
||||
'dwd_platform_coupon_redemption_ex'
|
||||
]
|
||||
|
||||
output_dir = r'c:\dev\LLTQ\ETL\feiqiu-ETL\tmp\table_analysis'
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
def safe_str(val):
|
||||
if val is None:
|
||||
return 'NULL'
|
||||
if isinstance(val, (Decimal, float)):
|
||||
return f'{val:.2f}' if isinstance(val, Decimal) else str(val)
|
||||
if isinstance(val, (datetime, date, time)):
|
||||
return str(val)
|
||||
return str(val)
|
||||
|
||||
for table_name in missing_tables:
|
||||
print(f"Processing {table_name}...")
|
||||
output_file = os.path.join(output_dir, f'{table_name}.txt')
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write('='*80 + '\n')
|
||||
f.write(f'Table: billiards_dwd.{table_name}\n')
|
||||
|
||||
# Get primary key
|
||||
cur.execute("""
|
||||
SELECT a.attname
|
||||
FROM pg_index i
|
||||
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
||||
WHERE i.indrelid = %s::regclass AND i.indisprimary
|
||||
""", (f'billiards_dwd.{table_name}',))
|
||||
pk_cols = [row[0] for row in cur.fetchall()]
|
||||
pk_str = ', '.join(pk_cols) if pk_cols else 'None'
|
||||
f.write(f'Primary Key: {pk_str}\n')
|
||||
f.write('='*80 + '\n\n')
|
||||
|
||||
# Get columns
|
||||
cur.execute("""
|
||||
SELECT column_name, data_type, is_nullable,
|
||||
character_maximum_length, numeric_precision, numeric_scale
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'billiards_dwd' AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""", (table_name,))
|
||||
columns = cur.fetchall()
|
||||
|
||||
f.write('## COLUMNS\n')
|
||||
f.write('-'*80 + '\n')
|
||||
for col in columns:
|
||||
col_name, data_type, nullable, char_len, num_prec, num_scale = col
|
||||
type_str = data_type
|
||||
if char_len:
|
||||
type_str = f'{data_type}({char_len})'
|
||||
elif num_prec and num_scale:
|
||||
type_str = f'{data_type}({num_prec},{num_scale})'
|
||||
pk_mark = ' | PK' if col_name in pk_cols else ''
|
||||
f.write(f'{col_name}: {type_str} | nullable={nullable}{pk_mark}\n')
|
||||
|
||||
f.write('\n## VALUE ANALYSIS (for enum detection)\n')
|
||||
f.write('-'*80 + '\n\n')
|
||||
|
||||
# Get row count
|
||||
cur.execute(f'SELECT COUNT(*) FROM billiards_dwd.{table_name}')
|
||||
row_count = cur.fetchone()[0]
|
||||
|
||||
col_names = [c[0] for c in columns]
|
||||
col_types = [c[1] for c in columns]
|
||||
|
||||
for col_name, col_type in zip(col_names, col_types):
|
||||
f.write(f'{col_name}:\n')
|
||||
|
||||
# Count total, non-null, distinct
|
||||
cur.execute(f'''
|
||||
SELECT COUNT(*), COUNT({col_name}), COUNT(DISTINCT {col_name})
|
||||
FROM billiards_dwd.{table_name}
|
||||
''')
|
||||
total, non_null, distinct = cur.fetchone()
|
||||
f.write(f' Total: {total}, Non-null: {non_null}, Distinct: {distinct}\n')
|
||||
|
||||
# Flag likely enum
|
||||
if distinct > 0 and distinct <= 15:
|
||||
f.write(' *** LIKELY ENUM (distinct <= 15) ***\n')
|
||||
|
||||
# Top 15 values
|
||||
if col_type not in ('bytea', 'json', 'jsonb'):
|
||||
try:
|
||||
cur.execute(f'''
|
||||
SELECT {col_name}, COUNT(*) as cnt
|
||||
FROM billiards_dwd.{table_name}
|
||||
WHERE {col_name} IS NOT NULL
|
||||
GROUP BY {col_name}
|
||||
ORDER BY cnt DESC
|
||||
LIMIT 15
|
||||
''')
|
||||
top_vals = cur.fetchall()
|
||||
f.write(' Top values:\n')
|
||||
for val, cnt in top_vals:
|
||||
f.write(f' {safe_str(val)}: {cnt}\n')
|
||||
except Exception as e:
|
||||
f.write(f' Error getting top values: {e}\n')
|
||||
f.write('\n')
|
||||
|
||||
# Sample data
|
||||
f.write('## SAMPLE DATA (first 10 rows)\n')
|
||||
f.write('-'*80 + '\n')
|
||||
try:
|
||||
cur.execute(f'SELECT * FROM billiards_dwd.{table_name} LIMIT 10')
|
||||
sample_rows = cur.fetchall()
|
||||
f.write(f'Columns: {col_names}\n\n')
|
||||
for i, row in enumerate(sample_rows, 1):
|
||||
f.write(f'Row {i}:\n')
|
||||
for col_name, val in zip(col_names, row):
|
||||
f.write(f' {col_name}: {safe_str(val)}\n')
|
||||
f.write('\n')
|
||||
if not sample_rows:
|
||||
f.write('No sample data available.\n')
|
||||
except Exception as e:
|
||||
f.write(f'Error fetching samples: {e}\n')
|
||||
|
||||
print(f" -> {output_file} (rows: {row_count})")
|
||||
|
||||
conn.close()
|
||||
print("\nDone!")
|
||||
Reference in New Issue
Block a user