数据库 数据校验写入等逻辑更新。

This commit is contained in:
Neo
2026-02-01 03:46:16 +08:00
parent 9948000b71
commit 076f5755ca
128 changed files with 494310 additions and 2819 deletions

137
tmp/query_missing_tables.py Normal file
View File

@@ -0,0 +1,137 @@
import psycopg2
from decimal import Decimal
from datetime import datetime, date, time
import os
dsn = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
conn = psycopg2.connect(dsn)
cur = conn.cursor()
missing_tables = [
'dwd_assistant_trash_event',
'dwd_assistant_trash_event_ex',
'dwd_groupbuy_redemption',
'dwd_groupbuy_redemption_ex',
'dwd_platform_coupon_redemption',
'dwd_platform_coupon_redemption_ex'
]
output_dir = r'c:\dev\LLTQ\ETL\feiqiu-ETL\tmp\table_analysis'
os.makedirs(output_dir, exist_ok=True)
def safe_str(val):
if val is None:
return 'NULL'
if isinstance(val, (Decimal, float)):
return f'{val:.2f}' if isinstance(val, Decimal) else str(val)
if isinstance(val, (datetime, date, time)):
return str(val)
return str(val)
for table_name in missing_tables:
print(f"Processing {table_name}...")
output_file = os.path.join(output_dir, f'{table_name}.txt')
with open(output_file, 'w', encoding='utf-8') as f:
f.write('='*80 + '\n')
f.write(f'Table: billiards_dwd.{table_name}\n')
# Get primary key
cur.execute("""
SELECT a.attname
FROM pg_index i
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
WHERE i.indrelid = %s::regclass AND i.indisprimary
""", (f'billiards_dwd.{table_name}',))
pk_cols = [row[0] for row in cur.fetchall()]
pk_str = ', '.join(pk_cols) if pk_cols else 'None'
f.write(f'Primary Key: {pk_str}\n')
f.write('='*80 + '\n\n')
# Get columns
cur.execute("""
SELECT column_name, data_type, is_nullable,
character_maximum_length, numeric_precision, numeric_scale
FROM information_schema.columns
WHERE table_schema = 'billiards_dwd' AND table_name = %s
ORDER BY ordinal_position
""", (table_name,))
columns = cur.fetchall()
f.write('## COLUMNS\n')
f.write('-'*80 + '\n')
for col in columns:
col_name, data_type, nullable, char_len, num_prec, num_scale = col
type_str = data_type
if char_len:
type_str = f'{data_type}({char_len})'
elif num_prec and num_scale:
type_str = f'{data_type}({num_prec},{num_scale})'
pk_mark = ' | PK' if col_name in pk_cols else ''
f.write(f'{col_name}: {type_str} | nullable={nullable}{pk_mark}\n')
f.write('\n## VALUE ANALYSIS (for enum detection)\n')
f.write('-'*80 + '\n\n')
# Get row count
cur.execute(f'SELECT COUNT(*) FROM billiards_dwd.{table_name}')
row_count = cur.fetchone()[0]
col_names = [c[0] for c in columns]
col_types = [c[1] for c in columns]
for col_name, col_type in zip(col_names, col_types):
f.write(f'{col_name}:\n')
# Count total, non-null, distinct
cur.execute(f'''
SELECT COUNT(*), COUNT({col_name}), COUNT(DISTINCT {col_name})
FROM billiards_dwd.{table_name}
''')
total, non_null, distinct = cur.fetchone()
f.write(f' Total: {total}, Non-null: {non_null}, Distinct: {distinct}\n')
# Flag likely enum
if distinct > 0 and distinct <= 15:
f.write(' *** LIKELY ENUM (distinct <= 15) ***\n')
# Top 15 values
if col_type not in ('bytea', 'json', 'jsonb'):
try:
cur.execute(f'''
SELECT {col_name}, COUNT(*) as cnt
FROM billiards_dwd.{table_name}
WHERE {col_name} IS NOT NULL
GROUP BY {col_name}
ORDER BY cnt DESC
LIMIT 15
''')
top_vals = cur.fetchall()
f.write(' Top values:\n')
for val, cnt in top_vals:
f.write(f' {safe_str(val)}: {cnt}\n')
except Exception as e:
f.write(f' Error getting top values: {e}\n')
f.write('\n')
# Sample data
f.write('## SAMPLE DATA (first 10 rows)\n')
f.write('-'*80 + '\n')
try:
cur.execute(f'SELECT * FROM billiards_dwd.{table_name} LIMIT 10')
sample_rows = cur.fetchall()
f.write(f'Columns: {col_names}\n\n')
for i, row in enumerate(sample_rows, 1):
f.write(f'Row {i}:\n')
for col_name, val in zip(col_names, row):
f.write(f' {col_name}: {safe_str(val)}\n')
f.write('\n')
if not sample_rows:
f.write('No sample data available.\n')
except Exception as e:
f.write(f'Error fetching samples: {e}\n')
print(f" -> {output_file} (rows: {row_count})")
conn.close()
print("\nDone!")