#!/usr/bin/env python3 """ 验证 DWD 数据是否已经更新到最新,检查 SPI 警告是否应该消失 """ import os import psycopg2 from datetime import datetime from dotenv import load_dotenv def main(): # 加载环境变量 load_dotenv() test_db_dsn = os.environ.get('TEST_DB_DSN') if not test_db_dsn: raise RuntimeError("TEST_DB_DSN 环境变量未设置") print("🔍 验证 DWD 数据更新状况") print(f"连接数据库: {test_db_dsn.split('@')[1]}") with psycopg2.connect(test_db_dsn) as conn: with conn.cursor() as cur: # 检查 DWD settlement_head 最新数据 print("\n📊 检查 DWD settlement_head 最新数据:") cur.execute(""" SELECT MAX(pay_time) as latest_pay_time, COUNT(*) as total_records, COUNT(CASE WHEN pay_time >= '2026-02-15' THEN 1 END) as records_after_0215 FROM dwd.dwd_settlement_head """) result = cur.fetchone() latest_pay_time, total_records, records_after_0215 = result print(f" 最新支付时间: {latest_pay_time}") print(f" 总记录数: {total_records:,}") print(f" 2026-02-15后记录数: {records_after_0215:,}") # 检查最近几天的数据分布 print("\n📅 最近几天数据分布:") cur.execute(""" SELECT pay_time::date as pay_date, COUNT(*) as record_count FROM dwd.dwd_settlement_head WHERE pay_time >= '2026-02-20' GROUP BY pay_time::date ORDER BY pay_date DESC LIMIT 10 """) for row in cur.fetchall(): pay_date, count = row print(f" {pay_date}: {count:,} 条记录") # 检查 ODS 与 DWD 的数据一致性 print("\n🔄 ODS vs DWD 数据一致性检查:") cur.execute(""" SELECT 'ODS' as layer, MAX(paytime) as latest_time, COUNT(*) as total_count, COUNT(CASE WHEN paytime >= '2026-02-15' THEN 1 END) as after_0215_count FROM ods.settlement_records UNION ALL SELECT 'DWD' as layer, MAX(pay_time) as latest_time, COUNT(*) as total_count, COUNT(CASE WHEN pay_time >= '2026-02-15' THEN 1 END) as after_0215_count FROM dwd.dwd_settlement_head ORDER BY layer """) for row in cur.fetchall(): layer, latest_time, total_count, after_0215_count = row print(f" {layer}: 最新时间={latest_time}, 总数={total_count:,}, 2/15后={after_0215_count:,}") # 检查是否还有数据缺失 cur.execute(""" SELECT COUNT(*) as missing_count FROM ods.settlement_records o LEFT JOIN dwd.dwd_settlement_head d ON o.id = d.order_settle_id WHERE d.order_settle_id IS NULL """) missing_count = cur.fetchone()[0] print(f"\n❓ ODS 中存在但 DWD 中缺失的记录: {missing_count:,} 条") if missing_count == 0: print("✅ ODS 和 DWD 数据完全同步") else: print("⚠️ 仍有数据未同步到 DWD") # 显示缺失的记录详情 cur.execute(""" SELECT o.paytime::date as pay_date, COUNT(*) as missing_count FROM ods.settlement_records o LEFT JOIN dwd.dwd_settlement_head d ON o.id = d.order_settle_id WHERE d.order_settle_id IS NULL GROUP BY o.paytime::date ORDER BY pay_date DESC LIMIT 5 """) print(" 缺失记录按日期分布:") for row in cur.fetchall(): pay_date, count = row print(f" {pay_date}: {count:,} 条") if __name__ == "__main__": main()