116 lines
4.4 KiB
Python
116 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
验证 DWD 数据是否已经更新到最新,检查 SPI 警告是否应该消失
|
|
"""
|
|
|
|
import os
|
|
import psycopg2
|
|
from datetime import datetime
|
|
from dotenv import load_dotenv
|
|
|
|
def main():
|
|
# 加载环境变量
|
|
load_dotenv()
|
|
|
|
test_db_dsn = os.environ.get('TEST_DB_DSN')
|
|
if not test_db_dsn:
|
|
raise RuntimeError("TEST_DB_DSN 环境变量未设置")
|
|
|
|
print("🔍 验证 DWD 数据更新状况")
|
|
print(f"连接数据库: {test_db_dsn.split('@')[1]}")
|
|
|
|
with psycopg2.connect(test_db_dsn) as conn:
|
|
with conn.cursor() as cur:
|
|
# 检查 DWD settlement_head 最新数据
|
|
print("\n📊 检查 DWD settlement_head 最新数据:")
|
|
cur.execute("""
|
|
SELECT
|
|
MAX(pay_time) as latest_pay_time,
|
|
COUNT(*) as total_records,
|
|
COUNT(CASE WHEN pay_time >= '2026-02-15' THEN 1 END) as records_after_0215
|
|
FROM dwd.dwd_settlement_head
|
|
""")
|
|
|
|
result = cur.fetchone()
|
|
latest_pay_time, total_records, records_after_0215 = result
|
|
|
|
print(f" 最新支付时间: {latest_pay_time}")
|
|
print(f" 总记录数: {total_records:,}")
|
|
print(f" 2026-02-15后记录数: {records_after_0215:,}")
|
|
|
|
# 检查最近几天的数据分布
|
|
print("\n📅 最近几天数据分布:")
|
|
cur.execute("""
|
|
SELECT
|
|
pay_time::date as pay_date,
|
|
COUNT(*) as record_count
|
|
FROM dwd.dwd_settlement_head
|
|
WHERE pay_time >= '2026-02-20'
|
|
GROUP BY pay_time::date
|
|
ORDER BY pay_date DESC
|
|
LIMIT 10
|
|
""")
|
|
|
|
for row in cur.fetchall():
|
|
pay_date, count = row
|
|
print(f" {pay_date}: {count:,} 条记录")
|
|
|
|
# 检查 ODS 与 DWD 的数据一致性
|
|
print("\n🔄 ODS vs DWD 数据一致性检查:")
|
|
cur.execute("""
|
|
SELECT
|
|
'ODS' as layer,
|
|
MAX(paytime) as latest_time,
|
|
COUNT(*) as total_count,
|
|
COUNT(CASE WHEN paytime >= '2026-02-15' THEN 1 END) as after_0215_count
|
|
FROM ods.settlement_records
|
|
UNION ALL
|
|
SELECT
|
|
'DWD' as layer,
|
|
MAX(pay_time) as latest_time,
|
|
COUNT(*) as total_count,
|
|
COUNT(CASE WHEN pay_time >= '2026-02-15' THEN 1 END) as after_0215_count
|
|
FROM dwd.dwd_settlement_head
|
|
ORDER BY layer
|
|
""")
|
|
|
|
for row in cur.fetchall():
|
|
layer, latest_time, total_count, after_0215_count = row
|
|
print(f" {layer}: 最新时间={latest_time}, 总数={total_count:,}, 2/15后={after_0215_count:,}")
|
|
|
|
# 检查是否还有数据缺失
|
|
cur.execute("""
|
|
SELECT COUNT(*) as missing_count
|
|
FROM ods.settlement_records o
|
|
LEFT JOIN dwd.dwd_settlement_head d ON o.id = d.order_settle_id
|
|
WHERE d.order_settle_id IS NULL
|
|
""")
|
|
|
|
missing_count = cur.fetchone()[0]
|
|
print(f"\n❓ ODS 中存在但 DWD 中缺失的记录: {missing_count:,} 条")
|
|
|
|
if missing_count == 0:
|
|
print("✅ ODS 和 DWD 数据完全同步")
|
|
else:
|
|
print("⚠️ 仍有数据未同步到 DWD")
|
|
|
|
# 显示缺失的记录详情
|
|
cur.execute("""
|
|
SELECT
|
|
o.paytime::date as pay_date,
|
|
COUNT(*) as missing_count
|
|
FROM ods.settlement_records o
|
|
LEFT JOIN dwd.dwd_settlement_head d ON o.id = d.order_settle_id
|
|
WHERE d.order_settle_id IS NULL
|
|
GROUP BY o.paytime::date
|
|
ORDER BY pay_date DESC
|
|
LIMIT 5
|
|
""")
|
|
|
|
print(" 缺失记录按日期分布:")
|
|
for row in cur.fetchall():
|
|
pay_date, count = row
|
|
print(f" {pay_date}: {count:,} 条")
|
|
|
|
if __name__ == "__main__":
|
|
main() |