"""检查 ODS/DWD 数据为什么只到 2/14,以及 SPI canonical_member_id 映射""" import os from pathlib import Path from dotenv import load_dotenv load_dotenv(Path(__file__).resolve().parents[2] / ".env") PG_DSN = os.environ.get("PG_DSN") if not PG_DSN: raise RuntimeError("PG_DSN 未设置") import psycopg2 import psycopg2.extras conn = psycopg2.connect(PG_DSN) conn.autocommit = True def q(sql, params=None): with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute(sql, params or ()) return cur.fetchall() # 1. 查 ODS 结算表的实际表名 print("ODS 结算相关表:") rows = q(""" SELECT table_name FROM information_schema.tables WHERE table_schema = 'ods' AND table_name LIKE '%settle%' ORDER BY table_name """) for r in rows: print(f" {r['table_name']}") rows2 = q(""" SELECT table_name FROM information_schema.tables WHERE table_schema = 'ods' AND table_name LIKE '%payment%' ORDER BY table_name """) for r in rows2: print(f" {r['table_name']}") # 2. 查 ODS payment 表的最新数据 rows3 = q(""" SELECT table_name FROM information_schema.tables WHERE table_schema = 'ods' ORDER BY table_name """) print(f"\nODS 全部表 ({len(rows3)} 张):") for r in rows3: print(f" {r['table_name']}") # 3. 检查 SPI 的 canonical_member_id 映射 print("\n" + "=" * 60) print("SPI canonical_member_id 映射分析") rows4 = q(""" WITH consume_source AS ( SELECT s.member_id AS raw_member_id, COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) AS canonical_member_id, s.pay_time, COALESCE(s.pay_amount, 0) AS pay_amount FROM dwd.dwd_settlement_head s LEFT JOIN dwd.dim_member_card_account mca ON s.member_card_account_id = mca.member_card_id AND mca.scd2_is_current = 1 AND mca.register_site_id = s.site_id AND COALESCE(mca.is_delete, 0) = 0 WHERE s.site_id = (SELECT DISTINCT site_id FROM dwd.dwd_settlement_head LIMIT 1) AND s.settle_type IN (1, 3) AND s.pay_time >= NOW() - INTERVAL '90 days' ) SELECT COUNT(*) AS total_records, COUNT(DISTINCT raw_member_id) AS raw_members, COUNT(DISTINCT canonical_member_id) AS canonical_members, COUNT(*) FILTER (WHERE canonical_member_id IS NULL OR canonical_member_id = 0) AS null_canonical, COUNT(*) FILTER (WHERE raw_member_id != canonical_member_id) AS remapped FROM consume_source """) r = rows4[0] print(f" 总记录: {r['total_records']}") print(f" 原始 member_id 去重: {r['raw_members']}") print(f" canonical_member_id 去重: {r['canonical_members']}") print(f" canonical 为 NULL/0: {r['null_canonical']}") print(f" 被重映射的记录: {r['remapped']}") # 4. 30 天窗口内 canonical_member_id 有消费的会员数 rows5 = q(""" WITH consume_source AS ( SELECT COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) AS canonical_member_id, s.pay_time, COALESCE(s.pay_amount, 0) AS pay_amount FROM dwd.dwd_settlement_head s LEFT JOIN dwd.dim_member_card_account mca ON s.member_card_account_id = mca.member_card_id AND mca.scd2_is_current = 1 AND mca.register_site_id = s.site_id AND COALESCE(mca.is_delete, 0) = 0 WHERE s.site_id = (SELECT DISTINCT site_id FROM dwd.dwd_settlement_head LIMIT 1) AND s.settle_type IN (1, 3) AND s.pay_time >= NOW() - INTERVAL '90 days' ) SELECT canonical_member_id, SUM(pay_amount) AS spend_90, SUM(CASE WHEN pay_time >= NOW() - INTERVAL '30 days' THEN pay_amount ELSE 0 END) AS spend_30 FROM consume_source WHERE canonical_member_id > 0 GROUP BY canonical_member_id """) total = len(rows5) has_30 = sum(1 for r in rows5 if float(r['spend_30']) > 0) has_90 = sum(1 for r in rows5 if float(r['spend_90']) > 0) print(f"\nSPI 视角(canonical_member_id):") print(f" 90天有消费会员: {total}") print(f" 30天有消费会员: {has_30} ({has_30/total*100:.1f}%)") print(f" 30天无消费会员: {total - has_30} ({(total-has_30)/total*100:.1f}%)") # 5. 中位数计算 spend_30_vals = sorted([float(r['spend_30']) for r in rows5]) spend_90_vals = sorted([float(r['spend_90']) for r in rows5]) n = len(spend_30_vals) median_30 = spend_30_vals[n // 2] if n else 0 median_90 = spend_90_vals[n // 2] if n else 0 print(f"\n spend_30 中位数: {median_30:.2f}") print(f" spend_90 中位数: {median_90:.2f}") # 6. 检查 API 拉取的最新数据时间(从 ODS 表看) print("\n" + "=" * 60) print("ODS 各表最新数据时间:") for r in rows3[:5]: tname = r['table_name'] try: cols = q(f""" SELECT column_name FROM information_schema.columns WHERE table_schema = 'ods' AND table_name = '{tname}' AND column_name IN ('pay_time', 'create_time', 'updated_at', 'etl_loaded_at') ORDER BY column_name """) if cols: col = cols[0]['column_name'] maxr = q(f"SELECT MAX({col}) AS max_time FROM ods.{tname}") if maxr and maxr[0]['max_time']: print(f" {tname}.{col}: {maxr[0]['max_time']}") except Exception as e: print(f" {tname}: 查询失败 ({e})") conn.close() print("\n诊断完成。")