"""诊断 DWS_MEMBER_VISIT 唯一约束冲突的具体 order""" import os from pathlib import Path from dotenv import load_dotenv load_dotenv(Path(__file__).resolve().parents[2] / ".env") PG_DSN = os.environ.get("PG_DSN") if not PG_DSN: raise RuntimeError("PG_DSN 未设置") import psycopg2 import psycopg2.extras conn = psycopg2.connect(PG_DSN) conn.autocommit = True def q(sql): with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute(sql) return cur.fetchall() print("DWS_MEMBER_VISIT 冲突 order 详情") print("=" * 60) # 找出旧数据中与新窗口重叠的 order 的详细信息 rows = q(""" WITH old_visits AS ( SELECT site_id, member_id, order_settle_id, visit_date FROM dws.dws_member_visit_detail WHERE visit_date < '2025-11-01' ), new_source AS ( SELECT DISTINCT site_id, member_id, order_settle_id, pay_time, (CASE WHEN EXTRACT(HOUR FROM pay_time) < 8 THEN (pay_time - INTERVAL '1 day')::date ELSE pay_time::date END) AS biz_date FROM dwd.dwd_settlement_head WHERE member_id IS NOT NULL AND member_id != 0 AND pay_time >= '2025-11-01' AND pay_time <= '2026-02-28' ) SELECT o.site_id, o.member_id, o.order_settle_id, o.visit_date AS old_visit_date, n.biz_date AS new_biz_date, n.pay_time FROM old_visits o JOIN new_source n ON o.site_id = n.site_id AND o.member_id = n.member_id AND o.order_settle_id = n.order_settle_id ORDER BY o.visit_date """) print(f"冲突 order 数: {len(rows)}") for r in rows: print(f" order={r['order_settle_id']}, member={r['member_id']}") print(f" 旧 visit_date={r['old_visit_date']}, 新 biz_date={r['new_biz_date']}, pay_time={r['pay_time']}") # 检查这些 order 的 pay_time 是否在 biz_date 边界上 print() print("分析: 这些 order 的 pay_time 在凌晨(< 8:00),") print("biz_date 归属到前一天,但 DWD 中的数据可能在不同时间被处理") # 检查 dwd_settlement_head 中这些 order 的 SCD2 版本 if rows: order_ids = [r['order_settle_id'] for r in rows] placeholders = ','.join(['%s'] * len(order_ids)) rows2 = q(f""" SELECT order_settle_id, member_id, pay_time, created_at, updated_at, scd2_valid_from, scd2_valid_to, scd2_is_current FROM dwd.dwd_settlement_head WHERE order_settle_id IN ({placeholders}) """.replace('%s', ','.join(str(x) for x in order_ids))) print(f"\n这些 order 在 dwd_settlement_head 中的记录:") for r in rows2: print(f" order={r['order_settle_id']}, member={r['member_id']}") print(f" pay_time={r['pay_time']}, scd2_current={r['scd2_is_current']}") print(f" scd2_from={r['scd2_valid_from']}, scd2_to={r['scd2_valid_to']}") conn.close() print("\n诊断完成。")