"""诊断 NULL level_name 的根因:检查 dim_assistant SCD2 记录覆盖情况""" import os, sys from pathlib import Path from dotenv import load_dotenv load_dotenv(Path(__file__).resolve().parents[2] / ".env") import psycopg2 import psycopg2.extras dsn = os.environ.get("PG_DSN") if not dsn: raise RuntimeError("PG_DSN 未设置") conn = psycopg2.connect(dsn) cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) # 1. 找出 NULL level_name 的 assistant_id 分布 cur.execute(""" SELECT assistant_id, assistant_level_code, MIN(stat_date) AS earliest, MAX(stat_date) AS latest, COUNT(*) AS cnt FROM dws.dws_assistant_daily_detail WHERE assistant_level_name IS NULL GROUP BY assistant_id, assistant_level_code ORDER BY cnt DESC LIMIT 20 """) rows = cur.fetchall() print("=== NULL level_name 的 assistant_id 分布 (top 20) ===") for r in rows: print(f" aid={r['assistant_id']}, code={r['assistant_level_code']}, " f"range=[{r['earliest']}~{r['latest']}], count={r['cnt']}") # 2. 对比这些 assistant_id 在 dim_assistant 中的 SCD2 记录 if rows: aids = list(set(r['assistant_id'] for r in rows))[:10] print(f"\n=== dim_assistant SCD2 记录 (前 {len(aids)} 个 assistant_id) ===") for aid in aids: cur.execute(""" SELECT assistant_id, level, nickname, scd2_start_time, scd2_end_time, scd2_is_current FROM dwd.dim_assistant WHERE assistant_id = %s ORDER BY scd2_start_time """, (aid,)) scd_rows = cur.fetchall() print(f"\n assistant_id={aid}: {len(scd_rows)} 条 SCD2 记录") for s in scd_rows: print(f" level={s['level']}, start={s['scd2_start_time']}, " f"end={s['scd2_end_time']}, current={s['scd2_is_current']}") # 对比 daily_detail 中该 aid 的最早日期 cur.execute(""" SELECT MIN(stat_date) AS earliest_daily FROM dws.dws_assistant_daily_detail WHERE assistant_id = %s AND assistant_level_name IS NULL """, (aid,)) d = cur.fetchone() if d: print(f" daily NULL earliest: {d['earliest_daily']}") # 3. 总体统计:有多少 NULL 的 assistant_id 在 dim_assistant 中完全没有记录 cur.execute(""" SELECT COUNT(DISTINCT d.assistant_id) AS total_null_aids, COUNT(DISTINCT d.assistant_id) FILTER ( WHERE NOT EXISTS ( SELECT 1 FROM dwd.dim_assistant da WHERE da.assistant_id = d.assistant_id ) ) AS no_dim_record FROM dws.dws_assistant_daily_detail d WHERE d.assistant_level_name IS NULL """) row = cur.fetchone() print(f"\n=== 总体 ===") print(f" NULL level_name 涉及 {row['total_null_aids']} 个 assistant_id") print(f" 其中 {row['no_dim_record']} 个在 dim_assistant 中完全无记录") cur.close() conn.close()