82 lines
2.9 KiB
Python
82 lines
2.9 KiB
Python
"""诊断 NULL level_name 的根因:检查 dim_assistant SCD2 记录覆盖情况"""
|
|
import os, sys
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
|
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
|
|
dsn = os.environ.get("PG_DSN")
|
|
if not dsn:
|
|
raise RuntimeError("PG_DSN 未设置")
|
|
|
|
conn = psycopg2.connect(dsn)
|
|
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
|
|
|
# 1. 找出 NULL level_name 的 assistant_id 分布
|
|
cur.execute("""
|
|
SELECT assistant_id, assistant_level_code,
|
|
MIN(stat_date) AS earliest, MAX(stat_date) AS latest,
|
|
COUNT(*) AS cnt
|
|
FROM dws.dws_assistant_daily_detail
|
|
WHERE assistant_level_name IS NULL
|
|
GROUP BY assistant_id, assistant_level_code
|
|
ORDER BY cnt DESC
|
|
LIMIT 20
|
|
""")
|
|
rows = cur.fetchall()
|
|
print("=== NULL level_name 的 assistant_id 分布 (top 20) ===")
|
|
for r in rows:
|
|
print(f" aid={r['assistant_id']}, code={r['assistant_level_code']}, "
|
|
f"range=[{r['earliest']}~{r['latest']}], count={r['cnt']}")
|
|
|
|
# 2. 对比这些 assistant_id 在 dim_assistant 中的 SCD2 记录
|
|
if rows:
|
|
aids = list(set(r['assistant_id'] for r in rows))[:10]
|
|
print(f"\n=== dim_assistant SCD2 记录 (前 {len(aids)} 个 assistant_id) ===")
|
|
for aid in aids:
|
|
cur.execute("""
|
|
SELECT assistant_id, level, nickname,
|
|
scd2_start_time, scd2_end_time, scd2_is_current
|
|
FROM dwd.dim_assistant
|
|
WHERE assistant_id = %s
|
|
ORDER BY scd2_start_time
|
|
""", (aid,))
|
|
scd_rows = cur.fetchall()
|
|
print(f"\n assistant_id={aid}: {len(scd_rows)} 条 SCD2 记录")
|
|
for s in scd_rows:
|
|
print(f" level={s['level']}, start={s['scd2_start_time']}, "
|
|
f"end={s['scd2_end_time']}, current={s['scd2_is_current']}")
|
|
|
|
# 对比 daily_detail 中该 aid 的最早日期
|
|
cur.execute("""
|
|
SELECT MIN(stat_date) AS earliest_daily
|
|
FROM dws.dws_assistant_daily_detail
|
|
WHERE assistant_id = %s AND assistant_level_name IS NULL
|
|
""", (aid,))
|
|
d = cur.fetchone()
|
|
if d:
|
|
print(f" daily NULL earliest: {d['earliest_daily']}")
|
|
|
|
# 3. 总体统计:有多少 NULL 的 assistant_id 在 dim_assistant 中完全没有记录
|
|
cur.execute("""
|
|
SELECT COUNT(DISTINCT d.assistant_id) AS total_null_aids,
|
|
COUNT(DISTINCT d.assistant_id) FILTER (
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM dwd.dim_assistant da
|
|
WHERE da.assistant_id = d.assistant_id
|
|
)
|
|
) AS no_dim_record
|
|
FROM dws.dws_assistant_daily_detail d
|
|
WHERE d.assistant_level_name IS NULL
|
|
""")
|
|
row = cur.fetchone()
|
|
print(f"\n=== 总体 ===")
|
|
print(f" NULL level_name 涉及 {row['total_null_aids']} 个 assistant_id")
|
|
print(f" 其中 {row['no_dim_record']} 个在 dim_assistant 中完全无记录")
|
|
|
|
cur.close()
|
|
conn.close()
|