微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
81
scripts/ops/_debug_null_level.py
Normal file
81
scripts/ops/_debug_null_level.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""诊断 NULL level_name 的根因:检查 dim_assistant SCD2 记录覆盖情况"""
|
||||
import os, sys
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
dsn = os.environ.get("PG_DSN")
|
||||
if not dsn:
|
||||
raise RuntimeError("PG_DSN 未设置")
|
||||
|
||||
conn = psycopg2.connect(dsn)
|
||||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
|
||||
# 1. 找出 NULL level_name 的 assistant_id 分布
|
||||
cur.execute("""
|
||||
SELECT assistant_id, assistant_level_code,
|
||||
MIN(stat_date) AS earliest, MAX(stat_date) AS latest,
|
||||
COUNT(*) AS cnt
|
||||
FROM dws.dws_assistant_daily_detail
|
||||
WHERE assistant_level_name IS NULL
|
||||
GROUP BY assistant_id, assistant_level_code
|
||||
ORDER BY cnt DESC
|
||||
LIMIT 20
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
print("=== NULL level_name 的 assistant_id 分布 (top 20) ===")
|
||||
for r in rows:
|
||||
print(f" aid={r['assistant_id']}, code={r['assistant_level_code']}, "
|
||||
f"range=[{r['earliest']}~{r['latest']}], count={r['cnt']}")
|
||||
|
||||
# 2. 对比这些 assistant_id 在 dim_assistant 中的 SCD2 记录
|
||||
if rows:
|
||||
aids = list(set(r['assistant_id'] for r in rows))[:10]
|
||||
print(f"\n=== dim_assistant SCD2 记录 (前 {len(aids)} 个 assistant_id) ===")
|
||||
for aid in aids:
|
||||
cur.execute("""
|
||||
SELECT assistant_id, level, nickname,
|
||||
scd2_start_time, scd2_end_time, scd2_is_current
|
||||
FROM dwd.dim_assistant
|
||||
WHERE assistant_id = %s
|
||||
ORDER BY scd2_start_time
|
||||
""", (aid,))
|
||||
scd_rows = cur.fetchall()
|
||||
print(f"\n assistant_id={aid}: {len(scd_rows)} 条 SCD2 记录")
|
||||
for s in scd_rows:
|
||||
print(f" level={s['level']}, start={s['scd2_start_time']}, "
|
||||
f"end={s['scd2_end_time']}, current={s['scd2_is_current']}")
|
||||
|
||||
# 对比 daily_detail 中该 aid 的最早日期
|
||||
cur.execute("""
|
||||
SELECT MIN(stat_date) AS earliest_daily
|
||||
FROM dws.dws_assistant_daily_detail
|
||||
WHERE assistant_id = %s AND assistant_level_name IS NULL
|
||||
""", (aid,))
|
||||
d = cur.fetchone()
|
||||
if d:
|
||||
print(f" daily NULL earliest: {d['earliest_daily']}")
|
||||
|
||||
# 3. 总体统计:有多少 NULL 的 assistant_id 在 dim_assistant 中完全没有记录
|
||||
cur.execute("""
|
||||
SELECT COUNT(DISTINCT d.assistant_id) AS total_null_aids,
|
||||
COUNT(DISTINCT d.assistant_id) FILTER (
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM dwd.dim_assistant da
|
||||
WHERE da.assistant_id = d.assistant_id
|
||||
)
|
||||
) AS no_dim_record
|
||||
FROM dws.dws_assistant_daily_detail d
|
||||
WHERE d.assistant_level_name IS NULL
|
||||
""")
|
||||
row = cur.fetchone()
|
||||
print(f"\n=== 总体 ===")
|
||||
print(f" NULL level_name 涉及 {row['total_null_aids']} 个 assistant_id")
|
||||
print(f" 其中 {row['no_dim_record']} 个在 dim_assistant 中完全无记录")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user