Files
Neo-ZQYY/scripts/ops/_debug_null_level2.py

63 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""诊断NULL level_name 助教的 SCD2 最早记录 vs daily 最早日期"""
import os, sys
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
import psycopg2
import psycopg2.extras
dsn = os.environ.get("PG_DSN")
if not dsn:
raise RuntimeError("PG_DSN 未设置")
conn = psycopg2.connect(dsn)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
# 对比每个 NULL assistant_id 的 SCD2 最早时间 vs daily 最早日期
cur.execute("""
WITH null_aids AS (
SELECT DISTINCT assistant_id
FROM dws.dws_assistant_daily_detail
WHERE assistant_level_name IS NULL
),
scd2_earliest AS (
SELECT da.assistant_id, MIN(da.scd2_start_time) AS earliest_scd2
FROM dwd.dim_assistant da
JOIN null_aids n ON da.assistant_id = n.assistant_id
GROUP BY da.assistant_id
),
daily_earliest AS (
SELECT d.assistant_id, MIN(d.stat_date) AS earliest_daily
FROM dws.dws_assistant_daily_detail d
JOIN null_aids n ON d.assistant_id = n.assistant_id
WHERE d.assistant_level_name IS NULL
GROUP BY d.assistant_id
)
SELECT
de.assistant_id,
de.earliest_daily,
se.earliest_scd2,
se.earliest_scd2::date AS scd2_date,
CASE WHEN de.earliest_daily < se.earliest_scd2::date THEN 'DAILY_BEFORE_SCD2'
ELSE 'SCD2_COVERS' END AS status
FROM daily_earliest de
LEFT JOIN scd2_earliest se ON de.assistant_id = se.assistant_id
ORDER BY de.earliest_daily
""")
rows = cur.fetchall()
print(f"=== {len(rows)} 个 NULL level_name 助教 ===")
before_count = 0
for r in rows:
status = r['status']
if status == 'DAILY_BEFORE_SCD2':
before_count += 1
print(f" aid={r['assistant_id']}, daily_earliest={r['earliest_daily']}, "
f"scd2_earliest={r['scd2_date']}, status={status}")
print(f"\n总计: {before_count}/{len(rows)} 个助教的 daily 数据早于 SCD2 首条记录")
cur.close()
conn.close()