63 lines
2.0 KiB
Python
63 lines
2.0 KiB
Python
"""诊断:NULL level_name 助教的 SCD2 最早记录 vs daily 最早日期"""
|
||
import os, sys
|
||
from pathlib import Path
|
||
from dotenv import load_dotenv
|
||
|
||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||
|
||
import psycopg2
|
||
import psycopg2.extras
|
||
|
||
dsn = os.environ.get("PG_DSN")
|
||
if not dsn:
|
||
raise RuntimeError("PG_DSN 未设置")
|
||
|
||
conn = psycopg2.connect(dsn)
|
||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||
|
||
# 对比每个 NULL assistant_id 的 SCD2 最早时间 vs daily 最早日期
|
||
cur.execute("""
|
||
WITH null_aids AS (
|
||
SELECT DISTINCT assistant_id
|
||
FROM dws.dws_assistant_daily_detail
|
||
WHERE assistant_level_name IS NULL
|
||
),
|
||
scd2_earliest AS (
|
||
SELECT da.assistant_id, MIN(da.scd2_start_time) AS earliest_scd2
|
||
FROM dwd.dim_assistant da
|
||
JOIN null_aids n ON da.assistant_id = n.assistant_id
|
||
GROUP BY da.assistant_id
|
||
),
|
||
daily_earliest AS (
|
||
SELECT d.assistant_id, MIN(d.stat_date) AS earliest_daily
|
||
FROM dws.dws_assistant_daily_detail d
|
||
JOIN null_aids n ON d.assistant_id = n.assistant_id
|
||
WHERE d.assistant_level_name IS NULL
|
||
GROUP BY d.assistant_id
|
||
)
|
||
SELECT
|
||
de.assistant_id,
|
||
de.earliest_daily,
|
||
se.earliest_scd2,
|
||
se.earliest_scd2::date AS scd2_date,
|
||
CASE WHEN de.earliest_daily < se.earliest_scd2::date THEN 'DAILY_BEFORE_SCD2'
|
||
ELSE 'SCD2_COVERS' END AS status
|
||
FROM daily_earliest de
|
||
LEFT JOIN scd2_earliest se ON de.assistant_id = se.assistant_id
|
||
ORDER BY de.earliest_daily
|
||
""")
|
||
rows = cur.fetchall()
|
||
print(f"=== {len(rows)} 个 NULL level_name 助教 ===")
|
||
before_count = 0
|
||
for r in rows:
|
||
status = r['status']
|
||
if status == 'DAILY_BEFORE_SCD2':
|
||
before_count += 1
|
||
print(f" aid={r['assistant_id']}, daily_earliest={r['earliest_daily']}, "
|
||
f"scd2_earliest={r['scd2_date']}, status={status}")
|
||
|
||
print(f"\n总计: {before_count}/{len(rows)} 个助教的 daily 数据早于 SCD2 首条记录")
|
||
|
||
cur.close()
|
||
conn.close()
|