微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
115
scripts/ops/_diagnose_spi_median.py
Normal file
115
scripts/ops/_diagnose_spi_median.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""诊断 SPI 基数校准中位数为 0 的原因"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
PG_DSN = os.environ.get("PG_DSN")
|
||||
if not PG_DSN:
|
||||
raise RuntimeError("PG_DSN 未设置")
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
conn = psycopg2.connect(PG_DSN)
|
||||
conn.autocommit = True
|
||||
|
||||
def q(sql, params=None):
|
||||
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||
cur.execute(sql, params)
|
||||
return cur.fetchall()
|
||||
|
||||
# SPI 提取的是近 90 天有消费的会员
|
||||
# ETL 运行时间是 2026-02-27 07:55,所以 NOW() - 90 days ≈ 2025-11-29
|
||||
# NOW() - 30 days ≈ 2026-01-28
|
||||
# 但测试数据的时间范围是 2025-11-01 ~ 2026-02-27
|
||||
|
||||
print("SPI 特征提取模拟(与 ETL 运行时一致的窗口)")
|
||||
print("=" * 60)
|
||||
|
||||
# 模拟 SPI 的 SQL,看有多少会员有 spend_30 > 0
|
||||
rows = q("""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) AS canonical_member_id,
|
||||
s.pay_time,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = (SELECT DISTINCT site_id FROM dwd.dwd_settlement_head LIMIT 1)
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '90 days'
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
SUM(pay_amount) AS spend_90,
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '30 days' THEN pay_amount ELSE 0 END) AS spend_30,
|
||||
COUNT(*) AS orders_90,
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '30 days' THEN 1 ELSE 0 END) AS orders_30
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
""")
|
||||
|
||||
print(f"近 90 天有消费的会员数: {len(rows)}")
|
||||
|
||||
if rows:
|
||||
spend_30_values = sorted([float(r['spend_30']) for r in rows])
|
||||
spend_90_values = sorted([float(r['spend_90']) for r in rows])
|
||||
|
||||
n = len(spend_30_values)
|
||||
median_idx = n // 2
|
||||
|
||||
zero_30 = sum(1 for v in spend_30_values if v == 0)
|
||||
zero_90 = sum(1 for v in spend_90_values if v == 0)
|
||||
|
||||
print(f"\nspend_30 分布:")
|
||||
print(f" 为 0 的会员: {zero_30}/{n} ({zero_30/n*100:.1f}%)")
|
||||
print(f" 中位数: {spend_30_values[median_idx]:.2f}")
|
||||
print(f" 最大值: {spend_30_values[-1]:.2f}")
|
||||
|
||||
print(f"\nspend_90 分布:")
|
||||
print(f" 为 0 的会员: {zero_90}/{n} ({zero_90/n*100:.1f}%)")
|
||||
print(f" 中位数: {spend_90_values[median_idx]:.2f}")
|
||||
print(f" 最大值: {spend_90_values[-1]:.2f}")
|
||||
|
||||
# 检查 pay_time 的实际范围
|
||||
rows2 = q("""
|
||||
SELECT MIN(pay_time) AS min_pay, MAX(pay_time) AS max_pay,
|
||||
NOW() - INTERVAL '90 days' AS cutoff_90,
|
||||
NOW() - INTERVAL '30 days' AS cutoff_30,
|
||||
NOW() AS now_ts
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE settle_type IN (1, 3)
|
||||
""")
|
||||
if rows2:
|
||||
r = rows2[0]
|
||||
print(f"\n时间范围:")
|
||||
print(f" 数据最早: {r['min_pay']}")
|
||||
print(f" 数据最晚: {r['max_pay']}")
|
||||
print(f" NOW(): {r['now_ts']}")
|
||||
print(f" 90天截止: {r['cutoff_90']}")
|
||||
print(f" 30天截止: {r['cutoff_30']}")
|
||||
|
||||
# 检查 avg_ticket_90 和 daily_spend 的中位数
|
||||
avg_tickets = sorted([float(r['spend_90']) / max(int(r['orders_90']), 1) for r in rows])
|
||||
print(f"\navg_ticket_90 中位数: {avg_tickets[median_idx]:.2f}")
|
||||
|
||||
# 检查 recharge_90
|
||||
rows3 = q("""
|
||||
SELECT COUNT(*) AS cnt, SUM(recharge_amount) AS total
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE settle_type = 2
|
||||
AND pay_time >= NOW() - INTERVAL '90 days'
|
||||
""")
|
||||
if rows3:
|
||||
r = rows3[0]
|
||||
print(f"\n近 90 天充值记录: {r['cnt']} 条, 总额: {r['total']}")
|
||||
|
||||
conn.close()
|
||||
print("\n诊断完成。")
|
||||
Reference in New Issue
Block a user