微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -0,0 +1,115 @@
"""诊断 SPI 基数校准中位数为 0 的原因"""
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
PG_DSN = os.environ.get("PG_DSN")
if not PG_DSN:
raise RuntimeError("PG_DSN 未设置")
import psycopg2
import psycopg2.extras
conn = psycopg2.connect(PG_DSN)
conn.autocommit = True
def q(sql, params=None):
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, params)
return cur.fetchall()
# SPI 提取的是近 90 天有消费的会员
# ETL 运行时间是 2026-02-27 07:55所以 NOW() - 90 days ≈ 2025-11-29
# NOW() - 30 days ≈ 2026-01-28
# 但测试数据的时间范围是 2025-11-01 ~ 2026-02-27
print("SPI 特征提取模拟(与 ETL 运行时一致的窗口)")
print("=" * 60)
# 模拟 SPI 的 SQL看有多少会员有 spend_30 > 0
rows = q("""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) AS canonical_member_id,
s.pay_time,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
ON s.member_card_account_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = s.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = (SELECT DISTINCT site_id FROM dwd.dwd_settlement_head LIMIT 1)
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '90 days'
)
SELECT
canonical_member_id AS member_id,
SUM(pay_amount) AS spend_90,
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '30 days' THEN pay_amount ELSE 0 END) AS spend_30,
COUNT(*) AS orders_90,
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '30 days' THEN 1 ELSE 0 END) AS orders_30
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
""")
print(f"近 90 天有消费的会员数: {len(rows)}")
if rows:
spend_30_values = sorted([float(r['spend_30']) for r in rows])
spend_90_values = sorted([float(r['spend_90']) for r in rows])
n = len(spend_30_values)
median_idx = n // 2
zero_30 = sum(1 for v in spend_30_values if v == 0)
zero_90 = sum(1 for v in spend_90_values if v == 0)
print(f"\nspend_30 分布:")
print(f" 为 0 的会员: {zero_30}/{n} ({zero_30/n*100:.1f}%)")
print(f" 中位数: {spend_30_values[median_idx]:.2f}")
print(f" 最大值: {spend_30_values[-1]:.2f}")
print(f"\nspend_90 分布:")
print(f" 为 0 的会员: {zero_90}/{n} ({zero_90/n*100:.1f}%)")
print(f" 中位数: {spend_90_values[median_idx]:.2f}")
print(f" 最大值: {spend_90_values[-1]:.2f}")
# 检查 pay_time 的实际范围
rows2 = q("""
SELECT MIN(pay_time) AS min_pay, MAX(pay_time) AS max_pay,
NOW() - INTERVAL '90 days' AS cutoff_90,
NOW() - INTERVAL '30 days' AS cutoff_30,
NOW() AS now_ts
FROM dwd.dwd_settlement_head
WHERE settle_type IN (1, 3)
""")
if rows2:
r = rows2[0]
print(f"\n时间范围:")
print(f" 数据最早: {r['min_pay']}")
print(f" 数据最晚: {r['max_pay']}")
print(f" NOW(): {r['now_ts']}")
print(f" 90天截止: {r['cutoff_90']}")
print(f" 30天截止: {r['cutoff_30']}")
# 检查 avg_ticket_90 和 daily_spend 的中位数
avg_tickets = sorted([float(r['spend_90']) / max(int(r['orders_90']), 1) for r in rows])
print(f"\navg_ticket_90 中位数: {avg_tickets[median_idx]:.2f}")
# 检查 recharge_90
rows3 = q("""
SELECT COUNT(*) AS cnt, SUM(recharge_amount) AS total
FROM dwd.dwd_settlement_head
WHERE settle_type = 2
AND pay_time >= NOW() - INTERVAL '90 days'
""")
if rows3:
r = rows3[0]
print(f"\n近 90 天充值记录: {r['cnt']} 条, 总额: {r['total']}")
conn.close()
print("\n诊断完成。")