微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
156
scripts/ops/_debug_spi_values.py
Normal file
156
scripts/ops/_debug_spi_values.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""诊断 SPI 哪些值超出 numeric 精度"""
|
||||
import os, sys, math
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
dsn = os.environ.get("PG_DSN")
|
||||
if not dsn:
|
||||
raise RuntimeError("PG_DSN 未设置")
|
||||
|
||||
# 模拟 SPI 计算,找出哪些值溢出
|
||||
import psycopg2
|
||||
conn = psycopg2.connect(dsn)
|
||||
cur = conn.cursor()
|
||||
|
||||
# 检查 SPI 特征数据范围
|
||||
site_id = 2790685415443269
|
||||
|
||||
# 1. 查看消费特征的极值
|
||||
cur.execute("""
|
||||
SELECT
|
||||
MAX(ABS(spend_30)) as max_spend_30,
|
||||
MAX(ABS(spend_90)) as max_spend_90,
|
||||
MAX(ABS(recharge_90)) as max_recharge_90,
|
||||
MAX(ABS(avg_ticket_90)) as max_avg_ticket,
|
||||
MAX(ABS(daily_spend_ewma_90)) as max_ewma
|
||||
FROM dws.dws_member_consumption
|
||||
WHERE site_id = %s
|
||||
""", (site_id,))
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
print(f"消费特征极值: spend_30={row[0]}, spend_90={row[1]}, recharge_90={row[2]}, avg_ticket={row[3]}, ewma={row[4]}")
|
||||
|
||||
# 2. 模拟 score 计算
|
||||
# 默认参数
|
||||
params = {
|
||||
'amount_base_spend_30': 500.0,
|
||||
'amount_base_spend_90': 1500.0,
|
||||
'amount_base_ticket_90': 200.0,
|
||||
'amount_base_recharge_90': 1000.0,
|
||||
'amount_base_speed_abs': 100.0,
|
||||
'amount_base_ewma_90': 50.0,
|
||||
'w_level_spend_30': 0.30,
|
||||
'w_level_spend_90': 0.30,
|
||||
'w_level_ticket_90': 0.20,
|
||||
'w_level_recharge_90': 0.20,
|
||||
'w_speed_abs': 0.40,
|
||||
'w_speed_rel': 0.30,
|
||||
'w_speed_ewma': 0.30,
|
||||
'speed_epsilon': 1e-6,
|
||||
}
|
||||
|
||||
# 查询实际消费数据
|
||||
cur.execute("""
|
||||
SELECT member_id,
|
||||
COALESCE(spend_30, 0), COALESCE(spend_90, 0),
|
||||
COALESCE(avg_ticket_90, 0),
|
||||
COALESCE(orders_30, 0), COALESCE(orders_90, 0),
|
||||
COALESCE(visit_days_30, 0), COALESCE(visit_days_90, 0)
|
||||
FROM dws.dws_member_consumption
|
||||
WHERE site_id = %s
|
||||
""", (site_id,))
|
||||
rows = cur.fetchall()
|
||||
|
||||
print(f"\n会员数: {len(rows)}")
|
||||
|
||||
# 模拟计算,找出极值
|
||||
max_level = (-float('inf'), None)
|
||||
max_speed = (-float('inf'), None)
|
||||
max_raw = (-float('inf'), None)
|
||||
overflow_members = []
|
||||
|
||||
for row in rows:
|
||||
mid = row[0]
|
||||
spend_30 = float(row[1])
|
||||
spend_90 = float(row[2])
|
||||
avg_ticket = float(row[3])
|
||||
orders_30 = int(row[4])
|
||||
orders_90 = int(row[5])
|
||||
visit_days_30 = int(row[6])
|
||||
visit_days_90 = int(row[7])
|
||||
recharge_90 = 0.0 # 简化
|
||||
|
||||
# Level
|
||||
level = (
|
||||
params['w_level_spend_30'] * math.log1p(spend_30 / params['amount_base_spend_30'])
|
||||
+ params['w_level_spend_90'] * math.log1p(spend_90 / params['amount_base_spend_90'])
|
||||
+ params['w_level_ticket_90'] * math.log1p(avg_ticket / params['amount_base_ticket_90'])
|
||||
+ params['w_level_recharge_90'] * math.log1p(recharge_90 / params['amount_base_recharge_90'])
|
||||
)
|
||||
|
||||
# Speed
|
||||
eps = params['speed_epsilon']
|
||||
v_abs = math.log1p(spend_30 / (max(visit_days_30, 1) * params['amount_base_speed_abs']))
|
||||
v_30 = spend_30 / 30.0
|
||||
v_90 = spend_90 / 90.0
|
||||
v_rel = math.log((v_30 + eps) / (v_90 + eps))
|
||||
v_ewma = 0.0 # 简化
|
||||
speed = (
|
||||
params['w_speed_abs'] * v_abs
|
||||
+ params['w_speed_rel'] * max(0.0, v_rel)
|
||||
+ params['w_speed_ewma'] * v_ewma
|
||||
)
|
||||
|
||||
# Raw
|
||||
raw = 0.60 * level + 0.30 * speed + 0.10 * 0.0
|
||||
|
||||
if level > max_level[0]:
|
||||
max_level = (level, mid)
|
||||
if speed > max_speed[0]:
|
||||
max_speed = (speed, mid)
|
||||
if raw > max_raw[0]:
|
||||
max_raw = (raw, mid)
|
||||
|
||||
# 检查是否超出 numeric(10,4) 范围
|
||||
RAW_MAX = 999999.9999
|
||||
if abs(level) > RAW_MAX or abs(speed) > RAW_MAX or abs(raw) > RAW_MAX:
|
||||
overflow_members.append((mid, level, speed, raw))
|
||||
|
||||
# 检查 inf/nan
|
||||
if math.isinf(level) or math.isnan(level) or math.isinf(speed) or math.isnan(speed):
|
||||
print(f" INF/NAN: member_id={mid}, level={level}, speed={speed}, spend_30={spend_30}, spend_90={spend_90}")
|
||||
|
||||
print(f"\nMax level: {max_level[0]:.6f} (member_id={max_level[1]})")
|
||||
print(f"Max speed: {max_speed[0]:.6f} (member_id={max_speed[1]})")
|
||||
print(f"Max raw: {max_raw[0]:.6f} (member_id={max_raw[1]})")
|
||||
print(f"Overflow members (>{RAW_MAX}): {len(overflow_members)}")
|
||||
|
||||
# 也检查 daily_spend_ewma_90 的实际值
|
||||
# 这个值是在 _compute_daily_spend_ewma_batch 中计算的
|
||||
# 看看 dws_member_consumption 中有没有极端值
|
||||
cur.execute("""
|
||||
SELECT member_id, spend_30, spend_90, avg_ticket_90, daily_spend_ewma_90
|
||||
FROM dws.dws_member_consumption
|
||||
WHERE site_id = %s
|
||||
ORDER BY spend_90 DESC
|
||||
LIMIT 5
|
||||
""", (site_id,))
|
||||
print("\nTop 5 消费会员:")
|
||||
for r in cur.fetchall():
|
||||
print(f" member_id={r[0]}, spend_30={r[1]}, spend_90={r[2]}, avg_ticket={r[3]}, ewma={r[4]}")
|
||||
|
||||
# 检查 numeric(14,2) 的实际数据范围
|
||||
cur.execute("""
|
||||
SELECT
|
||||
MAX(spend_30), MAX(spend_90), MAX(recharge_90),
|
||||
MAX(avg_ticket_90), MAX(daily_spend_ewma_90)
|
||||
FROM dws.dws_member_consumption
|
||||
WHERE site_id = %s
|
||||
""", (site_id,))
|
||||
r = cur.fetchone()
|
||||
print(f"\n最大值: spend_30={r[0]}, spend_90={r[1]}, recharge_90={r[2]}, avg_ticket={r[3]}, ewma={r[4]}")
|
||||
|
||||
conn.close()
|
||||
print("\n诊断完成")
|
||||
Reference in New Issue
Block a user