在前后端开发联调前 的提交20260223
This commit is contained in:
@@ -8,16 +8,19 @@
|
||||
- NewconvIndexTask: 新客转化指数 (NCI)
|
||||
- MlManualImportTask: ML 人工台账导入任务
|
||||
- RelationIndexTask: 关系指数计算任务(RS/OS/MS/ML)
|
||||
- SpendingPowerIndexTask: 消费力指数 (SPI)
|
||||
"""
|
||||
|
||||
from .winback_index_task import WinbackIndexTask
|
||||
from .newconv_index_task import NewconvIndexTask
|
||||
from .ml_manual_import_task import MlManualImportTask
|
||||
from .relation_index_task import RelationIndexTask
|
||||
from .spending_power_index_task import SpendingPowerIndexTask
|
||||
|
||||
__all__ = [
|
||||
'WinbackIndexTask',
|
||||
'NewconvIndexTask',
|
||||
'MlManualImportTask',
|
||||
'RelationIndexTask',
|
||||
'SpendingPowerIndexTask',
|
||||
]
|
||||
|
||||
@@ -0,0 +1,767 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
SPI 消费力指数任务(Spending Power Index)
|
||||
|
||||
设计说明:
|
||||
1. 直接继承 BaseIndexTask,不经过 MemberIndexBaseTask(无需 NEW/OLD/STOP 分群)
|
||||
2. 子分计算为 @staticmethod 纯函数,便于属性测试直接调用
|
||||
3. 三个子分:Level(消费水平)、Speed(消费速度)、Stability(消费稳定性)
|
||||
4. 结果写入 dws.dws_member_spending_power_index,按 site_id delete-before-insert
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .base_index_task import BaseIndexTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据类定义
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class SPIMemberFeatures:
|
||||
"""SPI 计算所需的会员级特征"""
|
||||
member_id: int
|
||||
site_id: int
|
||||
|
||||
# 基础特征
|
||||
spend_30: float = 0.0 # 近30天消费总额
|
||||
spend_90: float = 0.0 # 近90天消费总额
|
||||
recharge_90: float = 0.0 # 近90天充值总额
|
||||
orders_30: int = 0 # 近30天消费笔数
|
||||
orders_90: int = 0 # 近90天消费笔数
|
||||
visit_days_30: int = 0 # 近30天消费日数(按天去重)
|
||||
visit_days_90: int = 0 # 近90天消费日数(按天去重)
|
||||
avg_ticket_90: float = 0.0 # 90天客单价
|
||||
active_weeks_90: int = 0 # 近90天有消费的自然周数
|
||||
daily_spend_ewma_90: float = 0.0 # 日消费 EWMA
|
||||
|
||||
# 子分
|
||||
score_level_raw: float = 0.0
|
||||
score_speed_raw: float = 0.0
|
||||
score_stability_raw: float = 0.0
|
||||
|
||||
# 展示分(归一化后填充)
|
||||
score_level_display: float = 0.0
|
||||
score_speed_display: float = 0.0
|
||||
score_stability_display: float = 0.0
|
||||
|
||||
# 总分
|
||||
raw_score: float = 0.0
|
||||
display_score: float = 0.0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SPI 任务
|
||||
# =============================================================================
|
||||
|
||||
class SpendingPowerIndexTask(BaseIndexTask):
|
||||
"""SPI 消费力指数:单任务产出 Level / Speed / Stability 子分及 SPI 总分。"""
|
||||
|
||||
INDEX_TYPE = "SPI"
|
||||
|
||||
DEFAULT_PARAMS: Dict[str, float] = {
|
||||
# 窗口参数
|
||||
'spend_window_short_days': 30,
|
||||
'spend_window_long_days': 90,
|
||||
'ewma_alpha_daily_spend': 0.3,
|
||||
# 金额压缩基数(初始默认值,可被自动校准或配置表覆盖)
|
||||
'amount_base_spend_30': 500.0,
|
||||
'amount_base_spend_90': 1500.0,
|
||||
'amount_base_ticket_90': 200.0,
|
||||
'amount_base_recharge_90': 1000.0,
|
||||
'amount_base_speed_abs': 100.0,
|
||||
'amount_base_ewma_90': 50.0,
|
||||
# Level 子分权重
|
||||
'w_level_spend_30': 0.30,
|
||||
'w_level_spend_90': 0.35,
|
||||
'w_level_ticket_90': 0.20,
|
||||
'w_level_recharge_90': 0.15,
|
||||
# Speed 子分权重
|
||||
'w_speed_abs': 0.50,
|
||||
'w_speed_rel': 0.30,
|
||||
'w_speed_ewma': 0.20,
|
||||
# 总分权重
|
||||
'weight_level': 0.60,
|
||||
'weight_speed': 0.30,
|
||||
'weight_stability': 0.10,
|
||||
# 稳定性参数
|
||||
'stability_window_days': 90,
|
||||
'use_stability': 1,
|
||||
# 映射与平滑
|
||||
'percentile_lower': 5,
|
||||
'percentile_upper': 95,
|
||||
'compression_mode': 1, # log1p
|
||||
'use_smoothing': 1,
|
||||
'ewma_alpha': 0.2,
|
||||
# 速度计算
|
||||
'speed_epsilon': 1e-6,
|
||||
}
|
||||
|
||||
# =========================================================================
|
||||
# 抽象方法实现
|
||||
# =========================================================================
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_SPENDING_POWER_INDEX"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_spending_power_index"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "member_id"]
|
||||
|
||||
def get_index_type(self) -> str:
|
||||
return self.INDEX_TYPE
|
||||
|
||||
# =========================================================================
|
||||
# 辅助方法
|
||||
# =========================================================================
|
||||
|
||||
def _get_site_id(self, context: Optional[TaskContext]) -> int:
|
||||
"""从 context 或配置中获取门店 ID"""
|
||||
if context and getattr(context, "store_id", None):
|
||||
return int(context.store_id)
|
||||
site_id = self.config.get("app.default_site_id") or self.config.get("app.store_id")
|
||||
if site_id is not None:
|
||||
return int(site_id)
|
||||
# 回退:从消费数据中取一个 site_id
|
||||
sql = "SELECT DISTINCT site_id FROM dwd.dwd_settlement_head WHERE site_id IS NOT NULL LIMIT 1"
|
||||
rows = self.db.query(sql)
|
||||
if rows:
|
||||
return int(dict(rows[0]).get("site_id") or 0)
|
||||
self.logger.warning("无法确定门店ID,使用 0 继续执行")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def _map_compression(params: Dict[str, float]) -> str:
|
||||
"""将 compression_mode 数值映射为 batch_normalize_to_display 所需的字符串"""
|
||||
mode = int(params.get('compression_mode', 0))
|
||||
if mode == 1:
|
||||
return "log1p"
|
||||
if mode == 2:
|
||||
return "asinh"
|
||||
return "none"
|
||||
|
||||
# =========================================================================
|
||||
# 核心执行流程
|
||||
# =========================================================================
|
||||
|
||||
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
|
||||
"""完整执行流程:提取 → 计算 → 归一化 → 持久化
|
||||
|
||||
流程:
|
||||
1. 获取 site_id
|
||||
2. 加载 SPI 参数(cfg_index_parameters + DEFAULT_PARAMS 回退)
|
||||
3. 提取消费特征 + 充值特征 + 日消费 EWMA,合并到 SPIMemberFeatures
|
||||
4. 无数据时返回 skipped
|
||||
5. 校准金额压缩基数
|
||||
6. 逐会员计算 Level / Speed / Stability / SPI_raw
|
||||
7. 四组 raw_score 分别 batch_normalize_to_display
|
||||
8. delete-before-insert 持久化
|
||||
9. 保存分位点历史
|
||||
"""
|
||||
self.logger.info("开始计算 SPI 消费力指数")
|
||||
|
||||
# 1. 获取 site_id
|
||||
site_id = self._get_site_id(context)
|
||||
|
||||
# 2. 加载参数(配置表 + 默认值合并)
|
||||
db_params = self.load_index_parameters('SPI')
|
||||
params = {**self.DEFAULT_PARAMS, **db_params}
|
||||
|
||||
# 3. 提取特征
|
||||
features = self._extract_spending_features(site_id, params)
|
||||
recharge_map = self._extract_recharge_features(site_id, params)
|
||||
|
||||
# 合并充值特征
|
||||
for mid, recharge_90 in recharge_map.items():
|
||||
if mid in features:
|
||||
features[mid].recharge_90 = recharge_90
|
||||
# 仅有充值无消费的会员不参与 SPI 计算(无消费基础特征)
|
||||
|
||||
# 批量计算日消费 EWMA 并合并
|
||||
member_ids = list(features.keys())
|
||||
ewma_map = self._compute_daily_spend_ewma_batch(site_id, member_ids, params)
|
||||
for mid, ewma_val in ewma_map.items():
|
||||
if mid in features:
|
||||
features[mid].daily_spend_ewma_90 = ewma_val
|
||||
|
||||
# 4. 无数据时跳过(Req 9.4)
|
||||
if not features:
|
||||
self.logger.info("SPI: site_id=%s 无消费数据,跳过计算", site_id)
|
||||
return {'status': 'skipped', 'reason': 'no_data'}
|
||||
|
||||
# 5. 校准金额压缩基数
|
||||
params = self._calibrate_amount_bases(features, params)
|
||||
|
||||
# 6. 逐会员计算子分和总分
|
||||
for feat in features.values():
|
||||
feat.score_level_raw = self.compute_level(feat, params)
|
||||
feat.score_speed_raw = self.compute_speed(feat, params)
|
||||
feat.score_stability_raw = self.compute_stability(feat, params)
|
||||
feat.raw_score = self.compute_spi_raw(
|
||||
feat.score_level_raw,
|
||||
feat.score_speed_raw,
|
||||
feat.score_stability_raw,
|
||||
params,
|
||||
)
|
||||
|
||||
# 7. 四组 raw_score 分别归一化为展示分
|
||||
percentile_lower = int(params.get('percentile_lower', 5))
|
||||
percentile_upper = int(params.get('percentile_upper', 95))
|
||||
use_smoothing = int(params.get('use_smoothing', 1)) == 1
|
||||
compression = self._map_compression(params)
|
||||
|
||||
feat_list = list(features.values())
|
||||
|
||||
# SPI 总分展示分
|
||||
spi_normalized = self.batch_normalize_to_display(
|
||||
raw_scores=[(f.member_id, f.raw_score) for f in feat_list],
|
||||
compression=compression,
|
||||
percentile_lower=percentile_lower,
|
||||
percentile_upper=percentile_upper,
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id,
|
||||
index_type='SPI',
|
||||
)
|
||||
spi_display_map = {mid: display for mid, _, display in spi_normalized}
|
||||
|
||||
# Level 子分展示分
|
||||
level_normalized = self.batch_normalize_to_display(
|
||||
raw_scores=[(f.member_id, f.score_level_raw) for f in feat_list],
|
||||
compression=compression,
|
||||
percentile_lower=percentile_lower,
|
||||
percentile_upper=percentile_upper,
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id,
|
||||
index_type='SPI_LEVEL',
|
||||
)
|
||||
level_display_map = {mid: display for mid, _, display in level_normalized}
|
||||
|
||||
# Speed 子分展示分
|
||||
speed_normalized = self.batch_normalize_to_display(
|
||||
raw_scores=[(f.member_id, f.score_speed_raw) for f in feat_list],
|
||||
compression=compression,
|
||||
percentile_lower=percentile_lower,
|
||||
percentile_upper=percentile_upper,
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id,
|
||||
index_type='SPI_SPEED',
|
||||
)
|
||||
speed_display_map = {mid: display for mid, _, display in speed_normalized}
|
||||
|
||||
# Stability 子分展示分
|
||||
stability_normalized = self.batch_normalize_to_display(
|
||||
raw_scores=[(f.member_id, f.score_stability_raw) for f in feat_list],
|
||||
compression=compression,
|
||||
percentile_lower=percentile_lower,
|
||||
percentile_upper=percentile_upper,
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id,
|
||||
index_type='SPI_STABILITY',
|
||||
)
|
||||
stability_display_map = {mid: display for mid, _, display in stability_normalized}
|
||||
|
||||
# 回写展示分到特征对象
|
||||
for feat in feat_list:
|
||||
mid = feat.member_id
|
||||
feat.display_score = spi_display_map.get(mid, 0.0)
|
||||
feat.score_level_display = level_display_map.get(mid, 0.0)
|
||||
feat.score_speed_display = speed_display_map.get(mid, 0.0)
|
||||
feat.score_stability_display = stability_display_map.get(mid, 0.0)
|
||||
|
||||
# 8. delete-before-insert 持久化(Req 9.3)
|
||||
records_inserted = self._save_spi_data(feat_list, site_id)
|
||||
|
||||
# 9. 保存分位点历史(Req 9.5)——SPI 总分
|
||||
raw_values = [f.raw_score for f in feat_list]
|
||||
q_l, q_u = self.calculate_percentiles(raw_values, percentile_lower, percentile_upper)
|
||||
if use_smoothing:
|
||||
smoothed_l, smoothed_u = self._apply_ewma_smoothing(
|
||||
site_id=site_id,
|
||||
current_p5=q_l,
|
||||
current_p95=q_u,
|
||||
index_type='SPI',
|
||||
)
|
||||
else:
|
||||
smoothed_l, smoothed_u = q_l, q_u
|
||||
self.save_percentile_history(
|
||||
site_id=site_id,
|
||||
percentile_5=q_l,
|
||||
percentile_95=q_u,
|
||||
percentile_5_smoothed=smoothed_l,
|
||||
percentile_95_smoothed=smoothed_u,
|
||||
record_count=len(raw_values),
|
||||
min_raw=min(raw_values),
|
||||
max_raw=max(raw_values),
|
||||
avg_raw=sum(raw_values) / len(raw_values),
|
||||
index_type='SPI',
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"SPI 计算完成: site_id=%s, 会员数=%d, 写入记录=%d",
|
||||
site_id, len(feat_list), records_inserted,
|
||||
)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'member_count': len(feat_list),
|
||||
'records_inserted': records_inserted,
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# 数据提取(后续任务实现)
|
||||
# =========================================================================
|
||||
|
||||
def _extract_spending_features(
|
||||
self, site_id: int, params: Dict[str, float]
|
||||
) -> Dict[int, SPIMemberFeatures]:
|
||||
"""从 dwd_settlement_head 提取消费特征,按 member_id 聚合。
|
||||
|
||||
提取近 90 天消费订单(settle_type IN (1, 3)),聚合为会员级特征:
|
||||
spend_30/90、orders_30/90、visit_days_30/90、avg_ticket_90、active_weeks_90。
|
||||
|
||||
使用 canonical_member_id 模式解析会员身份(与 WBI/NCI 一致)。
|
||||
"""
|
||||
short_days = int(params.get('spend_window_short_days', 30))
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
|
||||
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
|
||||
# INTERVAL 天数通过 f-string 内嵌(整数,安全);site_id 走参数化
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
s.pay_time,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
-- 90 天窗口
|
||||
SUM(pay_amount) AS spend_90,
|
||||
COUNT(*) AS orders_90,
|
||||
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
|
||||
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
|
||||
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
|
||||
-- 30 天窗口(子集过滤)
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN pay_amount ELSE 0 END) AS spend_30,
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN 1 ELSE 0 END) AS orders_30,
|
||||
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN DATE(pay_time) END) AS visit_days_30
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result: Dict[int, SPIMemberFeatures] = {}
|
||||
for row in (rows or []):
|
||||
r = dict(row)
|
||||
mid = int(r['member_id'])
|
||||
orders_90 = int(r['orders_90'] or 0)
|
||||
spend_90 = float(r['spend_90'] or 0)
|
||||
# avg_ticket_90 = spend_90 / max(orders_90, 1)(Req 2.4)
|
||||
avg_ticket = spend_90 / max(orders_90, 1)
|
||||
# active_weeks_90 上限 13(Req 2.5)
|
||||
active_weeks = min(int(r['active_weeks_90'] or 0), 13)
|
||||
|
||||
result[mid] = SPIMemberFeatures(
|
||||
member_id=mid,
|
||||
site_id=site_id,
|
||||
spend_30=float(r['spend_30'] or 0),
|
||||
spend_90=spend_90,
|
||||
orders_30=int(r['orders_30'] or 0),
|
||||
orders_90=orders_90,
|
||||
visit_days_30=int(r['visit_days_30'] or 0),
|
||||
visit_days_90=int(r['visit_days_90'] or 0),
|
||||
avg_ticket_90=avg_ticket,
|
||||
active_weeks_90=active_weeks,
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"SPI 消费特征提取完成: site_id=%s, 会员数=%d, 窗口=%d/%d天",
|
||||
site_id, len(result), short_days, long_days,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
def _extract_recharge_features(
|
||||
self, site_id: int, params: Dict[str, float]
|
||||
) -> Dict[int, float]:
|
||||
"""从 dwd_recharge_order 提取充值特征,返回 {member_id: recharge_90}。
|
||||
|
||||
提取近 90 天充值订单(settle_type = 5),按 member_id 聚合充值总额。
|
||||
使用 canonical_member_id 模式解析会员身份(与 _extract_spending_features 一致)。
|
||||
"""
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
|
||||
sql = f"""
|
||||
WITH recharge_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(r.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
COALESCE(r.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_recharge_order r
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON r.tenant_member_card_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = r.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE r.site_id = %s
|
||||
AND r.settle_type = 5
|
||||
AND r.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
SUM(pay_amount) AS recharge_90
|
||||
FROM recharge_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result: Dict[int, float] = {}
|
||||
for row in (rows or []):
|
||||
r = dict(row)
|
||||
mid = int(r['member_id'])
|
||||
result[mid] = float(r['recharge_90'] or 0)
|
||||
|
||||
self.logger.info(
|
||||
"SPI 充值特征提取完成: site_id=%s, 有充值会员数=%d, 窗口=%d天",
|
||||
site_id, len(result), long_days,
|
||||
)
|
||||
return result
|
||||
|
||||
def _compute_daily_spend_ewma(
|
||||
self, site_id: int, member_id: int, params: Dict[str, float]
|
||||
) -> float:
|
||||
"""对单个会员近 90 天日消费序列计算 EWMA。
|
||||
|
||||
从 dwd_settlement_head 查询该会员每日消费总额(settle_type IN (1,3)),
|
||||
按日期升序排列后逐日计算 EWMA,返回最终值。
|
||||
|
||||
EWMA 递推公式:S_t = α × X_t + (1 - α) × S_{t-1}
|
||||
初始值 S_0 = X_0(首日消费额)。
|
||||
无消费记录时返回 0.0。
|
||||
"""
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
)
|
||||
SELECT pay_date, SUM(pay_amount) AS daily_spend
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id = %s
|
||||
GROUP BY pay_date
|
||||
ORDER BY pay_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, member_id))
|
||||
|
||||
if not rows:
|
||||
return 0.0
|
||||
|
||||
# 逐日 EWMA 递推:S_0 = X_0, S_t = α·X_t + (1-α)·S_{t-1}
|
||||
ewma = float(dict(rows[0])['daily_spend'] or 0)
|
||||
for row in rows[1:]:
|
||||
x = float(dict(row)['daily_spend'] or 0)
|
||||
ewma = alpha * x + (1 - alpha) * ewma
|
||||
|
||||
return ewma
|
||||
|
||||
def _compute_daily_spend_ewma_batch(
|
||||
self, site_id: int, member_ids: List[int], params: Dict[str, float]
|
||||
) -> Dict[int, float]:
|
||||
"""批量计算多个会员的日消费 EWMA,单次 SQL 查询避免 N+1。
|
||||
|
||||
返回 {member_id: daily_spend_ewma_90},未出现的会员值为 0.0。
|
||||
"""
|
||||
if not member_ids:
|
||||
return {}
|
||||
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
)
|
||||
SELECT canonical_member_id AS member_id,
|
||||
pay_date,
|
||||
SUM(pay_amount) AS daily_spend
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id, pay_date
|
||||
ORDER BY canonical_member_id, pay_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
# 按 member_id 分组,逐组计算 EWMA
|
||||
result: Dict[int, float] = {}
|
||||
if not rows:
|
||||
return result
|
||||
|
||||
current_mid: Optional[int] = None
|
||||
ewma = 0.0
|
||||
for row in rows:
|
||||
r = dict(row)
|
||||
mid = int(r['member_id'])
|
||||
x = float(r['daily_spend'] or 0)
|
||||
if mid != current_mid:
|
||||
# 新会员:保存上一个会员结果,重置
|
||||
if current_mid is not None:
|
||||
result[current_mid] = ewma
|
||||
current_mid = mid
|
||||
ewma = x # S_0 = X_0
|
||||
else:
|
||||
ewma = alpha * x + (1 - alpha) * ewma
|
||||
# 保存最后一个会员
|
||||
if current_mid is not None:
|
||||
result[current_mid] = ewma
|
||||
|
||||
self.logger.info(
|
||||
"SPI 日消费 EWMA 批量计算完成: site_id=%s, 会员数=%d, α=%.2f",
|
||||
site_id, len(result), alpha,
|
||||
)
|
||||
return result
|
||||
|
||||
def _calibrate_amount_bases(
|
||||
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
|
||||
) -> Dict[str, float]:
|
||||
"""从门店数据计算中位数作为金额压缩基数校准值。
|
||||
|
||||
优先级:cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
|
||||
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS。
|
||||
"""
|
||||
# 特征字段 → 对应的 amount_base 参数名
|
||||
base_extractors: Dict[str, callable] = {
|
||||
'amount_base_spend_30': lambda f: f.spend_30,
|
||||
'amount_base_spend_90': lambda f: f.spend_90,
|
||||
'amount_base_ticket_90': lambda f: f.avg_ticket_90,
|
||||
'amount_base_recharge_90': lambda f: f.recharge_90,
|
||||
'amount_base_speed_abs': lambda f: f.spend_30 / max(f.visit_days_30, 1),
|
||||
'amount_base_ewma_90': lambda f: f.daily_spend_ewma_90,
|
||||
}
|
||||
|
||||
calibrated = dict(params) # 以当前参数为基础,逐项覆盖
|
||||
|
||||
for base_key, extractor in base_extractors.items():
|
||||
# 配置表已有值 → 跳过自动校准
|
||||
if base_key in params and params[base_key] != self.DEFAULT_PARAMS.get(base_key):
|
||||
self.logger.info(
|
||||
"SPI 基数校准: %s 使用配置表值 %.2f", base_key, params[base_key],
|
||||
)
|
||||
continue
|
||||
|
||||
# 从特征数据计算中位数
|
||||
values = [extractor(f) for f in features.values()]
|
||||
median_val = self.calculate_median(values)
|
||||
|
||||
if median_val > 0:
|
||||
calibrated[base_key] = median_val
|
||||
self.logger.info(
|
||||
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
|
||||
)
|
||||
else:
|
||||
# 中位数 ≤ 0,回退到 DEFAULT_PARAMS
|
||||
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
|
||||
self.logger.warning(
|
||||
"SPI 基数校准: %s 中位数 %.2f ≤ 0,回退到默认值 %.2f",
|
||||
base_key, median_val, self.DEFAULT_PARAMS[base_key],
|
||||
)
|
||||
|
||||
return calibrated
|
||||
|
||||
# =========================================================================
|
||||
# 子分计算(纯函数,后续任务实现具体逻辑)
|
||||
# =========================================================================
|
||||
|
||||
@staticmethod
|
||||
def compute_level(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
|
||||
"""Level 子分:消费水平
|
||||
|
||||
L = w_s30 × ln(1 + spend_30/M30)
|
||||
+ w_s90 × ln(1 + spend_90/M90)
|
||||
+ w_ticket × ln(1 + avg_ticket_90/T0)
|
||||
+ w_r90 × ln(1 + recharge_90/R90)
|
||||
"""
|
||||
return (
|
||||
params['w_level_spend_30'] * math.log1p(features.spend_30 / params['amount_base_spend_30'])
|
||||
+ params['w_level_spend_90'] * math.log1p(features.spend_90 / params['amount_base_spend_90'])
|
||||
+ params['w_level_ticket_90'] * math.log1p(features.avg_ticket_90 / params['amount_base_ticket_90'])
|
||||
+ params['w_level_recharge_90'] * math.log1p(features.recharge_90 / params['amount_base_recharge_90'])
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def compute_speed(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
|
||||
"""Speed 子分:消费速度
|
||||
|
||||
V_abs = ln(1 + spend_30 / (max(visit_days_30, 1) × V0))
|
||||
V_rel = ln((v_30 + ε) / (v_90 + ε)), v_30=spend_30/30, v_90=spend_90/90
|
||||
V_ewma = ln(1 + daily_spend_ewma_90 / E0)
|
||||
S = w_abs × V_abs + w_rel × max(0, V_rel) + w_ewma × V_ewma
|
||||
|
||||
仅对加速(V_rel > 0)加分,不对减速扣分(Req 4.5)。
|
||||
"""
|
||||
eps = params.get('speed_epsilon', 1e-6)
|
||||
|
||||
# 绝对速度(Req 4.1)
|
||||
v_abs = math.log1p(
|
||||
features.spend_30
|
||||
/ (max(features.visit_days_30, 1) * params['amount_base_speed_abs'])
|
||||
)
|
||||
|
||||
# 相对速度(Req 4.2)——仅加速加分
|
||||
v_30 = features.spend_30 / 30.0
|
||||
v_90 = features.spend_90 / 90.0
|
||||
v_rel = math.log((v_30 + eps) / (v_90 + eps))
|
||||
|
||||
# EWMA 速度(Req 4.3)
|
||||
v_ewma = math.log1p(
|
||||
features.daily_spend_ewma_90 / params['amount_base_ewma_90']
|
||||
)
|
||||
|
||||
# 加权合成(Req 4.4, 4.5)
|
||||
return (
|
||||
params['w_speed_abs'] * v_abs
|
||||
+ params['w_speed_rel'] * max(0.0, v_rel)
|
||||
+ params['w_speed_ewma'] * v_ewma
|
||||
)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def compute_stability(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
|
||||
"""Stability 子分:消费稳定性
|
||||
|
||||
P = active_weeks_90 / 13(周覆盖率)
|
||||
use_stability=0 时返回 0.0(Req 5.3)
|
||||
取值范围 [0, 1](Req 5.4)
|
||||
"""
|
||||
# 配置关闭稳定性时直接返回 0(Req 5.3)
|
||||
if params.get('use_stability', 1) == 0:
|
||||
return 0.0
|
||||
# 周覆盖率(Req 5.1, 5.2)
|
||||
return features.active_weeks_90 / 13.0
|
||||
|
||||
@staticmethod
|
||||
def compute_spi_raw(
|
||||
level: float, speed: float, stability: float, params: Dict[str, float]
|
||||
) -> float:
|
||||
"""SPI 总分合成
|
||||
|
||||
SPI_raw = w_L × L + w_S × S + w_P × P
|
||||
默认权重 w_L=0.60, w_S=0.30, w_P=0.10(Req 6.1)
|
||||
"""
|
||||
w_l = params.get('weight_level', 0.60)
|
||||
w_s = params.get('weight_speed', 0.30)
|
||||
w_p = params.get('weight_stability', 0.10)
|
||||
return w_l * level + w_s * speed + w_p * stability
|
||||
|
||||
# =========================================================================
|
||||
# 持久化(后续任务实现)
|
||||
# =========================================================================
|
||||
|
||||
def _save_spi_data(
|
||||
self, data_list: List[SPIMemberFeatures], site_id: int
|
||||
) -> int:
|
||||
"""delete-before-insert 写入 dws_member_spending_power_index"""
|
||||
with self.db.conn.cursor() as cur:
|
||||
# 先删除该门店旧记录(Req 9.3)
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
if not data_list:
|
||||
self.db.conn.commit()
|
||||
return 0
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_member_spending_power_index (
|
||||
site_id, member_id,
|
||||
spend_30, spend_90, recharge_90,
|
||||
orders_30, orders_90,
|
||||
visit_days_30, visit_days_90,
|
||||
avg_ticket_90, active_weeks_90, daily_spend_ewma_90,
|
||||
score_level_raw, score_speed_raw, score_stability_raw,
|
||||
score_level_display, score_speed_display, score_stability_display,
|
||||
raw_score, display_score,
|
||||
calc_time, created_at, updated_at
|
||||
) VALUES (
|
||||
%s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
%s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
inserted = 0
|
||||
for f in data_list:
|
||||
cur.execute(insert_sql, (
|
||||
f.site_id, f.member_id,
|
||||
f.spend_30, f.spend_90, f.recharge_90,
|
||||
f.orders_30, f.orders_90,
|
||||
f.visit_days_30, f.visit_days_90,
|
||||
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
|
||||
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
|
||||
f.score_level_display, f.score_speed_display, f.score_stability_display,
|
||||
f.raw_score, f.display_score,
|
||||
))
|
||||
inserted += max(cur.rowcount, 0)
|
||||
|
||||
self.db.conn.commit()
|
||||
self.logger.info(
|
||||
"SPI 数据写入完成: site_id=%s, 插入记录=%d", site_id, inserted,
|
||||
)
|
||||
return inserted
|
||||
Reference in New Issue
Block a user