Files
ZQYY.FQ-ETL/tasks/dws/index/winback_index_task.py

403 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
老客挽回指数WBI计算任务。"""
from __future__ import annotations
import math
from dataclasses import dataclass
from datetime import date, timedelta
from typing import Any, Dict, List, Optional, Tuple
from .member_index_base import MemberActivityData, MemberIndexBaseTask
from ..base_dws_task import TaskContext
@dataclass
class MemberWinbackData:
activity: MemberActivityData
status: str
segment: str
overdue_old: float = 0.0
overdue_cdf_p: float = 0.0
drop_old: float = 0.0
recharge_old: float = 0.0
value_old: float = 0.0
ideal_interval_days: Optional[float] = None
ideal_next_visit_date: Optional[date] = None
raw_score: Optional[float] = None
display_score: Optional[float] = None
class WinbackIndexTask(MemberIndexBaseTask):
"""老客挽回指数WBI计算任务。"""
INDEX_TYPE = "WBI"
DEFAULT_PARAMS = {
# 通用参数
'lookback_days_recency': 60,
'visit_lookback_days': 180,
'percentile_lower': 5,
'percentile_upper': 95,
'compression_mode': 0,
'use_smoothing': 1,
'ewma_alpha': 0.2,
# 分流参数
'new_visit_threshold': 2,
'new_days_threshold': 30,
'recharge_recent_days': 14,
'new_recharge_max_visits': 10,
'recency_hard_floor_days': 14,
'recency_gate_days': 14,
'recency_gate_slope_days': 3,
# WBI参数
'overdue_alpha': 2.0,
'overdue_weight_halflife_days': 30,
'overdue_weight_blend_min_samples': 8,
'h_recharge': 7,
'amount_base_M0': 300,
'balance_base_B0': 500,
'value_w_spend': 1.0,
'value_w_bal': 1.0,
'w_over': 2.0,
'w_drop': 1.0,
'w_re': 0.4,
'w_value': 1.2,
# STOP高余额例外默认关闭
'enable_stop_high_balance_exception': 0,
'high_balance_threshold': 1000,
}
def get_task_code(self) -> str:
return "DWS_WINBACK_INDEX"
def get_target_table(self) -> str:
return "dws_member_winback_index"
def get_primary_keys(self) -> List[str]:
return ['site_id', 'member_id']
def get_index_type(self) -> str:
return self.INDEX_TYPE
def execute(self, context: Optional[TaskContext]) -> Dict[str, Any]:
"""执行 WBI 计算"""
self.logger.info("开始计算老客挽回指数 (WBI)")
site_id = self._get_site_id(context)
tenant_id = self._get_tenant_id()
params = self._load_params()
activity_map = self._build_member_activity(site_id, tenant_id, params)
if not activity_map:
self.logger.warning("No member activity data available; skip calculation")
return {'status': 'skipped', 'reason': 'no_data'}
winback_list: List[MemberWinbackData] = []
for activity in activity_map.values():
segment, status, in_scope = self.classify_segment(activity, params)
if not in_scope:
continue
if segment != "OLD" and status != "STOP_HIGH_BALANCE":
continue
data = MemberWinbackData(activity=activity, status=status, segment=segment)
if segment == "OLD":
self._calculate_wbi_scores(data, params)
winback_list.append(data)
if not winback_list:
self.logger.warning("No old-member rows to calculate")
return {'status': 'skipped', 'reason': 'no_old_members'}
# 归一化 Display Score
raw_scores = [
(d.activity.member_id, d.raw_score)
for d in winback_list
if d.raw_score is not None
]
if raw_scores:
compression = self._map_compression(params)
use_smoothing = int(params.get('use_smoothing', 1)) == 1
normalized = self.batch_normalize_to_display(
raw_scores,
compression=compression,
percentile_lower=int(params['percentile_lower']),
percentile_upper=int(params['percentile_upper']),
use_smoothing=use_smoothing,
site_id=site_id
)
score_map = {member_id: display for member_id, _, display in normalized}
for data in winback_list:
if data.activity.member_id in score_map:
data.display_score = score_map[data.activity.member_id]
# 保存分位点历史
all_raw = [float(score) for _, score in raw_scores]
q_l, q_u = self.calculate_percentiles(
all_raw,
int(params['percentile_lower']),
int(params['percentile_upper'])
)
if use_smoothing:
smoothed_l, smoothed_u = self._apply_ewma_smoothing(site_id, q_l, q_u)
else:
smoothed_l, smoothed_u = q_l, q_u
self.save_percentile_history(
site_id=site_id,
percentile_5=q_l,
percentile_95=q_u,
percentile_5_smoothed=smoothed_l,
percentile_95_smoothed=smoothed_u,
record_count=len(all_raw),
min_raw=min(all_raw),
max_raw=max(all_raw),
avg_raw=sum(all_raw) / len(all_raw)
)
inserted = self._save_winback_data(winback_list)
self.logger.info("WBI calculation finished, inserted %d rows", inserted)
return {
'status': 'success',
'member_count': len(winback_list),
'records_inserted': inserted
}
def _weighted_cdf(
self,
samples: List[Tuple[float, int]],
t_v: float,
halflife_days: float,
blend_min_samples: int,
) -> float:
if not samples:
return 0.5
if halflife_days <= 0:
p_equal = sum(1.0 for interval, _ in samples if interval <= t_v) / len(samples)
return self._clip(p_equal, 0.0, 1.0)
ln2 = math.log(2.0)
weighted_hit = 0.0
weight_sum = 0.0
equal_hit = 0.0
for interval, age_days in samples:
weight = math.exp(-ln2 * float(age_days) / halflife_days)
indicator = 1.0 if interval <= t_v else 0.0
weighted_hit += weight * indicator
weight_sum += weight
equal_hit += indicator
p_weighted = 0.5 if weight_sum <= 0 else (weighted_hit / weight_sum)
p_equal = equal_hit / len(samples)
lam = min(1.0, float(len(samples)) / float(max(1, blend_min_samples)))
p_final = lam * p_weighted + (1.0 - lam) * p_equal
return self._clip(p_final, 0.0, 1.0)
def _weighted_quantile(
self,
samples: List[Tuple[float, int]],
quantile: float,
halflife_days: float,
blend_min_samples: int,
) -> Optional[float]:
if not samples:
return None
q = self._clip(quantile, 0.0, 1.0)
equal_weight = 1.0 / float(len(samples))
if halflife_days <= 0:
weighted = [(interval, equal_weight) for interval, _ in samples]
else:
ln2 = math.log(2.0)
raw_weighted: List[Tuple[float, float]] = []
total = 0.0
for interval, age_days in samples:
w = math.exp(-ln2 * float(age_days) / halflife_days)
raw_weighted.append((interval, w))
total += w
if total <= 0:
weighted = [(interval, equal_weight) for interval, _ in samples]
else:
weighted = [(interval, w / total) for interval, w in raw_weighted]
# 对小样本混合加权分布与等权分布。
lam = min(1.0, float(len(samples)) / float(max(1, blend_min_samples)))
blended: List[Tuple[float, float]] = []
for (interval_w, w), (interval_e, _) in zip(weighted, samples):
_ = interval_e # keep tuple alignment explicit
blended_weight = lam * w + (1.0 - lam) * equal_weight
blended.append((interval_w, blended_weight))
blended.sort(key=lambda item: item[0])
cumulative = 0.0
for interval, weight in blended:
cumulative += weight
if cumulative >= q:
return float(interval)
return float(blended[-1][0])
def _calculate_wbi_scores(self, data: MemberWinbackData, params: Dict[str, float]) -> None:
"""计算 WBI 分项与 Raw Score"""
activity = data.activity
# 1) 超期紧急性基于近期加权经验CDF
overdue_alpha = float(params['overdue_alpha'])
half_life_days = float(params.get('overdue_weight_halflife_days', 30))
blend_min_samples = int(params.get('overdue_weight_blend_min_samples', 8))
if activity.interval_count <= 0:
p = 0.5
ideal_interval = None
else:
if len(activity.interval_ages_days) == activity.interval_count:
samples = list(zip(activity.intervals, activity.interval_ages_days))
else:
samples = [(interval, 0) for interval in activity.intervals]
p = self._weighted_cdf(
samples=samples,
t_v=activity.t_v,
halflife_days=half_life_days,
blend_min_samples=blend_min_samples,
)
ideal_interval = self._weighted_quantile(
samples=samples,
quantile=0.5,
halflife_days=half_life_days,
blend_min_samples=blend_min_samples,
)
data.overdue_cdf_p = p
data.overdue_old = math.pow(p, overdue_alpha)
data.ideal_interval_days = ideal_interval
if ideal_interval is not None and activity.last_visit_time is not None:
ideal_days = max(0, int(round(ideal_interval)))
data.ideal_next_visit_date = activity.last_visit_time.date() + timedelta(days=ideal_days)
else:
data.ideal_next_visit_date = None
# 2) 降频分
expected14 = activity.visits_60d * 14.0 / 60.0
data.drop_old = self._clip((expected14 - activity.visits_14d) / (expected14 + 1), 0.0, 1.0)
# 3) 充值未回访压力
if activity.recharge_unconsumed == 1:
data.recharge_old = self.decay(activity.t_r, params['h_recharge'])
else:
data.recharge_old = 0.0
# 4) 价值分
m0 = float(params['amount_base_M0'])
b0 = float(params['balance_base_B0'])
spend_score = math.log1p(activity.spend_180d / m0) if m0 > 0 else 0.0
bal_score = math.log1p(activity.sv_balance / b0) if b0 > 0 else 0.0
data.value_old = float(params['value_w_spend']) * spend_score + float(params['value_w_bal']) * bal_score
data.raw_score = (
float(params['w_over']) * data.overdue_old
+ float(params['w_drop']) * data.drop_old
+ float(params['w_re']) * data.recharge_old
+ float(params['w_value']) * data.value_old
)
hard_floor_days = float(params.get('recency_hard_floor_days', 0))
gate_days = float(params.get('recency_gate_days', 14))
slope_days = float(params.get('recency_gate_slope_days', 3))
if hard_floor_days > 0 and activity.t_v < hard_floor_days:
suppression = 0.0
elif slope_days <= 0:
suppression = 1.0 if activity.t_v >= gate_days else 0.0
else:
x = (activity.t_v - gate_days) / slope_days
x = self._clip(x, -60.0, 60.0)
suppression = 1.0 / (1.0 + math.exp(-x))
data.raw_score *= suppression
# 限制在 0 以上
if data.raw_score < 0:
data.raw_score = 0.0
def _save_winback_data(self, data_list: List[MemberWinbackData]) -> int:
"""保存 WBI 数据"""
if not data_list:
return 0
site_id = data_list[0].activity.site_id
# 按门店全量刷新,避免因分群变化导致过期数据残留。
delete_sql = """
DELETE FROM billiards_dws.dws_member_winback_index
WHERE site_id = %s
"""
with self.db.conn.cursor() as cur:
cur.execute(delete_sql, (site_id,))
insert_sql = """
INSERT INTO billiards_dws.dws_member_winback_index (
site_id, tenant_id, member_id,
status, segment,
member_create_time, first_visit_time, last_visit_time, last_recharge_time,
t_v, t_r, t_a,
visits_14d, visits_60d, visits_total,
spend_30d, spend_180d, sv_balance, recharge_60d_amt,
interval_count,
overdue_old, overdue_cdf_p, drop_old, recharge_old, value_old,
ideal_interval_days, ideal_next_visit_date,
raw_score, display_score,
last_wechat_touch_time,
calc_time, created_at, updated_at
) VALUES (
%s, %s, %s,
%s, %s,
%s, %s, %s, %s,
%s, %s, %s,
%s, %s, %s,
%s, %s, %s, %s,
%s,
%s, %s, %s, %s, %s,
%s, %s,
%s, %s,
%s,
NOW(), NOW(), NOW()
)
"""
inserted = 0
with self.db.conn.cursor() as cur:
for data in data_list:
activity = data.activity
cur.execute(insert_sql, (
activity.site_id, activity.tenant_id, activity.member_id,
data.status, data.segment,
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
activity.t_v, activity.t_r, activity.t_a,
activity.visits_14d, activity.visits_60d, activity.visits_total,
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
activity.interval_count,
data.overdue_old, data.overdue_cdf_p, data.drop_old, data.recharge_old, data.value_old,
data.ideal_interval_days, data.ideal_next_visit_date,
data.raw_score, data.display_score,
None,
))
inserted += cur.rowcount
self.db.conn.commit()
return inserted
def _clip(self, value: float, low: float, high: float) -> float:
return max(low, min(high, value))
def _map_compression(self, params: Dict[str, float]) -> str:
mode = int(params.get('compression_mode', 0))
if mode == 1:
return "log1p"
if mode == 2:
return "asinh"
return "none"
__all__ = ['WinbackIndexTask']