# -*- coding: utf-8 -*- """ 老客挽回指数(WBI)计算任务。""" from __future__ import annotations import math from dataclasses import dataclass from datetime import date, timedelta from typing import Any, Dict, List, Optional, Tuple from .member_index_base import MemberActivityData, MemberIndexBaseTask from ..base_dws_task import TaskContext @dataclass class MemberWinbackData: activity: MemberActivityData status: str segment: str overdue_old: float = 0.0 overdue_cdf_p: float = 0.0 drop_old: float = 0.0 recharge_old: float = 0.0 value_old: float = 0.0 ideal_interval_days: Optional[float] = None ideal_next_visit_date: Optional[date] = None raw_score: Optional[float] = None display_score: Optional[float] = None class WinbackIndexTask(MemberIndexBaseTask): """老客挽回指数(WBI)计算任务。""" INDEX_TYPE = "WBI" DEFAULT_PARAMS = { # 通用参数 'lookback_days_recency': 60, 'visit_lookback_days': 180, 'percentile_lower': 5, 'percentile_upper': 95, 'compression_mode': 0, 'use_smoothing': 1, 'ewma_alpha': 0.2, # 分流参数 'new_visit_threshold': 2, 'new_days_threshold': 30, 'recharge_recent_days': 14, 'new_recharge_max_visits': 10, 'recency_hard_floor_days': 14, 'recency_gate_days': 14, 'recency_gate_slope_days': 3, # WBI参数 'overdue_alpha': 2.0, 'overdue_weight_halflife_days': 30, 'overdue_weight_blend_min_samples': 8, 'h_recharge': 7, 'amount_base_M0': 300, 'balance_base_B0': 500, 'value_w_spend': 1.0, 'value_w_bal': 1.0, 'w_over': 2.0, 'w_drop': 1.0, 'w_re': 0.4, 'w_value': 1.2, # STOP高余额例外(默认关闭) 'enable_stop_high_balance_exception': 0, 'high_balance_threshold': 1000, } def get_task_code(self) -> str: return "DWS_WINBACK_INDEX" def get_target_table(self) -> str: return "dws_member_winback_index" def get_primary_keys(self) -> List[str]: return ['site_id', 'member_id'] def get_index_type(self) -> str: return self.INDEX_TYPE def execute(self, context: Optional[TaskContext]) -> Dict[str, Any]: """执行 WBI 计算""" self.logger.info("开始计算老客挽回指数 (WBI)") site_id = self._get_site_id(context) tenant_id = self._get_tenant_id() params = self._load_params() activity_map = self._build_member_activity(site_id, tenant_id, params) if not activity_map: self.logger.warning("No member activity data available; skip calculation") return {'status': 'skipped', 'reason': 'no_data'} winback_list: List[MemberWinbackData] = [] for activity in activity_map.values(): segment, status, in_scope = self.classify_segment(activity, params) if not in_scope: continue if segment != "OLD" and status != "STOP_HIGH_BALANCE": continue data = MemberWinbackData(activity=activity, status=status, segment=segment) if segment == "OLD": self._calculate_wbi_scores(data, params) winback_list.append(data) if not winback_list: self.logger.warning("No old-member rows to calculate") return {'status': 'skipped', 'reason': 'no_old_members'} # 归一化 Display Score raw_scores = [ (d.activity.member_id, d.raw_score) for d in winback_list if d.raw_score is not None ] if raw_scores: compression = self._map_compression(params) use_smoothing = int(params.get('use_smoothing', 1)) == 1 normalized = self.batch_normalize_to_display( raw_scores, compression=compression, percentile_lower=int(params['percentile_lower']), percentile_upper=int(params['percentile_upper']), use_smoothing=use_smoothing, site_id=site_id ) score_map = {member_id: display for member_id, _, display in normalized} for data in winback_list: if data.activity.member_id in score_map: data.display_score = score_map[data.activity.member_id] # 保存分位点历史 all_raw = [float(score) for _, score in raw_scores] q_l, q_u = self.calculate_percentiles( all_raw, int(params['percentile_lower']), int(params['percentile_upper']) ) if use_smoothing: smoothed_l, smoothed_u = self._apply_ewma_smoothing(site_id, q_l, q_u) else: smoothed_l, smoothed_u = q_l, q_u self.save_percentile_history( site_id=site_id, percentile_5=q_l, percentile_95=q_u, percentile_5_smoothed=smoothed_l, percentile_95_smoothed=smoothed_u, record_count=len(all_raw), min_raw=min(all_raw), max_raw=max(all_raw), avg_raw=sum(all_raw) / len(all_raw) ) inserted = self._save_winback_data(winback_list) self.logger.info("WBI calculation finished, inserted %d rows", inserted) return { 'status': 'success', 'member_count': len(winback_list), 'records_inserted': inserted } def _weighted_cdf( self, samples: List[Tuple[float, int]], t_v: float, halflife_days: float, blend_min_samples: int, ) -> float: if not samples: return 0.5 if halflife_days <= 0: p_equal = sum(1.0 for interval, _ in samples if interval <= t_v) / len(samples) return self._clip(p_equal, 0.0, 1.0) ln2 = math.log(2.0) weighted_hit = 0.0 weight_sum = 0.0 equal_hit = 0.0 for interval, age_days in samples: weight = math.exp(-ln2 * float(age_days) / halflife_days) indicator = 1.0 if interval <= t_v else 0.0 weighted_hit += weight * indicator weight_sum += weight equal_hit += indicator p_weighted = 0.5 if weight_sum <= 0 else (weighted_hit / weight_sum) p_equal = equal_hit / len(samples) lam = min(1.0, float(len(samples)) / float(max(1, blend_min_samples))) p_final = lam * p_weighted + (1.0 - lam) * p_equal return self._clip(p_final, 0.0, 1.0) def _weighted_quantile( self, samples: List[Tuple[float, int]], quantile: float, halflife_days: float, blend_min_samples: int, ) -> Optional[float]: if not samples: return None q = self._clip(quantile, 0.0, 1.0) equal_weight = 1.0 / float(len(samples)) if halflife_days <= 0: weighted = [(interval, equal_weight) for interval, _ in samples] else: ln2 = math.log(2.0) raw_weighted: List[Tuple[float, float]] = [] total = 0.0 for interval, age_days in samples: w = math.exp(-ln2 * float(age_days) / halflife_days) raw_weighted.append((interval, w)) total += w if total <= 0: weighted = [(interval, equal_weight) for interval, _ in samples] else: weighted = [(interval, w / total) for interval, w in raw_weighted] # 对小样本混合加权分布与等权分布。 lam = min(1.0, float(len(samples)) / float(max(1, blend_min_samples))) blended: List[Tuple[float, float]] = [] for (interval_w, w), (interval_e, _) in zip(weighted, samples): _ = interval_e # keep tuple alignment explicit blended_weight = lam * w + (1.0 - lam) * equal_weight blended.append((interval_w, blended_weight)) blended.sort(key=lambda item: item[0]) cumulative = 0.0 for interval, weight in blended: cumulative += weight if cumulative >= q: return float(interval) return float(blended[-1][0]) def _calculate_wbi_scores(self, data: MemberWinbackData, params: Dict[str, float]) -> None: """计算 WBI 分项与 Raw Score""" activity = data.activity # 1) 超期紧急性(基于近期加权经验CDF) overdue_alpha = float(params['overdue_alpha']) half_life_days = float(params.get('overdue_weight_halflife_days', 30)) blend_min_samples = int(params.get('overdue_weight_blend_min_samples', 8)) if activity.interval_count <= 0: p = 0.5 ideal_interval = None else: if len(activity.interval_ages_days) == activity.interval_count: samples = list(zip(activity.intervals, activity.interval_ages_days)) else: samples = [(interval, 0) for interval in activity.intervals] p = self._weighted_cdf( samples=samples, t_v=activity.t_v, halflife_days=half_life_days, blend_min_samples=blend_min_samples, ) ideal_interval = self._weighted_quantile( samples=samples, quantile=0.5, halflife_days=half_life_days, blend_min_samples=blend_min_samples, ) data.overdue_cdf_p = p data.overdue_old = math.pow(p, overdue_alpha) data.ideal_interval_days = ideal_interval if ideal_interval is not None and activity.last_visit_time is not None: ideal_days = max(0, int(round(ideal_interval))) data.ideal_next_visit_date = activity.last_visit_time.date() + timedelta(days=ideal_days) else: data.ideal_next_visit_date = None # 2) 降频分 expected14 = activity.visits_60d * 14.0 / 60.0 data.drop_old = self._clip((expected14 - activity.visits_14d) / (expected14 + 1), 0.0, 1.0) # 3) 充值未回访压力 if activity.recharge_unconsumed == 1: data.recharge_old = self.decay(activity.t_r, params['h_recharge']) else: data.recharge_old = 0.0 # 4) 价值分 m0 = float(params['amount_base_M0']) b0 = float(params['balance_base_B0']) spend_score = math.log1p(activity.spend_180d / m0) if m0 > 0 else 0.0 bal_score = math.log1p(activity.sv_balance / b0) if b0 > 0 else 0.0 data.value_old = float(params['value_w_spend']) * spend_score + float(params['value_w_bal']) * bal_score data.raw_score = ( float(params['w_over']) * data.overdue_old + float(params['w_drop']) * data.drop_old + float(params['w_re']) * data.recharge_old + float(params['w_value']) * data.value_old ) hard_floor_days = float(params.get('recency_hard_floor_days', 0)) gate_days = float(params.get('recency_gate_days', 14)) slope_days = float(params.get('recency_gate_slope_days', 3)) if hard_floor_days > 0 and activity.t_v < hard_floor_days: suppression = 0.0 elif slope_days <= 0: suppression = 1.0 if activity.t_v >= gate_days else 0.0 else: x = (activity.t_v - gate_days) / slope_days x = self._clip(x, -60.0, 60.0) suppression = 1.0 / (1.0 + math.exp(-x)) data.raw_score *= suppression # 限制在 0 以上 if data.raw_score < 0: data.raw_score = 0.0 def _save_winback_data(self, data_list: List[MemberWinbackData]) -> int: """保存 WBI 数据""" if not data_list: return 0 site_id = data_list[0].activity.site_id # 按门店全量刷新,避免因分群变化导致过期数据残留。 delete_sql = """ DELETE FROM billiards_dws.dws_member_winback_index WHERE site_id = %s """ with self.db.conn.cursor() as cur: cur.execute(delete_sql, (site_id,)) insert_sql = """ INSERT INTO billiards_dws.dws_member_winback_index ( site_id, tenant_id, member_id, status, segment, member_create_time, first_visit_time, last_visit_time, last_recharge_time, t_v, t_r, t_a, visits_14d, visits_60d, visits_total, spend_30d, spend_180d, sv_balance, recharge_60d_amt, interval_count, overdue_old, overdue_cdf_p, drop_old, recharge_old, value_old, ideal_interval_days, ideal_next_visit_date, raw_score, display_score, last_wechat_touch_time, calc_time, created_at, updated_at ) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW(), NOW() ) """ inserted = 0 with self.db.conn.cursor() as cur: for data in data_list: activity = data.activity cur.execute(insert_sql, ( activity.site_id, activity.tenant_id, activity.member_id, data.status, data.segment, activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time, activity.t_v, activity.t_r, activity.t_a, activity.visits_14d, activity.visits_60d, activity.visits_total, activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt, activity.interval_count, data.overdue_old, data.overdue_cdf_p, data.drop_old, data.recharge_old, data.value_old, data.ideal_interval_days, data.ideal_next_visit_date, data.raw_score, data.display_score, None, )) inserted += cur.rowcount self.db.conn.commit() return inserted def _clip(self, value: float, low: float, high: float) -> float: return max(low, min(high, value)) def _map_compression(self, params: Dict[str, float]) -> str: mode = int(params.get('compression_mode', 0)) if mode == 1: return "log1p" if mode == 2: return "asinh" return "none" __all__ = ['WinbackIndexTask']