初始提交:飞球 ETL 系统全量代码
This commit is contained in:
381
tasks/dws/index/newconv_index_task.py
Normal file
381
tasks/dws/index/newconv_index_task.py
Normal file
@@ -0,0 +1,381 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
新客转化指数(NCI)计算任务。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .member_index_base import MemberActivityData, MemberIndexBaseTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemberNewconvData:
|
||||
activity: MemberActivityData
|
||||
status: str
|
||||
segment: str
|
||||
|
||||
need_new: float = 0.0
|
||||
salvage_new: float = 0.0
|
||||
recharge_new: float = 0.0
|
||||
value_new: float = 0.0
|
||||
welcome_new: float = 0.0
|
||||
|
||||
raw_score_welcome: Optional[float] = None
|
||||
raw_score_convert: Optional[float] = None
|
||||
raw_score: Optional[float] = None
|
||||
display_score_welcome: Optional[float] = None
|
||||
display_score_convert: Optional[float] = None
|
||||
display_score: Optional[float] = None
|
||||
|
||||
|
||||
class NewconvIndexTask(MemberIndexBaseTask):
|
||||
"""新客转化指数(NCI)计算任务。"""
|
||||
|
||||
INDEX_TYPE = "NCI"
|
||||
|
||||
DEFAULT_PARAMS = {
|
||||
# 通用参数
|
||||
'lookback_days_recency': 60,
|
||||
'visit_lookback_days': 180,
|
||||
'percentile_lower': 5,
|
||||
'percentile_upper': 95,
|
||||
'compression_mode': 0,
|
||||
'use_smoothing': 1,
|
||||
'ewma_alpha': 0.2,
|
||||
# 分流参数
|
||||
'new_visit_threshold': 2,
|
||||
'new_days_threshold': 30,
|
||||
'recharge_recent_days': 14,
|
||||
'new_recharge_max_visits': 10,
|
||||
# NCI参数
|
||||
'no_touch_days_new': 3,
|
||||
't2_target_days': 7,
|
||||
'salvage_start': 30,
|
||||
'salvage_end': 60,
|
||||
'welcome_window_days': 3,
|
||||
'active_new_visit_threshold_14d': 2,
|
||||
'active_new_recency_days': 7,
|
||||
'active_new_penalty': 0.2,
|
||||
'h_recharge': 7,
|
||||
'amount_base_M0': 300,
|
||||
'balance_base_B0': 500,
|
||||
'value_w_spend': 1.0,
|
||||
'value_w_bal': 0.8,
|
||||
'w_welcome': 1.0,
|
||||
'w_need': 1.6,
|
||||
'w_re': 0.8,
|
||||
'w_value': 1.0,
|
||||
# STOP高余额例外(默认关闭)
|
||||
'enable_stop_high_balance_exception': 0,
|
||||
'high_balance_threshold': 1000,
|
||||
}
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_NEWCONV_INDEX"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_newconv_index"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ['site_id', 'member_id']
|
||||
|
||||
def get_index_type(self) -> str:
|
||||
return self.INDEX_TYPE
|
||||
|
||||
def execute(self, context: Optional[TaskContext]) -> Dict[str, Any]:
|
||||
"""执行 NCI 计算"""
|
||||
self.logger.info("开始计算新客转化指数(NCI)")
|
||||
|
||||
site_id = self._get_site_id(context)
|
||||
tenant_id = self._get_tenant_id()
|
||||
params = self._load_params()
|
||||
|
||||
activity_map = self._build_member_activity(site_id, tenant_id, params)
|
||||
if not activity_map:
|
||||
self.logger.warning("No member activity data available; skip calculation")
|
||||
return {'status': 'skipped', 'reason': 'no_data'}
|
||||
|
||||
newconv_list: List[MemberNewconvData] = []
|
||||
for activity in activity_map.values():
|
||||
segment, status, in_scope = self.classify_segment(activity, params)
|
||||
if not in_scope:
|
||||
continue
|
||||
|
||||
if segment != "NEW":
|
||||
continue
|
||||
|
||||
data = MemberNewconvData(activity=activity, status=status, segment=segment)
|
||||
self._calculate_nci_scores(data, params)
|
||||
newconv_list.append(data)
|
||||
|
||||
if not newconv_list:
|
||||
self.logger.warning("No new-member rows to calculate")
|
||||
return {'status': 'skipped', 'reason': 'no_new_members'}
|
||||
|
||||
# 归一化 Display Score
|
||||
raw_scores = [
|
||||
(d.activity.member_id, d.raw_score)
|
||||
for d in newconv_list
|
||||
if d.raw_score is not None
|
||||
]
|
||||
if raw_scores:
|
||||
use_smoothing = int(params.get('use_smoothing', 1)) == 1
|
||||
total_score_map = self._normalize_score_pairs(
|
||||
raw_scores,
|
||||
params=params,
|
||||
site_id=site_id,
|
||||
use_smoothing=use_smoothing,
|
||||
)
|
||||
for data in newconv_list:
|
||||
if data.activity.member_id in total_score_map:
|
||||
data.display_score = total_score_map[data.activity.member_id]
|
||||
|
||||
raw_scores_welcome = [
|
||||
(d.activity.member_id, d.raw_score_welcome)
|
||||
for d in newconv_list
|
||||
if d.raw_score_welcome is not None
|
||||
]
|
||||
welcome_score_map = self._normalize_score_pairs(
|
||||
raw_scores_welcome,
|
||||
params=params,
|
||||
site_id=site_id,
|
||||
use_smoothing=False,
|
||||
)
|
||||
for data in newconv_list:
|
||||
if data.activity.member_id in welcome_score_map:
|
||||
data.display_score_welcome = welcome_score_map[data.activity.member_id]
|
||||
|
||||
raw_scores_convert = [
|
||||
(d.activity.member_id, d.raw_score_convert)
|
||||
for d in newconv_list
|
||||
if d.raw_score_convert is not None
|
||||
]
|
||||
convert_score_map = self._normalize_score_pairs(
|
||||
raw_scores_convert,
|
||||
params=params,
|
||||
site_id=site_id,
|
||||
use_smoothing=False,
|
||||
)
|
||||
for data in newconv_list:
|
||||
if data.activity.member_id in convert_score_map:
|
||||
data.display_score_convert = convert_score_map[data.activity.member_id]
|
||||
|
||||
# 保存分位点历史
|
||||
all_raw = [float(score) for _, score in raw_scores]
|
||||
q_l, q_u = self.calculate_percentiles(
|
||||
all_raw,
|
||||
int(params['percentile_lower']),
|
||||
int(params['percentile_upper'])
|
||||
)
|
||||
if use_smoothing:
|
||||
smoothed_l, smoothed_u = self._apply_ewma_smoothing(site_id, q_l, q_u)
|
||||
else:
|
||||
smoothed_l, smoothed_u = q_l, q_u
|
||||
self.save_percentile_history(
|
||||
site_id=site_id,
|
||||
percentile_5=q_l,
|
||||
percentile_95=q_u,
|
||||
percentile_5_smoothed=smoothed_l,
|
||||
percentile_95_smoothed=smoothed_u,
|
||||
record_count=len(all_raw),
|
||||
min_raw=min(all_raw),
|
||||
max_raw=max(all_raw),
|
||||
avg_raw=sum(all_raw) / len(all_raw)
|
||||
)
|
||||
|
||||
inserted = self._save_newconv_data(newconv_list)
|
||||
self.logger.info("NCI calculation finished, inserted %d rows", inserted)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'member_count': len(newconv_list),
|
||||
'records_inserted': inserted
|
||||
}
|
||||
|
||||
def _calculate_nci_scores(self, data: MemberNewconvData, params: Dict[str, float]) -> None:
|
||||
"""计算 NCI 分项与 Raw Score"""
|
||||
activity = data.activity
|
||||
|
||||
# 1) 紧迫度
|
||||
no_touch_days = float(params['no_touch_days_new'])
|
||||
t2_target_days = float(params['t2_target_days'])
|
||||
t2_max_days = t2_target_days * 2.0
|
||||
if t2_max_days <= no_touch_days:
|
||||
data.need_new = 0.0
|
||||
else:
|
||||
data.need_new = self._clip(
|
||||
(activity.t_v - no_touch_days) / (t2_max_days - no_touch_days),
|
||||
0.0, 1.0
|
||||
)
|
||||
|
||||
# 2) Salvage(30-60天线性衰减)
|
||||
salvage_start = float(params['salvage_start'])
|
||||
salvage_end = float(params['salvage_end'])
|
||||
if salvage_end <= salvage_start:
|
||||
data.salvage_new = 0.0
|
||||
elif activity.t_a <= salvage_start:
|
||||
data.salvage_new = 1.0
|
||||
elif activity.t_a >= salvage_end:
|
||||
data.salvage_new = 0.0
|
||||
else:
|
||||
data.salvage_new = (salvage_end - activity.t_a) / (salvage_end - salvage_start)
|
||||
|
||||
# 3) 充值未回访压力
|
||||
if activity.recharge_unconsumed == 1:
|
||||
data.recharge_new = self.decay(activity.t_r, params['h_recharge'])
|
||||
else:
|
||||
data.recharge_new = 0.0
|
||||
|
||||
# 4) 价值分
|
||||
m0 = float(params['amount_base_M0'])
|
||||
b0 = float(params['balance_base_B0'])
|
||||
spend_score = math.log1p(activity.spend_180d / m0) if m0 > 0 else 0.0
|
||||
bal_score = math.log1p(activity.sv_balance / b0) if b0 > 0 else 0.0
|
||||
data.value_new = float(params['value_w_spend']) * spend_score + float(params['value_w_bal']) * bal_score
|
||||
|
||||
# 5) 欢迎建联分:优先首访后立即触达
|
||||
welcome_window_days = float(params.get('welcome_window_days', 3))
|
||||
data.welcome_new = 0.0
|
||||
if welcome_window_days > 0 and activity.visits_total <= 1 and activity.t_v <= welcome_window_days:
|
||||
data.welcome_new = self._clip(1.0 - (activity.t_v / welcome_window_days), 0.0, 1.0)
|
||||
|
||||
# 6) 抑制高活跃新客在转化召回排名中的权重
|
||||
active_visit_threshold = int(params.get('active_new_visit_threshold_14d', 2))
|
||||
active_recency_days = float(params.get('active_new_recency_days', 7))
|
||||
active_penalty = float(params.get('active_new_penalty', 0.2))
|
||||
if activity.visits_14d >= active_visit_threshold and activity.t_v <= active_recency_days:
|
||||
active_multiplier = self._clip(active_penalty, 0.0, 1.0)
|
||||
else:
|
||||
active_multiplier = 1.0
|
||||
|
||||
# 7) 价值/充值分主要在进入免打扰窗口后生效
|
||||
if no_touch_days > 0:
|
||||
touch_multiplier = self._clip(activity.t_v / no_touch_days, 0.0, 1.0)
|
||||
else:
|
||||
touch_multiplier = 1.0
|
||||
|
||||
data.raw_score_welcome = float(params.get('w_welcome', 1.0)) * data.welcome_new
|
||||
data.raw_score_convert = active_multiplier * (
|
||||
float(params['w_need']) * (data.need_new * data.salvage_new)
|
||||
+ float(params['w_re']) * data.recharge_new * touch_multiplier
|
||||
+ float(params['w_value']) * data.value_new * touch_multiplier
|
||||
)
|
||||
data.raw_score_welcome = max(0.0, data.raw_score_welcome)
|
||||
data.raw_score_convert = max(0.0, data.raw_score_convert)
|
||||
data.raw_score = data.raw_score_welcome + data.raw_score_convert
|
||||
|
||||
if data.raw_score < 0:
|
||||
data.raw_score = 0.0
|
||||
|
||||
def _save_newconv_data(self, data_list: List[MemberNewconvData]) -> int:
|
||||
"""保存 NCI 数据"""
|
||||
if not data_list:
|
||||
return 0
|
||||
|
||||
site_id = data_list[0].activity.site_id
|
||||
# 按门店全量刷新,避免因分群变化导致过期数据残留。
|
||||
delete_sql = """
|
||||
DELETE FROM billiards_dws.dws_member_newconv_index
|
||||
WHERE site_id = %s
|
||||
"""
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(delete_sql, (site_id,))
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO billiards_dws.dws_member_newconv_index (
|
||||
site_id, tenant_id, member_id,
|
||||
status, segment,
|
||||
member_create_time, first_visit_time, last_visit_time, last_recharge_time,
|
||||
t_v, t_r, t_a,
|
||||
visits_14d, visits_60d, visits_total,
|
||||
spend_30d, spend_180d, sv_balance, recharge_60d_amt,
|
||||
interval_count,
|
||||
need_new, salvage_new, recharge_new, value_new,
|
||||
welcome_new,
|
||||
raw_score_welcome, raw_score_convert, raw_score,
|
||||
display_score_welcome, display_score_convert, display_score,
|
||||
last_wechat_touch_time,
|
||||
calc_time, created_at, updated_at
|
||||
) VALUES (
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
%s, %s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s, %s, %s,
|
||||
%s,
|
||||
%s, %s, %s, %s,
|
||||
%s,
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
|
||||
inserted = 0
|
||||
with self.db.conn.cursor() as cur:
|
||||
for data in data_list:
|
||||
activity = data.activity
|
||||
cur.execute(insert_sql, (
|
||||
activity.site_id, activity.tenant_id, activity.member_id,
|
||||
data.status, data.segment,
|
||||
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
|
||||
activity.t_v, activity.t_r, activity.t_a,
|
||||
activity.visits_14d, activity.visits_60d, activity.visits_total,
|
||||
activity.spend_30d, activity.spend_180d, activity.sv_balance, activity.recharge_60d_amt,
|
||||
activity.interval_count,
|
||||
data.need_new, data.salvage_new, data.recharge_new, data.value_new,
|
||||
data.welcome_new,
|
||||
data.raw_score_welcome, data.raw_score_convert, data.raw_score,
|
||||
data.display_score_welcome, data.display_score_convert, data.display_score,
|
||||
None,
|
||||
))
|
||||
inserted += cur.rowcount
|
||||
|
||||
self.db.conn.commit()
|
||||
return inserted
|
||||
|
||||
def _clip(self, value: float, low: float, high: float) -> float:
|
||||
return max(low, min(high, value))
|
||||
|
||||
def _map_compression(self, params: Dict[str, float]) -> str:
|
||||
mode = int(params.get('compression_mode', 0))
|
||||
if mode == 1:
|
||||
return "log1p"
|
||||
if mode == 2:
|
||||
return "asinh"
|
||||
return "none"
|
||||
|
||||
def _normalize_score_pairs(
|
||||
self,
|
||||
raw_scores: List[tuple[int, Optional[float]]],
|
||||
params: Dict[str, float],
|
||||
site_id: int,
|
||||
use_smoothing: bool,
|
||||
) -> Dict[int, float]:
|
||||
valid_scores = [(member_id, float(score)) for member_id, score in raw_scores if score is not None]
|
||||
if not valid_scores:
|
||||
return {}
|
||||
|
||||
# 全为0时直接返回,避免 MinMax 归一化退化
|
||||
if all(abs(score) <= 1e-9 for _, score in valid_scores):
|
||||
return {member_id: 0.0 for member_id, _ in valid_scores}
|
||||
|
||||
compression = self._map_compression(params)
|
||||
normalized = self.batch_normalize_to_display(
|
||||
valid_scores,
|
||||
compression=compression,
|
||||
percentile_lower=int(params['percentile_lower']),
|
||||
percentile_upper=int(params['percentile_upper']),
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id
|
||||
)
|
||||
return {member_id: display for member_id, _, display in normalized}
|
||||
|
||||
|
||||
__all__ = ['NewconvIndexTask']
|
||||
|
||||
Reference in New Issue
Block a user