微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
from .base_index_task import BaseIndexTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemberActivityData:
|
||||
@@ -238,6 +240,8 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""提取到店记录(按天去重)"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
condition_sql = self._build_visit_condition_sql()
|
||||
sql = f"""
|
||||
WITH visit_source AS (
|
||||
@@ -258,12 +262,12 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
DATE(pay_time) AS visit_date,
|
||||
{biz_expr} AS visit_date,
|
||||
MAX(pay_time) AS last_visit_time,
|
||||
SUM(COALESCE(pay_amount, 0)) AS day_pay_amount
|
||||
FROM visit_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id, DATE(pay_time)
|
||||
GROUP BY canonical_member_id, {biz_expr}
|
||||
ORDER BY canonical_member_id, visit_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
|
||||
@@ -214,7 +214,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
JOIN dwd.dim_assistant d
|
||||
ON s.user_id = d.user_id
|
||||
AND d.scd2_is_current = 1
|
||||
AND COALESCE(d.is_delete, 0) = 0
|
||||
AND COALESCE(d.leave_status, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.tenant_member_id > 0
|
||||
AND s.user_id > 0
|
||||
|
||||
@@ -18,6 +18,8 @@ from typing import Any, Dict, List, Optional
|
||||
from .base_index_task import BaseIndexTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据类定义
|
||||
@@ -333,6 +335,10 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
short_days = int(params.get('spend_window_short_days', 30))
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 7.6: DATE(pay_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
|
||||
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
|
||||
# INTERVAL 天数通过 f-string 内嵌(整数,安全);site_id 走参数化
|
||||
sql = f"""
|
||||
@@ -357,7 +363,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
-- 90 天窗口
|
||||
SUM(pay_amount) AS spend_90,
|
||||
COUNT(*) AS orders_90,
|
||||
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
|
||||
COUNT(DISTINCT {biz_expr}) AS visit_days_90,
|
||||
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
|
||||
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
|
||||
-- 30 天窗口(子集过滤)
|
||||
@@ -366,7 +372,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN 1 ELSE 0 END) AS orders_30,
|
||||
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN DATE(pay_time) END) AS visit_days_30
|
||||
THEN {biz_expr} END) AS visit_days_30
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
@@ -467,12 +473,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
{biz_expr_s} AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
@@ -516,12 +525,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
{biz_expr_s} AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
@@ -572,13 +584,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
return result
|
||||
|
||||
# CHANGE 2026-03-02 | 基数校准改用非零样本中位数,零消费会员不参与校准
|
||||
# 原因:零消费会员不参与 SPI 有效区分,纳入中位数只会拉低基数
|
||||
_CALIBRATE_MIN_SAMPLE = 10 # 非零样本最小数量,低于此值回退默认值
|
||||
|
||||
def _calibrate_amount_bases(
|
||||
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
|
||||
) -> Dict[str, float]:
|
||||
"""从门店数据计算中位数作为金额压缩基数校准值。
|
||||
|
||||
优先级:cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
|
||||
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS。
|
||||
优先级:cfg_index_parameters 配置值 > 非零样本自动校准中位数 > DEFAULT_PARAMS 默认值。
|
||||
仅使用值 > 0 的样本计算中位数;非零样本数 < _CALIBRATE_MIN_SAMPLE 时回退默认值。
|
||||
"""
|
||||
# 特征字段 → 对应的 amount_base 参数名
|
||||
base_extractors: Dict[str, callable] = {
|
||||
@@ -600,21 +616,23 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
continue
|
||||
|
||||
# 从特征数据计算中位数
|
||||
values = [extractor(f) for f in features.values()]
|
||||
median_val = self.calculate_median(values)
|
||||
# 仅取非零样本计算中位数
|
||||
nonzero_values = [v for v in (extractor(f) for f in features.values()) if v > 0]
|
||||
|
||||
if median_val > 0:
|
||||
if len(nonzero_values) >= self._CALIBRATE_MIN_SAMPLE:
|
||||
median_val = self.calculate_median(nonzero_values)
|
||||
calibrated[base_key] = median_val
|
||||
self.logger.info(
|
||||
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
|
||||
"SPI 基数校准: %s 非零样本 %d/%d,中位数 %.2f",
|
||||
base_key, len(nonzero_values), len(features), median_val,
|
||||
)
|
||||
else:
|
||||
# 中位数 ≤ 0,回退到 DEFAULT_PARAMS
|
||||
# 非零样本不足,回退到 DEFAULT_PARAMS
|
||||
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
|
||||
self.logger.warning(
|
||||
"SPI 基数校准: %s 中位数 %.2f ≤ 0,回退到默认值 %.2f",
|
||||
base_key, median_val, self.DEFAULT_PARAMS[base_key],
|
||||
"SPI 基数校准: %s 非零样本 %d 不足(最低 %d),回退到默认值 %.2f",
|
||||
base_key, len(nonzero_values), self._CALIBRATE_MIN_SAMPLE,
|
||||
self.DEFAULT_PARAMS[base_key],
|
||||
)
|
||||
|
||||
return calibrated
|
||||
@@ -747,6 +765,13 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
"""
|
||||
inserted = 0
|
||||
# raw score 列为 numeric(10,4),display 列为 numeric(5,2)
|
||||
# 防止极端数据导致 NumericValueOutOfRange
|
||||
RAW_MAX = 999999.9999
|
||||
DISP_MAX = 999.99
|
||||
def _clamp(v, lo, hi):
|
||||
return max(lo, min(hi, v))
|
||||
|
||||
for f in data_list:
|
||||
cur.execute(insert_sql, (
|
||||
f.site_id, f.member_id,
|
||||
@@ -754,9 +779,14 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
f.orders_30, f.orders_90,
|
||||
f.visit_days_30, f.visit_days_90,
|
||||
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
|
||||
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
|
||||
f.score_level_display, f.score_speed_display, f.score_stability_display,
|
||||
f.raw_score, f.display_score,
|
||||
_clamp(f.score_level_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_speed_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_stability_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_level_display, 0, DISP_MAX),
|
||||
_clamp(f.score_speed_display, 0, DISP_MAX),
|
||||
_clamp(f.score_stability_display, 0, DISP_MAX),
|
||||
_clamp(f.raw_score, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.display_score, 0, DISP_MAX),
|
||||
))
|
||||
inserted += max(cur.rowcount, 0)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user