微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Optional, Tuple
from .base_index_task import BaseIndexTask
from ..base_dws_task import TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
@dataclass
class MemberActivityData:
@@ -238,6 +240,8 @@ class MemberIndexBaseTask(BaseIndexTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""提取到店记录(按天去重)"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
condition_sql = self._build_visit_condition_sql()
sql = f"""
WITH visit_source AS (
@@ -258,12 +262,12 @@ class MemberIndexBaseTask(BaseIndexTask):
)
SELECT
canonical_member_id AS member_id,
DATE(pay_time) AS visit_date,
{biz_expr} AS visit_date,
MAX(pay_time) AS last_visit_time,
SUM(COALESCE(pay_amount, 0)) AS day_pay_amount
FROM visit_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id, DATE(pay_time)
GROUP BY canonical_member_id, {biz_expr}
ORDER BY canonical_member_id, visit_date
"""
rows = self.db.query(sql, (site_id, start_date, end_date))

View File

@@ -214,7 +214,7 @@ class RelationIndexTask(BaseIndexTask):
JOIN dwd.dim_assistant d
ON s.user_id = d.user_id
AND d.scd2_is_current = 1
AND COALESCE(d.is_delete, 0) = 0
AND COALESCE(d.leave_status, 0) = 0
WHERE s.site_id = %s
AND s.tenant_member_id > 0
AND s.user_id > 0

View File

@@ -18,6 +18,8 @@ from typing import Any, Dict, List, Optional
from .base_index_task import BaseIndexTask
from ..base_dws_task import TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# =============================================================================
# 数据类定义
@@ -333,6 +335,10 @@ class SpendingPowerIndexTask(BaseIndexTask):
short_days = int(params.get('spend_window_short_days', 30))
long_days = int(params.get('spend_window_long_days', 90))
# CHANGE 2026-03-01 | business-day-cutoff 7.6: DATE(pay_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
# INTERVAL 天数通过 f-string 内嵌整数安全site_id 走参数化
sql = f"""
@@ -357,7 +363,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
-- 90 天窗口
SUM(pay_amount) AS spend_90,
COUNT(*) AS orders_90,
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
COUNT(DISTINCT {biz_expr}) AS visit_days_90,
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
-- 30 天窗口(子集过滤)
@@ -366,7 +372,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN 1 ELSE 0 END) AS orders_30,
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN DATE(pay_time) END) AS visit_days_30
THEN {biz_expr} END) AS visit_days_30
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
@@ -467,12 +473,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
{biz_expr_s} AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
@@ -516,12 +525,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
{biz_expr_s} AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
@@ -572,13 +584,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
return result
# CHANGE 2026-03-02 | 基数校准改用非零样本中位数,零消费会员不参与校准
# 原因:零消费会员不参与 SPI 有效区分,纳入中位数只会拉低基数
_CALIBRATE_MIN_SAMPLE = 10 # 非零样本最小数量,低于此值回退默认值
def _calibrate_amount_bases(
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
) -> Dict[str, float]:
"""从门店数据计算中位数作为金额压缩基数校准值。
优先级cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS
优先级cfg_index_parameters 配置值 > 非零样本自动校准中位数 > DEFAULT_PARAMS 默认值。
仅使用值 > 0 的样本计算中位数;非零样本数 < _CALIBRATE_MIN_SAMPLE 时回退默认值
"""
# 特征字段 → 对应的 amount_base 参数名
base_extractors: Dict[str, callable] = {
@@ -600,21 +616,23 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
continue
# 从特征数据计算中位数
values = [extractor(f) for f in features.values()]
median_val = self.calculate_median(values)
# 仅取非零样本计算中位数
nonzero_values = [v for v in (extractor(f) for f in features.values()) if v > 0]
if median_val > 0:
if len(nonzero_values) >= self._CALIBRATE_MIN_SAMPLE:
median_val = self.calculate_median(nonzero_values)
calibrated[base_key] = median_val
self.logger.info(
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
"SPI 基数校准: %s 非零样本 %d/%d,中位数 %.2f",
base_key, len(nonzero_values), len(features), median_val,
)
else:
# 中位数 ≤ 0,回退到 DEFAULT_PARAMS
# 非零样本不足,回退到 DEFAULT_PARAMS
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
self.logger.warning(
"SPI 基数校准: %s 中位数 %.2f ≤ 0,回退到默认值 %.2f",
base_key, median_val, self.DEFAULT_PARAMS[base_key],
"SPI 基数校准: %s 非零样本 %d 不足(最低 %d,回退到默认值 %.2f",
base_key, len(nonzero_values), self._CALIBRATE_MIN_SAMPLE,
self.DEFAULT_PARAMS[base_key],
)
return calibrated
@@ -747,6 +765,13 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
"""
inserted = 0
# raw score 列为 numeric(10,4)display 列为 numeric(5,2)
# 防止极端数据导致 NumericValueOutOfRange
RAW_MAX = 999999.9999
DISP_MAX = 999.99
def _clamp(v, lo, hi):
return max(lo, min(hi, v))
for f in data_list:
cur.execute(insert_sql, (
f.site_id, f.member_id,
@@ -754,9 +779,14 @@ class SpendingPowerIndexTask(BaseIndexTask):
f.orders_30, f.orders_90,
f.visit_days_30, f.visit_days_90,
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
f.score_level_display, f.score_speed_display, f.score_stability_display,
f.raw_score, f.display_score,
_clamp(f.score_level_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_speed_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_stability_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_level_display, 0, DISP_MAX),
_clamp(f.score_speed_display, 0, DISP_MAX),
_clamp(f.score_stability_display, 0, DISP_MAX),
_clamp(f.raw_score, -RAW_MAX, RAW_MAX),
_clamp(f.display_score, 0, DISP_MAX),
))
inserted += max(cur.rowcount, 0)