在前后端开发联调前 的提交20260223
This commit is contained in:
@@ -25,6 +25,9 @@ from .finance_income_task import FinanceIncomeStructureTask
|
||||
from .finance_discount_task import FinanceDiscountDetailTask
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
from .maintenance_task import DwsMaintenanceTask
|
||||
from .goods_stock_daily_task import GoodsStockDailyTask
|
||||
from .goods_stock_weekly_task import GoodsStockWeeklyTask
|
||||
from .goods_stock_monthly_task import GoodsStockMonthlyTask
|
||||
|
||||
# 指数算法任务
|
||||
from .index import (
|
||||
@@ -32,6 +35,7 @@ from .index import (
|
||||
NewconvIndexTask,
|
||||
MlManualImportTask,
|
||||
RelationIndexTask,
|
||||
SpendingPowerIndexTask,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
@@ -57,9 +61,14 @@ __all__ = [
|
||||
"FinanceIncomeStructureTask",
|
||||
"FinanceDiscountDetailTask",
|
||||
"DwsMaintenanceTask",
|
||||
# 库存维度
|
||||
"GoodsStockDailyTask",
|
||||
"GoodsStockWeeklyTask",
|
||||
"GoodsStockMonthlyTask",
|
||||
# 指数算法
|
||||
"WinbackIndexTask",
|
||||
"NewconvIndexTask",
|
||||
"MlManualImportTask",
|
||||
"RelationIndexTask",
|
||||
"SpendingPowerIndexTask",
|
||||
]
|
||||
|
||||
@@ -198,7 +198,7 @@ class AssistantCustomerTask(BaseDwsTask):
|
||||
)
|
||||
SELECT
|
||||
assistant_id,
|
||||
MAX(assistant_nickname) AS assistant_nickname,
|
||||
(ARRAY_AGG(assistant_nickname ORDER BY service_date DESC))[1] AS assistant_nickname,
|
||||
member_id,
|
||||
MIN(service_date) AS first_service_date,
|
||||
MAX(service_date) AS last_service_date,
|
||||
@@ -247,21 +247,31 @@ class AssistantCustomerTask(BaseDwsTask):
|
||||
"""
|
||||
提取会员信息
|
||||
"""
|
||||
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id;
|
||||
# 加 scd2_is_current=1 只取当前有效版本
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
sql = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile
|
||||
FROM dwd.dim_member
|
||||
WHERE site_id = %s
|
||||
WHERE member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
FROM dwd.dwd_assistant_service_log
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
result[row_dict['member_id']] = row_dict
|
||||
return result
|
||||
|
||||
|
||||
def _extract_assistant_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
数据来源:
|
||||
- dwd_assistant_service_log: 助教服务流水
|
||||
- dwd_assistant_trash_event: 废除记录(排除)
|
||||
- dwd_assistant_service_log_ex: 扩展表(提供 is_trash 废除标记)
|
||||
- dim_assistant: 助教维度(SCD2,获取当日等级)
|
||||
- cfg_skill_type: 技能→课程类型映射
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
- 幂等方式:delete-before-insert(按日期窗口)
|
||||
|
||||
业务规则:
|
||||
- 有效业绩:需排除dwd_assistant_trash_event中的废除记录
|
||||
- 有效业绩:通过 dwd_assistant_service_log_ex.is_trash 字段判断是否被废除
|
||||
- 助教等级:使用SCD2 as-of取值,获取统计日当日生效的等级
|
||||
- 课程类型:通过skill_id映射,分为基础课和附加课
|
||||
|
||||
@@ -78,18 +78,14 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
self.get_task_code(), start_date, end_date
|
||||
)
|
||||
|
||||
# 1. 获取助教服务记录
|
||||
# 1. 获取助教服务记录(含 is_trash 标记,来自 _ex 表 JOIN)
|
||||
service_records = self._extract_service_records(site_id, start_date, end_date)
|
||||
|
||||
# 2. 获取废除记录
|
||||
trash_records = self._extract_trash_records(site_id, start_date, end_date)
|
||||
|
||||
# 3. 加载配置缓存
|
||||
# 2. 加载配置缓存
|
||||
self.load_config_cache()
|
||||
|
||||
return {
|
||||
'service_records': service_records,
|
||||
'trash_records': trash_records,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
'site_id': site_id
|
||||
@@ -100,21 +96,16 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
转换数据:按助教+日期聚合
|
||||
"""
|
||||
service_records = extracted['service_records']
|
||||
trash_records = extracted['trash_records']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
"%s: 转换数据,服务记录 %d 条,废除记录 %d 条",
|
||||
self.get_task_code(), len(service_records), len(trash_records)
|
||||
"%s: 转换数据,服务记录 %d 条",
|
||||
self.get_task_code(), len(service_records)
|
||||
)
|
||||
|
||||
# 构建废除记录索引(assistant_service_id -> trash_info)
|
||||
trash_index = self._build_trash_index(trash_records)
|
||||
|
||||
# 按助教+日期聚合
|
||||
aggregated = self._aggregate_by_assistant_date(
|
||||
service_records,
|
||||
trash_index,
|
||||
site_id
|
||||
)
|
||||
|
||||
@@ -134,6 +125,8 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取助教服务记录
|
||||
|
||||
JOIN _ex 表取 is_trash 字段,用于直接判断服务是否被废除。
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
@@ -150,8 +143,11 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
asl.real_use_seconds,
|
||||
asl.ledger_amount,
|
||||
asl.ledger_unit_price,
|
||||
DATE(asl.start_use_time) AS service_date
|
||||
DATE(asl.start_use_time) AS service_date,
|
||||
COALESCE(ex.is_trash, 0) AS is_trash
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
|
||||
ON asl.assistant_service_id = ex.assistant_service_id
|
||||
WHERE asl.site_id = %s
|
||||
AND DATE(asl.start_use_time) >= %s
|
||||
AND DATE(asl.start_use_time) <= %s
|
||||
@@ -160,53 +156,14 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_trash_records(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
提取废除记录
|
||||
|
||||
有效业绩的排除规则:仅对"助教废除表"的记录进行处理排除
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
assistant_service_id,
|
||||
trash_seconds,
|
||||
trash_reason,
|
||||
trash_time
|
||||
FROM dwd.dwd_assistant_trash_event
|
||||
WHERE site_id = %s
|
||||
AND DATE(trash_time) >= %s
|
||||
AND DATE(trash_time) <= %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
# ==========================================================================
|
||||
# 数据转换方法
|
||||
# ==========================================================================
|
||||
|
||||
def _build_trash_index(
|
||||
self,
|
||||
trash_records: List[Dict[str, Any]]
|
||||
) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
构建废除记录索引
|
||||
"""
|
||||
index = {}
|
||||
for record in trash_records:
|
||||
service_id = record.get('assistant_service_id')
|
||||
if service_id:
|
||||
index[service_id] = record
|
||||
return index
|
||||
|
||||
def _aggregate_by_assistant_date(
|
||||
self,
|
||||
service_records: List[Dict[str, Any]],
|
||||
trash_index: Dict[int, Dict[str, Any]],
|
||||
site_id: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
@@ -275,14 +232,12 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
is_bonus = course_type == CourseType.BONUS
|
||||
is_room = course_type == CourseType.ROOM
|
||||
|
||||
# 检查是否被废除
|
||||
is_trashed = service_id in trash_index
|
||||
# 检查是否被废除(使用 _ex 表的 is_trash 标记)
|
||||
is_trashed = bool(record.get('is_trash', 0))
|
||||
|
||||
if is_trashed:
|
||||
# 废除记录单独统计
|
||||
trash_info = trash_index[service_id]
|
||||
trash_seconds = self.safe_int(trash_info.get('trash_seconds', income_seconds))
|
||||
agg['trashed_seconds'] += trash_seconds
|
||||
# 废除记录:直接用服务记录的 income_seconds 作为废除时长
|
||||
agg['trashed_seconds'] += income_seconds
|
||||
agg['trashed_count'] += 1
|
||||
else:
|
||||
# 正常记录累加
|
||||
|
||||
@@ -129,7 +129,7 @@ class AssistantFinanceTask(BaseDwsTask):
|
||||
SELECT
|
||||
DATE(s.start_use_time) AS stat_date,
|
||||
s.site_assistant_id AS assistant_id,
|
||||
MAX(s.nickname) AS assistant_nickname,
|
||||
(ARRAY_AGG(s.nickname ORDER BY s.start_use_time DESC))[1] AS assistant_nickname,
|
||||
COUNT(*) AS service_count,
|
||||
SUM(s.income_seconds) / 3600.0 AS service_hours,
|
||||
SUM(s.ledger_amount) AS revenue_total,
|
||||
|
||||
@@ -261,12 +261,16 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
|
||||
month_where = " OR ".join(month_conditions)
|
||||
|
||||
# CHANGE 2026-02-22 | Prompt: 需求 A — 按档位分段统计
|
||||
# GROUP BY 加入 assistant_level_code/name,使同一助教月内不同档位各自聚合;
|
||||
# nickname 改用 ARRAY_AGG 按时间倒序取最新值,替代 MAX() 的字典序取值。
|
||||
# 唯一约束已同步变更为 (site_id, assistant_id, stat_month, assistant_level_code)
|
||||
sql = f"""
|
||||
SELECT
|
||||
assistant_id,
|
||||
assistant_nickname,
|
||||
assistant_level_code,
|
||||
assistant_level_name,
|
||||
(ARRAY_AGG(assistant_nickname ORDER BY stat_date DESC))[1] AS assistant_nickname,
|
||||
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
|
||||
COUNT(DISTINCT stat_date) AS work_days,
|
||||
SUM(total_service_count) AS total_service_count,
|
||||
@@ -287,7 +291,7 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
SUM(trashed_count) AS trashed_count
|
||||
FROM dws.dws_assistant_daily_detail
|
||||
WHERE site_id = %s AND ({month_where})
|
||||
GROUP BY assistant_id, assistant_nickname, assistant_level_code, assistant_level_name,
|
||||
GROUP BY assistant_id, assistant_level_code, assistant_level_name,
|
||||
DATE_TRUNC('month', stat_date)
|
||||
"""
|
||||
|
||||
@@ -405,9 +409,10 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
max_tier_level=max_tier_level
|
||||
)
|
||||
|
||||
# 获取月末的等级信息(用于记录)
|
||||
month_end = self._get_month_end(month)
|
||||
level_info = self.get_assistant_level_asof(assistant_id, month_end)
|
||||
# CHANGE 2026-02-22 | Prompt: 需求 A 任务 7.3 — 多行适配
|
||||
# 聚合行已按 assistant_level_code 分组,每行自带档位信息,
|
||||
# 直接使用聚合行的 assistant_level_code/name,不再用月末 SCD2 覆盖,
|
||||
# 避免同一助教多档位行被统一覆盖为月末档位导致 UK 冲突。
|
||||
|
||||
# 月度去重客户/台桌(从DWD直接去重)
|
||||
unique_info = monthly_unique_index.get((assistant_id, month), {})
|
||||
@@ -424,8 +429,8 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
'assistant_id': assistant_id,
|
||||
'assistant_nickname': agg.get('assistant_nickname'),
|
||||
'stat_month': month,
|
||||
'assistant_level_code': level_info.get('level_code') if level_info else agg.get('assistant_level_code'),
|
||||
'assistant_level_name': level_info.get('level_name') if level_info else agg.get('assistant_level_name'),
|
||||
'assistant_level_code': agg.get('assistant_level_code'),
|
||||
'assistant_level_name': agg.get('assistant_level_name'),
|
||||
'hire_date': hire_date,
|
||||
'is_new_hire': is_new_hire,
|
||||
'work_days': self.safe_int(agg.get('work_days', 0)),
|
||||
@@ -536,7 +541,8 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
计算排名(考虑并列)
|
||||
|
||||
Top3排名口径:按有效业绩总小时数排名,
|
||||
如遇并列则都算,比如2个第一,则记为2个第一,一个第三
|
||||
如遇并列则都算,比如2个第一,则记为2个第一,一个第三。
|
||||
同一助教不同档位的行各自独立参与排名。
|
||||
"""
|
||||
if not records:
|
||||
return
|
||||
@@ -548,24 +554,29 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# 计算考虑并列的排名
|
||||
# CHANGE 2026-02-22 | Prompt: 需求 A 任务 7.3 — 多行排名适配
|
||||
# 同一助教可能有多个档位行,用 (assistant_id, assistant_level_code) 做唯一标识,
|
||||
# 避免 rank_map 中同一 assistant_id 的多行互相覆盖。
|
||||
values = [
|
||||
(r.get('assistant_id'), r.get('effective_hours', Decimal('0')))
|
||||
(
|
||||
(r.get('assistant_id'), r.get('assistant_level_code')),
|
||||
r.get('effective_hours', Decimal('0'))
|
||||
)
|
||||
for r in sorted_records
|
||||
]
|
||||
ranked = self.calculate_rank_with_ties(values)
|
||||
|
||||
# 创建排名映射
|
||||
# 创建排名映射:key = (assistant_id, assistant_level_code)
|
||||
rank_map = {
|
||||
assistant_id: (rank, dense_rank)
|
||||
for assistant_id, rank, dense_rank in ranked
|
||||
entity_key: (rank, dense_rank)
|
||||
for entity_key, rank, dense_rank in ranked
|
||||
}
|
||||
|
||||
# 更新记录
|
||||
for record in records:
|
||||
assistant_id = record.get('assistant_id')
|
||||
if assistant_id in rank_map:
|
||||
rank, _ = rank_map[assistant_id]
|
||||
key = (record.get('assistant_id'), record.get('assistant_level_code'))
|
||||
if key in rank_map:
|
||||
rank, _ = rank_map[key]
|
||||
record['rank_by_hours'] = rank
|
||||
record['rank_with_ties'] = rank # 使用考虑并列的排名
|
||||
|
||||
|
||||
@@ -1,4 +1,13 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# AI_CHANGELOG
|
||||
# - 2026-02-21 | feature: 新增 GUARANTEE 保底月薪线逻辑
|
||||
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
|
||||
# 直接原因: cfg_bonus_rules 新增 GUARANTEE 类型规则,_calculate_salary 需要对应处理
|
||||
# 变更: (1) _calculate_salary 调用 calculate_guarantee() 获取保底金额
|
||||
# (2) gross_salary = MAX(课时收入+奖金, 保底金额)
|
||||
# (3) _build_calc_notes 增加保底生效/未触发备注
|
||||
# 风险: 仅在 salary_month 落入 GUARANTEE 生效期(2025-01-01~2026-02-28)时触发
|
||||
# 验证: 保底期外的月份不受影响(calculate_guarantee 返回 (0, None))
|
||||
"""
|
||||
助教工资计算任务
|
||||
|
||||
@@ -64,7 +73,8 @@ class AssistantSalaryTask(BaseDwsTask):
|
||||
return "dws_assistant_salary_calc"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "assistant_id", "salary_month"]
|
||||
# CHANGE 2026-02-22 | task 7.4: 唯一键加入 assistant_level_code,适配档位分段工资
|
||||
return ["site_id", "assistant_id", "salary_month", "assistant_level_code"]
|
||||
|
||||
# ==========================================================================
|
||||
# ETL主流程
|
||||
@@ -330,12 +340,29 @@ class AssistantSalaryTask(BaseDwsTask):
|
||||
# 获取充值提成
|
||||
recharge_commission = commission_index.get(assistant_id, Decimal('0'))
|
||||
|
||||
# 汇总奖金
|
||||
# CHANGE 2026-02-21 | 保底月薪线逻辑
|
||||
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
|
||||
# reason: 2025-01-01~2026-02-28 期间,满足条件的助教享受保底月薪线
|
||||
# 公式: gross_salary = MAX(课时收入 + 其他奖金, guarantee_amount)
|
||||
guarantee_amount, guarantee_rule = self.calculate_guarantee(
|
||||
level_code=level_code,
|
||||
effective_hours=effective_hours,
|
||||
bonus_hours=bonus_hours,
|
||||
effective_date=salary_month,
|
||||
)
|
||||
|
||||
# 汇总奖金(不含保底,保底是月薪线而非额外奖金)
|
||||
other_bonus = Decimal('0') # 预留其他奖金
|
||||
total_bonus = sprint_bonus + top_rank_bonus + recharge_commission + other_bonus
|
||||
|
||||
# 计算应发工资 = 课时收入 + 奖金
|
||||
gross_salary = total_course_income + total_bonus
|
||||
# 计算应发工资
|
||||
# 无保底或不满足条件:gross_salary = 课时收入 + 奖金
|
||||
# 有保底且满足条件:gross_salary = MAX(课时收入 + 奖金, 保底金额)
|
||||
raw_salary = total_course_income + total_bonus
|
||||
if guarantee_amount > 0 and raw_salary < guarantee_amount:
|
||||
gross_salary = guarantee_amount
|
||||
else:
|
||||
gross_salary = raw_salary
|
||||
|
||||
# 构建记录
|
||||
return {
|
||||
@@ -377,7 +404,11 @@ class AssistantSalaryTask(BaseDwsTask):
|
||||
# 假期
|
||||
'vacation_days': vacation_days,
|
||||
'vacation_unlimited': vacation_unlimited,
|
||||
'calc_notes': self._build_calc_notes(summary, tier, sprint_bonus, top_rank_bonus),
|
||||
'calc_notes': self._build_calc_notes(
|
||||
summary, tier, sprint_bonus, top_rank_bonus,
|
||||
guarantee_amount=guarantee_amount, guarantee_rule=guarantee_rule,
|
||||
raw_salary=raw_salary,
|
||||
),
|
||||
}
|
||||
|
||||
def _build_calc_notes(
|
||||
@@ -385,7 +416,10 @@ class AssistantSalaryTask(BaseDwsTask):
|
||||
summary: Dict[str, Any],
|
||||
tier: Optional[Dict[str, Any]],
|
||||
sprint_bonus: Decimal,
|
||||
top_rank_bonus: Decimal
|
||||
top_rank_bonus: Decimal,
|
||||
guarantee_amount: Decimal = Decimal('0'),
|
||||
guarantee_rule: Optional[str] = None,
|
||||
raw_salary: Decimal = Decimal('0'),
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
构建计算备注
|
||||
@@ -405,6 +439,20 @@ class AssistantSalaryTask(BaseDwsTask):
|
||||
rank = summary.get('rank_with_ties')
|
||||
notes.append(f"Top{rank}奖金: {top_rank_bonus}")
|
||||
|
||||
# 保底月薪线备注
|
||||
if guarantee_amount > 0:
|
||||
if raw_salary < guarantee_amount:
|
||||
diff = guarantee_amount - raw_salary
|
||||
notes.append(
|
||||
f"保底生效({guarantee_rule}): 月薪线{guarantee_amount}, "
|
||||
f"课时+奖金{raw_salary}, 补差{diff}"
|
||||
)
|
||||
else:
|
||||
notes.append(
|
||||
f"保底未触发({guarantee_rule}): 月薪线{guarantee_amount}, "
|
||||
f"实际收入{raw_salary}已超过"
|
||||
)
|
||||
|
||||
return "; ".join(notes) if notes else None
|
||||
|
||||
def _delete_by_month(
|
||||
|
||||
@@ -824,6 +824,55 @@ class BaseDwsTask(BaseTask):
|
||||
return Decimal(str(rule.get('bonus_amount', 0)))
|
||||
|
||||
return Decimal('0')
|
||||
|
||||
# CHANGE 2026-02-21 | 新增保底月薪线计算方法
|
||||
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
|
||||
# reason: cfg_bonus_rules 新增 GUARANTEE 类型规则,需要对应的计算入口
|
||||
def calculate_guarantee(
|
||||
self,
|
||||
level_code: int,
|
||||
effective_hours: Decimal,
|
||||
bonus_hours: Decimal,
|
||||
effective_date: Optional[date] = None,
|
||||
min_bonus_hours: Decimal = Decimal('10'),
|
||||
) -> Tuple[Decimal, Optional[str]]:
|
||||
"""
|
||||
计算保底月薪线(GUARANTEE)
|
||||
|
||||
保底规则(2025-01-01 ~ 2026-02-28):
|
||||
- 按助教等级匹配 rule_code = GUAR_LV{level_code}
|
||||
- 条件:effective_hours >= threshold_hours AND bonus_hours >= 10
|
||||
- 保底含义:salary_floor,非额外奖金
|
||||
实发 = MAX(课时收入, guarantee_amount)
|
||||
|
||||
Args:
|
||||
level_code: 助教等级代码(10/20/30/40)
|
||||
effective_hours: 有效业绩小时数
|
||||
bonus_hours: 打赏课小时数
|
||||
effective_date: 生效日期(salary_month)
|
||||
min_bonus_hours: 打赏课最低要求(默认10小时)
|
||||
|
||||
Returns:
|
||||
(guarantee_amount, rule_code) — 不满足条件时返回 (0, None)
|
||||
"""
|
||||
config = self.load_config_cache()
|
||||
rules = self._filter_by_effective_date(config.bonus_rules, effective_date)
|
||||
|
||||
target_code = f"GUAR_LV{level_code}"
|
||||
for rule in rules:
|
||||
if rule.get('rule_type') != 'GUARANTEE':
|
||||
continue
|
||||
if rule.get('rule_code') != target_code:
|
||||
continue
|
||||
threshold = Decimal(str(rule.get('threshold_hours', 0)))
|
||||
if effective_hours >= threshold and bonus_hours >= min_bonus_hours:
|
||||
return (Decimal(str(rule.get('bonus_amount', 0))), target_code)
|
||||
# 匹配到规则但条件不满足
|
||||
return (Decimal('0'), None)
|
||||
|
||||
# 当前生效期内无 GUARANTEE 规则
|
||||
return (Decimal('0'), None)
|
||||
|
||||
|
||||
# ==========================================================================
|
||||
# DWD数据读取方法
|
||||
|
||||
@@ -82,21 +82,22 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""充值日汇总(充值订单按日聚合)"""
|
||||
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money,实际字段为 pay_amount/point_amount
|
||||
sql = """
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_money + gift_money) AS recharge_total,
|
||||
SUM(pay_money) AS recharge_cash,
|
||||
SUM(gift_money) AS recharge_gift,
|
||||
SUM(pay_amount + point_amount) AS recharge_total,
|
||||
SUM(pay_amount) AS recharge_cash,
|
||||
SUM(point_amount) AS recharge_gift,
|
||||
COUNT(CASE WHEN is_first = 1 THEN 1 END) AS first_recharge_count,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money + gift_money ELSE 0 END) AS first_recharge_total,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
|
||||
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_amount + point_amount ELSE 0 END) AS first_recharge_total,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_amount ELSE 0 END) AS first_recharge_cash,
|
||||
SUM(CASE WHEN is_first = 1 THEN point_amount ELSE 0 END) AS first_recharge_gift,
|
||||
COUNT(CASE WHEN is_first = 0 OR is_first IS NULL THEN 1 END) AS renewal_count,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_money + gift_money ELSE 0 END) AS renewal_total,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN gift_money ELSE 0 END) AS renewal_gift,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_amount + point_amount ELSE 0 END) AS renewal_total,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_amount ELSE 0 END) AS renewal_cash,
|
||||
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN point_amount ELSE 0 END) AS renewal_gift,
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count
|
||||
FROM dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
|
||||
@@ -140,6 +140,8 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
|
||||
关联dim_table获取区域名称,再映射到cfg_area_category
|
||||
"""
|
||||
# CHANGE 2026-02-22 | BUG 7 修复 | dim_table 主键是 table_id 而非 site_table_id,
|
||||
# JOIN 条件从 dt.site_table_id → dt.table_id(事实表侧 site_table_id 不变)
|
||||
sql = """
|
||||
WITH area_orders AS (
|
||||
SELECT
|
||||
@@ -150,7 +152,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
COALESCE(tfl.ledger_time_seconds, 0) AS duration_seconds
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.site_table_id = tfl.site_table_id
|
||||
ON dt.table_id = tfl.site_table_id
|
||||
WHERE tfl.site_id = %(site_id)s
|
||||
AND tfl.pay_time >= %(start_date)s
|
||||
AND tfl.pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
@@ -166,7 +168,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
COALESCE(asl.income_seconds, 0) AS duration_seconds
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.site_table_id = asl.site_table_id
|
||||
ON dt.table_id = asl.site_table_id
|
||||
WHERE asl.site_id = %(site_id)s
|
||||
AND asl.start_use_time >= %(start_date)s
|
||||
AND asl.start_use_time < %(end_date)s + INTERVAL '1 day'
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
业务规则:
|
||||
- 首充/续充:通过 is_first 字段区分
|
||||
- 现金/赠送:通过 pay_money/gift_money 区分
|
||||
- 现金/赠送:通过 pay_amount/point_amount 区分
|
||||
- 卡余额:区分储值卡和赠送卡
|
||||
|
||||
作者:ETL团队
|
||||
@@ -110,21 +110,22 @@ class FinanceRechargeTask(FinanceBaseTask):
|
||||
# load() 已移除——使用 BaseDwsTask 默认实现(DATE_COL="stat_date")
|
||||
|
||||
def _extract_recharge_summary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
|
||||
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money,实际字段为 pay_amount/point_amount
|
||||
sql = """
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_money + gift_money) AS recharge_total,
|
||||
SUM(pay_money) AS recharge_cash,
|
||||
SUM(gift_money) AS recharge_gift,
|
||||
SUM(pay_amount + point_amount) AS recharge_total,
|
||||
SUM(pay_amount) AS recharge_cash,
|
||||
SUM(point_amount) AS recharge_gift,
|
||||
COUNT(CASE WHEN is_first = 1 THEN 1 END) AS first_recharge_count,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
|
||||
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money + gift_money ELSE 0 END) AS first_recharge_total,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_amount ELSE 0 END) AS first_recharge_cash,
|
||||
SUM(CASE WHEN is_first = 1 THEN point_amount ELSE 0 END) AS first_recharge_gift,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_amount + point_amount ELSE 0 END) AS first_recharge_total,
|
||||
COUNT(CASE WHEN is_first != 1 OR is_first IS NULL THEN 1 END) AS renewal_count,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN gift_money ELSE 0 END) AS renewal_gift,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money + gift_money ELSE 0 END) AS renewal_total,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_amount ELSE 0 END) AS renewal_cash,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN point_amount ELSE 0 END) AS renewal_gift,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_amount + point_amount ELSE 0 END) AS renewal_total,
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count,
|
||||
COUNT(DISTINCT CASE WHEN is_first = 1 THEN member_id END) AS new_member_count
|
||||
FROM dwd.dwd_recharge_order
|
||||
@@ -138,10 +139,18 @@ class FinanceRechargeTask(FinanceBaseTask):
|
||||
CASH_CARD_TYPE_ID = 2793249295533893
|
||||
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
|
||||
|
||||
# CHANGE 2026-02-21 | dim_member_card_account 无 site_id 字段,改用 register_site_id
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
sql = """
|
||||
SELECT card_type_id, SUM(balance) AS total_balance
|
||||
FROM dwd.dim_member_card_account
|
||||
WHERE site_id = %s AND scd2_is_current = 1
|
||||
WHERE tenant_member_id IN (
|
||||
SELECT DISTINCT member_id
|
||||
FROM dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
GROUP BY card_type_id
|
||||
"""
|
||||
|
||||
237
apps/etl/connectors/feiqiu/tasks/dws/goods_stock_daily_task.py
Normal file
237
apps/etl/connectors/feiqiu/tasks/dws/goods_stock_daily_task.py
Normal file
@@ -0,0 +1,237 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
库存日度汇总任务
|
||||
|
||||
功能说明:
|
||||
以"门店+日期+商品"为粒度,汇总每日库存数据
|
||||
|
||||
数据来源:
|
||||
- dwd.dwd_goods_stock_summary:库存汇总明细(按 fetched_at 日期聚合)
|
||||
|
||||
目标表:
|
||||
dws.dws_goods_stock_daily_summary
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新
|
||||
- 幂等方式:upsert(ON CONFLICT DO UPDATE)
|
||||
|
||||
业务规则:
|
||||
- 按 fetched_at 的日期部分分组,同一天同一商品可能有多条 DWD 记录
|
||||
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
|
||||
- current_stock 取当日最后一条记录的值(期末快照)
|
||||
- range_start_stock 取当日第一条记录的值(期初快照)
|
||||
- range_end_stock 取当日最后一条记录的值(期末快照)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
class GoodsStockDailyTask(BaseDwsTask):
|
||||
"""
|
||||
库存日度汇总任务
|
||||
|
||||
从 dwd.dwd_goods_stock_summary 提取数据,按日粒度汇总后
|
||||
upsert 写入 dws.dws_goods_stock_daily_summary。
|
||||
"""
|
||||
|
||||
DATE_COL = "stat_date"
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_GOODS_STOCK_DAILY"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_goods_stock_daily_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date", "site_goods_id"]
|
||||
|
||||
# ======================================================================
|
||||
# Extract
|
||||
# ======================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""从 DWD 层按时间范围提取库存汇总数据"""
|
||||
start_date = (
|
||||
context.window_start.date()
|
||||
if hasattr(context.window_start, "date")
|
||||
else context.window_start
|
||||
)
|
||||
end_date = (
|
||||
context.window_end.date()
|
||||
if hasattr(context.window_end, "date")
|
||||
else context.window_end
|
||||
)
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
goods_unit,
|
||||
goods_category_id,
|
||||
goods_category_second_id,
|
||||
category_name,
|
||||
range_start_stock,
|
||||
range_end_stock,
|
||||
range_in,
|
||||
range_out,
|
||||
range_sale,
|
||||
range_sale_money,
|
||||
range_inventory,
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
|
||||
)
|
||||
|
||||
return {
|
||||
"rows": rows,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
# ======================================================================
|
||||
# Transform
|
||||
# ======================================================================
|
||||
|
||||
def transform(
|
||||
self, extracted: Dict[str, Any], context: TaskContext
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按日粒度汇总:同一天同一商品聚合为一条记录"""
|
||||
rows = extracted.get("rows", [])
|
||||
site_id = extracted["site_id"]
|
||||
|
||||
if not rows:
|
||||
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
|
||||
return []
|
||||
|
||||
# 按 (stat_date, site_goods_id) 分组聚合
|
||||
# key: (date, site_goods_id) -> 聚合数据
|
||||
agg: Dict[tuple, Dict[str, Any]] = {}
|
||||
|
||||
for row in rows:
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
stat_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
continue
|
||||
|
||||
key = (stat_date, site_goods_id)
|
||||
|
||||
if key not in agg:
|
||||
# 首条记录:初始化,期初取第一条
|
||||
agg[key] = {
|
||||
"site_id": site_id,
|
||||
"tenant_id": row.get("tenant_id"),
|
||||
"stat_date": stat_date,
|
||||
"site_goods_id": site_goods_id,
|
||||
"goods_name": row.get("goods_name"),
|
||||
"goods_unit": row.get("goods_unit"),
|
||||
"goods_category_id": row.get("goods_category_id"),
|
||||
"goods_category_second_id": row.get("goods_category_second_id"),
|
||||
"category_name": row.get("category_name"),
|
||||
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
|
||||
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
|
||||
"range_in": self.safe_decimal(row.get("range_in")),
|
||||
"range_out": self.safe_decimal(row.get("range_out")),
|
||||
"range_sale": self.safe_decimal(row.get("range_sale")),
|
||||
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
|
||||
"range_inventory": self.safe_decimal(row.get("range_inventory")),
|
||||
"current_stock": self.safe_decimal(row.get("current_stock")),
|
||||
"stat_period": "daily",
|
||||
}
|
||||
else:
|
||||
# 后续记录:累加数值指标,更新期末快照
|
||||
rec = agg[key]
|
||||
rec["range_in"] += self.safe_decimal(row.get("range_in"))
|
||||
rec["range_out"] += self.safe_decimal(row.get("range_out"))
|
||||
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
|
||||
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
|
||||
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
|
||||
# 期末/当前库存取最后一条(rows 已按 fetched_at 排序)
|
||||
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
|
||||
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
|
||||
|
||||
result = list(agg.values())
|
||||
self.logger.info(
|
||||
"%s: 汇总完成,生成 %d 条日度记录",
|
||||
self.get_task_code(), len(result),
|
||||
)
|
||||
return result
|
||||
|
||||
# ======================================================================
|
||||
# Load
|
||||
# ======================================================================
|
||||
|
||||
def load(
|
||||
self, transformed: List[Dict[str, Any]], context: TaskContext
|
||||
) -> Dict[str, Any]:
|
||||
"""upsert 写入 DWS 目标表"""
|
||||
if not transformed:
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
columns = [
|
||||
"site_id", "tenant_id", "stat_date", "site_goods_id",
|
||||
"goods_name", "goods_unit", "goods_category_id",
|
||||
"goods_category_second_id", "category_name",
|
||||
"range_start_stock", "range_end_stock",
|
||||
"range_in", "range_out", "range_sale",
|
||||
"range_sale_money", "range_inventory", "current_stock",
|
||||
"stat_period",
|
||||
]
|
||||
|
||||
inserted, updated = self.upsert(transformed, columns=columns)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 写入完成,inserted=%d",
|
||||
self.get_task_code(), inserted,
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": updated,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__all__ = ["GoodsStockDailyTask"]
|
||||
245
apps/etl/connectors/feiqiu/tasks/dws/goods_stock_monthly_task.py
Normal file
245
apps/etl/connectors/feiqiu/tasks/dws/goods_stock_monthly_task.py
Normal file
@@ -0,0 +1,245 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
库存月度汇总任务
|
||||
|
||||
功能说明:
|
||||
以"门店+自然月+商品"为粒度,汇总每月库存数据
|
||||
|
||||
数据来源:
|
||||
- dwd.dwd_goods_stock_summary:库存汇总明细(按 fetched_at 日期聚合)
|
||||
|
||||
目标表:
|
||||
dws.dws_goods_stock_monthly_summary
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每日更新当月数据
|
||||
- 幂等方式:upsert(ON CONFLICT DO UPDATE)
|
||||
|
||||
业务规则:
|
||||
- 按自然月分组,stat_date = 该月的第一天(如 2026-01-01 代表 2026 年 1 月)
|
||||
- 同一月同一商品可能有多条 DWD 记录
|
||||
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
|
||||
- current_stock 取该月最后一条记录的值(期末快照)
|
||||
- range_start_stock 取该月第一条记录的值(期初快照)
|
||||
- range_end_stock 取该月最后一条记录的值(期末快照)
|
||||
- stat_period = 'monthly'
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
def _month_first_day(d: date) -> date:
|
||||
"""获取给定日期所在自然月的第一天"""
|
||||
return d.replace(day=1)
|
||||
|
||||
|
||||
class GoodsStockMonthlyTask(BaseDwsTask):
|
||||
"""
|
||||
库存月度汇总任务
|
||||
|
||||
从 dwd.dwd_goods_stock_summary 提取数据,按自然月粒度汇总后
|
||||
upsert 写入 dws.dws_goods_stock_monthly_summary。
|
||||
"""
|
||||
|
||||
DATE_COL = "stat_date"
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_GOODS_STOCK_MONTHLY"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_goods_stock_monthly_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date", "site_goods_id"]
|
||||
|
||||
# ======================================================================
|
||||
# Extract
|
||||
# ======================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""从 DWD 层按时间范围提取库存汇总数据"""
|
||||
start_date = (
|
||||
context.window_start.date()
|
||||
if hasattr(context.window_start, "date")
|
||||
else context.window_start
|
||||
)
|
||||
end_date = (
|
||||
context.window_end.date()
|
||||
if hasattr(context.window_end, "date")
|
||||
else context.window_end
|
||||
)
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
goods_unit,
|
||||
goods_category_id,
|
||||
goods_category_second_id,
|
||||
category_name,
|
||||
range_start_stock,
|
||||
range_end_stock,
|
||||
range_in,
|
||||
range_out,
|
||||
range_sale,
|
||||
range_sale_money,
|
||||
range_inventory,
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
|
||||
)
|
||||
|
||||
return {
|
||||
"rows": rows,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
# ======================================================================
|
||||
# Transform
|
||||
# ======================================================================
|
||||
|
||||
def transform(
|
||||
self, extracted: Dict[str, Any], context: TaskContext
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按自然月粒度汇总:同一月同一商品聚合为一条记录"""
|
||||
rows = extracted.get("rows", [])
|
||||
site_id = extracted["site_id"]
|
||||
|
||||
if not rows:
|
||||
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
|
||||
return []
|
||||
|
||||
# 按 (month_first_day, site_goods_id) 分组聚合
|
||||
agg: Dict[tuple, Dict[str, Any]] = {}
|
||||
|
||||
for row in rows:
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 自然月的第一天作为 stat_date
|
||||
first_day = _month_first_day(row_date)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
continue
|
||||
|
||||
key = (first_day, site_goods_id)
|
||||
|
||||
if key not in agg:
|
||||
# 首条记录:初始化,期初取第一条
|
||||
agg[key] = {
|
||||
"site_id": site_id,
|
||||
"tenant_id": row.get("tenant_id"),
|
||||
"stat_date": first_day,
|
||||
"site_goods_id": site_goods_id,
|
||||
"goods_name": row.get("goods_name"),
|
||||
"goods_unit": row.get("goods_unit"),
|
||||
"goods_category_id": row.get("goods_category_id"),
|
||||
"goods_category_second_id": row.get("goods_category_second_id"),
|
||||
"category_name": row.get("category_name"),
|
||||
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
|
||||
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
|
||||
"range_in": self.safe_decimal(row.get("range_in")),
|
||||
"range_out": self.safe_decimal(row.get("range_out")),
|
||||
"range_sale": self.safe_decimal(row.get("range_sale")),
|
||||
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
|
||||
"range_inventory": self.safe_decimal(row.get("range_inventory")),
|
||||
"current_stock": self.safe_decimal(row.get("current_stock")),
|
||||
"stat_period": "monthly",
|
||||
}
|
||||
else:
|
||||
# 后续记录:累加数值指标,更新期末快照
|
||||
rec = agg[key]
|
||||
rec["range_in"] += self.safe_decimal(row.get("range_in"))
|
||||
rec["range_out"] += self.safe_decimal(row.get("range_out"))
|
||||
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
|
||||
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
|
||||
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
|
||||
# 期末/当前库存取最后一条(rows 已按 fetched_at 排序)
|
||||
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
|
||||
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
|
||||
|
||||
result = list(agg.values())
|
||||
self.logger.info(
|
||||
"%s: 汇总完成,生成 %d 条月度记录",
|
||||
self.get_task_code(), len(result),
|
||||
)
|
||||
return result
|
||||
|
||||
# ======================================================================
|
||||
# Load
|
||||
# ======================================================================
|
||||
|
||||
def load(
|
||||
self, transformed: List[Dict[str, Any]], context: TaskContext
|
||||
) -> Dict[str, Any]:
|
||||
"""upsert 写入 DWS 目标表"""
|
||||
if not transformed:
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
columns = [
|
||||
"site_id", "tenant_id", "stat_date", "site_goods_id",
|
||||
"goods_name", "goods_unit", "goods_category_id",
|
||||
"goods_category_second_id", "category_name",
|
||||
"range_start_stock", "range_end_stock",
|
||||
"range_in", "range_out", "range_sale",
|
||||
"range_sale_money", "range_inventory", "current_stock",
|
||||
"stat_period",
|
||||
]
|
||||
|
||||
inserted, updated = self.upsert(transformed, columns=columns)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 写入完成,inserted=%d",
|
||||
self.get_task_code(), inserted,
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": updated,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__all__ = ["GoodsStockMonthlyTask"]
|
||||
246
apps/etl/connectors/feiqiu/tasks/dws/goods_stock_weekly_task.py
Normal file
246
apps/etl/connectors/feiqiu/tasks/dws/goods_stock_weekly_task.py
Normal file
@@ -0,0 +1,246 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
库存周度汇总任务
|
||||
|
||||
功能说明:
|
||||
以"门店+ISO周+商品"为粒度,汇总每周库存数据
|
||||
|
||||
数据来源:
|
||||
- dwd.dwd_goods_stock_summary:库存汇总明细(按 fetched_at 日期聚合)
|
||||
|
||||
目标表:
|
||||
dws.dws_goods_stock_weekly_summary
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每周更新
|
||||
- 幂等方式:upsert(ON CONFLICT DO UPDATE)
|
||||
|
||||
业务规则:
|
||||
- 按 ISO 周分组(isocalendar),stat_date = 该周的周一日期
|
||||
- 同一周同一商品可能有多条 DWD 记录
|
||||
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
|
||||
- current_stock 取该周最后一条记录的值(期末快照)
|
||||
- range_start_stock 取该周第一条记录的值(期初快照)
|
||||
- range_end_stock 取该周最后一条记录的值(期末快照)
|
||||
- stat_period = 'weekly'
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
def _iso_monday(d: date) -> date:
|
||||
"""根据 ISO 日历计算给定日期所在周的周一"""
|
||||
# weekday(): 0=周一 ... 6=周日
|
||||
return d - timedelta(days=d.weekday())
|
||||
|
||||
|
||||
class GoodsStockWeeklyTask(BaseDwsTask):
|
||||
"""
|
||||
库存周度汇总任务
|
||||
|
||||
从 dwd.dwd_goods_stock_summary 提取数据,按 ISO 周粒度汇总后
|
||||
upsert 写入 dws.dws_goods_stock_weekly_summary。
|
||||
"""
|
||||
|
||||
DATE_COL = "stat_date"
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_GOODS_STOCK_WEEKLY"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_goods_stock_weekly_summary"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date", "site_goods_id"]
|
||||
|
||||
# ======================================================================
|
||||
# Extract
|
||||
# ======================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""从 DWD 层按时间范围提取库存汇总数据"""
|
||||
start_date = (
|
||||
context.window_start.date()
|
||||
if hasattr(context.window_start, "date")
|
||||
else context.window_start
|
||||
)
|
||||
end_date = (
|
||||
context.window_end.date()
|
||||
if hasattr(context.window_end, "date")
|
||||
else context.window_end
|
||||
)
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
goods_unit,
|
||||
goods_category_id,
|
||||
goods_category_second_id,
|
||||
category_name,
|
||||
range_start_stock,
|
||||
range_end_stock,
|
||||
range_in,
|
||||
range_out,
|
||||
range_sale,
|
||||
range_sale_money,
|
||||
range_inventory,
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
|
||||
)
|
||||
|
||||
return {
|
||||
"rows": rows,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
# ======================================================================
|
||||
# Transform
|
||||
# ======================================================================
|
||||
|
||||
def transform(
|
||||
self, extracted: Dict[str, Any], context: TaskContext
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按 ISO 周粒度汇总:同一周同一商品聚合为一条记录"""
|
||||
rows = extracted.get("rows", [])
|
||||
site_id = extracted["site_id"]
|
||||
|
||||
if not rows:
|
||||
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
|
||||
return []
|
||||
|
||||
# 按 (iso_monday, site_goods_id) 分组聚合
|
||||
agg: Dict[tuple, Dict[str, Any]] = {}
|
||||
|
||||
for row in rows:
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# ISO 周的周一作为 stat_date
|
||||
monday = _iso_monday(row_date)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
continue
|
||||
|
||||
key = (monday, site_goods_id)
|
||||
|
||||
if key not in agg:
|
||||
# 首条记录:初始化,期初取第一条
|
||||
agg[key] = {
|
||||
"site_id": site_id,
|
||||
"tenant_id": row.get("tenant_id"),
|
||||
"stat_date": monday,
|
||||
"site_goods_id": site_goods_id,
|
||||
"goods_name": row.get("goods_name"),
|
||||
"goods_unit": row.get("goods_unit"),
|
||||
"goods_category_id": row.get("goods_category_id"),
|
||||
"goods_category_second_id": row.get("goods_category_second_id"),
|
||||
"category_name": row.get("category_name"),
|
||||
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
|
||||
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
|
||||
"range_in": self.safe_decimal(row.get("range_in")),
|
||||
"range_out": self.safe_decimal(row.get("range_out")),
|
||||
"range_sale": self.safe_decimal(row.get("range_sale")),
|
||||
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
|
||||
"range_inventory": self.safe_decimal(row.get("range_inventory")),
|
||||
"current_stock": self.safe_decimal(row.get("current_stock")),
|
||||
"stat_period": "weekly",
|
||||
}
|
||||
else:
|
||||
# 后续记录:累加数值指标,更新期末快照
|
||||
rec = agg[key]
|
||||
rec["range_in"] += self.safe_decimal(row.get("range_in"))
|
||||
rec["range_out"] += self.safe_decimal(row.get("range_out"))
|
||||
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
|
||||
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
|
||||
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
|
||||
# 期末/当前库存取最后一条(rows 已按 fetched_at 排序)
|
||||
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
|
||||
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
|
||||
|
||||
result = list(agg.values())
|
||||
self.logger.info(
|
||||
"%s: 汇总完成,生成 %d 条周度记录",
|
||||
self.get_task_code(), len(result),
|
||||
)
|
||||
return result
|
||||
|
||||
# ======================================================================
|
||||
# Load
|
||||
# ======================================================================
|
||||
|
||||
def load(
|
||||
self, transformed: List[Dict[str, Any]], context: TaskContext
|
||||
) -> Dict[str, Any]:
|
||||
"""upsert 写入 DWS 目标表"""
|
||||
if not transformed:
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
columns = [
|
||||
"site_id", "tenant_id", "stat_date", "site_goods_id",
|
||||
"goods_name", "goods_unit", "goods_category_id",
|
||||
"goods_category_second_id", "category_name",
|
||||
"range_start_stock", "range_end_stock",
|
||||
"range_in", "range_out", "range_sale",
|
||||
"range_sale_money", "range_inventory", "current_stock",
|
||||
"stat_period",
|
||||
]
|
||||
|
||||
inserted, updated = self.upsert(transformed, columns=columns)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 写入完成,inserted=%d",
|
||||
self.get_task_code(), inserted,
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": updated,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__all__ = ["GoodsStockWeeklyTask"]
|
||||
@@ -8,16 +8,19 @@
|
||||
- NewconvIndexTask: 新客转化指数 (NCI)
|
||||
- MlManualImportTask: ML 人工台账导入任务
|
||||
- RelationIndexTask: 关系指数计算任务(RS/OS/MS/ML)
|
||||
- SpendingPowerIndexTask: 消费力指数 (SPI)
|
||||
"""
|
||||
|
||||
from .winback_index_task import WinbackIndexTask
|
||||
from .newconv_index_task import NewconvIndexTask
|
||||
from .ml_manual_import_task import MlManualImportTask
|
||||
from .relation_index_task import RelationIndexTask
|
||||
from .spending_power_index_task import SpendingPowerIndexTask
|
||||
|
||||
__all__ = [
|
||||
'WinbackIndexTask',
|
||||
'NewconvIndexTask',
|
||||
'MlManualImportTask',
|
||||
'RelationIndexTask',
|
||||
'SpendingPowerIndexTask',
|
||||
]
|
||||
|
||||
@@ -0,0 +1,767 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
SPI 消费力指数任务(Spending Power Index)
|
||||
|
||||
设计说明:
|
||||
1. 直接继承 BaseIndexTask,不经过 MemberIndexBaseTask(无需 NEW/OLD/STOP 分群)
|
||||
2. 子分计算为 @staticmethod 纯函数,便于属性测试直接调用
|
||||
3. 三个子分:Level(消费水平)、Speed(消费速度)、Stability(消费稳定性)
|
||||
4. 结果写入 dws.dws_member_spending_power_index,按 site_id delete-before-insert
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .base_index_task import BaseIndexTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据类定义
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class SPIMemberFeatures:
|
||||
"""SPI 计算所需的会员级特征"""
|
||||
member_id: int
|
||||
site_id: int
|
||||
|
||||
# 基础特征
|
||||
spend_30: float = 0.0 # 近30天消费总额
|
||||
spend_90: float = 0.0 # 近90天消费总额
|
||||
recharge_90: float = 0.0 # 近90天充值总额
|
||||
orders_30: int = 0 # 近30天消费笔数
|
||||
orders_90: int = 0 # 近90天消费笔数
|
||||
visit_days_30: int = 0 # 近30天消费日数(按天去重)
|
||||
visit_days_90: int = 0 # 近90天消费日数(按天去重)
|
||||
avg_ticket_90: float = 0.0 # 90天客单价
|
||||
active_weeks_90: int = 0 # 近90天有消费的自然周数
|
||||
daily_spend_ewma_90: float = 0.0 # 日消费 EWMA
|
||||
|
||||
# 子分
|
||||
score_level_raw: float = 0.0
|
||||
score_speed_raw: float = 0.0
|
||||
score_stability_raw: float = 0.0
|
||||
|
||||
# 展示分(归一化后填充)
|
||||
score_level_display: float = 0.0
|
||||
score_speed_display: float = 0.0
|
||||
score_stability_display: float = 0.0
|
||||
|
||||
# 总分
|
||||
raw_score: float = 0.0
|
||||
display_score: float = 0.0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SPI 任务
|
||||
# =============================================================================
|
||||
|
||||
class SpendingPowerIndexTask(BaseIndexTask):
|
||||
"""SPI 消费力指数:单任务产出 Level / Speed / Stability 子分及 SPI 总分。"""
|
||||
|
||||
INDEX_TYPE = "SPI"
|
||||
|
||||
DEFAULT_PARAMS: Dict[str, float] = {
|
||||
# 窗口参数
|
||||
'spend_window_short_days': 30,
|
||||
'spend_window_long_days': 90,
|
||||
'ewma_alpha_daily_spend': 0.3,
|
||||
# 金额压缩基数(初始默认值,可被自动校准或配置表覆盖)
|
||||
'amount_base_spend_30': 500.0,
|
||||
'amount_base_spend_90': 1500.0,
|
||||
'amount_base_ticket_90': 200.0,
|
||||
'amount_base_recharge_90': 1000.0,
|
||||
'amount_base_speed_abs': 100.0,
|
||||
'amount_base_ewma_90': 50.0,
|
||||
# Level 子分权重
|
||||
'w_level_spend_30': 0.30,
|
||||
'w_level_spend_90': 0.35,
|
||||
'w_level_ticket_90': 0.20,
|
||||
'w_level_recharge_90': 0.15,
|
||||
# Speed 子分权重
|
||||
'w_speed_abs': 0.50,
|
||||
'w_speed_rel': 0.30,
|
||||
'w_speed_ewma': 0.20,
|
||||
# 总分权重
|
||||
'weight_level': 0.60,
|
||||
'weight_speed': 0.30,
|
||||
'weight_stability': 0.10,
|
||||
# 稳定性参数
|
||||
'stability_window_days': 90,
|
||||
'use_stability': 1,
|
||||
# 映射与平滑
|
||||
'percentile_lower': 5,
|
||||
'percentile_upper': 95,
|
||||
'compression_mode': 1, # log1p
|
||||
'use_smoothing': 1,
|
||||
'ewma_alpha': 0.2,
|
||||
# 速度计算
|
||||
'speed_epsilon': 1e-6,
|
||||
}
|
||||
|
||||
# =========================================================================
|
||||
# 抽象方法实现
|
||||
# =========================================================================
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_SPENDING_POWER_INDEX"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_spending_power_index"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "member_id"]
|
||||
|
||||
def get_index_type(self) -> str:
|
||||
return self.INDEX_TYPE
|
||||
|
||||
# =========================================================================
|
||||
# 辅助方法
|
||||
# =========================================================================
|
||||
|
||||
def _get_site_id(self, context: Optional[TaskContext]) -> int:
|
||||
"""从 context 或配置中获取门店 ID"""
|
||||
if context and getattr(context, "store_id", None):
|
||||
return int(context.store_id)
|
||||
site_id = self.config.get("app.default_site_id") or self.config.get("app.store_id")
|
||||
if site_id is not None:
|
||||
return int(site_id)
|
||||
# 回退:从消费数据中取一个 site_id
|
||||
sql = "SELECT DISTINCT site_id FROM dwd.dwd_settlement_head WHERE site_id IS NOT NULL LIMIT 1"
|
||||
rows = self.db.query(sql)
|
||||
if rows:
|
||||
return int(dict(rows[0]).get("site_id") or 0)
|
||||
self.logger.warning("无法确定门店ID,使用 0 继续执行")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def _map_compression(params: Dict[str, float]) -> str:
|
||||
"""将 compression_mode 数值映射为 batch_normalize_to_display 所需的字符串"""
|
||||
mode = int(params.get('compression_mode', 0))
|
||||
if mode == 1:
|
||||
return "log1p"
|
||||
if mode == 2:
|
||||
return "asinh"
|
||||
return "none"
|
||||
|
||||
# =========================================================================
|
||||
# 核心执行流程
|
||||
# =========================================================================
|
||||
|
||||
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
|
||||
"""完整执行流程:提取 → 计算 → 归一化 → 持久化
|
||||
|
||||
流程:
|
||||
1. 获取 site_id
|
||||
2. 加载 SPI 参数(cfg_index_parameters + DEFAULT_PARAMS 回退)
|
||||
3. 提取消费特征 + 充值特征 + 日消费 EWMA,合并到 SPIMemberFeatures
|
||||
4. 无数据时返回 skipped
|
||||
5. 校准金额压缩基数
|
||||
6. 逐会员计算 Level / Speed / Stability / SPI_raw
|
||||
7. 四组 raw_score 分别 batch_normalize_to_display
|
||||
8. delete-before-insert 持久化
|
||||
9. 保存分位点历史
|
||||
"""
|
||||
self.logger.info("开始计算 SPI 消费力指数")
|
||||
|
||||
# 1. 获取 site_id
|
||||
site_id = self._get_site_id(context)
|
||||
|
||||
# 2. 加载参数(配置表 + 默认值合并)
|
||||
db_params = self.load_index_parameters('SPI')
|
||||
params = {**self.DEFAULT_PARAMS, **db_params}
|
||||
|
||||
# 3. 提取特征
|
||||
features = self._extract_spending_features(site_id, params)
|
||||
recharge_map = self._extract_recharge_features(site_id, params)
|
||||
|
||||
# 合并充值特征
|
||||
for mid, recharge_90 in recharge_map.items():
|
||||
if mid in features:
|
||||
features[mid].recharge_90 = recharge_90
|
||||
# 仅有充值无消费的会员不参与 SPI 计算(无消费基础特征)
|
||||
|
||||
# 批量计算日消费 EWMA 并合并
|
||||
member_ids = list(features.keys())
|
||||
ewma_map = self._compute_daily_spend_ewma_batch(site_id, member_ids, params)
|
||||
for mid, ewma_val in ewma_map.items():
|
||||
if mid in features:
|
||||
features[mid].daily_spend_ewma_90 = ewma_val
|
||||
|
||||
# 4. 无数据时跳过(Req 9.4)
|
||||
if not features:
|
||||
self.logger.info("SPI: site_id=%s 无消费数据,跳过计算", site_id)
|
||||
return {'status': 'skipped', 'reason': 'no_data'}
|
||||
|
||||
# 5. 校准金额压缩基数
|
||||
params = self._calibrate_amount_bases(features, params)
|
||||
|
||||
# 6. 逐会员计算子分和总分
|
||||
for feat in features.values():
|
||||
feat.score_level_raw = self.compute_level(feat, params)
|
||||
feat.score_speed_raw = self.compute_speed(feat, params)
|
||||
feat.score_stability_raw = self.compute_stability(feat, params)
|
||||
feat.raw_score = self.compute_spi_raw(
|
||||
feat.score_level_raw,
|
||||
feat.score_speed_raw,
|
||||
feat.score_stability_raw,
|
||||
params,
|
||||
)
|
||||
|
||||
# 7. 四组 raw_score 分别归一化为展示分
|
||||
percentile_lower = int(params.get('percentile_lower', 5))
|
||||
percentile_upper = int(params.get('percentile_upper', 95))
|
||||
use_smoothing = int(params.get('use_smoothing', 1)) == 1
|
||||
compression = self._map_compression(params)
|
||||
|
||||
feat_list = list(features.values())
|
||||
|
||||
# SPI 总分展示分
|
||||
spi_normalized = self.batch_normalize_to_display(
|
||||
raw_scores=[(f.member_id, f.raw_score) for f in feat_list],
|
||||
compression=compression,
|
||||
percentile_lower=percentile_lower,
|
||||
percentile_upper=percentile_upper,
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id,
|
||||
index_type='SPI',
|
||||
)
|
||||
spi_display_map = {mid: display for mid, _, display in spi_normalized}
|
||||
|
||||
# Level 子分展示分
|
||||
level_normalized = self.batch_normalize_to_display(
|
||||
raw_scores=[(f.member_id, f.score_level_raw) for f in feat_list],
|
||||
compression=compression,
|
||||
percentile_lower=percentile_lower,
|
||||
percentile_upper=percentile_upper,
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id,
|
||||
index_type='SPI_LEVEL',
|
||||
)
|
||||
level_display_map = {mid: display for mid, _, display in level_normalized}
|
||||
|
||||
# Speed 子分展示分
|
||||
speed_normalized = self.batch_normalize_to_display(
|
||||
raw_scores=[(f.member_id, f.score_speed_raw) for f in feat_list],
|
||||
compression=compression,
|
||||
percentile_lower=percentile_lower,
|
||||
percentile_upper=percentile_upper,
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id,
|
||||
index_type='SPI_SPEED',
|
||||
)
|
||||
speed_display_map = {mid: display for mid, _, display in speed_normalized}
|
||||
|
||||
# Stability 子分展示分
|
||||
stability_normalized = self.batch_normalize_to_display(
|
||||
raw_scores=[(f.member_id, f.score_stability_raw) for f in feat_list],
|
||||
compression=compression,
|
||||
percentile_lower=percentile_lower,
|
||||
percentile_upper=percentile_upper,
|
||||
use_smoothing=use_smoothing,
|
||||
site_id=site_id,
|
||||
index_type='SPI_STABILITY',
|
||||
)
|
||||
stability_display_map = {mid: display for mid, _, display in stability_normalized}
|
||||
|
||||
# 回写展示分到特征对象
|
||||
for feat in feat_list:
|
||||
mid = feat.member_id
|
||||
feat.display_score = spi_display_map.get(mid, 0.0)
|
||||
feat.score_level_display = level_display_map.get(mid, 0.0)
|
||||
feat.score_speed_display = speed_display_map.get(mid, 0.0)
|
||||
feat.score_stability_display = stability_display_map.get(mid, 0.0)
|
||||
|
||||
# 8. delete-before-insert 持久化(Req 9.3)
|
||||
records_inserted = self._save_spi_data(feat_list, site_id)
|
||||
|
||||
# 9. 保存分位点历史(Req 9.5)——SPI 总分
|
||||
raw_values = [f.raw_score for f in feat_list]
|
||||
q_l, q_u = self.calculate_percentiles(raw_values, percentile_lower, percentile_upper)
|
||||
if use_smoothing:
|
||||
smoothed_l, smoothed_u = self._apply_ewma_smoothing(
|
||||
site_id=site_id,
|
||||
current_p5=q_l,
|
||||
current_p95=q_u,
|
||||
index_type='SPI',
|
||||
)
|
||||
else:
|
||||
smoothed_l, smoothed_u = q_l, q_u
|
||||
self.save_percentile_history(
|
||||
site_id=site_id,
|
||||
percentile_5=q_l,
|
||||
percentile_95=q_u,
|
||||
percentile_5_smoothed=smoothed_l,
|
||||
percentile_95_smoothed=smoothed_u,
|
||||
record_count=len(raw_values),
|
||||
min_raw=min(raw_values),
|
||||
max_raw=max(raw_values),
|
||||
avg_raw=sum(raw_values) / len(raw_values),
|
||||
index_type='SPI',
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"SPI 计算完成: site_id=%s, 会员数=%d, 写入记录=%d",
|
||||
site_id, len(feat_list), records_inserted,
|
||||
)
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'member_count': len(feat_list),
|
||||
'records_inserted': records_inserted,
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# 数据提取(后续任务实现)
|
||||
# =========================================================================
|
||||
|
||||
def _extract_spending_features(
|
||||
self, site_id: int, params: Dict[str, float]
|
||||
) -> Dict[int, SPIMemberFeatures]:
|
||||
"""从 dwd_settlement_head 提取消费特征,按 member_id 聚合。
|
||||
|
||||
提取近 90 天消费订单(settle_type IN (1, 3)),聚合为会员级特征:
|
||||
spend_30/90、orders_30/90、visit_days_30/90、avg_ticket_90、active_weeks_90。
|
||||
|
||||
使用 canonical_member_id 模式解析会员身份(与 WBI/NCI 一致)。
|
||||
"""
|
||||
short_days = int(params.get('spend_window_short_days', 30))
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
|
||||
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
|
||||
# INTERVAL 天数通过 f-string 内嵌(整数,安全);site_id 走参数化
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
s.pay_time,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
-- 90 天窗口
|
||||
SUM(pay_amount) AS spend_90,
|
||||
COUNT(*) AS orders_90,
|
||||
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
|
||||
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
|
||||
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
|
||||
-- 30 天窗口(子集过滤)
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN pay_amount ELSE 0 END) AS spend_30,
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN 1 ELSE 0 END) AS orders_30,
|
||||
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN DATE(pay_time) END) AS visit_days_30
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result: Dict[int, SPIMemberFeatures] = {}
|
||||
for row in (rows or []):
|
||||
r = dict(row)
|
||||
mid = int(r['member_id'])
|
||||
orders_90 = int(r['orders_90'] or 0)
|
||||
spend_90 = float(r['spend_90'] or 0)
|
||||
# avg_ticket_90 = spend_90 / max(orders_90, 1)(Req 2.4)
|
||||
avg_ticket = spend_90 / max(orders_90, 1)
|
||||
# active_weeks_90 上限 13(Req 2.5)
|
||||
active_weeks = min(int(r['active_weeks_90'] or 0), 13)
|
||||
|
||||
result[mid] = SPIMemberFeatures(
|
||||
member_id=mid,
|
||||
site_id=site_id,
|
||||
spend_30=float(r['spend_30'] or 0),
|
||||
spend_90=spend_90,
|
||||
orders_30=int(r['orders_30'] or 0),
|
||||
orders_90=orders_90,
|
||||
visit_days_30=int(r['visit_days_30'] or 0),
|
||||
visit_days_90=int(r['visit_days_90'] or 0),
|
||||
avg_ticket_90=avg_ticket,
|
||||
active_weeks_90=active_weeks,
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"SPI 消费特征提取完成: site_id=%s, 会员数=%d, 窗口=%d/%d天",
|
||||
site_id, len(result), short_days, long_days,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
def _extract_recharge_features(
|
||||
self, site_id: int, params: Dict[str, float]
|
||||
) -> Dict[int, float]:
|
||||
"""从 dwd_recharge_order 提取充值特征,返回 {member_id: recharge_90}。
|
||||
|
||||
提取近 90 天充值订单(settle_type = 5),按 member_id 聚合充值总额。
|
||||
使用 canonical_member_id 模式解析会员身份(与 _extract_spending_features 一致)。
|
||||
"""
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
|
||||
sql = f"""
|
||||
WITH recharge_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(r.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
COALESCE(r.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_recharge_order r
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON r.tenant_member_card_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = r.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE r.site_id = %s
|
||||
AND r.settle_type = 5
|
||||
AND r.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
SUM(pay_amount) AS recharge_90
|
||||
FROM recharge_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result: Dict[int, float] = {}
|
||||
for row in (rows or []):
|
||||
r = dict(row)
|
||||
mid = int(r['member_id'])
|
||||
result[mid] = float(r['recharge_90'] or 0)
|
||||
|
||||
self.logger.info(
|
||||
"SPI 充值特征提取完成: site_id=%s, 有充值会员数=%d, 窗口=%d天",
|
||||
site_id, len(result), long_days,
|
||||
)
|
||||
return result
|
||||
|
||||
def _compute_daily_spend_ewma(
|
||||
self, site_id: int, member_id: int, params: Dict[str, float]
|
||||
) -> float:
|
||||
"""对单个会员近 90 天日消费序列计算 EWMA。
|
||||
|
||||
从 dwd_settlement_head 查询该会员每日消费总额(settle_type IN (1,3)),
|
||||
按日期升序排列后逐日计算 EWMA,返回最终值。
|
||||
|
||||
EWMA 递推公式:S_t = α × X_t + (1 - α) × S_{t-1}
|
||||
初始值 S_0 = X_0(首日消费额)。
|
||||
无消费记录时返回 0.0。
|
||||
"""
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
)
|
||||
SELECT pay_date, SUM(pay_amount) AS daily_spend
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id = %s
|
||||
GROUP BY pay_date
|
||||
ORDER BY pay_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, member_id))
|
||||
|
||||
if not rows:
|
||||
return 0.0
|
||||
|
||||
# 逐日 EWMA 递推:S_0 = X_0, S_t = α·X_t + (1-α)·S_{t-1}
|
||||
ewma = float(dict(rows[0])['daily_spend'] or 0)
|
||||
for row in rows[1:]:
|
||||
x = float(dict(row)['daily_spend'] or 0)
|
||||
ewma = alpha * x + (1 - alpha) * ewma
|
||||
|
||||
return ewma
|
||||
|
||||
def _compute_daily_spend_ewma_batch(
|
||||
self, site_id: int, member_ids: List[int], params: Dict[str, float]
|
||||
) -> Dict[int, float]:
|
||||
"""批量计算多个会员的日消费 EWMA,单次 SQL 查询避免 N+1。
|
||||
|
||||
返回 {member_id: daily_spend_ewma_90},未出现的会员值为 0.0。
|
||||
"""
|
||||
if not member_ids:
|
||||
return {}
|
||||
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
)
|
||||
SELECT canonical_member_id AS member_id,
|
||||
pay_date,
|
||||
SUM(pay_amount) AS daily_spend
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id, pay_date
|
||||
ORDER BY canonical_member_id, pay_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
# 按 member_id 分组,逐组计算 EWMA
|
||||
result: Dict[int, float] = {}
|
||||
if not rows:
|
||||
return result
|
||||
|
||||
current_mid: Optional[int] = None
|
||||
ewma = 0.0
|
||||
for row in rows:
|
||||
r = dict(row)
|
||||
mid = int(r['member_id'])
|
||||
x = float(r['daily_spend'] or 0)
|
||||
if mid != current_mid:
|
||||
# 新会员:保存上一个会员结果,重置
|
||||
if current_mid is not None:
|
||||
result[current_mid] = ewma
|
||||
current_mid = mid
|
||||
ewma = x # S_0 = X_0
|
||||
else:
|
||||
ewma = alpha * x + (1 - alpha) * ewma
|
||||
# 保存最后一个会员
|
||||
if current_mid is not None:
|
||||
result[current_mid] = ewma
|
||||
|
||||
self.logger.info(
|
||||
"SPI 日消费 EWMA 批量计算完成: site_id=%s, 会员数=%d, α=%.2f",
|
||||
site_id, len(result), alpha,
|
||||
)
|
||||
return result
|
||||
|
||||
def _calibrate_amount_bases(
|
||||
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
|
||||
) -> Dict[str, float]:
|
||||
"""从门店数据计算中位数作为金额压缩基数校准值。
|
||||
|
||||
优先级:cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
|
||||
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS。
|
||||
"""
|
||||
# 特征字段 → 对应的 amount_base 参数名
|
||||
base_extractors: Dict[str, callable] = {
|
||||
'amount_base_spend_30': lambda f: f.spend_30,
|
||||
'amount_base_spend_90': lambda f: f.spend_90,
|
||||
'amount_base_ticket_90': lambda f: f.avg_ticket_90,
|
||||
'amount_base_recharge_90': lambda f: f.recharge_90,
|
||||
'amount_base_speed_abs': lambda f: f.spend_30 / max(f.visit_days_30, 1),
|
||||
'amount_base_ewma_90': lambda f: f.daily_spend_ewma_90,
|
||||
}
|
||||
|
||||
calibrated = dict(params) # 以当前参数为基础,逐项覆盖
|
||||
|
||||
for base_key, extractor in base_extractors.items():
|
||||
# 配置表已有值 → 跳过自动校准
|
||||
if base_key in params and params[base_key] != self.DEFAULT_PARAMS.get(base_key):
|
||||
self.logger.info(
|
||||
"SPI 基数校准: %s 使用配置表值 %.2f", base_key, params[base_key],
|
||||
)
|
||||
continue
|
||||
|
||||
# 从特征数据计算中位数
|
||||
values = [extractor(f) for f in features.values()]
|
||||
median_val = self.calculate_median(values)
|
||||
|
||||
if median_val > 0:
|
||||
calibrated[base_key] = median_val
|
||||
self.logger.info(
|
||||
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
|
||||
)
|
||||
else:
|
||||
# 中位数 ≤ 0,回退到 DEFAULT_PARAMS
|
||||
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
|
||||
self.logger.warning(
|
||||
"SPI 基数校准: %s 中位数 %.2f ≤ 0,回退到默认值 %.2f",
|
||||
base_key, median_val, self.DEFAULT_PARAMS[base_key],
|
||||
)
|
||||
|
||||
return calibrated
|
||||
|
||||
# =========================================================================
|
||||
# 子分计算(纯函数,后续任务实现具体逻辑)
|
||||
# =========================================================================
|
||||
|
||||
@staticmethod
|
||||
def compute_level(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
|
||||
"""Level 子分:消费水平
|
||||
|
||||
L = w_s30 × ln(1 + spend_30/M30)
|
||||
+ w_s90 × ln(1 + spend_90/M90)
|
||||
+ w_ticket × ln(1 + avg_ticket_90/T0)
|
||||
+ w_r90 × ln(1 + recharge_90/R90)
|
||||
"""
|
||||
return (
|
||||
params['w_level_spend_30'] * math.log1p(features.spend_30 / params['amount_base_spend_30'])
|
||||
+ params['w_level_spend_90'] * math.log1p(features.spend_90 / params['amount_base_spend_90'])
|
||||
+ params['w_level_ticket_90'] * math.log1p(features.avg_ticket_90 / params['amount_base_ticket_90'])
|
||||
+ params['w_level_recharge_90'] * math.log1p(features.recharge_90 / params['amount_base_recharge_90'])
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def compute_speed(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
|
||||
"""Speed 子分:消费速度
|
||||
|
||||
V_abs = ln(1 + spend_30 / (max(visit_days_30, 1) × V0))
|
||||
V_rel = ln((v_30 + ε) / (v_90 + ε)), v_30=spend_30/30, v_90=spend_90/90
|
||||
V_ewma = ln(1 + daily_spend_ewma_90 / E0)
|
||||
S = w_abs × V_abs + w_rel × max(0, V_rel) + w_ewma × V_ewma
|
||||
|
||||
仅对加速(V_rel > 0)加分,不对减速扣分(Req 4.5)。
|
||||
"""
|
||||
eps = params.get('speed_epsilon', 1e-6)
|
||||
|
||||
# 绝对速度(Req 4.1)
|
||||
v_abs = math.log1p(
|
||||
features.spend_30
|
||||
/ (max(features.visit_days_30, 1) * params['amount_base_speed_abs'])
|
||||
)
|
||||
|
||||
# 相对速度(Req 4.2)——仅加速加分
|
||||
v_30 = features.spend_30 / 30.0
|
||||
v_90 = features.spend_90 / 90.0
|
||||
v_rel = math.log((v_30 + eps) / (v_90 + eps))
|
||||
|
||||
# EWMA 速度(Req 4.3)
|
||||
v_ewma = math.log1p(
|
||||
features.daily_spend_ewma_90 / params['amount_base_ewma_90']
|
||||
)
|
||||
|
||||
# 加权合成(Req 4.4, 4.5)
|
||||
return (
|
||||
params['w_speed_abs'] * v_abs
|
||||
+ params['w_speed_rel'] * max(0.0, v_rel)
|
||||
+ params['w_speed_ewma'] * v_ewma
|
||||
)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def compute_stability(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
|
||||
"""Stability 子分:消费稳定性
|
||||
|
||||
P = active_weeks_90 / 13(周覆盖率)
|
||||
use_stability=0 时返回 0.0(Req 5.3)
|
||||
取值范围 [0, 1](Req 5.4)
|
||||
"""
|
||||
# 配置关闭稳定性时直接返回 0(Req 5.3)
|
||||
if params.get('use_stability', 1) == 0:
|
||||
return 0.0
|
||||
# 周覆盖率(Req 5.1, 5.2)
|
||||
return features.active_weeks_90 / 13.0
|
||||
|
||||
@staticmethod
|
||||
def compute_spi_raw(
|
||||
level: float, speed: float, stability: float, params: Dict[str, float]
|
||||
) -> float:
|
||||
"""SPI 总分合成
|
||||
|
||||
SPI_raw = w_L × L + w_S × S + w_P × P
|
||||
默认权重 w_L=0.60, w_S=0.30, w_P=0.10(Req 6.1)
|
||||
"""
|
||||
w_l = params.get('weight_level', 0.60)
|
||||
w_s = params.get('weight_speed', 0.30)
|
||||
w_p = params.get('weight_stability', 0.10)
|
||||
return w_l * level + w_s * speed + w_p * stability
|
||||
|
||||
# =========================================================================
|
||||
# 持久化(后续任务实现)
|
||||
# =========================================================================
|
||||
|
||||
def _save_spi_data(
|
||||
self, data_list: List[SPIMemberFeatures], site_id: int
|
||||
) -> int:
|
||||
"""delete-before-insert 写入 dws_member_spending_power_index"""
|
||||
with self.db.conn.cursor() as cur:
|
||||
# 先删除该门店旧记录(Req 9.3)
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
if not data_list:
|
||||
self.db.conn.commit()
|
||||
return 0
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_member_spending_power_index (
|
||||
site_id, member_id,
|
||||
spend_30, spend_90, recharge_90,
|
||||
orders_30, orders_90,
|
||||
visit_days_30, visit_days_90,
|
||||
avg_ticket_90, active_weeks_90, daily_spend_ewma_90,
|
||||
score_level_raw, score_speed_raw, score_stability_raw,
|
||||
score_level_display, score_speed_display, score_stability_display,
|
||||
raw_score, display_score,
|
||||
calc_time, created_at, updated_at
|
||||
) VALUES (
|
||||
%s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
%s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
inserted = 0
|
||||
for f in data_list:
|
||||
cur.execute(insert_sql, (
|
||||
f.site_id, f.member_id,
|
||||
f.spend_30, f.spend_90, f.recharge_90,
|
||||
f.orders_30, f.orders_90,
|
||||
f.visit_days_30, f.visit_days_90,
|
||||
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
|
||||
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
|
||||
f.score_level_display, f.score_speed_display, f.score_stability_display,
|
||||
f.raw_score, f.display_score,
|
||||
))
|
||||
inserted += max(cur.rowcount, 0)
|
||||
|
||||
self.db.conn.commit()
|
||||
self.logger.info(
|
||||
"SPI 数据写入完成: site_id=%s, 插入记录=%d", site_id, inserted,
|
||||
)
|
||||
return inserted
|
||||
@@ -233,21 +233,67 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取会员信息
|
||||
|
||||
生日优先级:手动补录(fdw_app.member_birthday_manual)> API 来源(dim_member.birthday)
|
||||
FDW 连接失败时降级为仅使用 dim_member.birthday
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
|
||||
# CHANGE 2026-02-22 | 恢复 birthday 字段(C1 迁移已加列),供后续 C2 COALESCE 使用
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
# CHANGE 2026-02-22 | 需求 C2:COALESCE 优先手动补录生日,FDW 失败时降级
|
||||
sql_with_fdw = """
|
||||
SELECT
|
||||
m.member_id,
|
||||
m.nickname,
|
||||
m.mobile,
|
||||
m.member_card_grade_name,
|
||||
DATE(m.create_time) AS register_date,
|
||||
m.recharge_money_sum,
|
||||
COALESCE(
|
||||
(SELECT birthday_value
|
||||
FROM fdw_app.member_birthday_manual
|
||||
WHERE member_id = m.member_id
|
||||
ORDER BY recorded_at ASC
|
||||
LIMIT 1),
|
||||
m.birthday
|
||||
) AS birthday
|
||||
FROM dwd.dim_member m
|
||||
WHERE m.member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
) AND m.scd2_is_current = 1
|
||||
"""
|
||||
sql_fallback = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
member_card_grade_name,
|
||||
DATE(create_time) AS register_date,
|
||||
recharge_money_sum
|
||||
recharge_money_sum,
|
||||
birthday
|
||||
FROM dwd.dim_member
|
||||
WHERE site_id = %s
|
||||
AND scd2_is_current = 1
|
||||
WHERE member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
try:
|
||||
rows = self.db.query(sql_with_fdw, (site_id,))
|
||||
except Exception as exc:
|
||||
# FDW 连接失败,降级为仅使用 dim_member.birthday
|
||||
self.logger.warning(
|
||||
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
|
||||
self.get_task_code(), exc,
|
||||
)
|
||||
rows = self.db.query(sql_fallback, (site_id,))
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
row_dict = dict(row)
|
||||
@@ -262,14 +308,21 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
CASH_CARD_TYPE_ID = 2793249295533893
|
||||
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
|
||||
|
||||
# CHANGE 2026-02-21 | dim_member_card_account 无 site_id 字段,改用 register_site_id
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
sql = """
|
||||
SELECT
|
||||
tenant_member_id AS member_id,
|
||||
card_type_id,
|
||||
balance
|
||||
FROM dwd.dim_member_card_account
|
||||
WHERE site_id = %s
|
||||
AND scd2_is_current = 1
|
||||
WHERE tenant_member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
@@ -175,6 +175,7 @@ class MemberVisitTask(BaseDwsTask):
|
||||
# 会员信息
|
||||
'member_nickname': memb_info.get('nickname'),
|
||||
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
|
||||
# CHANGE 2026-02-22 | 恢复从 dim_member.birthday 读取
|
||||
'member_birthday': memb_info.get('birthday'),
|
||||
# 台桌信息
|
||||
'table_id': table_id,
|
||||
@@ -302,28 +303,73 @@ class MemberVisitTask(BaseDwsTask):
|
||||
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取会员信息
|
||||
|
||||
生日优先级:手动补录(fdw_app.member_birthday_manual)> API 来源(dim_member.birthday)
|
||||
FDW 连接失败时降级为仅使用 dim_member.birthday
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
|
||||
# CHANGE 2026-02-22 | 恢复 birthday 字段(C1 迁移已加列)
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
# CHANGE 2026-02-22 | 需求 C2:COALESCE 优先手动补录生日,FDW 失败时降级
|
||||
sql_with_fdw = """
|
||||
SELECT
|
||||
m.member_id,
|
||||
m.nickname,
|
||||
m.mobile,
|
||||
COALESCE(
|
||||
(SELECT birthday_value
|
||||
FROM fdw_app.member_birthday_manual
|
||||
WHERE member_id = m.member_id
|
||||
ORDER BY recorded_at ASC
|
||||
LIMIT 1),
|
||||
m.birthday
|
||||
) AS birthday
|
||||
FROM dwd.dim_member m
|
||||
WHERE m.member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
) AND m.scd2_is_current = 1
|
||||
"""
|
||||
sql_fallback = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
birthday
|
||||
FROM dwd.dim_member
|
||||
WHERE site_id = %s
|
||||
AND scd2_is_current = 1
|
||||
WHERE member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
try:
|
||||
rows = self.db.query(sql_with_fdw, (site_id,))
|
||||
except Exception as exc:
|
||||
# FDW 连接失败,降级为仅使用 dim_member.birthday
|
||||
self.logger.warning(
|
||||
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
|
||||
self.get_task_code(), exc,
|
||||
)
|
||||
rows = self.db.query(sql_fallback, (site_id,))
|
||||
return {r['member_id']: dict(r) for r in (rows or [])}
|
||||
|
||||
|
||||
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""
|
||||
提取台桌信息
|
||||
"""
|
||||
# CHANGE 2026-02-22 | BUG 6 修复 | dim_table 无 site_table_id/site_table_name,
|
||||
# 正确字段为 table_id/table_name(参考 dwd.sql DDL)
|
||||
sql = """
|
||||
SELECT
|
||||
site_table_id AS table_id,
|
||||
site_table_name AS table_name,
|
||||
table_id AS table_id,
|
||||
table_name AS table_name,
|
||||
site_table_area_name AS area_name
|
||||
FROM dwd.dim_table
|
||||
WHERE site_id = %s
|
||||
|
||||
Reference in New Issue
Block a user