在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -25,6 +25,9 @@ from .finance_income_task import FinanceIncomeStructureTask
from .finance_discount_task import FinanceDiscountDetailTask
from .finance_base_task import FinanceBaseTask
from .maintenance_task import DwsMaintenanceTask
from .goods_stock_daily_task import GoodsStockDailyTask
from .goods_stock_weekly_task import GoodsStockWeeklyTask
from .goods_stock_monthly_task import GoodsStockMonthlyTask
# 指数算法任务
from .index import (
@@ -32,6 +35,7 @@ from .index import (
NewconvIndexTask,
MlManualImportTask,
RelationIndexTask,
SpendingPowerIndexTask,
)
__all__ = [
@@ -57,9 +61,14 @@ __all__ = [
"FinanceIncomeStructureTask",
"FinanceDiscountDetailTask",
"DwsMaintenanceTask",
# 库存维度
"GoodsStockDailyTask",
"GoodsStockWeeklyTask",
"GoodsStockMonthlyTask",
# 指数算法
"WinbackIndexTask",
"NewconvIndexTask",
"MlManualImportTask",
"RelationIndexTask",
"SpendingPowerIndexTask",
]

View File

@@ -198,7 +198,7 @@ class AssistantCustomerTask(BaseDwsTask):
)
SELECT
assistant_id,
MAX(assistant_nickname) AS assistant_nickname,
(ARRAY_AGG(assistant_nickname ORDER BY service_date DESC))[1] AS assistant_nickname,
member_id,
MIN(service_date) AS first_service_date,
MAX(service_date) AS last_service_date,
@@ -247,21 +247,31 @@ class AssistantCustomerTask(BaseDwsTask):
"""
提取会员信息
"""
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# 加 scd2_is_current=1 只取当前有效版本
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
sql = """
SELECT
member_id,
nickname,
mobile
FROM dwd.dim_member
WHERE site_id = %s
WHERE member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_assistant_service_log
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
result = {}
for row in (rows or []):
row_dict = dict(row)
result[row_dict['member_id']] = row_dict
return result
def _extract_assistant_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""

View File

@@ -7,7 +7,7 @@
数据来源:
- dwd_assistant_service_log: 助教服务流水
- dwd_assistant_trash_event: 废除记录(排除
- dwd_assistant_service_log_ex: 扩展表(提供 is_trash 废除标记
- dim_assistant: 助教维度SCD2获取当日等级
- cfg_skill_type: 技能→课程类型映射
@@ -19,7 +19,7 @@
- 幂等方式delete-before-insert按日期窗口
业务规则:
- 有效业绩:需排除dwd_assistant_trash_event中的废除记录
- 有效业绩:通过 dwd_assistant_service_log_ex.is_trash 字段判断是否被废除
- 助教等级使用SCD2 as-of取值获取统计日当日生效的等级
- 课程类型通过skill_id映射分为基础课和附加课
@@ -78,18 +78,14 @@ class AssistantDailyTask(BaseDwsTask):
self.get_task_code(), start_date, end_date
)
# 1. 获取助教服务记录
# 1. 获取助教服务记录(含 is_trash 标记,来自 _ex 表 JOIN
service_records = self._extract_service_records(site_id, start_date, end_date)
# 2. 获取废除记录
trash_records = self._extract_trash_records(site_id, start_date, end_date)
# 3. 加载配置缓存
# 2. 加载配置缓存
self.load_config_cache()
return {
'service_records': service_records,
'trash_records': trash_records,
'start_date': start_date,
'end_date': end_date,
'site_id': site_id
@@ -100,21 +96,16 @@ class AssistantDailyTask(BaseDwsTask):
转换数据:按助教+日期聚合
"""
service_records = extracted['service_records']
trash_records = extracted['trash_records']
site_id = extracted['site_id']
self.logger.info(
"%s: 转换数据,服务记录 %d 条,废除记录 %d",
self.get_task_code(), len(service_records), len(trash_records)
"%s: 转换数据,服务记录 %d",
self.get_task_code(), len(service_records)
)
# 构建废除记录索引assistant_service_id -> trash_info
trash_index = self._build_trash_index(trash_records)
# 按助教+日期聚合
aggregated = self._aggregate_by_assistant_date(
service_records,
trash_index,
site_id
)
@@ -134,6 +125,8 @@ class AssistantDailyTask(BaseDwsTask):
) -> List[Dict[str, Any]]:
"""
提取助教服务记录
JOIN _ex 表取 is_trash 字段,用于直接判断服务是否被废除。
"""
sql = """
SELECT
@@ -150,8 +143,11 @@ class AssistantDailyTask(BaseDwsTask):
asl.real_use_seconds,
asl.ledger_amount,
asl.ledger_unit_price,
DATE(asl.start_use_time) AS service_date
DATE(asl.start_use_time) AS service_date,
COALESCE(ex.is_trash, 0) AS is_trash
FROM dwd.dwd_assistant_service_log asl
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
ON asl.assistant_service_id = ex.assistant_service_id
WHERE asl.site_id = %s
AND DATE(asl.start_use_time) >= %s
AND DATE(asl.start_use_time) <= %s
@@ -160,53 +156,14 @@ class AssistantDailyTask(BaseDwsTask):
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
def _extract_trash_records(
self,
site_id: int,
start_date: date,
end_date: date
) -> List[Dict[str, Any]]:
"""
提取废除记录
有效业绩的排除规则:仅对"助教废除表"的记录进行处理排除
"""
sql = """
SELECT
assistant_service_id,
trash_seconds,
trash_reason,
trash_time
FROM dwd.dwd_assistant_trash_event
WHERE site_id = %s
AND DATE(trash_time) >= %s
AND DATE(trash_time) <= %s
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
# ==========================================================================
# 数据转换方法
# ==========================================================================
def _build_trash_index(
self,
trash_records: List[Dict[str, Any]]
) -> Dict[int, Dict[str, Any]]:
"""
构建废除记录索引
"""
index = {}
for record in trash_records:
service_id = record.get('assistant_service_id')
if service_id:
index[service_id] = record
return index
def _aggregate_by_assistant_date(
self,
service_records: List[Dict[str, Any]],
trash_index: Dict[int, Dict[str, Any]],
site_id: int
) -> List[Dict[str, Any]]:
"""
@@ -275,14 +232,12 @@ class AssistantDailyTask(BaseDwsTask):
is_bonus = course_type == CourseType.BONUS
is_room = course_type == CourseType.ROOM
# 检查是否被废除
is_trashed = service_id in trash_index
# 检查是否被废除(使用 _ex 表的 is_trash 标记)
is_trashed = bool(record.get('is_trash', 0))
if is_trashed:
# 废除记录单独统计
trash_info = trash_index[service_id]
trash_seconds = self.safe_int(trash_info.get('trash_seconds', income_seconds))
agg['trashed_seconds'] += trash_seconds
# 废除记录:直接用服务记录的 income_seconds 作为废除时长
agg['trashed_seconds'] += income_seconds
agg['trashed_count'] += 1
else:
# 正常记录累加

View File

@@ -129,7 +129,7 @@ class AssistantFinanceTask(BaseDwsTask):
SELECT
DATE(s.start_use_time) AS stat_date,
s.site_assistant_id AS assistant_id,
MAX(s.nickname) AS assistant_nickname,
(ARRAY_AGG(s.nickname ORDER BY s.start_use_time DESC))[1] AS assistant_nickname,
COUNT(*) AS service_count,
SUM(s.income_seconds) / 3600.0 AS service_hours,
SUM(s.ledger_amount) AS revenue_total,

View File

@@ -261,12 +261,16 @@ class AssistantMonthlyTask(BaseDwsTask):
month_where = " OR ".join(month_conditions)
# CHANGE 2026-02-22 | Prompt: 需求 A — 按档位分段统计
# GROUP BY 加入 assistant_level_code/name使同一助教月内不同档位各自聚合
# nickname 改用 ARRAY_AGG 按时间倒序取最新值,替代 MAX() 的字典序取值。
# 唯一约束已同步变更为 (site_id, assistant_id, stat_month, assistant_level_code)
sql = f"""
SELECT
assistant_id,
assistant_nickname,
assistant_level_code,
assistant_level_name,
(ARRAY_AGG(assistant_nickname ORDER BY stat_date DESC))[1] AS assistant_nickname,
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
COUNT(DISTINCT stat_date) AS work_days,
SUM(total_service_count) AS total_service_count,
@@ -287,7 +291,7 @@ class AssistantMonthlyTask(BaseDwsTask):
SUM(trashed_count) AS trashed_count
FROM dws.dws_assistant_daily_detail
WHERE site_id = %s AND ({month_where})
GROUP BY assistant_id, assistant_nickname, assistant_level_code, assistant_level_name,
GROUP BY assistant_id, assistant_level_code, assistant_level_name,
DATE_TRUNC('month', stat_date)
"""
@@ -405,9 +409,10 @@ class AssistantMonthlyTask(BaseDwsTask):
max_tier_level=max_tier_level
)
# 获取月末的等级信息(用于记录)
month_end = self._get_month_end(month)
level_info = self.get_assistant_level_asof(assistant_id, month_end)
# CHANGE 2026-02-22 | Prompt: 需求 A 任务 7.3 — 多行适配
# 聚合行已按 assistant_level_code 分组,每行自带档位信息,
# 直接使用聚合行的 assistant_level_code/name不再用月末 SCD2 覆盖,
# 避免同一助教多档位行被统一覆盖为月末档位导致 UK 冲突。
# 月度去重客户/台桌从DWD直接去重
unique_info = monthly_unique_index.get((assistant_id, month), {})
@@ -424,8 +429,8 @@ class AssistantMonthlyTask(BaseDwsTask):
'assistant_id': assistant_id,
'assistant_nickname': agg.get('assistant_nickname'),
'stat_month': month,
'assistant_level_code': level_info.get('level_code') if level_info else agg.get('assistant_level_code'),
'assistant_level_name': level_info.get('level_name') if level_info else agg.get('assistant_level_name'),
'assistant_level_code': agg.get('assistant_level_code'),
'assistant_level_name': agg.get('assistant_level_name'),
'hire_date': hire_date,
'is_new_hire': is_new_hire,
'work_days': self.safe_int(agg.get('work_days', 0)),
@@ -536,7 +541,8 @@ class AssistantMonthlyTask(BaseDwsTask):
计算排名(考虑并列)
Top3排名口径按有效业绩总小时数排名
如遇并列则都算比如2个第一则记为2个第一一个第三
如遇并列则都算比如2个第一则记为2个第一一个第三
同一助教不同档位的行各自独立参与排名。
"""
if not records:
return
@@ -548,24 +554,29 @@ class AssistantMonthlyTask(BaseDwsTask):
reverse=True
)
# 计算考虑并列的排名
# CHANGE 2026-02-22 | Prompt: 需求 A 任务 7.3 — 多行排名适配
# 同一助教可能有多个档位行,用 (assistant_id, assistant_level_code) 做唯一标识,
# 避免 rank_map 中同一 assistant_id 的多行互相覆盖。
values = [
(r.get('assistant_id'), r.get('effective_hours', Decimal('0')))
(
(r.get('assistant_id'), r.get('assistant_level_code')),
r.get('effective_hours', Decimal('0'))
)
for r in sorted_records
]
ranked = self.calculate_rank_with_ties(values)
# 创建排名映射
# 创建排名映射key = (assistant_id, assistant_level_code)
rank_map = {
assistant_id: (rank, dense_rank)
for assistant_id, rank, dense_rank in ranked
entity_key: (rank, dense_rank)
for entity_key, rank, dense_rank in ranked
}
# 更新记录
for record in records:
assistant_id = record.get('assistant_id')
if assistant_id in rank_map:
rank, _ = rank_map[assistant_id]
key = (record.get('assistant_id'), record.get('assistant_level_code'))
if key in rank_map:
rank, _ = rank_map[key]
record['rank_by_hours'] = rank
record['rank_with_ties'] = rank # 使用考虑并列的排名

View File

@@ -1,4 +1,13 @@
# -*- coding: utf-8 -*-
# AI_CHANGELOG
# - 2026-02-21 | feature: 新增 GUARANTEE 保底月薪线逻辑
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
# 直接原因: cfg_bonus_rules 新增 GUARANTEE 类型规则_calculate_salary 需要对应处理
# 变更: (1) _calculate_salary 调用 calculate_guarantee() 获取保底金额
# (2) gross_salary = MAX(课时收入+奖金, 保底金额)
# (3) _build_calc_notes 增加保底生效/未触发备注
# 风险: 仅在 salary_month 落入 GUARANTEE 生效期2025-01-01~2026-02-28时触发
# 验证: 保底期外的月份不受影响calculate_guarantee 返回 (0, None)
"""
助教工资计算任务
@@ -64,7 +73,8 @@ class AssistantSalaryTask(BaseDwsTask):
return "dws_assistant_salary_calc"
def get_primary_keys(self) -> List[str]:
return ["site_id", "assistant_id", "salary_month"]
# CHANGE 2026-02-22 | task 7.4: 唯一键加入 assistant_level_code适配档位分段工资
return ["site_id", "assistant_id", "salary_month", "assistant_level_code"]
# ==========================================================================
# ETL主流程
@@ -330,12 +340,29 @@ class AssistantSalaryTask(BaseDwsTask):
# 获取充值提成
recharge_commission = commission_index.get(assistant_id, Decimal('0'))
# 汇总奖金
# CHANGE 2026-02-21 | 保底月薪线逻辑
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
# reason: 2025-01-01~2026-02-28 期间,满足条件的助教享受保底月薪线
# 公式: gross_salary = MAX(课时收入 + 其他奖金, guarantee_amount)
guarantee_amount, guarantee_rule = self.calculate_guarantee(
level_code=level_code,
effective_hours=effective_hours,
bonus_hours=bonus_hours,
effective_date=salary_month,
)
# 汇总奖金(不含保底,保底是月薪线而非额外奖金)
other_bonus = Decimal('0') # 预留其他奖金
total_bonus = sprint_bonus + top_rank_bonus + recharge_commission + other_bonus
# 计算应发工资 = 课时收入 + 奖金
gross_salary = total_course_income + total_bonus
# 计算应发工资
# 无保底或不满足条件gross_salary = 课时收入 + 奖金
# 有保底且满足条件gross_salary = MAX(课时收入 + 奖金, 保底金额)
raw_salary = total_course_income + total_bonus
if guarantee_amount > 0 and raw_salary < guarantee_amount:
gross_salary = guarantee_amount
else:
gross_salary = raw_salary
# 构建记录
return {
@@ -377,7 +404,11 @@ class AssistantSalaryTask(BaseDwsTask):
# 假期
'vacation_days': vacation_days,
'vacation_unlimited': vacation_unlimited,
'calc_notes': self._build_calc_notes(summary, tier, sprint_bonus, top_rank_bonus),
'calc_notes': self._build_calc_notes(
summary, tier, sprint_bonus, top_rank_bonus,
guarantee_amount=guarantee_amount, guarantee_rule=guarantee_rule,
raw_salary=raw_salary,
),
}
def _build_calc_notes(
@@ -385,7 +416,10 @@ class AssistantSalaryTask(BaseDwsTask):
summary: Dict[str, Any],
tier: Optional[Dict[str, Any]],
sprint_bonus: Decimal,
top_rank_bonus: Decimal
top_rank_bonus: Decimal,
guarantee_amount: Decimal = Decimal('0'),
guarantee_rule: Optional[str] = None,
raw_salary: Decimal = Decimal('0'),
) -> Optional[str]:
"""
构建计算备注
@@ -405,6 +439,20 @@ class AssistantSalaryTask(BaseDwsTask):
rank = summary.get('rank_with_ties')
notes.append(f"Top{rank}奖金: {top_rank_bonus}")
# 保底月薪线备注
if guarantee_amount > 0:
if raw_salary < guarantee_amount:
diff = guarantee_amount - raw_salary
notes.append(
f"保底生效({guarantee_rule}): 月薪线{guarantee_amount}, "
f"课时+奖金{raw_salary}, 补差{diff}"
)
else:
notes.append(
f"保底未触发({guarantee_rule}): 月薪线{guarantee_amount}, "
f"实际收入{raw_salary}已超过"
)
return "; ".join(notes) if notes else None
def _delete_by_month(

View File

@@ -824,6 +824,55 @@ class BaseDwsTask(BaseTask):
return Decimal(str(rule.get('bonus_amount', 0)))
return Decimal('0')
# CHANGE 2026-02-21 | 新增保底月薪线计算方法
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
# reason: cfg_bonus_rules 新增 GUARANTEE 类型规则,需要对应的计算入口
def calculate_guarantee(
self,
level_code: int,
effective_hours: Decimal,
bonus_hours: Decimal,
effective_date: Optional[date] = None,
min_bonus_hours: Decimal = Decimal('10'),
) -> Tuple[Decimal, Optional[str]]:
"""
计算保底月薪线GUARANTEE
保底规则2025-01-01 ~ 2026-02-28
- 按助教等级匹配 rule_code = GUAR_LV{level_code}
- 条件effective_hours >= threshold_hours AND bonus_hours >= 10
- 保底含义salary_floor非额外奖金
实发 = MAX(课时收入, guarantee_amount)
Args:
level_code: 助教等级代码10/20/30/40
effective_hours: 有效业绩小时数
bonus_hours: 打赏课小时数
effective_date: 生效日期salary_month
min_bonus_hours: 打赏课最低要求默认10小时
Returns:
(guarantee_amount, rule_code) — 不满足条件时返回 (0, None)
"""
config = self.load_config_cache()
rules = self._filter_by_effective_date(config.bonus_rules, effective_date)
target_code = f"GUAR_LV{level_code}"
for rule in rules:
if rule.get('rule_type') != 'GUARANTEE':
continue
if rule.get('rule_code') != target_code:
continue
threshold = Decimal(str(rule.get('threshold_hours', 0)))
if effective_hours >= threshold and bonus_hours >= min_bonus_hours:
return (Decimal(str(rule.get('bonus_amount', 0))), target_code)
# 匹配到规则但条件不满足
return (Decimal('0'), None)
# 当前生效期内无 GUARANTEE 规则
return (Decimal('0'), None)
# ==========================================================================
# DWD数据读取方法

View File

@@ -82,21 +82,22 @@ class FinanceBaseTask(BaseDwsTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""充值日汇总(充值订单按日聚合)"""
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money实际字段为 pay_amount/point_amount
sql = """
SELECT
DATE(pay_time) AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_money + gift_money) AS recharge_total,
SUM(pay_money) AS recharge_cash,
SUM(gift_money) AS recharge_gift,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
SUM(point_amount) AS recharge_gift,
COUNT(CASE WHEN is_first = 1 THEN 1 END) AS first_recharge_count,
SUM(CASE WHEN is_first = 1 THEN pay_money + gift_money ELSE 0 END) AS first_recharge_total,
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
SUM(CASE WHEN is_first = 1 THEN pay_amount + point_amount ELSE 0 END) AS first_recharge_total,
SUM(CASE WHEN is_first = 1 THEN pay_amount ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN point_amount ELSE 0 END) AS first_recharge_gift,
COUNT(CASE WHEN is_first = 0 OR is_first IS NULL THEN 1 END) AS renewal_count,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_money + gift_money ELSE 0 END) AS renewal_total,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN gift_money ELSE 0 END) AS renewal_gift,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_amount + point_amount ELSE 0 END) AS renewal_total,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_amount ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN point_amount ELSE 0 END) AS renewal_gift,
COUNT(DISTINCT member_id) AS recharge_member_count
FROM dwd.dwd_recharge_order
WHERE site_id = %s

View File

@@ -140,6 +140,8 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
关联dim_table获取区域名称再映射到cfg_area_category
"""
# CHANGE 2026-02-22 | BUG 7 修复 | dim_table 主键是 table_id 而非 site_table_id
# JOIN 条件从 dt.site_table_id → dt.table_id事实表侧 site_table_id 不变)
sql = """
WITH area_orders AS (
SELECT
@@ -150,7 +152,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
COALESCE(tfl.ledger_time_seconds, 0) AS duration_seconds
FROM dwd.dwd_table_fee_log tfl
LEFT JOIN dwd.dim_table dt
ON dt.site_table_id = tfl.site_table_id
ON dt.table_id = tfl.site_table_id
WHERE tfl.site_id = %(site_id)s
AND tfl.pay_time >= %(start_date)s
AND tfl.pay_time < %(end_date)s + INTERVAL '1 day'
@@ -166,7 +168,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
COALESCE(asl.income_seconds, 0) AS duration_seconds
FROM dwd.dwd_assistant_service_log asl
LEFT JOIN dwd.dim_table dt
ON dt.site_table_id = asl.site_table_id
ON dt.table_id = asl.site_table_id
WHERE asl.site_id = %(site_id)s
AND asl.start_use_time >= %(start_date)s
AND asl.start_use_time < %(end_date)s + INTERVAL '1 day'

View File

@@ -18,7 +18,7 @@
业务规则:
- 首充/续充:通过 is_first 字段区分
- 现金/赠送:通过 pay_money/gift_money 区分
- 现金/赠送:通过 pay_amount/point_amount 区分
- 卡余额:区分储值卡和赠送卡
作者ETL团队
@@ -110,21 +110,22 @@ class FinanceRechargeTask(FinanceBaseTask):
# load() 已移除——使用 BaseDwsTask 默认实现DATE_COL="stat_date"
def _extract_recharge_summary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money实际字段为 pay_amount/point_amount
sql = """
SELECT
DATE(pay_time) AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_money + gift_money) AS recharge_total,
SUM(pay_money) AS recharge_cash,
SUM(gift_money) AS recharge_gift,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
SUM(point_amount) AS recharge_gift,
COUNT(CASE WHEN is_first = 1 THEN 1 END) AS first_recharge_count,
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
SUM(CASE WHEN is_first = 1 THEN pay_money + gift_money ELSE 0 END) AS first_recharge_total,
SUM(CASE WHEN is_first = 1 THEN pay_amount ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN point_amount ELSE 0 END) AS first_recharge_gift,
SUM(CASE WHEN is_first = 1 THEN pay_amount + point_amount ELSE 0 END) AS first_recharge_total,
COUNT(CASE WHEN is_first != 1 OR is_first IS NULL THEN 1 END) AS renewal_count,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN gift_money ELSE 0 END) AS renewal_gift,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money + gift_money ELSE 0 END) AS renewal_total,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_amount ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN point_amount ELSE 0 END) AS renewal_gift,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_amount + point_amount ELSE 0 END) AS renewal_total,
COUNT(DISTINCT member_id) AS recharge_member_count,
COUNT(DISTINCT CASE WHEN is_first = 1 THEN member_id END) AS new_member_count
FROM dwd.dwd_recharge_order
@@ -138,10 +139,18 @@ class FinanceRechargeTask(FinanceBaseTask):
CASH_CARD_TYPE_ID = 2793249295533893
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
# CHANGE 2026-02-21 | dim_member_card_account 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
sql = """
SELECT card_type_id, SUM(balance) AS total_balance
FROM dwd.dim_member_card_account
WHERE site_id = %s AND scd2_is_current = 1
WHERE tenant_member_id IN (
SELECT DISTINCT member_id
FROM dwd.dwd_recharge_order
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
) AND scd2_is_current = 1
AND COALESCE(is_delete, 0) = 0
GROUP BY card_type_id
"""

View File

@@ -0,0 +1,237 @@
# -*- coding: utf-8 -*-
"""
库存日度汇总任务
功能说明:
"门店+日期+商品"为粒度,汇总每日库存数据
数据来源:
- dwd.dwd_goods_stock_summary库存汇总明细按 fetched_at 日期聚合)
目标表:
dws.dws_goods_stock_daily_summary
更新策略:
- 更新频率:每日更新
- 幂等方式upsertON CONFLICT DO UPDATE
业务规则:
- 按 fetched_at 的日期部分分组,同一天同一商品可能有多条 DWD 记录
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
- current_stock 取当日最后一条记录的值(期末快照)
- range_start_stock 取当日第一条记录的值(期初快照)
- range_end_stock 取当日最后一条记录的值(期末快照)
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
class GoodsStockDailyTask(BaseDwsTask):
"""
库存日度汇总任务
从 dwd.dwd_goods_stock_summary 提取数据,按日粒度汇总后
upsert 写入 dws.dws_goods_stock_daily_summary。
"""
DATE_COL = "stat_date"
def get_task_code(self) -> str:
return "DWS_GOODS_STOCK_DAILY"
def get_target_table(self) -> str:
return "dws_goods_stock_daily_summary"
def get_primary_keys(self) -> List[str]:
return ["site_id", "stat_date", "site_goods_id"]
# ======================================================================
# Extract
# ======================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""从 DWD 层按时间范围提取库存汇总数据"""
start_date = (
context.window_start.date()
if hasattr(context.window_start, "date")
else context.window_start
)
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
site_id = context.store_id
self.logger.info(
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
SELECT
site_goods_id,
goods_name,
goods_unit,
goods_category_id,
goods_category_second_id,
category_name,
range_start_stock,
range_end_stock,
range_in,
range_out,
range_sale,
range_sale_money,
range_inventory,
current_stock,
site_id,
tenant_id,
fetched_at
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
self.logger.info(
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
)
return {
"rows": rows,
"start_date": start_date,
"end_date": end_date,
"site_id": site_id,
}
# ======================================================================
# Transform
# ======================================================================
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
"""按日粒度汇总:同一天同一商品聚合为一条记录"""
rows = extracted.get("rows", [])
site_id = extracted["site_id"]
if not rows:
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
return []
# 按 (stat_date, site_goods_id) 分组聚合
# key: (date, site_goods_id) -> 聚合数据
agg: Dict[tuple, Dict[str, Any]] = {}
for row in rows:
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
stat_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:
continue
key = (stat_date, site_goods_id)
if key not in agg:
# 首条记录:初始化,期初取第一条
agg[key] = {
"site_id": site_id,
"tenant_id": row.get("tenant_id"),
"stat_date": stat_date,
"site_goods_id": site_goods_id,
"goods_name": row.get("goods_name"),
"goods_unit": row.get("goods_unit"),
"goods_category_id": row.get("goods_category_id"),
"goods_category_second_id": row.get("goods_category_second_id"),
"category_name": row.get("category_name"),
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
"range_in": self.safe_decimal(row.get("range_in")),
"range_out": self.safe_decimal(row.get("range_out")),
"range_sale": self.safe_decimal(row.get("range_sale")),
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
"range_inventory": self.safe_decimal(row.get("range_inventory")),
"current_stock": self.safe_decimal(row.get("current_stock")),
"stat_period": "daily",
}
else:
# 后续记录:累加数值指标,更新期末快照
rec = agg[key]
rec["range_in"] += self.safe_decimal(row.get("range_in"))
rec["range_out"] += self.safe_decimal(row.get("range_out"))
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
# 期末/当前库存取最后一条rows 已按 fetched_at 排序)
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
result = list(agg.values())
self.logger.info(
"%s: 汇总完成,生成 %d 条日度记录",
self.get_task_code(), len(result),
)
return result
# ======================================================================
# Load
# ======================================================================
def load(
self, transformed: List[Dict[str, Any]], context: TaskContext
) -> Dict[str, Any]:
"""upsert 写入 DWS 目标表"""
if not transformed:
return {
"counts": {
"fetched": 0,
"inserted": 0,
"updated": 0,
"skipped": 0,
"errors": 0,
}
}
columns = [
"site_id", "tenant_id", "stat_date", "site_goods_id",
"goods_name", "goods_unit", "goods_category_id",
"goods_category_second_id", "category_name",
"range_start_stock", "range_end_stock",
"range_in", "range_out", "range_sale",
"range_sale_money", "range_inventory", "current_stock",
"stat_period",
]
inserted, updated = self.upsert(transformed, columns=columns)
self.logger.info(
"%s: 写入完成inserted=%d",
self.get_task_code(), inserted,
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": updated,
"skipped": 0,
"errors": 0,
}
}
__all__ = ["GoodsStockDailyTask"]

View File

@@ -0,0 +1,245 @@
# -*- coding: utf-8 -*-
"""
库存月度汇总任务
功能说明:
"门店+自然月+商品"为粒度,汇总每月库存数据
数据来源:
- dwd.dwd_goods_stock_summary库存汇总明细按 fetched_at 日期聚合)
目标表:
dws.dws_goods_stock_monthly_summary
更新策略:
- 更新频率:每日更新当月数据
- 幂等方式upsertON CONFLICT DO UPDATE
业务规则:
- 按自然月分组stat_date = 该月的第一天(如 2026-01-01 代表 2026 年 1 月)
- 同一月同一商品可能有多条 DWD 记录
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
- current_stock 取该月最后一条记录的值(期末快照)
- range_start_stock 取该月第一条记录的值(期初快照)
- range_end_stock 取该月最后一条记录的值(期末快照)
- stat_period = 'monthly'
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
def _month_first_day(d: date) -> date:
"""获取给定日期所在自然月的第一天"""
return d.replace(day=1)
class GoodsStockMonthlyTask(BaseDwsTask):
"""
库存月度汇总任务
从 dwd.dwd_goods_stock_summary 提取数据,按自然月粒度汇总后
upsert 写入 dws.dws_goods_stock_monthly_summary。
"""
DATE_COL = "stat_date"
def get_task_code(self) -> str:
return "DWS_GOODS_STOCK_MONTHLY"
def get_target_table(self) -> str:
return "dws_goods_stock_monthly_summary"
def get_primary_keys(self) -> List[str]:
return ["site_id", "stat_date", "site_goods_id"]
# ======================================================================
# Extract
# ======================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""从 DWD 层按时间范围提取库存汇总数据"""
start_date = (
context.window_start.date()
if hasattr(context.window_start, "date")
else context.window_start
)
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
site_id = context.store_id
self.logger.info(
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
SELECT
site_goods_id,
goods_name,
goods_unit,
goods_category_id,
goods_category_second_id,
category_name,
range_start_stock,
range_end_stock,
range_in,
range_out,
range_sale,
range_sale_money,
range_inventory,
current_stock,
site_id,
tenant_id,
fetched_at
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
self.logger.info(
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
)
return {
"rows": rows,
"start_date": start_date,
"end_date": end_date,
"site_id": site_id,
}
# ======================================================================
# Transform
# ======================================================================
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
"""按自然月粒度汇总:同一月同一商品聚合为一条记录"""
rows = extracted.get("rows", [])
site_id = extracted["site_id"]
if not rows:
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
return []
# 按 (month_first_day, site_goods_id) 分组聚合
agg: Dict[tuple, Dict[str, Any]] = {}
for row in rows:
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 自然月的第一天作为 stat_date
first_day = _month_first_day(row_date)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:
continue
key = (first_day, site_goods_id)
if key not in agg:
# 首条记录:初始化,期初取第一条
agg[key] = {
"site_id": site_id,
"tenant_id": row.get("tenant_id"),
"stat_date": first_day,
"site_goods_id": site_goods_id,
"goods_name": row.get("goods_name"),
"goods_unit": row.get("goods_unit"),
"goods_category_id": row.get("goods_category_id"),
"goods_category_second_id": row.get("goods_category_second_id"),
"category_name": row.get("category_name"),
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
"range_in": self.safe_decimal(row.get("range_in")),
"range_out": self.safe_decimal(row.get("range_out")),
"range_sale": self.safe_decimal(row.get("range_sale")),
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
"range_inventory": self.safe_decimal(row.get("range_inventory")),
"current_stock": self.safe_decimal(row.get("current_stock")),
"stat_period": "monthly",
}
else:
# 后续记录:累加数值指标,更新期末快照
rec = agg[key]
rec["range_in"] += self.safe_decimal(row.get("range_in"))
rec["range_out"] += self.safe_decimal(row.get("range_out"))
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
# 期末/当前库存取最后一条rows 已按 fetched_at 排序)
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
result = list(agg.values())
self.logger.info(
"%s: 汇总完成,生成 %d 条月度记录",
self.get_task_code(), len(result),
)
return result
# ======================================================================
# Load
# ======================================================================
def load(
self, transformed: List[Dict[str, Any]], context: TaskContext
) -> Dict[str, Any]:
"""upsert 写入 DWS 目标表"""
if not transformed:
return {
"counts": {
"fetched": 0,
"inserted": 0,
"updated": 0,
"skipped": 0,
"errors": 0,
}
}
columns = [
"site_id", "tenant_id", "stat_date", "site_goods_id",
"goods_name", "goods_unit", "goods_category_id",
"goods_category_second_id", "category_name",
"range_start_stock", "range_end_stock",
"range_in", "range_out", "range_sale",
"range_sale_money", "range_inventory", "current_stock",
"stat_period",
]
inserted, updated = self.upsert(transformed, columns=columns)
self.logger.info(
"%s: 写入完成inserted=%d",
self.get_task_code(), inserted,
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": updated,
"skipped": 0,
"errors": 0,
}
}
__all__ = ["GoodsStockMonthlyTask"]

View File

@@ -0,0 +1,246 @@
# -*- coding: utf-8 -*-
"""
库存周度汇总任务
功能说明:
"门店+ISO周+商品"为粒度,汇总每周库存数据
数据来源:
- dwd.dwd_goods_stock_summary库存汇总明细按 fetched_at 日期聚合)
目标表:
dws.dws_goods_stock_weekly_summary
更新策略:
- 更新频率:每周更新
- 幂等方式upsertON CONFLICT DO UPDATE
业务规则:
- 按 ISO 周分组isocalendarstat_date = 该周的周一日期
- 同一周同一商品可能有多条 DWD 记录
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
- current_stock 取该周最后一条记录的值(期末快照)
- range_start_stock 取该周第一条记录的值(期初快照)
- range_end_stock 取该周最后一条记录的值(期末快照)
- stat_period = 'weekly'
"""
from __future__ import annotations
from datetime import date, timedelta
from decimal import Decimal
from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
def _iso_monday(d: date) -> date:
"""根据 ISO 日历计算给定日期所在周的周一"""
# weekday(): 0=周一 ... 6=周日
return d - timedelta(days=d.weekday())
class GoodsStockWeeklyTask(BaseDwsTask):
"""
库存周度汇总任务
从 dwd.dwd_goods_stock_summary 提取数据,按 ISO 周粒度汇总后
upsert 写入 dws.dws_goods_stock_weekly_summary。
"""
DATE_COL = "stat_date"
def get_task_code(self) -> str:
return "DWS_GOODS_STOCK_WEEKLY"
def get_target_table(self) -> str:
return "dws_goods_stock_weekly_summary"
def get_primary_keys(self) -> List[str]:
return ["site_id", "stat_date", "site_goods_id"]
# ======================================================================
# Extract
# ======================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""从 DWD 层按时间范围提取库存汇总数据"""
start_date = (
context.window_start.date()
if hasattr(context.window_start, "date")
else context.window_start
)
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
site_id = context.store_id
self.logger.info(
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
SELECT
site_goods_id,
goods_name,
goods_unit,
goods_category_id,
goods_category_second_id,
category_name,
range_start_stock,
range_end_stock,
range_in,
range_out,
range_sale,
range_sale_money,
range_inventory,
current_stock,
site_id,
tenant_id,
fetched_at
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
self.logger.info(
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
)
return {
"rows": rows,
"start_date": start_date,
"end_date": end_date,
"site_id": site_id,
}
# ======================================================================
# Transform
# ======================================================================
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
"""按 ISO 周粒度汇总:同一周同一商品聚合为一条记录"""
rows = extracted.get("rows", [])
site_id = extracted["site_id"]
if not rows:
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
return []
# 按 (iso_monday, site_goods_id) 分组聚合
agg: Dict[tuple, Dict[str, Any]] = {}
for row in rows:
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# ISO 周的周一作为 stat_date
monday = _iso_monday(row_date)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:
continue
key = (monday, site_goods_id)
if key not in agg:
# 首条记录:初始化,期初取第一条
agg[key] = {
"site_id": site_id,
"tenant_id": row.get("tenant_id"),
"stat_date": monday,
"site_goods_id": site_goods_id,
"goods_name": row.get("goods_name"),
"goods_unit": row.get("goods_unit"),
"goods_category_id": row.get("goods_category_id"),
"goods_category_second_id": row.get("goods_category_second_id"),
"category_name": row.get("category_name"),
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
"range_in": self.safe_decimal(row.get("range_in")),
"range_out": self.safe_decimal(row.get("range_out")),
"range_sale": self.safe_decimal(row.get("range_sale")),
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
"range_inventory": self.safe_decimal(row.get("range_inventory")),
"current_stock": self.safe_decimal(row.get("current_stock")),
"stat_period": "weekly",
}
else:
# 后续记录:累加数值指标,更新期末快照
rec = agg[key]
rec["range_in"] += self.safe_decimal(row.get("range_in"))
rec["range_out"] += self.safe_decimal(row.get("range_out"))
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
# 期末/当前库存取最后一条rows 已按 fetched_at 排序)
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
result = list(agg.values())
self.logger.info(
"%s: 汇总完成,生成 %d 条周度记录",
self.get_task_code(), len(result),
)
return result
# ======================================================================
# Load
# ======================================================================
def load(
self, transformed: List[Dict[str, Any]], context: TaskContext
) -> Dict[str, Any]:
"""upsert 写入 DWS 目标表"""
if not transformed:
return {
"counts": {
"fetched": 0,
"inserted": 0,
"updated": 0,
"skipped": 0,
"errors": 0,
}
}
columns = [
"site_id", "tenant_id", "stat_date", "site_goods_id",
"goods_name", "goods_unit", "goods_category_id",
"goods_category_second_id", "category_name",
"range_start_stock", "range_end_stock",
"range_in", "range_out", "range_sale",
"range_sale_money", "range_inventory", "current_stock",
"stat_period",
]
inserted, updated = self.upsert(transformed, columns=columns)
self.logger.info(
"%s: 写入完成inserted=%d",
self.get_task_code(), inserted,
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": updated,
"skipped": 0,
"errors": 0,
}
}
__all__ = ["GoodsStockWeeklyTask"]

View File

@@ -8,16 +8,19 @@
- NewconvIndexTask: 新客转化指数 (NCI)
- MlManualImportTask: ML 人工台账导入任务
- RelationIndexTask: 关系指数计算任务RS/OS/MS/ML
- SpendingPowerIndexTask: 消费力指数 (SPI)
"""
from .winback_index_task import WinbackIndexTask
from .newconv_index_task import NewconvIndexTask
from .ml_manual_import_task import MlManualImportTask
from .relation_index_task import RelationIndexTask
from .spending_power_index_task import SpendingPowerIndexTask
__all__ = [
'WinbackIndexTask',
'NewconvIndexTask',
'MlManualImportTask',
'RelationIndexTask',
'SpendingPowerIndexTask',
]

View File

@@ -0,0 +1,767 @@
# -*- coding: utf-8 -*-
"""
SPI 消费力指数任务Spending Power Index
设计说明:
1. 直接继承 BaseIndexTask不经过 MemberIndexBaseTask无需 NEW/OLD/STOP 分群)
2. 子分计算为 @staticmethod 纯函数,便于属性测试直接调用
3. 三个子分Level消费水平、Speed消费速度、Stability消费稳定性
4. 结果写入 dws.dws_member_spending_power_index按 site_id delete-before-insert
"""
from __future__ import annotations
import math
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from .base_index_task import BaseIndexTask
from ..base_dws_task import TaskContext
# =============================================================================
# 数据类定义
# =============================================================================
@dataclass
class SPIMemberFeatures:
"""SPI 计算所需的会员级特征"""
member_id: int
site_id: int
# 基础特征
spend_30: float = 0.0 # 近30天消费总额
spend_90: float = 0.0 # 近90天消费总额
recharge_90: float = 0.0 # 近90天充值总额
orders_30: int = 0 # 近30天消费笔数
orders_90: int = 0 # 近90天消费笔数
visit_days_30: int = 0 # 近30天消费日数按天去重
visit_days_90: int = 0 # 近90天消费日数按天去重
avg_ticket_90: float = 0.0 # 90天客单价
active_weeks_90: int = 0 # 近90天有消费的自然周数
daily_spend_ewma_90: float = 0.0 # 日消费 EWMA
# 子分
score_level_raw: float = 0.0
score_speed_raw: float = 0.0
score_stability_raw: float = 0.0
# 展示分(归一化后填充)
score_level_display: float = 0.0
score_speed_display: float = 0.0
score_stability_display: float = 0.0
# 总分
raw_score: float = 0.0
display_score: float = 0.0
# =============================================================================
# SPI 任务
# =============================================================================
class SpendingPowerIndexTask(BaseIndexTask):
"""SPI 消费力指数:单任务产出 Level / Speed / Stability 子分及 SPI 总分。"""
INDEX_TYPE = "SPI"
DEFAULT_PARAMS: Dict[str, float] = {
# 窗口参数
'spend_window_short_days': 30,
'spend_window_long_days': 90,
'ewma_alpha_daily_spend': 0.3,
# 金额压缩基数(初始默认值,可被自动校准或配置表覆盖)
'amount_base_spend_30': 500.0,
'amount_base_spend_90': 1500.0,
'amount_base_ticket_90': 200.0,
'amount_base_recharge_90': 1000.0,
'amount_base_speed_abs': 100.0,
'amount_base_ewma_90': 50.0,
# Level 子分权重
'w_level_spend_30': 0.30,
'w_level_spend_90': 0.35,
'w_level_ticket_90': 0.20,
'w_level_recharge_90': 0.15,
# Speed 子分权重
'w_speed_abs': 0.50,
'w_speed_rel': 0.30,
'w_speed_ewma': 0.20,
# 总分权重
'weight_level': 0.60,
'weight_speed': 0.30,
'weight_stability': 0.10,
# 稳定性参数
'stability_window_days': 90,
'use_stability': 1,
# 映射与平滑
'percentile_lower': 5,
'percentile_upper': 95,
'compression_mode': 1, # log1p
'use_smoothing': 1,
'ewma_alpha': 0.2,
# 速度计算
'speed_epsilon': 1e-6,
}
# =========================================================================
# 抽象方法实现
# =========================================================================
def get_task_code(self) -> str:
return "DWS_SPENDING_POWER_INDEX"
def get_target_table(self) -> str:
return "dws_member_spending_power_index"
def get_primary_keys(self) -> List[str]:
return ["site_id", "member_id"]
def get_index_type(self) -> str:
return self.INDEX_TYPE
# =========================================================================
# 辅助方法
# =========================================================================
def _get_site_id(self, context: Optional[TaskContext]) -> int:
"""从 context 或配置中获取门店 ID"""
if context and getattr(context, "store_id", None):
return int(context.store_id)
site_id = self.config.get("app.default_site_id") or self.config.get("app.store_id")
if site_id is not None:
return int(site_id)
# 回退:从消费数据中取一个 site_id
sql = "SELECT DISTINCT site_id FROM dwd.dwd_settlement_head WHERE site_id IS NOT NULL LIMIT 1"
rows = self.db.query(sql)
if rows:
return int(dict(rows[0]).get("site_id") or 0)
self.logger.warning("无法确定门店ID使用 0 继续执行")
return 0
@staticmethod
def _map_compression(params: Dict[str, float]) -> str:
"""将 compression_mode 数值映射为 batch_normalize_to_display 所需的字符串"""
mode = int(params.get('compression_mode', 0))
if mode == 1:
return "log1p"
if mode == 2:
return "asinh"
return "none"
# =========================================================================
# 核心执行流程
# =========================================================================
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
"""完整执行流程:提取 → 计算 → 归一化 → 持久化
流程:
1. 获取 site_id
2. 加载 SPI 参数cfg_index_parameters + DEFAULT_PARAMS 回退)
3. 提取消费特征 + 充值特征 + 日消费 EWMA合并到 SPIMemberFeatures
4. 无数据时返回 skipped
5. 校准金额压缩基数
6. 逐会员计算 Level / Speed / Stability / SPI_raw
7. 四组 raw_score 分别 batch_normalize_to_display
8. delete-before-insert 持久化
9. 保存分位点历史
"""
self.logger.info("开始计算 SPI 消费力指数")
# 1. 获取 site_id
site_id = self._get_site_id(context)
# 2. 加载参数(配置表 + 默认值合并)
db_params = self.load_index_parameters('SPI')
params = {**self.DEFAULT_PARAMS, **db_params}
# 3. 提取特征
features = self._extract_spending_features(site_id, params)
recharge_map = self._extract_recharge_features(site_id, params)
# 合并充值特征
for mid, recharge_90 in recharge_map.items():
if mid in features:
features[mid].recharge_90 = recharge_90
# 仅有充值无消费的会员不参与 SPI 计算(无消费基础特征)
# 批量计算日消费 EWMA 并合并
member_ids = list(features.keys())
ewma_map = self._compute_daily_spend_ewma_batch(site_id, member_ids, params)
for mid, ewma_val in ewma_map.items():
if mid in features:
features[mid].daily_spend_ewma_90 = ewma_val
# 4. 无数据时跳过Req 9.4
if not features:
self.logger.info("SPI: site_id=%s 无消费数据,跳过计算", site_id)
return {'status': 'skipped', 'reason': 'no_data'}
# 5. 校准金额压缩基数
params = self._calibrate_amount_bases(features, params)
# 6. 逐会员计算子分和总分
for feat in features.values():
feat.score_level_raw = self.compute_level(feat, params)
feat.score_speed_raw = self.compute_speed(feat, params)
feat.score_stability_raw = self.compute_stability(feat, params)
feat.raw_score = self.compute_spi_raw(
feat.score_level_raw,
feat.score_speed_raw,
feat.score_stability_raw,
params,
)
# 7. 四组 raw_score 分别归一化为展示分
percentile_lower = int(params.get('percentile_lower', 5))
percentile_upper = int(params.get('percentile_upper', 95))
use_smoothing = int(params.get('use_smoothing', 1)) == 1
compression = self._map_compression(params)
feat_list = list(features.values())
# SPI 总分展示分
spi_normalized = self.batch_normalize_to_display(
raw_scores=[(f.member_id, f.raw_score) for f in feat_list],
compression=compression,
percentile_lower=percentile_lower,
percentile_upper=percentile_upper,
use_smoothing=use_smoothing,
site_id=site_id,
index_type='SPI',
)
spi_display_map = {mid: display for mid, _, display in spi_normalized}
# Level 子分展示分
level_normalized = self.batch_normalize_to_display(
raw_scores=[(f.member_id, f.score_level_raw) for f in feat_list],
compression=compression,
percentile_lower=percentile_lower,
percentile_upper=percentile_upper,
use_smoothing=use_smoothing,
site_id=site_id,
index_type='SPI_LEVEL',
)
level_display_map = {mid: display for mid, _, display in level_normalized}
# Speed 子分展示分
speed_normalized = self.batch_normalize_to_display(
raw_scores=[(f.member_id, f.score_speed_raw) for f in feat_list],
compression=compression,
percentile_lower=percentile_lower,
percentile_upper=percentile_upper,
use_smoothing=use_smoothing,
site_id=site_id,
index_type='SPI_SPEED',
)
speed_display_map = {mid: display for mid, _, display in speed_normalized}
# Stability 子分展示分
stability_normalized = self.batch_normalize_to_display(
raw_scores=[(f.member_id, f.score_stability_raw) for f in feat_list],
compression=compression,
percentile_lower=percentile_lower,
percentile_upper=percentile_upper,
use_smoothing=use_smoothing,
site_id=site_id,
index_type='SPI_STABILITY',
)
stability_display_map = {mid: display for mid, _, display in stability_normalized}
# 回写展示分到特征对象
for feat in feat_list:
mid = feat.member_id
feat.display_score = spi_display_map.get(mid, 0.0)
feat.score_level_display = level_display_map.get(mid, 0.0)
feat.score_speed_display = speed_display_map.get(mid, 0.0)
feat.score_stability_display = stability_display_map.get(mid, 0.0)
# 8. delete-before-insert 持久化Req 9.3
records_inserted = self._save_spi_data(feat_list, site_id)
# 9. 保存分位点历史Req 9.5——SPI 总分
raw_values = [f.raw_score for f in feat_list]
q_l, q_u = self.calculate_percentiles(raw_values, percentile_lower, percentile_upper)
if use_smoothing:
smoothed_l, smoothed_u = self._apply_ewma_smoothing(
site_id=site_id,
current_p5=q_l,
current_p95=q_u,
index_type='SPI',
)
else:
smoothed_l, smoothed_u = q_l, q_u
self.save_percentile_history(
site_id=site_id,
percentile_5=q_l,
percentile_95=q_u,
percentile_5_smoothed=smoothed_l,
percentile_95_smoothed=smoothed_u,
record_count=len(raw_values),
min_raw=min(raw_values),
max_raw=max(raw_values),
avg_raw=sum(raw_values) / len(raw_values),
index_type='SPI',
)
self.logger.info(
"SPI 计算完成: site_id=%s, 会员数=%d, 写入记录=%d",
site_id, len(feat_list), records_inserted,
)
return {
'status': 'success',
'member_count': len(feat_list),
'records_inserted': records_inserted,
}
# =========================================================================
# 数据提取(后续任务实现)
# =========================================================================
def _extract_spending_features(
self, site_id: int, params: Dict[str, float]
) -> Dict[int, SPIMemberFeatures]:
"""从 dwd_settlement_head 提取消费特征,按 member_id 聚合。
提取近 90 天消费订单settle_type IN (1, 3)),聚合为会员级特征:
spend_30/90、orders_30/90、visit_days_30/90、avg_ticket_90、active_weeks_90。
使用 canonical_member_id 模式解析会员身份(与 WBI/NCI 一致)。
"""
short_days = int(params.get('spend_window_short_days', 30))
long_days = int(params.get('spend_window_long_days', 90))
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
# INTERVAL 天数通过 f-string 内嵌整数安全site_id 走参数化
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
s.pay_time,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
ON s.member_card_account_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = s.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = %s
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
)
SELECT
canonical_member_id AS member_id,
-- 90 天窗口
SUM(pay_amount) AS spend_90,
COUNT(*) AS orders_90,
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
-- 30 天窗口(子集过滤)
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN pay_amount ELSE 0 END) AS spend_30,
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN 1 ELSE 0 END) AS orders_30,
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN DATE(pay_time) END) AS visit_days_30
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
"""
rows = self.db.query(sql, (site_id,))
result: Dict[int, SPIMemberFeatures] = {}
for row in (rows or []):
r = dict(row)
mid = int(r['member_id'])
orders_90 = int(r['orders_90'] or 0)
spend_90 = float(r['spend_90'] or 0)
# avg_ticket_90 = spend_90 / max(orders_90, 1)Req 2.4
avg_ticket = spend_90 / max(orders_90, 1)
# active_weeks_90 上限 13Req 2.5
active_weeks = min(int(r['active_weeks_90'] or 0), 13)
result[mid] = SPIMemberFeatures(
member_id=mid,
site_id=site_id,
spend_30=float(r['spend_30'] or 0),
spend_90=spend_90,
orders_30=int(r['orders_30'] or 0),
orders_90=orders_90,
visit_days_30=int(r['visit_days_30'] or 0),
visit_days_90=int(r['visit_days_90'] or 0),
avg_ticket_90=avg_ticket,
active_weeks_90=active_weeks,
)
self.logger.info(
"SPI 消费特征提取完成: site_id=%s, 会员数=%d, 窗口=%d/%d",
site_id, len(result), short_days, long_days,
)
return result
def _extract_recharge_features(
self, site_id: int, params: Dict[str, float]
) -> Dict[int, float]:
"""从 dwd_recharge_order 提取充值特征,返回 {member_id: recharge_90}。
提取近 90 天充值订单settle_type = 5按 member_id 聚合充值总额。
使用 canonical_member_id 模式解析会员身份(与 _extract_spending_features 一致)。
"""
long_days = int(params.get('spend_window_long_days', 90))
sql = f"""
WITH recharge_source AS (
SELECT
COALESCE(NULLIF(r.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
COALESCE(r.pay_amount, 0) AS pay_amount
FROM dwd.dwd_recharge_order r
LEFT JOIN dwd.dim_member_card_account mca
ON r.tenant_member_card_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = r.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE r.site_id = %s
AND r.settle_type = 5
AND r.pay_time >= NOW() - INTERVAL '{long_days} days'
)
SELECT
canonical_member_id AS member_id,
SUM(pay_amount) AS recharge_90
FROM recharge_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
"""
rows = self.db.query(sql, (site_id,))
result: Dict[int, float] = {}
for row in (rows or []):
r = dict(row)
mid = int(r['member_id'])
result[mid] = float(r['recharge_90'] or 0)
self.logger.info(
"SPI 充值特征提取完成: site_id=%s, 有充值会员数=%d, 窗口=%d",
site_id, len(result), long_days,
)
return result
def _compute_daily_spend_ewma(
self, site_id: int, member_id: int, params: Dict[str, float]
) -> float:
"""对单个会员近 90 天日消费序列计算 EWMA。
从 dwd_settlement_head 查询该会员每日消费总额settle_type IN (1,3)
按日期升序排列后逐日计算 EWMA返回最终值。
EWMA 递推公式S_t = α × X_t + (1 - α) × S_{t-1}
初始值 S_0 = X_0首日消费额
无消费记录时返回 0.0。
"""
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
ON s.member_card_account_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = s.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = %s
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
)
SELECT pay_date, SUM(pay_amount) AS daily_spend
FROM consume_source
WHERE canonical_member_id = %s
GROUP BY pay_date
ORDER BY pay_date
"""
rows = self.db.query(sql, (site_id, member_id))
if not rows:
return 0.0
# 逐日 EWMA 递推S_0 = X_0, S_t = α·X_t + (1-α)·S_{t-1}
ewma = float(dict(rows[0])['daily_spend'] or 0)
for row in rows[1:]:
x = float(dict(row)['daily_spend'] or 0)
ewma = alpha * x + (1 - alpha) * ewma
return ewma
def _compute_daily_spend_ewma_batch(
self, site_id: int, member_ids: List[int], params: Dict[str, float]
) -> Dict[int, float]:
"""批量计算多个会员的日消费 EWMA单次 SQL 查询避免 N+1。
返回 {member_id: daily_spend_ewma_90},未出现的会员值为 0.0。
"""
if not member_ids:
return {}
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
ON s.member_card_account_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = s.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = %s
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
)
SELECT canonical_member_id AS member_id,
pay_date,
SUM(pay_amount) AS daily_spend
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id, pay_date
ORDER BY canonical_member_id, pay_date
"""
rows = self.db.query(sql, (site_id,))
# 按 member_id 分组,逐组计算 EWMA
result: Dict[int, float] = {}
if not rows:
return result
current_mid: Optional[int] = None
ewma = 0.0
for row in rows:
r = dict(row)
mid = int(r['member_id'])
x = float(r['daily_spend'] or 0)
if mid != current_mid:
# 新会员:保存上一个会员结果,重置
if current_mid is not None:
result[current_mid] = ewma
current_mid = mid
ewma = x # S_0 = X_0
else:
ewma = alpha * x + (1 - alpha) * ewma
# 保存最后一个会员
if current_mid is not None:
result[current_mid] = ewma
self.logger.info(
"SPI 日消费 EWMA 批量计算完成: site_id=%s, 会员数=%d, α=%.2f",
site_id, len(result), alpha,
)
return result
def _calibrate_amount_bases(
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
) -> Dict[str, float]:
"""从门店数据计算中位数作为金额压缩基数校准值。
优先级cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS。
"""
# 特征字段 → 对应的 amount_base 参数名
base_extractors: Dict[str, callable] = {
'amount_base_spend_30': lambda f: f.spend_30,
'amount_base_spend_90': lambda f: f.spend_90,
'amount_base_ticket_90': lambda f: f.avg_ticket_90,
'amount_base_recharge_90': lambda f: f.recharge_90,
'amount_base_speed_abs': lambda f: f.spend_30 / max(f.visit_days_30, 1),
'amount_base_ewma_90': lambda f: f.daily_spend_ewma_90,
}
calibrated = dict(params) # 以当前参数为基础,逐项覆盖
for base_key, extractor in base_extractors.items():
# 配置表已有值 → 跳过自动校准
if base_key in params and params[base_key] != self.DEFAULT_PARAMS.get(base_key):
self.logger.info(
"SPI 基数校准: %s 使用配置表值 %.2f", base_key, params[base_key],
)
continue
# 从特征数据计算中位数
values = [extractor(f) for f in features.values()]
median_val = self.calculate_median(values)
if median_val > 0:
calibrated[base_key] = median_val
self.logger.info(
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
)
else:
# 中位数 ≤ 0回退到 DEFAULT_PARAMS
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
self.logger.warning(
"SPI 基数校准: %s 中位数 %.2f ≤ 0回退到默认值 %.2f",
base_key, median_val, self.DEFAULT_PARAMS[base_key],
)
return calibrated
# =========================================================================
# 子分计算(纯函数,后续任务实现具体逻辑)
# =========================================================================
@staticmethod
def compute_level(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
"""Level 子分:消费水平
L = w_s30 × ln(1 + spend_30/M30)
+ w_s90 × ln(1 + spend_90/M90)
+ w_ticket × ln(1 + avg_ticket_90/T0)
+ w_r90 × ln(1 + recharge_90/R90)
"""
return (
params['w_level_spend_30'] * math.log1p(features.spend_30 / params['amount_base_spend_30'])
+ params['w_level_spend_90'] * math.log1p(features.spend_90 / params['amount_base_spend_90'])
+ params['w_level_ticket_90'] * math.log1p(features.avg_ticket_90 / params['amount_base_ticket_90'])
+ params['w_level_recharge_90'] * math.log1p(features.recharge_90 / params['amount_base_recharge_90'])
)
@staticmethod
def compute_speed(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
"""Speed 子分:消费速度
V_abs = ln(1 + spend_30 / (max(visit_days_30, 1) × V0))
V_rel = ln((v_30 + ε) / (v_90 + ε)), v_30=spend_30/30, v_90=spend_90/90
V_ewma = ln(1 + daily_spend_ewma_90 / E0)
S = w_abs × V_abs + w_rel × max(0, V_rel) + w_ewma × V_ewma
仅对加速V_rel > 0加分不对减速扣分Req 4.5)。
"""
eps = params.get('speed_epsilon', 1e-6)
# 绝对速度Req 4.1
v_abs = math.log1p(
features.spend_30
/ (max(features.visit_days_30, 1) * params['amount_base_speed_abs'])
)
# 相对速度Req 4.2)——仅加速加分
v_30 = features.spend_30 / 30.0
v_90 = features.spend_90 / 90.0
v_rel = math.log((v_30 + eps) / (v_90 + eps))
# EWMA 速度Req 4.3
v_ewma = math.log1p(
features.daily_spend_ewma_90 / params['amount_base_ewma_90']
)
# 加权合成Req 4.4, 4.5
return (
params['w_speed_abs'] * v_abs
+ params['w_speed_rel'] * max(0.0, v_rel)
+ params['w_speed_ewma'] * v_ewma
)
@staticmethod
def compute_stability(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
"""Stability 子分:消费稳定性
P = active_weeks_90 / 13周覆盖率
use_stability=0 时返回 0.0Req 5.3
取值范围 [0, 1]Req 5.4
"""
# 配置关闭稳定性时直接返回 0Req 5.3
if params.get('use_stability', 1) == 0:
return 0.0
# 周覆盖率Req 5.1, 5.2
return features.active_weeks_90 / 13.0
@staticmethod
def compute_spi_raw(
level: float, speed: float, stability: float, params: Dict[str, float]
) -> float:
"""SPI 总分合成
SPI_raw = w_L × L + w_S × S + w_P × P
默认权重 w_L=0.60, w_S=0.30, w_P=0.10Req 6.1
"""
w_l = params.get('weight_level', 0.60)
w_s = params.get('weight_speed', 0.30)
w_p = params.get('weight_stability', 0.10)
return w_l * level + w_s * speed + w_p * stability
# =========================================================================
# 持久化(后续任务实现)
# =========================================================================
def _save_spi_data(
self, data_list: List[SPIMemberFeatures], site_id: int
) -> int:
"""delete-before-insert 写入 dws_member_spending_power_index"""
with self.db.conn.cursor() as cur:
# 先删除该门店旧记录Req 9.3
cur.execute(
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s",
(site_id,),
)
if not data_list:
self.db.conn.commit()
return 0
insert_sql = """
INSERT INTO dws.dws_member_spending_power_index (
site_id, member_id,
spend_30, spend_90, recharge_90,
orders_30, orders_90,
visit_days_30, visit_days_90,
avg_ticket_90, active_weeks_90, daily_spend_ewma_90,
score_level_raw, score_speed_raw, score_stability_raw,
score_level_display, score_speed_display, score_stability_display,
raw_score, display_score,
calc_time, created_at, updated_at
) VALUES (
%s, %s,
%s, %s, %s,
%s, %s,
%s, %s,
%s, %s, %s,
%s, %s, %s,
%s, %s, %s,
%s, %s,
NOW(), NOW(), NOW()
)
"""
inserted = 0
for f in data_list:
cur.execute(insert_sql, (
f.site_id, f.member_id,
f.spend_30, f.spend_90, f.recharge_90,
f.orders_30, f.orders_90,
f.visit_days_30, f.visit_days_90,
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
f.score_level_display, f.score_speed_display, f.score_stability_display,
f.raw_score, f.display_score,
))
inserted += max(cur.rowcount, 0)
self.db.conn.commit()
self.logger.info(
"SPI 数据写入完成: site_id=%s, 插入记录=%d", site_id, inserted,
)
return inserted

View File

@@ -233,21 +233,67 @@ class MemberConsumptionTask(BaseDwsTask):
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""
提取会员信息
生日优先级手动补录fdw_app.member_birthday_manual> API 来源dim_member.birthday
FDW 连接失败时降级为仅使用 dim_member.birthday
"""
sql = """
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 恢复 birthday 字段C1 迁移已加列),供后续 C2 COALESCE 使用
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
# CHANGE 2026-02-22 | 需求 C2COALESCE 优先手动补录生日FDW 失败时降级
sql_with_fdw = """
SELECT
m.member_id,
m.nickname,
m.mobile,
m.member_card_grade_name,
DATE(m.create_time) AS register_date,
m.recharge_money_sum,
COALESCE(
(SELECT birthday_value
FROM fdw_app.member_birthday_manual
WHERE member_id = m.member_id
ORDER BY recorded_at ASC
LIMIT 1),
m.birthday
) AS birthday
FROM dwd.dim_member m
WHERE m.member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND m.scd2_is_current = 1
"""
sql_fallback = """
SELECT
member_id,
nickname,
mobile,
member_card_grade_name,
DATE(create_time) AS register_date,
recharge_money_sum
recharge_money_sum,
birthday
FROM dwd.dim_member
WHERE site_id = %s
AND scd2_is_current = 1
WHERE member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
try:
rows = self.db.query(sql_with_fdw, (site_id,))
except Exception as exc:
# FDW 连接失败,降级为仅使用 dim_member.birthday
self.logger.warning(
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
self.get_task_code(), exc,
)
rows = self.db.query(sql_fallback, (site_id,))
result = {}
for row in (rows or []):
row_dict = dict(row)
@@ -262,14 +308,21 @@ class MemberConsumptionTask(BaseDwsTask):
CASH_CARD_TYPE_ID = 2793249295533893
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
# CHANGE 2026-02-21 | dim_member_card_account 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
sql = """
SELECT
tenant_member_id AS member_id,
card_type_id,
balance
FROM dwd.dim_member_card_account
WHERE site_id = %s
AND scd2_is_current = 1
WHERE tenant_member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
AND COALESCE(is_delete, 0) = 0
"""
rows = self.db.query(sql, (site_id,))

View File

@@ -175,6 +175,7 @@ class MemberVisitTask(BaseDwsTask):
# 会员信息
'member_nickname': memb_info.get('nickname'),
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
# CHANGE 2026-02-22 | 恢复从 dim_member.birthday 读取
'member_birthday': memb_info.get('birthday'),
# 台桌信息
'table_id': table_id,
@@ -302,28 +303,73 @@ class MemberVisitTask(BaseDwsTask):
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""
提取会员信息
生日优先级手动补录fdw_app.member_birthday_manual> API 来源dim_member.birthday
FDW 连接失败时降级为仅使用 dim_member.birthday
"""
sql = """
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 恢复 birthday 字段C1 迁移已加列)
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
# CHANGE 2026-02-22 | 需求 C2COALESCE 优先手动补录生日FDW 失败时降级
sql_with_fdw = """
SELECT
m.member_id,
m.nickname,
m.mobile,
COALESCE(
(SELECT birthday_value
FROM fdw_app.member_birthday_manual
WHERE member_id = m.member_id
ORDER BY recorded_at ASC
LIMIT 1),
m.birthday
) AS birthday
FROM dwd.dim_member m
WHERE m.member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND m.scd2_is_current = 1
"""
sql_fallback = """
SELECT
member_id,
nickname,
mobile,
birthday
FROM dwd.dim_member
WHERE site_id = %s
AND scd2_is_current = 1
WHERE member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
try:
rows = self.db.query(sql_with_fdw, (site_id,))
except Exception as exc:
# FDW 连接失败,降级为仅使用 dim_member.birthday
self.logger.warning(
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
self.get_task_code(), exc,
)
rows = self.db.query(sql_fallback, (site_id,))
return {r['member_id']: dict(r) for r in (rows or [])}
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""
提取台桌信息
"""
# CHANGE 2026-02-22 | BUG 6 修复 | dim_table 无 site_table_id/site_table_name
# 正确字段为 table_id/table_name参考 dwd.sql DDL
sql = """
SELECT
site_table_id AS table_id,
site_table_name AS table_name,
table_id AS table_id,
table_name AS table_name,
site_table_area_name AS area_name
FROM dwd.dim_table
WHERE site_id = %s