feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本

包含多个会话的累积代码变更:
- backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔
- admin-web: ETL 状态页、任务管理、调度配置、登录优化
- miniprogram: 看板页面、聊天集成、UI 组件、导航更新
- etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强
- tenant-admin: 项目初始化
- db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8)
- packages/shared: 枚举和工具函数更新
- tools: 数据库工具、报表生成、健康检查
- docs: PRD/架构/部署/合约文档更新

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Neo
2026-04-06 00:03:48 +08:00
parent 70324d8542
commit 6f8f12314f
515 changed files with 76604 additions and 7456 deletions

View File

@@ -26,6 +26,9 @@ from .finance_daily_task import FinanceDailyTask
from .finance_recharge_task import FinanceRechargeTask
from .finance_income_task import FinanceIncomeStructureTask
from .finance_discount_task import FinanceDiscountDetailTask
from .finance_area_daily import FinanceAreaDailyTask
from .finance_board_cache import FinanceBoardCacheTask
from .coach_area_hours_task import CoachAreaHoursTask
from .finance_base_task import FinanceBaseTask
from .maintenance_task import DwsMaintenanceTask
from .goods_stock_daily_task import GoodsStockDailyTask
@@ -63,9 +66,12 @@ __all__ = [
# 财务维度
"FinanceBaseTask",
"FinanceDailyTask",
"FinanceAreaDailyTask",
"FinanceBoardCacheTask",
"FinanceRechargeTask",
"FinanceIncomeStructureTask",
"FinanceDiscountDetailTask",
"CoachAreaHoursTask",
"DwsMaintenanceTask",
# 库存维度
"GoodsStockDailyTask",

View File

@@ -155,6 +155,11 @@ class BaseDwsTask(BaseTask):
# 子类声明日期列名,用于 extract 时间过滤和 load 幂等删除
# 未声明时 load() 回退到 "stat_date"
DATE_COL: str | None = None
# CHANGE 2026-03-22 | P14: AI 触发集成
# 子类设置此属性以在任务成功后触发 AI 事件
# 值为事件类型字符串(如 "dws_completed" / "consumption"None 表示不触发
AI_TRIGGER_EVENT: str | None = None
# ==========================================================================
# 抽象方法(子类必须实现)
@@ -239,6 +244,38 @@ class BaseDwsTask(BaseTask):
"extra": {"deleted": deleted},
}
# ==========================================================================
# 主流程覆盖 — AI 触发集成
# ==========================================================================
def execute(self, cursor_data: dict | None = None) -> dict:
"""覆盖 BaseTask.execute(),成功后触发 AI 事件(如已配置)。"""
result = super().execute(cursor_data)
# CHANGE 2026-03-22 | P14: DWS 任务完成后触发 AI 事件
if self.AI_TRIGGER_EVENT and result.get("status") == "SUCCESS":
self._fire_ai_trigger()
return result
def _fire_ai_trigger(self) -> None:
"""发送 AI 触发事件,失败不中断任务流程。"""
try:
from utils.ai_trigger import trigger_ai_event
site_id = self.config.get("app.store_id")
trigger_ai_event(
event_type=self.AI_TRIGGER_EVENT,
site_id=site_id,
payload={"task_code": self.get_task_code()},
)
except Exception:
self.logger.warning(
"%s: AI 触发失败,不影响任务结果",
self.get_task_code(),
exc_info=True,
)
# ==========================================================================
# 时间计算方法
# ==========================================================================

View File

@@ -0,0 +1,244 @@
# -*- coding: utf-8 -*-
"""
DWS_COACH_AREA_HOURS — 助教区域课时汇总表
按 (site_id, stat_month, assistant_id, area_code) 粒度聚合助教课时,
为财务看板助教分析板块的区域过滤提供数据支撑。
数据来源dwd_assistant_service_log + dwd_assistant_service_log_ex + dim_table
更新策略delete-before-insert按 site_id + stat_month 删除后插入)
CHANGE 2026-03-29 | Prompt: 助教分析按区域细化 | 新建 ETL 任务
"""
from __future__ import annotations
import logging
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.area_mapping import (
ALL_AREA_CODES,
SPECIFIC_AREA_CODES,
resolve_area_code,
)
from tasks.dws.base_dws_task import BaseDwsTask, TaskContext
logger = logging.getLogger(__name__)
_ZERO = Decimal("0")
# 课程类型映射skill_name → 字段前缀)
_SKILL_MAP = {
"基础课": "base",
"附加课": "bonus",
"包厢课": "room",
}
class CoachAreaHoursTask(BaseDwsTask):
"""助教区域课时汇总 ETL 任务"""
def get_task_code(self) -> str:
return "DWS_COACH_AREA_HOURS"
def get_target_table(self) -> str:
return "dws.dws_coach_area_hours"
def get_primary_keys(self) -> List[str]:
return ["site_id", "stat_month", "assistant_id", "area_code"]
def extract(self, context: TaskContext) -> Dict[str, Any]:
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
site_id = context.store_id
# 取当月第一天
stat_month = end_date.replace(day=1)
sql = """
SELECT
s.site_assistant_id AS assistant_id,
dt.site_table_area_name AS area_name,
s.skill_name,
s.income_seconds,
COALESCE(ex.is_trash, 0) AS is_trash
FROM dwd.dwd_assistant_service_log s
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
ON s.assistant_service_id = ex.assistant_service_id
LEFT JOIN dwd.dim_table dt
ON s.site_table_id = dt.table_id
AND dt.scd2_is_current = 1
WHERE s.site_id = %s
AND s.is_delete = 0
AND DATE(s.start_use_time) >= %s
AND DATE(s.start_use_time) < %s
"""
# 下月第一天
if stat_month.month == 12:
next_month = stat_month.replace(year=stat_month.year + 1, month=1)
else:
next_month = stat_month.replace(month=stat_month.month + 1)
rows = self.db.query(sql, (site_id, stat_month, next_month))
records = [dict(r) for r in rows] if rows else []
self.logger.info(
"%s: 提取 %d 条服务记录,月份 %s",
self.get_task_code(), len(records), stat_month,
)
return {
"records": records,
"stat_month": stat_month,
"site_id": site_id,
"tenant_id": self.config.get("app.tenant_id", site_id),
}
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
return transform_coach_area_hours(
records=extracted["records"],
stat_month=extracted["stat_month"],
site_id=extracted["site_id"],
tenant_id=extracted["tenant_id"],
)
def load(
self, transformed: List[Dict[str, Any]], context: TaskContext
) -> dict:
if not transformed:
return {"inserted": 0}
site_id = transformed[0]["site_id"]
stat_month = transformed[0]["stat_month"]
# delete-before-insert
self.db.execute(
"DELETE FROM dws.dws_coach_area_hours WHERE site_id = %s AND stat_month = %s",
(site_id, stat_month),
)
cols = [
"site_id", "tenant_id", "stat_month", "assistant_id", "area_code",
"base_hours", "bonus_hours", "room_hours",
"effective_hours", "trashed_hours",
"base_service_count", "bonus_service_count", "room_service_count",
]
placeholders = ", ".join(["%s"] * len(cols))
insert_sql = f"INSERT INTO dws.dws_coach_area_hours ({', '.join(cols)}) VALUES ({placeholders})"
for row in transformed:
self.db.execute(insert_sql, tuple(row[c] for c in cols))
self.logger.info(
"%s: 写入 %dsite_id=%s, month=%s",
self.get_task_code(), len(transformed), site_id, stat_month,
)
return {"inserted": len(transformed)}
# ── 纯函数(可被回填脚本和属性测试直接调用) ─────────────────────────
def transform_coach_area_hours(
records: List[Dict[str, Any]],
stat_month: date,
site_id: int,
tenant_id: int,
) -> List[Dict[str, Any]]:
"""
将 DWD 服务记录按 (assistant_id, area_code) 聚合为区域课时。
返回列表,每个元素对应一行 dws_coach_area_hours。
包含 9 个 area_codehallA~ktv + hall + all× N 个助教。
"""
# 按 (assistant_id, area_code) 聚合
# key: (assistant_id, area_code) → {base_hours, bonus_hours, ...}
agg: Dict[tuple, Dict[str, Any]] = {}
for rec in records:
assistant_id = rec["assistant_id"]
area_name = rec.get("area_name")
skill_name = rec.get("skill_name")
income_seconds = rec.get("income_seconds") or 0
is_trash = rec.get("is_trash", 0)
hours = Decimal(str(income_seconds)) / Decimal("3600")
skill_prefix = _SKILL_MAP.get(skill_name) # base/bonus/room/None
# 解析区域
area_code = resolve_area_code(area_name) if area_name else None
if area_code is None:
# 未知区域或 NULL table_id → 不计入具体区域,只计入 all
target_areas = []
else:
target_areas = [area_code]
# 更新具体区域
for ac in target_areas:
_accumulate(agg, assistant_id, ac, skill_prefix, hours, is_trash)
# CHANGE 2026-03-29 | hall = 台球大厅hallA+hallB+hallC不含 vip/snooker/mahjong/ktv
if area_code in ("hallA", "hallB", "hallC"):
_accumulate(agg, assistant_id, "hall", skill_prefix, hours, is_trash)
# all 始终累加
_accumulate(agg, assistant_id, "all", skill_prefix, hours, is_trash)
# 构建输出行
result = []
for (aid, ac), data in sorted(agg.items()):
total_hours = data["base_hours"] + data["bonus_hours"] + data["room_hours"]
effective = total_hours - data["trashed_hours"]
result.append({
"site_id": site_id,
"tenant_id": tenant_id,
"stat_month": stat_month,
"assistant_id": aid,
"area_code": ac,
"base_hours": data["base_hours"],
"bonus_hours": data["bonus_hours"],
"room_hours": data["room_hours"],
"effective_hours": max(effective, _ZERO),
"trashed_hours": data["trashed_hours"],
"base_service_count": data["base_service_count"],
"bonus_service_count": data["bonus_service_count"],
"room_service_count": data["room_service_count"],
})
return result
def _accumulate(
agg: Dict[tuple, Dict[str, Any]],
assistant_id: int,
area_code: str,
skill_prefix: str | None,
hours: Decimal,
is_trash: int,
) -> None:
"""累加单条服务记录到聚合字典"""
key = (assistant_id, area_code)
if key not in agg:
agg[key] = {
"base_hours": _ZERO,
"bonus_hours": _ZERO,
"room_hours": _ZERO,
"trashed_hours": _ZERO,
"base_service_count": 0,
"bonus_service_count": 0,
"room_service_count": 0,
}
bucket = agg[key]
if is_trash == 1:
bucket["trashed_hours"] += hours
return
if skill_prefix:
bucket[f"{skill_prefix}_hours"] += hours
bucket[f"{skill_prefix}_service_count"] += 1
# skill_name 为 NULL 的记录:不计入任何课程类型的 hours但已计入 hall/all

View File

@@ -0,0 +1,586 @@
# -*- coding: utf-8 -*-
"""
财务区域日粒度汇总任务
功能说明:
以 (site_id, stat_date, area_code) 为粒度,按桌台区域聚合当日财务数据。
输出 9 行7 个具体区域 (hallA~ktv) + hall历史兼容+ all。
数据来源:
- dwd_settlement_head + dim_table (scd2_is_current=1):结算单 + 桌台区域
- dws_finance_daily_summary全局现金流/充值/卡消费(仅 all 行使用)
目标表:
dws.dws_finance_area_daily
更新策略:
- 更新频率:每小时更新当日数据
- 幂等方式delete-before-insert按 site_id + stat_date 删除后插入 9 行)
业务规则:
- 收入恒等式gross_amount = table_fee + goods + assistant_pd + assistant_cx
- 优惠恒等式discount_total = groupbuy + vip + manual + gift_card + rounding + other
- 确认收入confirmed_income = gross_amount - discount_total
- 非 all 行现金流/卡消费/充值字段 = 0
- hall 行 = 各具体区域之和(历史兼容)
- all 行 = 各具体区域之和(收入/优惠),现金流/充值/卡消费来自 dws_finance_daily_summary
- settle_type IN (1, 3) 过滤
- discount_gift_card 使用赠送卡消费金额口径
Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 8.1, 8.2
"""
from __future__ import annotations
import logging
from collections import defaultdict
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.area_mapping import (
ALL_AREA_CODES,
SPECIFIC_AREA_CODES,
resolve_area_code,
)
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import TaskContext
from .finance_base_task import FinanceBaseTask
logger = logging.getLogger(__name__)
# 收入字段(按区域聚合)
_REVENUE_FIELDS = [
"table_fee_amount",
"goods_amount",
"assistant_pd_amount",
"assistant_cx_amount",
"gross_amount",
]
# 优惠字段(按区域聚合)
_DISCOUNT_FIELDS = [
"discount_groupbuy",
"discount_vip",
"discount_manual",
"discount_gift_card",
"discount_rounding",
"discount_other",
"discount_total",
]
# 现金流字段(仅 all 行有值)
_CASHFLOW_FIELDS = [
"cash_pay_amount",
"cash_paper_amount",
"scan_pay_amount",
"groupbuy_pay_amount",
"recharge_cash_inflow",
"cash_inflow_total",
"cash_outflow_total",
"cash_balance_change",
]
# 卡消费字段(仅 all 行有值)
_CARD_FIELDS = [
"card_consume_total",
"recharge_card_consume",
"gift_card_consume",
]
# 充值字段(仅 all 行有值)
_RECHARGE_FIELDS = [
"recharge_cash",
"first_recharge_cash",
"renewal_cash",
]
# 所有仅 all 行有值的字段
_ALL_ONLY_FIELDS = _CASHFLOW_FIELDS + _CARD_FIELDS + _RECHARGE_FIELDS
# 按区域聚合的字段(收入 + 优惠 + order_count
_AREA_AGG_FIELDS = _REVENUE_FIELDS + _DISCOUNT_FIELDS + ["confirmed_income", "order_count"]
_ZERO = Decimal("0")
class FinanceAreaDailyTask(FinanceBaseTask):
"""
财务区域日粒度汇总任务
按 (site_id, stat_date, area_code) 粒度预计算收入、优惠、现金流等数据。
每次写入 9 行hallA~ktv + hall + all
"""
DATE_COL = "stat_date"
def get_task_code(self) -> str:
return "DWS_FINANCE_AREA_DAILY"
def get_target_table(self) -> str:
return "dws.dws_finance_area_daily"
def get_primary_keys(self) -> List[str]:
return ["site_id", "stat_date", "area_code"]
# ======================================================================
# Extract
# ======================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""提取结算单(含区域)和全局现金流/充值/卡消费数据。"""
start_date = (
context.window_start.date()
if hasattr(context.window_start, "date")
else context.window_start
)
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
site_id = context.store_id
self.logger.info(
"%s: 提取数据,日期范围 %s ~ %s",
self.get_task_code(),
start_date,
end_date,
)
# 1. 结算单 + dim_table 区域映射
settlement_rows = self._extract_settlement_with_area(
site_id, start_date, end_date
)
# 2. 全局现金流/充值/卡消费(从现有 dws_finance_daily_summary
global_summary = self._extract_global_summary(site_id, start_date, end_date)
return {
"settlement_rows": settlement_rows,
"global_summary": global_summary,
"start_date": start_date,
"end_date": end_date,
"site_id": site_id,
}
def _extract_settlement_with_area(
self, site_id: int, start_date: date, end_date: date
) -> List[Dict[str, Any]]:
"""从 dwd_settlement_head + dim_table 提取结算单(含区域名称)。"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
sql = f"""
SELECT
{biz_expr} AS stat_date,
dt.site_table_area_name AS area_name,
sh.settle_type,
-- 收入
sh.table_charge_money AS table_fee_amount,
sh.goods_money AS goods_amount,
sh.assistant_pd_money AS assistant_pd_amount,
sh.assistant_cx_money AS assistant_cx_amount,
(sh.table_charge_money + sh.goods_money
+ sh.assistant_pd_money + sh.assistant_cx_money)
AS gross_amount,
-- 优惠原始字段
sh.coupon_amount,
sh.pl_coupon_sale_amount,
sh.adjust_amount,
sh.member_discount_amount,
sh.rounding_amount,
sh.gift_card_amount
FROM dwd.dwd_settlement_head sh
LEFT JOIN dwd.dim_table dt
ON dt.table_id = sh.table_id
AND dt.site_id = sh.site_id
AND dt.scd2_is_current = 1
WHERE sh.site_id = %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND sh.settle_type IN (1, 3)
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
def _extract_global_summary(
self, site_id: int, start_date: date, end_date: date
) -> List[Dict[str, Any]]:
"""从 dws_finance_daily_summary 提取全局现金流/充值/卡消费字段。"""
sql = """
SELECT
stat_date,
cash_pay_amount,
cash_paper_amount,
scan_pay_amount,
groupbuy_pay_amount,
recharge_cash_inflow,
cash_inflow_total,
cash_outflow_total,
cash_balance_change,
card_consume_total,
recharge_card_consume,
gift_card_consume,
recharge_cash,
first_recharge_amount AS first_recharge_cash,
renewal_amount AS renewal_cash
FROM dws.dws_finance_daily_summary
WHERE site_id = %s
AND stat_date >= %s
AND stat_date <= %s
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
# ======================================================================
# Transform纯函数不依赖数据库
# ======================================================================
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
"""按区域聚合收入和优惠,构建 9 行输出。"""
settlement_rows = extracted["settlement_rows"]
global_summary = extracted["global_summary"]
site_id = extracted["site_id"]
return transform_area_daily(
settlement_rows=settlement_rows,
global_summary=global_summary,
site_id=site_id,
tenant_id=self.config.get("app.tenant_id", site_id),
safe_decimal_fn=self.safe_decimal,
logger=self.logger,
)
# ======================================================================
# Load覆盖基类按 site_id + stat_date 删除,而非仅按 DATE_COL
# ======================================================================
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> dict:
"""delete-before-insert按 site_id + stat_date 删除后插入 9 行。"""
if not transformed:
return {
"counts": {
"fetched": 0,
"inserted": 0,
"updated": 0,
"skipped": 0,
"errors": 0,
}
}
# 目标表已含 schema 前缀
target_table = self.get_target_table()
start_date = (
context.window_start.date()
if hasattr(context.window_start, "date")
else context.window_start
)
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
delete_sql = f"""
DELETE FROM {target_table}
WHERE site_id = %s
AND stat_date >= %s
AND stat_date <= %s
"""
columns = list(transformed[0].keys())
cols_str = ", ".join(columns)
placeholders = ", ".join(["%s"] * len(columns))
insert_sql = f"INSERT INTO {target_table} ({cols_str}) VALUES ({placeholders})"
with self.db.conn.cursor() as cur:
cur.execute(delete_sql, (context.store_id, start_date, end_date))
deleted = cur.rowcount
inserted = 0
for row in transformed:
values = [row.get(col) for col in columns]
cur.execute(insert_sql, values)
inserted += cur.rowcount
self.logger.info(
"%s: 删除 %d 行,插入 %d",
self.get_task_code(),
deleted,
inserted,
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": 0,
"skipped": 0,
"errors": 0,
},
"extra": {"deleted": deleted},
}
# ======================================================================
# 纯函数transform 核心逻辑(方便属性测试直接调用)
# ======================================================================
def transform_area_daily(
settlement_rows: List[Dict[str, Any]],
global_summary: List[Dict[str, Any]],
site_id: int,
tenant_id: int,
safe_decimal_fn=None,
logger=None,
) -> List[Dict[str, Any]]:
"""按区域聚合结算单,构建 9 行输出。
这是一个纯函数(不依赖数据库),方便属性测试直接调用。
Args:
settlement_rows: 结算单行(含 area_name、settle_type 等)
global_summary: 全局现金流/充值/卡消费(按 stat_date 索引)
site_id: 门店 ID
tenant_id: 租户 ID
safe_decimal_fn: Decimal 安全转换函数(默认使用内置实现)
logger: 日志记录器(可选)
Returns:
9 × N 行字典列表N = 涉及的日期数)
"""
if safe_decimal_fn is None:
safe_decimal_fn = _safe_decimal
if logger is None:
logger = logging.getLogger(__name__)
# 全局汇总按日期索引
global_index: Dict[date, Dict[str, Any]] = {}
for row in global_summary:
sd = row.get("stat_date")
if sd is not None:
global_index[sd] = row
# ── 第一步:按 (stat_date, area_code) 聚合结算单 ──────────────
# area_agg[stat_date][area_code] = {field: Decimal, ...}
area_agg: Dict[date, Dict[str, Dict[str, Decimal]]] = defaultdict(
lambda: defaultdict(lambda: defaultdict(lambda: _ZERO))
)
# 收集所有涉及的日期
all_dates: set[date] = set()
for row in settlement_rows:
sd = row.get("stat_date")
if sd is None:
continue
all_dates.add(sd)
# settle_type 过滤extract 已过滤transform 层再次防御)
settle_type = row.get("settle_type")
if settle_type not in (1, 3):
continue
area_name = row.get("area_name")
area_code = resolve_area_code(area_name)
if area_code is None:
# 未知区域:记录警告,不计入具体区域行,但仍计入 all 行
logger.warning(
"DWS_FINANCE_AREA_DAILY: 未知区域名称 '%s',不计入具体区域",
area_name,
)
# 提取金额
table_fee = safe_decimal_fn(row.get("table_fee_amount", 0))
goods = safe_decimal_fn(row.get("goods_amount", 0))
pd_amount = safe_decimal_fn(row.get("assistant_pd_amount", 0))
cx_amount = safe_decimal_fn(row.get("assistant_cx_amount", 0))
gross = table_fee + goods + pd_amount + cx_amount
coupon = safe_decimal_fn(row.get("coupon_amount", 0))
pl_coupon_sale = safe_decimal_fn(row.get("pl_coupon_sale_amount", 0))
adjust = safe_decimal_fn(row.get("adjust_amount", 0))
member_discount = safe_decimal_fn(row.get("member_discount_amount", 0))
rounding = safe_decimal_fn(row.get("rounding_amount", 0))
gift_card_amount = safe_decimal_fn(row.get("gift_card_amount", 0))
# 团购优惠 = coupon_amount - 团购支付金额
groupbuy_pay = pl_coupon_sale if pl_coupon_sale > 0 else _ZERO
discount_groupbuy = max(coupon - groupbuy_pay, _ZERO)
# 赠送卡消费金额口径(与现有 ETL 一致)
discount_gift_card = gift_card_amount
# 手动调整 = adjust_amount简化不拆分大客户区域级无法区分
discount_manual = adjust
discount_total = (
discount_groupbuy
+ member_discount
+ discount_manual
+ discount_gift_card
+ rounding
)
# discount_other 在区域级暂为 0adjust 全部归入 manual
discount_other = _ZERO
# 重新计算 discount_total 确保恒等式
discount_total = (
discount_groupbuy
+ member_discount
+ discount_manual
+ discount_gift_card
+ rounding
+ discount_other
)
confirmed_income = gross - discount_total
fields = {
"table_fee_amount": table_fee,
"goods_amount": goods,
"assistant_pd_amount": pd_amount,
"assistant_cx_amount": cx_amount,
"gross_amount": gross,
"discount_groupbuy": discount_groupbuy,
"discount_vip": member_discount,
"discount_manual": discount_manual,
"discount_gift_card": discount_gift_card,
"discount_rounding": rounding,
"discount_other": discount_other,
"discount_total": discount_total,
"confirmed_income": confirmed_income,
"order_count": 1,
}
# 累加到具体区域
if area_code is not None:
bucket = area_agg[sd][area_code]
for k, v in fields.items():
bucket[k] = bucket[k] + v
# 也收集 global_summary 中的日期
for sd in global_index:
all_dates.add(sd)
# ── 第二步:构建 9 行输出 ─────────────────────────────────────
results: List[Dict[str, Any]] = []
for sd in sorted(all_dates):
day_agg = area_agg.get(sd, {})
# 计算各具体区域行
specific_rows: Dict[str, Dict[str, Any]] = {}
for ac in SPECIFIC_AREA_CODES:
bucket = day_agg.get(ac, {})
row_data = _build_area_row(
site_id=site_id,
tenant_id=tenant_id,
stat_date=sd,
area_code=ac,
agg=bucket,
)
specific_rows[ac] = row_data
# CHANGE 2026-03-29 | hall = 台球大厅hallA+hallB+hallC不含 vip/snooker/mahjong/ktv
_HALL_CODES = ("hallA", "hallB", "hallC")
hall_source = [specific_rows[ac] for ac in _HALL_CODES if ac in specific_rows]
hall_row = _build_sum_row(
site_id=site_id,
tenant_id=tenant_id,
stat_date=sd,
area_code="hall",
source_rows=hall_source,
)
# all 行 = 各具体区域之和(收入/优惠/order_count+ 全局现金流/充值/卡消费
all_row = _build_sum_row(
site_id=site_id,
tenant_id=tenant_id,
stat_date=sd,
area_code="all",
source_rows=list(specific_rows.values()),
)
# 填充全局现金流/充值/卡消费
gs = global_index.get(sd, {})
for field in _ALL_ONLY_FIELDS:
all_row[field] = safe_decimal_fn(gs.get(field, 0))
# 输出顺序all, hall, hallA, hallB, hallC, vip, snooker, mahjong, ktv
results.append(all_row)
results.append(hall_row)
for ac in SPECIFIC_AREA_CODES:
results.append(specific_rows[ac])
return results
def _build_area_row(
site_id: int,
tenant_id: int,
stat_date: date,
area_code: str,
agg: Dict[str, Decimal],
) -> Dict[str, Any]:
"""构建单个区域行。"""
row: Dict[str, Any] = {
"site_id": site_id,
"tenant_id": tenant_id,
"stat_date": stat_date,
"area_code": area_code,
}
# 收入 + 优惠 + confirmed_income + order_count
for field in _AREA_AGG_FIELDS:
if field == "order_count":
row[field] = int(agg.get(field, 0))
else:
row[field] = agg.get(field, _ZERO)
# 非 all 行:现金流/卡消费/充值 = 0
for field in _ALL_ONLY_FIELDS:
row[field] = _ZERO
return row
def _build_sum_row(
site_id: int,
tenant_id: int,
stat_date: date,
area_code: str,
source_rows: List[Dict[str, Any]],
) -> Dict[str, Any]:
"""构建 hall/all 汇总行(收入/优惠 = 各具体区域之和)。"""
row: Dict[str, Any] = {
"site_id": site_id,
"tenant_id": tenant_id,
"stat_date": stat_date,
"area_code": area_code,
}
for field in _AREA_AGG_FIELDS:
if field == "order_count":
row[field] = sum(int(r.get(field, 0)) for r in source_rows)
else:
row[field] = sum(
(r.get(field, _ZERO) for r in source_rows), _ZERO
)
# 非 all 行:现金流/卡消费/充值 = 0all 行由调用方覆盖)
for field in _ALL_ONLY_FIELDS:
row[field] = _ZERO
return row
def _safe_decimal(value: Any, default: Decimal = _ZERO) -> Decimal:
"""内置的安全 Decimal 转换(供纯函数使用)。"""
if value is None:
return default
try:
return Decimal(str(value))
except Exception:
return default
__all__ = ["FinanceAreaDailyTask", "transform_area_daily"]

View File

@@ -118,6 +118,35 @@ class FinanceBaseTask(BaseDwsTask):
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
# ------------------------------------------------------------------
# 支付方式拆分
# ------------------------------------------------------------------
# CHANGE 2026-03-27 | board-finance-integration T1.1 | 新增支付方式拆分
def _extract_payment_split(
self,
site_id: int,
start_date: date,
end_date: date,
) -> List[Dict[str, Any]]:
"""支付方式拆分日汇总(纸币现金 vs 扫码/离线支付)"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("p.pay_time", cutoff)
sql = f"""
SELECT
{biz_expr} AS stat_date,
SUM(CASE WHEN p.payment_method = 2 THEN p.pay_amount ELSE 0 END) AS cash_paper_amount,
SUM(CASE WHEN p.payment_method = 4 THEN p.pay_amount ELSE 0 END) AS scan_pay_amount
FROM dwd.dwd_payment p
WHERE p.site_id = %s
AND p.pay_status = 2
AND p.relate_type = 2
AND {biz_expr} >= %s
AND {biz_expr} <= %s
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
# ------------------------------------------------------------------
# 团购核销汇总
# ------------------------------------------------------------------

View File

@@ -0,0 +1,525 @@
# -*- coding: utf-8 -*-
"""
财务看板缓存层任务
功能说明:
基于 dws_finance_area_daily 日粒度数据为已完成周期lastMonth/lastWeek/
lastQuarter/quarter3/half6× 9 个区域 = 45 组合维护缓存。
通过数据指纹MD5检测源数据变化仅对指纹不一致的组合重算并 upsert。
数据来源:
- dws.dws_finance_area_daily日粒度原子层
- dws.dws_finance_board_cache缓存层读取已有指纹做对比
目标表:
dws.dws_finance_board_cache
更新策略:
- 更新频率:每天一次(营业日切点后)
- 幂等方式ON CONFLICT (site_id, time_range, area_code) DO UPDATE
- 当期周期month/week/quarter不写入缓存
业务规则:
- 指纹 = MD5(sorted [(stat_date, gross_amount, discount_total), ...])
- 指纹一致 → 跳过;指纹不一致 → 从日粒度表 SUM 重算后 upsert
- overview 8 项occurrence/discount/discount_rate/confirmed_revenue/
cash_in/cash_out/cash_balance/balance_rate
Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7
"""
from __future__ import annotations
import hashlib
import json
import logging
from datetime import date, timedelta
from decimal import Decimal
from typing import Any, Dict, List, Optional, Tuple
from neozqyy_shared.area_mapping import ALL_AREA_CODES
from .base_dws_task import BaseDwsTask, TaskContext
logger = logging.getLogger(__name__)
# 已完成周期(当期 month/week/quarter 不缓存)
COMPLETED_PERIODS: List[str] = [
"lastMonth",
"lastWeek",
"lastQuarter",
"quarter3",
"half6",
]
# 当期周期(不写入缓存)
CURRENT_PERIODS: set[str] = {"month", "week", "quarter"}
_ZERO = Decimal("0")
# ======================================================================
# 纯函数:指纹计算(导出供属性测试直接调用)
# ======================================================================
def compute_fingerprint(rows: list[dict]) -> str:
"""对 (stat_date, gross_amount, discount_total) 排序后计算 MD5 指纹。
这是一个纯函数,不依赖数据库,方便属性测试直接调用。
Args:
rows: 日粒度行列表,每行至少包含 stat_date/gross_amount/discount_total
Returns:
MD5 十六进制字符串
"""
sorted_rows = sorted(rows, key=lambda r: str(r["stat_date"]))
payload = json.dumps(
[
(str(r["stat_date"]), str(r["gross_amount"]), str(r["discount_total"]))
for r in sorted_rows
]
)
return hashlib.md5(payload.encode()).hexdigest()
def is_current_period(time_range: str) -> bool:
"""判断 time_range 是否为当期周期(不应写入缓存)。
纯函数,导出供属性测试直接调用。
"""
return time_range in CURRENT_PERIODS
# ======================================================================
# 日期范围计算(复用 board_service 的逻辑)
# ======================================================================
def _calc_period_date_range(
time_range: str, ref_date: date | None = None
) -> Tuple[date, date]:
"""计算已完成周期的日期范围。
与 board_service._calc_date_range 保持一致。
"""
today = ref_date or date.today()
if time_range == "lastMonth":
first_of_this_month = today.replace(day=1)
last_month_end = first_of_this_month - timedelta(days=1)
last_month_start = last_month_end.replace(day=1)
return last_month_start, last_month_end
if time_range == "lastWeek":
this_monday = today - timedelta(days=today.weekday())
last_sunday = this_monday - timedelta(days=1)
last_monday = last_sunday - timedelta(days=6)
return last_monday, last_sunday
if time_range == "lastQuarter":
q_start_month = (today.month - 1) // 3 * 3 + 1
this_q_start = date(today.year, q_start_month, 1)
prev_q_end = this_q_start - timedelta(days=1)
prev_q_start_month = (prev_q_end.month - 1) // 3 * 3 + 1
prev_q_start = date(prev_q_end.year, prev_q_start_month, 1)
return prev_q_start, prev_q_end
if time_range == "quarter3":
first_of_this_month = today.replace(day=1)
end = first_of_this_month - timedelta(days=1)
start = _month_offset(first_of_this_month, -3)
return start, end
if time_range == "half6":
first_of_this_month = today.replace(day=1)
end = first_of_this_month - timedelta(days=1)
start = _month_offset(first_of_this_month, -6)
return start, end
raise ValueError(f"不支持的已完成周期: {time_range}")
def _month_offset(base_date: date, months: int) -> date:
"""按月偏移日期(保持日不越界)。"""
import calendar
total_months = base_date.year * 12 + (base_date.month - 1) + months
year = total_months // 12
month = total_months % 12 + 1
last_day = calendar.monthrange(year, month)[1]
day = min(base_date.day, last_day)
return date(year, month, day)
# ======================================================================
# ETL 任务类
# ======================================================================
class FinanceBoardCacheTask(BaseDwsTask):
"""
财务看板缓存层任务
遍历 5 个已完成周期 × 9 个区域 = 45 组合,
通过数据指纹检测变化,仅对需要重算的组合 upsert 缓存。
"""
DATE_COL = None # 缓存表无 stat_date 列
def get_task_code(self) -> str:
return "DWS_FINANCE_BOARD_CACHE"
def get_target_table(self) -> str:
return "dws.dws_finance_board_cache"
def get_primary_keys(self) -> List[str]:
return ["site_id", "time_range", "area_code"]
# ======================================================================
# Extract
# ======================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""遍历 45 组合,从日粒度表读取源数据行 + 从缓存表读取已有指纹。"""
site_id = context.store_id
self.logger.info(
"%s: 开始提取,站点 %s",
self.get_task_code(),
site_id,
)
combinations: List[Dict[str, Any]] = []
for time_range in COMPLETED_PERIODS:
start_date, end_date = _calc_period_date_range(time_range)
for area_code in ALL_AREA_CODES:
# 从日粒度表读取源数据行
daily_rows = self._read_daily_rows(
site_id, start_date, end_date, area_code
)
# 从缓存表读取已有指纹
existing_fp = self._read_existing_fingerprint(
site_id, time_range, area_code
)
combinations.append(
{
"time_range": time_range,
"area_code": area_code,
"start_date": start_date,
"end_date": end_date,
"daily_rows": daily_rows,
"existing_fingerprint": existing_fp,
}
)
return {
"combinations": combinations,
"site_id": site_id,
}
def _read_daily_rows(
self,
site_id: int,
start_date: date,
end_date: date,
area_code: str,
) -> List[Dict[str, Any]]:
"""从 dws_finance_area_daily 读取日粒度行。"""
sql = """
SELECT stat_date, gross_amount, discount_total,
confirmed_income,
cash_inflow_total, cash_outflow_total, cash_balance_change
FROM dws.dws_finance_area_daily
WHERE site_id = %s
AND stat_date >= %s
AND stat_date <= %s
AND area_code = %s
"""
rows = self.db.query(sql, (site_id, start_date, end_date, area_code))
return [dict(row) for row in rows] if rows else []
def _read_existing_fingerprint(
self,
site_id: int,
time_range: str,
area_code: str,
) -> Optional[str]:
"""从缓存表读取已有指纹。"""
sql = """
SELECT data_fingerprint
FROM dws.dws_finance_board_cache
WHERE site_id = %s
AND time_range = %s
AND area_code = %s
"""
rows = self.db.query(sql, (site_id, time_range, area_code))
if rows:
return dict(rows[0]).get("data_fingerprint")
return None
# ======================================================================
# Transform纯函数逻辑标记需重算的组合
# ======================================================================
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
"""计算指纹,与已有指纹对比,标记需重算的组合。"""
site_id = extracted["site_id"]
combinations = extracted["combinations"]
return transform_cache_combinations(
combinations=combinations,
site_id=site_id,
logger=self.logger,
)
# ======================================================================
# Loadupsert 需重算的组合)
# ======================================================================
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> dict:
"""对需重算的组合 upsert 到缓存表。"""
if not transformed:
return {
"counts": {
"fetched": 0,
"inserted": 0,
"updated": 0,
"skipped": 0,
"errors": 0,
}
}
site_id = context.store_id
upserted = 0
skipped = 0
for combo in transformed:
if not combo.get("needs_recompute"):
skipped += 1
continue
# 从日粒度表 SUM 计算 overview 指标
overview = self._sum_overview(
site_id,
combo["start_date"],
combo["end_date"],
combo["area_code"],
)
self._upsert_cache(
site_id=site_id,
time_range=combo["time_range"],
area_code=combo["area_code"],
start_date=combo["start_date"],
end_date=combo["end_date"],
overview=overview,
fingerprint=combo["new_fingerprint"],
)
upserted += 1
self.logger.info(
"%s: upsert %d 组合,跳过 %d 组合",
self.get_task_code(),
upserted,
skipped,
)
return {
"counts": {
"fetched": len(transformed),
"inserted": upserted,
"updated": 0,
"skipped": skipped,
"errors": 0,
}
}
def _sum_overview(
self,
site_id: int,
start_date: date,
end_date: date,
area_code: str,
) -> Dict[str, Any]:
"""从日粒度表 SUM 计算 overview 8 项指标。"""
sql = """
SELECT
COALESCE(SUM(gross_amount), 0) AS occurrence,
COALESCE(SUM(discount_total), 0) AS discount,
COALESCE(SUM(confirmed_income), 0) AS confirmed_revenue,
COALESCE(SUM(cash_inflow_total), 0) AS cash_in,
COALESCE(SUM(cash_outflow_total), 0) AS cash_out,
COALESCE(SUM(cash_balance_change), 0) AS cash_balance
FROM dws.dws_finance_area_daily
WHERE site_id = %s
AND stat_date >= %s
AND stat_date <= %s
AND area_code = %s
"""
rows = self.db.query(sql, (site_id, start_date, end_date, area_code))
if rows:
row = dict(rows[0])
occurrence = Decimal(str(row.get("occurrence", 0)))
discount = Decimal(str(row.get("discount", 0)))
confirmed_revenue = Decimal(str(row.get("confirmed_revenue", 0)))
cash_in = Decimal(str(row.get("cash_in", 0)))
cash_out = Decimal(str(row.get("cash_out", 0)))
cash_balance = Decimal(str(row.get("cash_balance", 0)))
# discount_rate = discount / occurrence避免除零
discount_rate = (
(discount / occurrence) if occurrence != 0 else _ZERO
)
# balance_rate = cash_balance / cash_in避免除零
balance_rate = (
(cash_balance / cash_in) if cash_in != 0 else _ZERO
)
return {
"occurrence": occurrence,
"discount": discount,
"discount_rate": discount_rate,
"confirmed_revenue": confirmed_revenue,
"cash_in": cash_in,
"cash_out": cash_out,
"cash_balance": cash_balance,
"balance_rate": balance_rate,
}
return {k: _ZERO for k in [
"occurrence", "discount", "discount_rate", "confirmed_revenue",
"cash_in", "cash_out", "cash_balance", "balance_rate",
]}
def _upsert_cache(
self,
site_id: int,
time_range: str,
area_code: str,
start_date: date,
end_date: date,
overview: Dict[str, Any],
fingerprint: str,
) -> None:
"""ON CONFLICT DO UPDATE 写入缓存表。"""
sql = """
INSERT INTO dws.dws_finance_board_cache (
site_id, time_range, area_code,
start_date, end_date,
occurrence, discount, discount_rate, confirmed_revenue,
cash_in, cash_out, cash_balance, balance_rate,
data_fingerprint, computed_at, updated_at
) VALUES (
%s, %s, %s,
%s, %s,
%s, %s, %s, %s,
%s, %s, %s, %s,
%s, NOW(), NOW()
)
ON CONFLICT (site_id, time_range, area_code) DO UPDATE SET
start_date = EXCLUDED.start_date,
end_date = EXCLUDED.end_date,
occurrence = EXCLUDED.occurrence,
discount = EXCLUDED.discount,
discount_rate = EXCLUDED.discount_rate,
confirmed_revenue = EXCLUDED.confirmed_revenue,
cash_in = EXCLUDED.cash_in,
cash_out = EXCLUDED.cash_out,
cash_balance = EXCLUDED.cash_balance,
balance_rate = EXCLUDED.balance_rate,
data_fingerprint = EXCLUDED.data_fingerprint,
computed_at = NOW(),
updated_at = NOW()
"""
with self.db.conn.cursor() as cur:
cur.execute(
sql,
(
site_id,
time_range,
area_code,
start_date,
end_date,
overview["occurrence"],
overview["discount"],
overview["discount_rate"],
overview["confirmed_revenue"],
overview["cash_in"],
overview["cash_out"],
overview["cash_balance"],
overview["balance_rate"],
fingerprint,
),
)
# ======================================================================
# 纯函数transform 核心逻辑(方便属性测试直接调用)
# ======================================================================
def transform_cache_combinations(
combinations: List[Dict[str, Any]],
site_id: int,
logger: Any = None,
) -> List[Dict[str, Any]]:
"""对每个组合计算指纹并与已有指纹对比,标记需重算的组合。
纯函数(不依赖数据库),方便属性测试直接调用。
Args:
combinations: extract 输出的组合列表
site_id: 门店 ID
logger: 日志记录器(可选)
Returns:
带 needs_recompute/new_fingerprint 标记的组合列表
"""
if logger is None:
logger = logging.getLogger(__name__)
results: List[Dict[str, Any]] = []
for combo in combinations:
daily_rows = combo.get("daily_rows", [])
existing_fp = combo.get("existing_fingerprint")
new_fp = compute_fingerprint(daily_rows)
needs_recompute = new_fp != existing_fp
if needs_recompute:
logger.info(
"DWS_FINANCE_BOARD_CACHE: %s/%s 指纹变化,需重算",
combo["time_range"],
combo["area_code"],
)
results.append(
{
**combo,
"site_id": site_id,
"new_fingerprint": new_fp,
"needs_recompute": needs_recompute,
}
)
return results
__all__ = [
"FinanceBoardCacheTask",
"compute_fingerprint",
"is_current_period",
"transform_cache_combinations",
"COMPLETED_PERIODS",
"CURRENT_PERIODS",
]

View File

@@ -55,6 +55,9 @@ class FinanceDailyTask(FinanceBaseTask):
# CHANGE 2025-07-15 | task 1.3: 声明日期列,供基类默认 load() 使用
DATE_COL = "stat_date"
# CHANGE 2026-03-22 | P14: 财务日度任务完成后触发 AI App2 预生成
AI_TRIGGER_EVENT = "dws_completed"
def get_task_code(self) -> str:
return "DWS_FINANCE_DAILY"
@@ -93,6 +96,10 @@ class FinanceDailyTask(FinanceBaseTask):
# 3.1 获取赠送卡消费汇总(余额变动)
gift_card_summary = self._extract_gift_card_consume_summary(site_id, start_date, end_date)
# 3.2 获取支付方式拆分
# CHANGE 2026-03-27 | board-finance-integration T1.1 | 新增支付方式拆分
payment_split = self._extract_payment_split(site_id, start_date, end_date)
# 4. 获取支出汇总(来自导入表)
expense_summary = self._extract_expense_summary(site_id, start_date, end_date)
@@ -108,6 +115,7 @@ class FinanceDailyTask(FinanceBaseTask):
'groupbuy_summary': groupbuy_summary,
'recharge_summary': recharge_summary,
'gift_card_summary': gift_card_summary,
'payment_split': payment_split,
'expense_summary': expense_summary,
'platform_summary': platform_summary,
'big_customer_summary': big_customer_summary,
@@ -127,6 +135,7 @@ class FinanceDailyTask(FinanceBaseTask):
expense_summary = extracted['expense_summary']
platform_summary = extracted['platform_summary']
big_customer_summary = extracted['big_customer_summary']
payment_split = extracted['payment_split']
site_id = extracted['site_id']
self.logger.info(
@@ -149,6 +158,7 @@ class FinanceDailyTask(FinanceBaseTask):
expense_index = {e['stat_date']: e for e in expense_summary}
platform_index = {p['stat_date']: p for p in platform_summary}
big_customer_index = {b['stat_date']: b for b in big_customer_summary}
payment_split_index = {ps['stat_date']: ps for ps in payment_split}
results = []
for stat_date in sorted(dates):
@@ -159,9 +169,10 @@ class FinanceDailyTask(FinanceBaseTask):
expense = expense_index.get(stat_date, {})
platform = platform_index.get(stat_date, {})
big_customer = big_customer_index.get(stat_date, {})
ps = payment_split_index.get(stat_date, {})
record = self._build_daily_record(
stat_date, settle, groupbuy, recharge, gift_card, expense, platform, big_customer, site_id
stat_date, settle, groupbuy, recharge, gift_card, expense, platform, big_customer, site_id, ps
)
results.append(record)
@@ -185,7 +196,8 @@ class FinanceDailyTask(FinanceBaseTask):
expense: Dict[str, Any],
platform: Dict[str, Any],
big_customer: Dict[str, Any],
site_id: int
site_id: int,
payment_split: Dict[str, Any],
) -> Dict[str, Any]:
"""
构建日度财务记录
@@ -237,6 +249,11 @@ class FinanceDailyTask(FinanceBaseTask):
# 确认收入
confirmed_income = gross_amount - discount_total
# 支付方式拆分
# CHANGE 2026-03-27 | board-finance-integration T1.1 | 新增支付方式拆分
cash_paper_amount = self.safe_decimal(payment_split.get('cash_paper_amount', 0))
scan_pay_amount = self.safe_decimal(payment_split.get('scan_pay_amount', 0))
# 现金流
platform_settlement_amount = self.safe_decimal(platform.get('settlement_amount', 0))
platform_fee_amount = (
@@ -298,6 +315,8 @@ class FinanceDailyTask(FinanceBaseTask):
# 现金流
'cash_inflow_total': cash_inflow_total,
'cash_pay_amount': cash_pay_amount,
'cash_paper_amount': cash_paper_amount,
'scan_pay_amount': scan_pay_amount,
'groupbuy_pay_amount': groupbuy_pay_amount,
'platform_settlement_amount': platform_settlement_amount,
'platform_fee_amount': platform_fee_amount,

View File

@@ -83,7 +83,7 @@ class BaseIndexTask(BaseDwsTask):
self._index_params_cache_by_type: Dict[str, IndexParameters] = {}
# 默认参数
DEFAULT_LOOKBACK_DAYS = 60
DEFAULT_LOOKBACK_DAYS = 90
DEFAULT_PERCENTILE_LOWER = 5
DEFAULT_PERCENTILE_UPPER = 95
DEFAULT_EWMA_ALPHA = 0.2

View File

@@ -86,7 +86,9 @@ class MemberIndexBaseTask(BaseIndexTask):
tenant_id = self._get_tenant_id()
params = self._load_params()
activities = self._build_member_activity(site_id, tenant_id, params)
# P19: 回测模式用 as_of_date 替代 datetime.now()
as_of = (context.as_of_date if context and context.as_of_date else None) or datetime.now(self.tz)
activities = self._build_member_activity(site_id, tenant_id, params, as_of=as_of)
if not activities:
self.logger.warning("No member activity data available; skip calculation")
return {'status': 'skipped', 'reason': 'no_data'}
@@ -402,9 +404,12 @@ class MemberIndexBaseTask(BaseIndexTask):
site_id: int,
tenant_id: int,
params: Dict[str, float],
*,
as_of: Optional[datetime] = None,
) -> Dict[int, MemberActivityData]:
"""构建会员活动特征"""
now = datetime.now(self.tz)
# P19: 回测模式用 as_of 替代 datetime.now()
now = as_of or datetime.now(self.tz)
base_date = now.date()
visit_lookback_days = int(params.get('visit_lookback_days', self.DEFAULT_VISIT_LOOKBACK_DAYS))

View File

@@ -202,7 +202,9 @@ class NewconvIndexTask(MemberIndexBaseTask):
avg_raw=sum(all_raw) / len(all_raw)
)
inserted = self._save_newconv_data(newconv_list)
# P19: 回测模式传入 calc_time
calc_time = (context.as_of_date if context and context.as_of_date else None)
inserted = self._save_newconv_data(newconv_list, calc_time=calc_time)
self.logger.info("NCI calculation finished, inserted %d rows", inserted)
return {
@@ -286,21 +288,30 @@ class NewconvIndexTask(MemberIndexBaseTask):
if data.raw_score < 0:
data.raw_score = 0.0
def _save_newconv_data(self, data_list: List[MemberNewconvData]) -> int:
def _save_newconv_data(self, data_list: List[MemberNewconvData], *, calc_time=None) -> int:
"""保存 NCI 数据"""
if not data_list:
return 0
site_id = data_list[0].activity.site_id
# 按门店全量刷新,避免因分群变化导致过期数据残留。
delete_sql = """
DELETE FROM dws.dws_member_newconv_index
WHERE site_id = %s
"""
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
use_param_time = calc_time is not None
with self.db.conn.cursor() as cur:
cur.execute(delete_sql, (site_id,))
if use_param_time:
cur.execute(
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s AND calc_time = %s",
(site_id, calc_time),
)
else:
cur.execute(
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s",
(site_id,),
)
insert_sql = """
# P19: 回测模式传入 calc_time正常模式用 NOW()
use_param_time = calc_time is not None
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
insert_sql = f"""
INSERT INTO dws.dws_member_newconv_index (
site_id, tenant_id, member_id,
status, segment,
@@ -328,7 +339,7 @@ class NewconvIndexTask(MemberIndexBaseTask):
%s, %s, %s,
%s, %s, %s,
%s,
NOW(), NOW(), NOW()
{time_placeholder}
)
"""
@@ -336,7 +347,7 @@ class NewconvIndexTask(MemberIndexBaseTask):
with self.db.conn.cursor() as cur:
for data in data_list:
activity = data.activity
cur.execute(insert_sql, (
params = (
activity.site_id, activity.tenant_id, activity.member_id,
data.status, data.segment,
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
@@ -349,7 +360,10 @@ class NewconvIndexTask(MemberIndexBaseTask):
data.raw_score_welcome, data.raw_score_convert, data.raw_score,
data.display_score_welcome, data.display_score_convert, data.display_score,
None,
))
)
if use_param_time:
params = params + (calc_time, calc_time, calc_time)
cur.execute(insert_sql, params)
inserted += cur.rowcount
self.db.conn.commit()

View File

@@ -78,7 +78,7 @@ class RelationIndexTask(BaseIndexTask):
INDEX_TYPE = "RS"
DEFAULT_PARAMS_RS: Dict[str, float] = {
"lookback_days": 60,
"lookback_days": 90,
"session_merge_hours": 4,
"incentive_weight": 1.5,
"halflife_session": 14.0,
@@ -93,15 +93,13 @@ class RelationIndexTask(BaseIndexTask):
"ewma_alpha": 0.2,
}
DEFAULT_PARAMS_OS: Dict[str, float] = {
"min_rs_raw_for_ownership": 0.05,
"min_total_rs_raw": 0.10,
"ownership_main_threshold": 0.60,
"ownership_comanage_threshold": 0.35,
"ownership_gap_threshold": 0.15,
"min_rs_for_label": 0.10,
"ownership_main_ratio": 0.70,
"ownership_comanage_ratio": 0.30,
"eps": 1e-6,
}
DEFAULT_PARAMS_MS: Dict[str, float] = {
"lookback_days": 60,
"lookback_days": 90,
"session_merge_hours": 4,
"incentive_weight": 1.5,
"halflife_short": 7.0,
@@ -115,7 +113,7 @@ class RelationIndexTask(BaseIndexTask):
}
# CHANGE 2026-02-13 | intent: ML 仅使用人工台账,移除 source_mode / recharge_attribute_hours
DEFAULT_PARAMS_ML: Dict[str, float] = {
"lookback_days": 60,
"lookback_days": 90,
"amount_base": 500.0,
"halflife_recharge": 21.0,
"percentile_lower": 5.0,
@@ -143,7 +141,8 @@ class RelationIndexTask(BaseIndexTask):
site_id = self._get_site_id(context)
tenant_id = self._get_tenant_id()
now = datetime.now(self.tz)
# P19: 回测模式用 as_of_date 替代 datetime.now()
now = (context.as_of_date if context and context.as_of_date else None) or datetime.now(self.tz)
params_rs = self._load_params("RS", self.DEFAULT_PARAMS_RS)
params_os = self._load_params("OS", self.DEFAULT_PARAMS_OS)
@@ -151,8 +150,8 @@ class RelationIndexTask(BaseIndexTask):
params_ml = self._load_params("ML", self.DEFAULT_PARAMS_ML)
service_lookback_days = max(
int(params_rs.get("lookback_days", 60)),
int(params_ms.get("lookback_days", 60)),
int(params_rs.get("lookback_days", 90)),
int(params_ms.get("lookback_days", 90)),
)
service_start = now - timedelta(days=service_lookback_days)
merge_hours = max(
@@ -181,7 +180,9 @@ class RelationIndexTask(BaseIndexTask):
self._apply_display_scores(pair_map, params_rs, params_ms, params_ml, site_id)
inserted = self._save_relation_rows(site_id, list(pair_map.values()))
# P19: 仅回测模式传 calc_time按 calc_time 删除保留其他快照),正常模式传 None按 site_id 全量刷新)
backtest_calc_time = now if (context and context.as_of_date) else None
inserted = self._save_relation_rows(site_id, list(pair_map.values()), calc_time=backtest_calc_time)
self.logger.info("关系指数计算完成,写入 %d 条记录", inserted)
return {
@@ -313,7 +314,7 @@ class RelationIndexTask(BaseIndexTask):
params: Dict[str, float],
now: datetime,
) -> None:
lookback_days = int(params.get("lookback_days", 60))
lookback_days = int(params.get("lookback_days", 90))
halflife_session = float(params.get("halflife_session", 14.0))
halflife_last = float(params.get("halflife_last", 10.0))
weight_f = float(params.get("weight_f", 1.0))
@@ -355,7 +356,7 @@ class RelationIndexTask(BaseIndexTask):
params: Dict[str, float],
now: datetime,
) -> None:
lookback_days = int(params.get("lookback_days", 60))
lookback_days = int(params.get("lookback_days", 90))
halflife_short = float(params.get("halflife_short", 7.0))
halflife_long = float(params.get("halflife_long", 30.0))
eps = float(params.get("eps", 1e-6))
@@ -382,7 +383,7 @@ class RelationIndexTask(BaseIndexTask):
site_id: int,
now: datetime,
) -> None:
lookback_days = int(params.get("lookback_days", 60))
lookback_days = int(params.get("lookback_days", 90))
amount_base = float(params.get("amount_base", 500.0))
halflife_recharge = float(params.get("halflife_recharge", 21.0))
start_time = now - timedelta(days=lookback_days)
@@ -439,68 +440,53 @@ class RelationIndexTask(BaseIndexTask):
pair_map: Dict[Tuple[int, int], RelationPairMetrics],
params: Dict[str, float],
) -> None:
min_rs = float(params.get("min_rs_raw_for_ownership", 0.05))
min_total = float(params.get("min_total_rs_raw", 0.10))
main_threshold = float(params.get("ownership_main_threshold", 0.60))
comanage_threshold = float(params.get("ownership_comanage_threshold", 0.35))
gap_threshold = float(params.get("ownership_gap_threshold", 0.15))
"""CHANGE 2026-03-31 | 新 OS 方案:基于第一名分值比例。
规则:
- 第一名一定为 MAINrs_raw ≥ min_rs_for_label
- 在第一名分值的 main_ratio_threshold 以内都为 MAIN
- 在第一名分值的 comanage_ratio_threshold 以内都为 COMANAGE
- 其余为 POOL
- MAIN/COMANAGE 前提rs_raw ≥ min_rs_for_label
"""
min_rs_for_label = float(params.get("min_rs_for_label", 0.1))
main_ratio = float(params.get("ownership_main_ratio", 0.70))
comanage_ratio = float(params.get("ownership_comanage_ratio", 0.30))
member_groups: Dict[int, List[RelationPairMetrics]] = {}
for metrics in pair_map.values():
member_groups.setdefault(metrics.member_id, []).append(metrics)
for _, rows in member_groups.items():
eligible = [row for row in rows if row.rs_raw >= min_rs]
sum_rs = sum(row.rs_raw for row in eligible)
if sum_rs < min_total:
for row in rows:
row.os_share = 0.0
row.os_label = "UNASSIGNED"
row.os_rank = None
continue
for row in rows:
if row.rs_raw >= min_rs:
row.os_share = row.rs_raw / sum_rs
else:
row.os_share = 0.0
sorted_eligible = sorted(
eligible,
# 按 rs_raw 降序排列
sorted_rows = sorted(
rows,
key=lambda item: (
-item.os_share,
-item.rs_raw,
item.days_since_last_session if item.days_since_last_session is not None else 10**9,
item.assistant_id,
),
)
for idx, row in enumerate(sorted_eligible, start=1):
row.os_rank = idx
top1 = sorted_eligible[0]
top2_share = sorted_eligible[1].os_share if len(sorted_eligible) > 1 else 0.0
gap = top1.os_share - top2_share
has_main = top1.os_share >= main_threshold and gap >= gap_threshold
top_rs = sorted_rows[0].rs_raw if sorted_rows else 0.0
if has_main:
for row in rows:
if row is top1:
row.os_label = "MAIN"
elif row.os_share >= comanage_threshold:
row.os_label = "COMANAGE"
else:
row.os_label = "POOL"
else:
for row in rows:
if row.os_share >= comanage_threshold and row.rs_raw >= min_rs:
row.os_label = "COMANAGE"
else:
row.os_label = "POOL"
for idx, row in enumerate(sorted_rows):
row.os_rank = idx + 1
# 计算份额(保留兼容性,用于前端展示)
sum_rs = sum(r.rs_raw for r in rows if r.rs_raw > 0)
row.os_share = row.rs_raw / sum_rs if sum_rs > 0 else 0.0
# 非 eligible 不赋 rank
for row in rows:
if row.rs_raw < min_rs:
row.os_rank = None
if top_rs <= 0 or row.rs_raw < min_rs_for_label:
row.os_label = "POOL"
continue
ratio = row.rs_raw / top_rs
if ratio >= main_ratio or row is sorted_rows[0]:
row.os_label = "MAIN"
elif ratio >= comanage_ratio:
row.os_label = "COMANAGE"
else:
row.os_label = "POOL"
def _apply_display_scores(
self,
@@ -599,18 +585,27 @@ class RelationIndexTask(BaseIndexTask):
return "asinh"
return "none"
def _save_relation_rows(self, site_id: int, rows: List[RelationPairMetrics]) -> int:
def _save_relation_rows(self, site_id: int, rows: List[RelationPairMetrics], *, calc_time: Optional[datetime] = None) -> int:
# P19: 回测模式传入 calc_time正常模式用 NOW()
use_param_time = calc_time is not None
with self.db.conn.cursor() as cur:
cur.execute(
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s",
(site_id,),
)
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
if use_param_time:
cur.execute(
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s AND calc_time = %s",
(site_id, calc_time),
)
else:
cur.execute(
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s",
(site_id,),
)
if not rows:
self.db.conn.commit()
return 0
insert_sql = """
insert_sql = f"""
INSERT INTO dws.dws_member_assistant_relation_index (
site_id, tenant_id, member_id, assistant_id,
session_count, total_duration_minutes, basic_session_count, incentive_session_count,
@@ -628,41 +623,41 @@ class RelationIndexTask(BaseIndexTask):
%s, %s, %s,
%s, %s, %s, %s,
%s, %s, %s, %s,
NOW(), NOW(), NOW()
{('%s, %s, %s' if use_param_time else 'NOW(), NOW(), NOW()')}
)
"""
inserted = 0
for row in rows:
cur.execute(
insert_sql,
(
row.site_id,
row.tenant_id,
row.member_id,
row.assistant_id,
row.session_count,
row.total_duration_minutes,
row.basic_session_count,
row.incentive_session_count,
row.days_since_last_session,
row.rs_f,
row.rs_d,
row.rs_r,
row.rs_raw,
row.rs_display,
row.os_share,
row.os_label,
row.os_rank,
row.ms_f_short,
row.ms_f_long,
row.ms_raw,
row.ms_display,
row.ml_order_count,
row.ml_allocated_amount,
row.ml_raw,
row.ml_display,
),
params = (
row.site_id,
row.tenant_id,
row.member_id,
row.assistant_id,
row.session_count,
row.total_duration_minutes,
row.basic_session_count,
row.incentive_session_count,
row.days_since_last_session,
row.rs_f,
row.rs_d,
row.rs_r,
row.rs_raw,
row.rs_display,
row.os_share,
row.os_label,
row.os_rank,
row.ms_f_short,
row.ms_f_long,
row.ms_raw,
row.ms_display,
row.ml_order_count,
row.ml_allocated_amount,
row.ml_raw,
row.ml_display,
)
if use_param_time:
params = params + (calc_time, calc_time, calc_time)
cur.execute(insert_sql, params)
inserted += max(cur.rowcount, 0)
self.db.conn.commit()
return inserted

View File

@@ -173,13 +173,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
# 1. 获取 site_id
site_id = self._get_site_id(context)
# P19: 回测模式用 as_of_date 替代 NOW()
from datetime import datetime as _dt
as_of = (context.as_of_date if context and context.as_of_date else None) or _dt.now(self.tz)
# 2. 加载参数(配置表 + 默认值合并)
db_params = self.load_index_parameters('SPI')
params = {**self.DEFAULT_PARAMS, **db_params}
# 3. 提取特征
features = self._extract_spending_features(site_id, params)
recharge_map = self._extract_recharge_features(site_id, params)
features = self._extract_spending_features(site_id, params, as_of=as_of)
recharge_map = self._extract_recharge_features(site_id, params, as_of=as_of)
# 合并充值特征
for mid, recharge_90 in recharge_map.items():
@@ -189,7 +193,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
# 批量计算日消费 EWMA 并合并
member_ids = list(features.keys())
ewma_map = self._compute_daily_spend_ewma_batch(site_id, member_ids, params)
ewma_map = self._compute_daily_spend_ewma_batch(site_id, member_ids, params, as_of=as_of)
for mid, ewma_val in ewma_map.items():
if mid in features:
features[mid].daily_spend_ewma_90 = ewma_val
@@ -279,7 +283,9 @@ class SpendingPowerIndexTask(BaseIndexTask):
feat.score_stability_display = stability_display_map.get(mid, 0.0)
# 8. delete-before-insert 持久化Req 9.3
records_inserted = self._save_spi_data(feat_list, site_id)
# P19: 仅回测模式传 calc_time正常模式传 Nonesite_id 全量刷新)
backtest_calc_time = as_of if (context and context.as_of_date) else None
records_inserted = self._save_spi_data(feat_list, site_id, calc_time=backtest_calc_time)
# 9. 保存分位点历史Req 9.5——SPI 总分
raw_values = [f.raw_score for f in feat_list]
@@ -323,7 +329,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
# =========================================================================
def _extract_spending_features(
self, site_id: int, params: Dict[str, float]
self, site_id: int, params: Dict[str, float], *, as_of=None
) -> Dict[int, SPIMemberFeatures]:
"""从 dwd_settlement_head 提取消费特征,按 member_id 聚合。
@@ -339,8 +345,8 @@ class SpendingPowerIndexTask(BaseIndexTask):
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
# INTERVAL 天数通过 f-string 内嵌整数安全site_id 走参数化
# P19: 回测模式用 as_of 参数替代 NOW()
# 时间基准用 %s 参数化,正常模式传 NOW() 等效的 as_of
sql = f"""
WITH consume_source AS (
SELECT
@@ -356,7 +362,8 @@ class SpendingPowerIndexTask(BaseIndexTask):
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = %s
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
AND s.pay_time >= %s - INTERVAL '{long_days} days'
AND s.pay_time < %s
)
SELECT
canonical_member_id AS member_id,
@@ -367,17 +374,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
-- 30 天窗口(子集过滤)
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
SUM(CASE WHEN pay_time >= %s - INTERVAL '{short_days} days'
THEN pay_amount ELSE 0 END) AS spend_30,
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
SUM(CASE WHEN pay_time >= %s - INTERVAL '{short_days} days'
THEN 1 ELSE 0 END) AS orders_30,
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
COUNT(DISTINCT CASE WHEN pay_time >= %s - INTERVAL '{short_days} days'
THEN {biz_expr} END) AS visit_days_30
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
"""
rows = self.db.query(sql, (site_id,))
rows = self.db.query(sql, (site_id, as_of, as_of, as_of, as_of, as_of))
result: Dict[int, SPIMemberFeatures] = {}
for row in (rows or []):
@@ -412,7 +419,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
def _extract_recharge_features(
self, site_id: int, params: Dict[str, float]
self, site_id: int, params: Dict[str, float], *, as_of=None
) -> Dict[int, float]:
"""从 dwd_recharge_order 提取充值特征,返回 {member_id: recharge_90}。
@@ -421,6 +428,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
"""
long_days = int(params.get('spend_window_long_days', 90))
# P19: 回测模式用 as_of 参数替代 NOW()
sql = f"""
WITH recharge_source AS (
SELECT
@@ -435,7 +443,8 @@ class SpendingPowerIndexTask(BaseIndexTask):
AND COALESCE(mca.is_delete, 0) = 0
WHERE r.site_id = %s
AND r.settle_type = 5
AND r.pay_time >= NOW() - INTERVAL '{long_days} days'
AND r.pay_time >= %s - INTERVAL '{long_days} days'
AND r.pay_time < %s
)
SELECT
canonical_member_id AS member_id,
@@ -444,7 +453,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
"""
rows = self.db.query(sql, (site_id,))
rows = self.db.query(sql, (site_id, as_of, as_of))
result: Dict[int, float] = {}
for row in (rows or []):
@@ -513,7 +522,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
return ewma
def _compute_daily_spend_ewma_batch(
self, site_id: int, member_ids: List[int], params: Dict[str, float]
self, site_id: int, member_ids: List[int], params: Dict[str, float], *, as_of=None
) -> Dict[int, float]:
"""批量计算多个会员的日消费 EWMA单次 SQL 查询避免 N+1。
@@ -528,6 +537,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
# P19: 回测模式用 as_of 参数替代 NOW()
sql = f"""
WITH consume_source AS (
SELECT
@@ -543,7 +553,8 @@ class SpendingPowerIndexTask(BaseIndexTask):
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = %s
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
AND s.pay_time >= %s - INTERVAL '{long_days} days'
AND s.pay_time < %s
)
SELECT canonical_member_id AS member_id,
pay_date,
@@ -553,7 +564,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
GROUP BY canonical_member_id, pay_date
ORDER BY canonical_member_id, pay_date
"""
rows = self.db.query(sql, (site_id,))
rows = self.db.query(sql, (site_id, as_of, as_of))
# 按 member_id 分组,逐组计算 EWMA
result: Dict[int, float] = {}
@@ -727,21 +738,31 @@ class SpendingPowerIndexTask(BaseIndexTask):
# =========================================================================
def _save_spi_data(
self, data_list: List[SPIMemberFeatures], site_id: int
self, data_list: List[SPIMemberFeatures], site_id: int, *, calc_time=None
) -> int:
"""delete-before-insert 写入 dws_member_spending_power_index"""
with self.db.conn.cursor() as cur:
# 先删除该门店旧记录Req 9.3
cur.execute(
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s",
(site_id,),
)
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
use_param_time = calc_time is not None
if use_param_time:
cur.execute(
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s AND calc_time = %s",
(site_id, calc_time),
)
else:
cur.execute(
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s",
(site_id,),
)
if not data_list:
self.db.conn.commit()
return 0
insert_sql = """
# P19: 回测模式传入 calc_time正常模式用 NOW()
use_param_time = calc_time is not None
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
insert_sql = f"""
INSERT INTO dws.dws_member_spending_power_index (
site_id, member_id,
spend_30, spend_90, recharge_90,
@@ -761,7 +782,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
%s, %s, %s,
%s, %s, %s,
%s, %s,
NOW(), NOW(), NOW()
{time_placeholder}
)
"""
inserted = 0
@@ -773,7 +794,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
return max(lo, min(hi, v))
for f in data_list:
cur.execute(insert_sql, (
params_tuple = (
f.site_id, f.member_id,
f.spend_30, f.spend_90, f.recharge_90,
f.orders_30, f.orders_90,
@@ -787,7 +808,10 @@ class SpendingPowerIndexTask(BaseIndexTask):
_clamp(f.score_stability_display, 0, DISP_MAX),
_clamp(f.raw_score, -RAW_MAX, RAW_MAX),
_clamp(f.display_score, 0, DISP_MAX),
))
)
if use_param_time:
params_tuple = params_tuple + (calc_time, calc_time, calc_time)
cur.execute(insert_sql, params_tuple)
inserted += max(cur.rowcount, 0)
self.db.conn.commit()

View File

@@ -7,7 +7,7 @@ from __future__ import annotations
import math
from dataclasses import dataclass
from datetime import date, timedelta
from datetime import date, datetime, timedelta
from typing import Any, Dict, List, Optional, Tuple
from .member_index_base import MemberActivityData, MemberIndexBaseTask
@@ -178,7 +178,9 @@ class WinbackIndexTask(MemberIndexBaseTask):
avg_raw=sum(all_raw) / len(all_raw)
)
inserted = self._save_winback_data(winback_list)
# P19: 回测模式传入 calc_time
calc_time = (context.as_of_date if context and context.as_of_date else None)
inserted = self._save_winback_data(winback_list, calc_time=calc_time)
self.logger.info("WBI calculation finished, inserted %d rows", inserted)
return {
@@ -339,21 +341,29 @@ class WinbackIndexTask(MemberIndexBaseTask):
if data.raw_score < 0:
data.raw_score = 0.0
def _save_winback_data(self, data_list: List[MemberWinbackData]) -> int:
def _save_winback_data(self, data_list: List[MemberWinbackData], *, calc_time: Optional[datetime] = None) -> int:
"""保存 WBI 数据"""
if not data_list:
return 0
site_id = data_list[0].activity.site_id
# 按门店全量刷新,避免因分群变化导致过期数据残留。
delete_sql = """
DELETE FROM dws.dws_member_winback_index
WHERE site_id = %s
"""
# P19: 回测模式传入 calc_time正常模式用 NOW()
use_param_time = calc_time is not None
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
with self.db.conn.cursor() as cur:
cur.execute(delete_sql, (site_id,))
if use_param_time:
cur.execute(
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s AND calc_time = %s",
(site_id, calc_time),
)
else:
cur.execute(
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s",
(site_id,),
)
insert_sql = """
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
insert_sql = f"""
INSERT INTO dws.dws_member_winback_index (
site_id, tenant_id, member_id,
status, segment,
@@ -379,7 +389,7 @@ class WinbackIndexTask(MemberIndexBaseTask):
%s, %s,
%s, %s,
%s,
NOW(), NOW(), NOW()
{time_placeholder}
)
"""
@@ -387,7 +397,7 @@ class WinbackIndexTask(MemberIndexBaseTask):
with self.db.conn.cursor() as cur:
for data in data_list:
activity = data.activity
cur.execute(insert_sql, (
params = (
activity.site_id, activity.tenant_id, activity.member_id,
data.status, data.segment,
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
@@ -399,7 +409,10 @@ class WinbackIndexTask(MemberIndexBaseTask):
data.ideal_interval_days, data.ideal_next_visit_date,
data.raw_score, data.display_score,
None,
))
)
if use_param_time:
params = params + (calc_time, calc_time, calc_time)
cur.execute(insert_sql, params)
inserted += cur.rowcount
self.db.conn.commit()

View File

@@ -53,6 +53,9 @@ class MemberConsumptionTask(BaseDwsTask):
# CHANGE 2025-07-15 | task 1.3: 声明日期列,供基类默认 load() 使用
DATE_COL = "stat_date"
# CHANGE 2026-03-22 | P14: 消费汇总完成后触发 AI 消费事件链
AI_TRIGGER_EVENT = "consumption"
def get_task_code(self) -> str:
return "DWS_MEMBER_CONSUMPTION"

View File

@@ -0,0 +1,124 @@
# AI_CHANGELOG
# - 2026-03-29 | Prompt: DWS_TASK_ENGINE ETL 任务 | 新建文件。
# 编排任务引擎全流程:完成检查 → 过期检查 → 任务生成。
# 通过 HTTP 调用后端 POST /api/internal/run-job 按 job_name 执行。
# -*- coding: utf-8 -*-
"""
DWS 任务引擎编排任务DWS_TASK_ENGINE
在 DWS 指数计算完成后执行,按顺序调用后端任务引擎的各个步骤:
1. recall_completion_check — 检测召回是否完成,生成回访任务
2. task_expiry_check — 标记超时未处理的任务
3. task_generator — 根据 WBI/NCI/RS 指数生成/替换任务
通过 HTTP 调用后端 POST /api/internal/run-jobInternal-Token 认证),
每步失败仅记录日志,不中断后续步骤。
"""
from __future__ import annotations
import logging
import os
from pathlib import Path
from typing import Any
import requests
from dotenv import load_dotenv
from ..base_task import BaseTask, TaskContext
# 加载根 .envBACKEND_API_URL / INTERNAL_API_TOKEN 不在 AppConfig 映射中)
# task_engine.py → dws/ → tasks/ → feiqiu/ → connectors/ → etl/ → apps/ → root
_REPO_ROOT = Path(__file__).resolve().parents[6]
load_dotenv(_REPO_ROOT / ".env", override=False)
logger = logging.getLogger(__name__)
_TIMEOUT = (5, 30) # 连接 5s读取 30s任务执行可能较慢
# 按顺序执行的后端任务列表
_JOB_SEQUENCE = [
"recall_completion_check",
"task_expiry_check",
"task_generator",
]
def _run_backend_job(backend_url: str, token: str, job_name: str) -> dict:
"""调用后端 POST /api/internal/run-job 执行指定任务。
Returns:
{"success": bool, "message": str} 或 {"success": False, "message": error}
"""
url = f"{backend_url}/api/internal/run-job"
headers = {
"Authorization": f"Internal-Token {token}",
"Content-Type": "application/json",
}
body = {"job_name": job_name}
try:
resp = requests.post(url, json=body, headers=headers, timeout=_TIMEOUT)
if resp.status_code == 200:
data = resp.json()
# 后端 ResponseWrapperMiddleware 包装:{"code": 0, "data": {...}}
inner = data.get("data", data)
return {
"success": inner.get("success", False),
"message": inner.get("message", ""),
}
else:
return {
"success": False,
"message": f"HTTP {resp.status_code}: {resp.text[:200]}",
}
except requests.RequestException as exc:
return {"success": False, "message": str(exc)}
class DwsTaskEngineTask(BaseTask):
"""DWS 任务引擎编排任务。
不读写 DWS 表,仅通过 HTTP 调用后端执行任务引擎步骤。
继承 BaseTask 而非 BaseDwsTask因为不需要 DWS 层的数据操作方法。
"""
def get_task_code(self) -> str:
return "DWS_TASK_ENGINE"
def extract(self, context: TaskContext) -> dict[str, Any]:
"""无需提取数据,返回空上下文。"""
return {}
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
"""按顺序调用后端任务引擎的各个步骤。"""
backend_url = os.environ.get("BACKEND_API_URL", "").rstrip("/")
token = os.environ.get("INTERNAL_API_TOKEN", "")
if not backend_url:
self.logger.error("DWS_TASK_ENGINE 跳过BACKEND_API_URL 未配置")
return {"skipped": True, "reason": "BACKEND_API_URL 未配置"}
if not token:
self.logger.error("DWS_TASK_ENGINE 跳过INTERNAL_API_TOKEN 未配置")
return {"skipped": True, "reason": "INTERNAL_API_TOKEN 未配置"}
results: dict[str, Any] = {}
for job_name in _JOB_SEQUENCE:
self.logger.info("DWS_TASK_ENGINE: 执行 %s ...", job_name)
result = _run_backend_job(backend_url, token, job_name)
success = result.get("success", False)
message = result.get("message", "")
results[job_name] = {"success": success, "message": message}
if success:
self.logger.info(
"DWS_TASK_ENGINE: %s 成功 — %s", job_name, message
)
else:
self.logger.warning(
"DWS_TASK_ENGINE: %s 失败 — %s", job_name, message
)
return results