feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本
包含多个会话的累积代码变更: - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8) - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,8 @@ class TaskContext:
|
||||
window_end: datetime
|
||||
window_minutes: int
|
||||
cursor: dict | None = None
|
||||
# P19: 回测模式 — "假装今天是 X 日",指数任务用此替代 datetime.now()
|
||||
as_of_date: datetime | None = None
|
||||
|
||||
|
||||
class BaseTask:
|
||||
|
||||
@@ -223,6 +223,10 @@ class DwdLoadTask(BaseTask):
|
||||
("person_tenant_org_id", "person_tenant_org_id", None),
|
||||
("person_tenant_org_name", "person_tenant_org_name", None),
|
||||
("register_source", "register_source", None),
|
||||
("other_pay_money_sum", "payload->>'otherPayMoneySum'", "numeric"), # CHANGE 2026-03-26: 其他支付金额汇总(从 payload 提取,ODS 列可能为空)
|
||||
("last_consume_time", "payload->>'lastConsumeTime'", "timestamptz"), # CHANGE 2026-03-26: 最后消费时间
|
||||
("non_consume_day_num", "payload->>'nonConsumeDayNum'", "integer"), # CHANGE 2026-03-26: 未消费天数
|
||||
("first_consumption", "payload->>'firstConsumption'", "integer"), # CHANGE 2026-03-26: 是否首次消费(1=是, 2=否)
|
||||
],
|
||||
"dwd.dim_member_card_account": [
|
||||
("member_card_id", "id", None),
|
||||
@@ -243,6 +247,8 @@ class DwdLoadTask(BaseTask):
|
||||
("electricity_discount", "electricity_discount", None),
|
||||
("electricity_card_deduct", "electricitycarddeduct", "boolean"),
|
||||
("recharge_freeze_balance", "rechargefreezebalance", None),
|
||||
("pdassisnatlevel", "pdassisnatlevel", None), # CHANGE 2026-03-26: 陪打助教等级限制
|
||||
("cxassisnatlevel", "cxassisnatlevel", None), # CHANGE 2026-03-26: 促销助教等级限制
|
||||
],
|
||||
"dwd.dim_tenant_goods": [
|
||||
("tenant_goods_id", "id", None),
|
||||
@@ -352,6 +358,7 @@ class DwdLoadTask(BaseTask):
|
||||
("table_fee_log_id", "id", None),
|
||||
("salesman_name", "salesman_name", None),
|
||||
("order_consumption_type", "order_consumption_type", None),
|
||||
("order_from", "order_from", None), # CHANGE 2026-03-26: 订单来源
|
||||
],
|
||||
"dwd.dwd_table_fee_adjust": [
|
||||
("table_fee_adjust_id", "id", None),
|
||||
@@ -389,6 +396,9 @@ class DwdLoadTask(BaseTask):
|
||||
("legacy_order_goods_id", "ordergoodsid", None),
|
||||
("site_name", "sitename", None),
|
||||
("legacy_site_id", "siteid", None),
|
||||
("activity_amount", "activity_amount", None), # CHANGE 2026-03-26: 活动金额
|
||||
("activity_id", "activity_id", None), # CHANGE 2026-03-26: 活动 ID
|
||||
("order_from", "order_from", None), # CHANGE 2026-03-26: 订单来源
|
||||
],
|
||||
"dwd.dwd_assistant_service_log": [
|
||||
("assistant_service_id", "id", None),
|
||||
@@ -411,6 +421,8 @@ class DwdLoadTask(BaseTask):
|
||||
("assistant_team_name", "assistantteamname", None),
|
||||
("operator_id", "operator_id", None), # 操作员 ID
|
||||
("operator_name", "operator_name", None), # 操作员姓名
|
||||
("deduct_leave_seconds", "deduct_leave_seconds", None), # CHANGE 2026-03-26: 扣除请假秒数
|
||||
("order_from", "order_from", None), # CHANGE 2026-03-26: 订单来源(1=线下, 4=小程序)
|
||||
],
|
||||
"dwd.dwd_member_balance_change": [
|
||||
("balance_change_id", "id", None),
|
||||
@@ -543,6 +555,7 @@ class DwdLoadTask(BaseTask):
|
||||
("order_remark", "orderremark", None),
|
||||
("operator_id", "operatorid", None),
|
||||
("salesman_user_id", "salesmanuserid", None),
|
||||
("order_from", "orderfrom", None), # CHANGE 2026-03-26: 订单来源(来自 settleList.orderFrom)
|
||||
# CHANGE: intent=删除 settle_list 映射,该列已从 DWD 表中移除(与 ODS payload 冗余)
|
||||
# assumptions=结算明细可随时从 ODS payload->'settleList' 按需提取
|
||||
# prompt=P20260214-040000
|
||||
@@ -639,6 +652,7 @@ class DwdLoadTask(BaseTask):
|
||||
("range_inventory", '"rangeinventory"', "numeric"), # 盘点调整量
|
||||
("current_stock", '"currentstock"', "numeric"), # 当前库存
|
||||
("site_id", '"siteid"', "bigint"), # 门店 ID(ODS 入库时注入)
|
||||
("create_time", '"createtime"', "timestamptz"), # CHANGE 2026-03-26: 库存记录创建时间
|
||||
],
|
||||
# 库存变动流水:goods_stock_movements(ODS 列名全小写)
|
||||
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写,不是驼峰
|
||||
|
||||
@@ -26,6 +26,9 @@ from .finance_daily_task import FinanceDailyTask
|
||||
from .finance_recharge_task import FinanceRechargeTask
|
||||
from .finance_income_task import FinanceIncomeStructureTask
|
||||
from .finance_discount_task import FinanceDiscountDetailTask
|
||||
from .finance_area_daily import FinanceAreaDailyTask
|
||||
from .finance_board_cache import FinanceBoardCacheTask
|
||||
from .coach_area_hours_task import CoachAreaHoursTask
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
from .maintenance_task import DwsMaintenanceTask
|
||||
from .goods_stock_daily_task import GoodsStockDailyTask
|
||||
@@ -63,9 +66,12 @@ __all__ = [
|
||||
# 财务维度
|
||||
"FinanceBaseTask",
|
||||
"FinanceDailyTask",
|
||||
"FinanceAreaDailyTask",
|
||||
"FinanceBoardCacheTask",
|
||||
"FinanceRechargeTask",
|
||||
"FinanceIncomeStructureTask",
|
||||
"FinanceDiscountDetailTask",
|
||||
"CoachAreaHoursTask",
|
||||
"DwsMaintenanceTask",
|
||||
# 库存维度
|
||||
"GoodsStockDailyTask",
|
||||
|
||||
@@ -155,6 +155,11 @@ class BaseDwsTask(BaseTask):
|
||||
# 子类声明日期列名,用于 extract 时间过滤和 load 幂等删除
|
||||
# 未声明时 load() 回退到 "stat_date"
|
||||
DATE_COL: str | None = None
|
||||
|
||||
# CHANGE 2026-03-22 | P14: AI 触发集成
|
||||
# 子类设置此属性以在任务成功后触发 AI 事件
|
||||
# 值为事件类型字符串(如 "dws_completed" / "consumption"),None 表示不触发
|
||||
AI_TRIGGER_EVENT: str | None = None
|
||||
|
||||
# ==========================================================================
|
||||
# 抽象方法(子类必须实现)
|
||||
@@ -239,6 +244,38 @@ class BaseDwsTask(BaseTask):
|
||||
"extra": {"deleted": deleted},
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 主流程覆盖 — AI 触发集成
|
||||
# ==========================================================================
|
||||
|
||||
def execute(self, cursor_data: dict | None = None) -> dict:
|
||||
"""覆盖 BaseTask.execute(),成功后触发 AI 事件(如已配置)。"""
|
||||
result = super().execute(cursor_data)
|
||||
|
||||
# CHANGE 2026-03-22 | P14: DWS 任务完成后触发 AI 事件
|
||||
if self.AI_TRIGGER_EVENT and result.get("status") == "SUCCESS":
|
||||
self._fire_ai_trigger()
|
||||
|
||||
return result
|
||||
|
||||
def _fire_ai_trigger(self) -> None:
|
||||
"""发送 AI 触发事件,失败不中断任务流程。"""
|
||||
try:
|
||||
from utils.ai_trigger import trigger_ai_event
|
||||
|
||||
site_id = self.config.get("app.store_id")
|
||||
trigger_ai_event(
|
||||
event_type=self.AI_TRIGGER_EVENT,
|
||||
site_id=site_id,
|
||||
payload={"task_code": self.get_task_code()},
|
||||
)
|
||||
except Exception:
|
||||
self.logger.warning(
|
||||
"%s: AI 触发失败,不影响任务结果",
|
||||
self.get_task_code(),
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
# 时间计算方法
|
||||
# ==========================================================================
|
||||
|
||||
244
apps/etl/connectors/feiqiu/tasks/dws/coach_area_hours_task.py
Normal file
244
apps/etl/connectors/feiqiu/tasks/dws/coach_area_hours_task.py
Normal file
@@ -0,0 +1,244 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS_COACH_AREA_HOURS — 助教区域课时汇总表
|
||||
|
||||
按 (site_id, stat_month, assistant_id, area_code) 粒度聚合助教课时,
|
||||
为财务看板助教分析板块的区域过滤提供数据支撑。
|
||||
|
||||
数据来源:dwd_assistant_service_log + dwd_assistant_service_log_ex + dim_table
|
||||
更新策略:delete-before-insert(按 site_id + stat_month 删除后插入)
|
||||
|
||||
CHANGE 2026-03-29 | Prompt: 助教分析按区域细化 | 新建 ETL 任务
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.area_mapping import (
|
||||
ALL_AREA_CODES,
|
||||
SPECIFIC_AREA_CODES,
|
||||
resolve_area_code,
|
||||
)
|
||||
|
||||
from tasks.dws.base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ZERO = Decimal("0")
|
||||
|
||||
# 课程类型映射(skill_name → 字段前缀)
|
||||
_SKILL_MAP = {
|
||||
"基础课": "base",
|
||||
"附加课": "bonus",
|
||||
"包厢课": "room",
|
||||
}
|
||||
|
||||
|
||||
class CoachAreaHoursTask(BaseDwsTask):
|
||||
"""助教区域课时汇总 ETL 任务"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_COACH_AREA_HOURS"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws.dws_coach_area_hours"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_month", "assistant_id", "area_code"]
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
end_date = (
|
||||
context.window_end.date()
|
||||
if hasattr(context.window_end, "date")
|
||||
else context.window_end
|
||||
)
|
||||
site_id = context.store_id
|
||||
# 取当月第一天
|
||||
stat_month = end_date.replace(day=1)
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
s.site_assistant_id AS assistant_id,
|
||||
dt.site_table_area_name AS area_name,
|
||||
s.skill_name,
|
||||
s.income_seconds,
|
||||
COALESCE(ex.is_trash, 0) AS is_trash
|
||||
FROM dwd.dwd_assistant_service_log s
|
||||
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
|
||||
ON s.assistant_service_id = ex.assistant_service_id
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON s.site_table_id = dt.table_id
|
||||
AND dt.scd2_is_current = 1
|
||||
WHERE s.site_id = %s
|
||||
AND s.is_delete = 0
|
||||
AND DATE(s.start_use_time) >= %s
|
||||
AND DATE(s.start_use_time) < %s
|
||||
"""
|
||||
# 下月第一天
|
||||
if stat_month.month == 12:
|
||||
next_month = stat_month.replace(year=stat_month.year + 1, month=1)
|
||||
else:
|
||||
next_month = stat_month.replace(month=stat_month.month + 1)
|
||||
|
||||
rows = self.db.query(sql, (site_id, stat_month, next_month))
|
||||
records = [dict(r) for r in rows] if rows else []
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取 %d 条服务记录,月份 %s",
|
||||
self.get_task_code(), len(records), stat_month,
|
||||
)
|
||||
return {
|
||||
"records": records,
|
||||
"stat_month": stat_month,
|
||||
"site_id": site_id,
|
||||
"tenant_id": self.config.get("app.tenant_id", site_id),
|
||||
}
|
||||
|
||||
def transform(
|
||||
self, extracted: Dict[str, Any], context: TaskContext
|
||||
) -> List[Dict[str, Any]]:
|
||||
return transform_coach_area_hours(
|
||||
records=extracted["records"],
|
||||
stat_month=extracted["stat_month"],
|
||||
site_id=extracted["site_id"],
|
||||
tenant_id=extracted["tenant_id"],
|
||||
)
|
||||
|
||||
def load(
|
||||
self, transformed: List[Dict[str, Any]], context: TaskContext
|
||||
) -> dict:
|
||||
if not transformed:
|
||||
return {"inserted": 0}
|
||||
|
||||
site_id = transformed[0]["site_id"]
|
||||
stat_month = transformed[0]["stat_month"]
|
||||
|
||||
# delete-before-insert
|
||||
self.db.execute(
|
||||
"DELETE FROM dws.dws_coach_area_hours WHERE site_id = %s AND stat_month = %s",
|
||||
(site_id, stat_month),
|
||||
)
|
||||
|
||||
cols = [
|
||||
"site_id", "tenant_id", "stat_month", "assistant_id", "area_code",
|
||||
"base_hours", "bonus_hours", "room_hours",
|
||||
"effective_hours", "trashed_hours",
|
||||
"base_service_count", "bonus_service_count", "room_service_count",
|
||||
]
|
||||
placeholders = ", ".join(["%s"] * len(cols))
|
||||
insert_sql = f"INSERT INTO dws.dws_coach_area_hours ({', '.join(cols)}) VALUES ({placeholders})"
|
||||
|
||||
for row in transformed:
|
||||
self.db.execute(insert_sql, tuple(row[c] for c in cols))
|
||||
|
||||
self.logger.info(
|
||||
"%s: 写入 %d 行(site_id=%s, month=%s)",
|
||||
self.get_task_code(), len(transformed), site_id, stat_month,
|
||||
)
|
||||
return {"inserted": len(transformed)}
|
||||
|
||||
|
||||
# ── 纯函数(可被回填脚本和属性测试直接调用) ─────────────────────────
|
||||
|
||||
|
||||
def transform_coach_area_hours(
|
||||
records: List[Dict[str, Any]],
|
||||
stat_month: date,
|
||||
site_id: int,
|
||||
tenant_id: int,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
将 DWD 服务记录按 (assistant_id, area_code) 聚合为区域课时。
|
||||
|
||||
返回列表,每个元素对应一行 dws_coach_area_hours。
|
||||
包含 9 个 area_code(hallA~ktv + hall + all)× N 个助教。
|
||||
"""
|
||||
# 按 (assistant_id, area_code) 聚合
|
||||
# key: (assistant_id, area_code) → {base_hours, bonus_hours, ...}
|
||||
agg: Dict[tuple, Dict[str, Any]] = {}
|
||||
|
||||
for rec in records:
|
||||
assistant_id = rec["assistant_id"]
|
||||
area_name = rec.get("area_name")
|
||||
skill_name = rec.get("skill_name")
|
||||
income_seconds = rec.get("income_seconds") or 0
|
||||
is_trash = rec.get("is_trash", 0)
|
||||
|
||||
hours = Decimal(str(income_seconds)) / Decimal("3600")
|
||||
skill_prefix = _SKILL_MAP.get(skill_name) # base/bonus/room/None
|
||||
|
||||
# 解析区域
|
||||
area_code = resolve_area_code(area_name) if area_name else None
|
||||
if area_code is None:
|
||||
# 未知区域或 NULL table_id → 不计入具体区域,只计入 all
|
||||
target_areas = []
|
||||
else:
|
||||
target_areas = [area_code]
|
||||
|
||||
# 更新具体区域
|
||||
for ac in target_areas:
|
||||
_accumulate(agg, assistant_id, ac, skill_prefix, hours, is_trash)
|
||||
|
||||
# CHANGE 2026-03-29 | hall = 台球大厅(hallA+hallB+hallC),不含 vip/snooker/mahjong/ktv
|
||||
if area_code in ("hallA", "hallB", "hallC"):
|
||||
_accumulate(agg, assistant_id, "hall", skill_prefix, hours, is_trash)
|
||||
# all 始终累加
|
||||
_accumulate(agg, assistant_id, "all", skill_prefix, hours, is_trash)
|
||||
|
||||
# 构建输出行
|
||||
result = []
|
||||
for (aid, ac), data in sorted(agg.items()):
|
||||
total_hours = data["base_hours"] + data["bonus_hours"] + data["room_hours"]
|
||||
effective = total_hours - data["trashed_hours"]
|
||||
result.append({
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"stat_month": stat_month,
|
||||
"assistant_id": aid,
|
||||
"area_code": ac,
|
||||
"base_hours": data["base_hours"],
|
||||
"bonus_hours": data["bonus_hours"],
|
||||
"room_hours": data["room_hours"],
|
||||
"effective_hours": max(effective, _ZERO),
|
||||
"trashed_hours": data["trashed_hours"],
|
||||
"base_service_count": data["base_service_count"],
|
||||
"bonus_service_count": data["bonus_service_count"],
|
||||
"room_service_count": data["room_service_count"],
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _accumulate(
|
||||
agg: Dict[tuple, Dict[str, Any]],
|
||||
assistant_id: int,
|
||||
area_code: str,
|
||||
skill_prefix: str | None,
|
||||
hours: Decimal,
|
||||
is_trash: int,
|
||||
) -> None:
|
||||
"""累加单条服务记录到聚合字典"""
|
||||
key = (assistant_id, area_code)
|
||||
if key not in agg:
|
||||
agg[key] = {
|
||||
"base_hours": _ZERO,
|
||||
"bonus_hours": _ZERO,
|
||||
"room_hours": _ZERO,
|
||||
"trashed_hours": _ZERO,
|
||||
"base_service_count": 0,
|
||||
"bonus_service_count": 0,
|
||||
"room_service_count": 0,
|
||||
}
|
||||
bucket = agg[key]
|
||||
|
||||
if is_trash == 1:
|
||||
bucket["trashed_hours"] += hours
|
||||
return
|
||||
|
||||
if skill_prefix:
|
||||
bucket[f"{skill_prefix}_hours"] += hours
|
||||
bucket[f"{skill_prefix}_service_count"] += 1
|
||||
# skill_name 为 NULL 的记录:不计入任何课程类型的 hours,但已计入 hall/all
|
||||
586
apps/etl/connectors/feiqiu/tasks/dws/finance_area_daily.py
Normal file
586
apps/etl/connectors/feiqiu/tasks/dws/finance_area_daily.py
Normal file
@@ -0,0 +1,586 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
财务区域日粒度汇总任务
|
||||
|
||||
功能说明:
|
||||
以 (site_id, stat_date, area_code) 为粒度,按桌台区域聚合当日财务数据。
|
||||
输出 9 行:7 个具体区域 (hallA~ktv) + hall(历史兼容)+ all。
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head + dim_table (scd2_is_current=1):结算单 + 桌台区域
|
||||
- dws_finance_daily_summary:全局现金流/充值/卡消费(仅 all 行使用)
|
||||
|
||||
目标表:
|
||||
dws.dws_finance_area_daily
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每小时更新当日数据
|
||||
- 幂等方式:delete-before-insert(按 site_id + stat_date 删除后插入 9 行)
|
||||
|
||||
业务规则:
|
||||
- 收入恒等式:gross_amount = table_fee + goods + assistant_pd + assistant_cx
|
||||
- 优惠恒等式:discount_total = groupbuy + vip + manual + gift_card + rounding + other
|
||||
- 确认收入:confirmed_income = gross_amount - discount_total
|
||||
- 非 all 行现金流/卡消费/充值字段 = 0
|
||||
- hall 行 = 各具体区域之和(历史兼容)
|
||||
- all 行 = 各具体区域之和(收入/优惠),现金流/充值/卡消费来自 dws_finance_daily_summary
|
||||
- settle_type IN (1, 3) 过滤
|
||||
- discount_gift_card 使用赠送卡消费金额口径
|
||||
|
||||
Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 8.1, 8.2
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.area_mapping import (
|
||||
ALL_AREA_CODES,
|
||||
SPECIFIC_AREA_CODES,
|
||||
resolve_area_code,
|
||||
)
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import TaskContext
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 收入字段(按区域聚合)
|
||||
_REVENUE_FIELDS = [
|
||||
"table_fee_amount",
|
||||
"goods_amount",
|
||||
"assistant_pd_amount",
|
||||
"assistant_cx_amount",
|
||||
"gross_amount",
|
||||
]
|
||||
|
||||
# 优惠字段(按区域聚合)
|
||||
_DISCOUNT_FIELDS = [
|
||||
"discount_groupbuy",
|
||||
"discount_vip",
|
||||
"discount_manual",
|
||||
"discount_gift_card",
|
||||
"discount_rounding",
|
||||
"discount_other",
|
||||
"discount_total",
|
||||
]
|
||||
|
||||
# 现金流字段(仅 all 行有值)
|
||||
_CASHFLOW_FIELDS = [
|
||||
"cash_pay_amount",
|
||||
"cash_paper_amount",
|
||||
"scan_pay_amount",
|
||||
"groupbuy_pay_amount",
|
||||
"recharge_cash_inflow",
|
||||
"cash_inflow_total",
|
||||
"cash_outflow_total",
|
||||
"cash_balance_change",
|
||||
]
|
||||
|
||||
# 卡消费字段(仅 all 行有值)
|
||||
_CARD_FIELDS = [
|
||||
"card_consume_total",
|
||||
"recharge_card_consume",
|
||||
"gift_card_consume",
|
||||
]
|
||||
|
||||
# 充值字段(仅 all 行有值)
|
||||
_RECHARGE_FIELDS = [
|
||||
"recharge_cash",
|
||||
"first_recharge_cash",
|
||||
"renewal_cash",
|
||||
]
|
||||
|
||||
# 所有仅 all 行有值的字段
|
||||
_ALL_ONLY_FIELDS = _CASHFLOW_FIELDS + _CARD_FIELDS + _RECHARGE_FIELDS
|
||||
|
||||
# 按区域聚合的字段(收入 + 优惠 + order_count)
|
||||
_AREA_AGG_FIELDS = _REVENUE_FIELDS + _DISCOUNT_FIELDS + ["confirmed_income", "order_count"]
|
||||
|
||||
_ZERO = Decimal("0")
|
||||
|
||||
|
||||
class FinanceAreaDailyTask(FinanceBaseTask):
|
||||
"""
|
||||
财务区域日粒度汇总任务
|
||||
|
||||
按 (site_id, stat_date, area_code) 粒度预计算收入、优惠、现金流等数据。
|
||||
每次写入 9 行(hallA~ktv + hall + all)。
|
||||
"""
|
||||
|
||||
DATE_COL = "stat_date"
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_FINANCE_AREA_DAILY"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws.dws_finance_area_daily"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "stat_date", "area_code"]
|
||||
|
||||
# ======================================================================
|
||||
# Extract
|
||||
# ======================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""提取结算单(含区域)和全局现金流/充值/卡消费数据。"""
|
||||
start_date = (
|
||||
context.window_start.date()
|
||||
if hasattr(context.window_start, "date")
|
||||
else context.window_start
|
||||
)
|
||||
end_date = (
|
||||
context.window_end.date()
|
||||
if hasattr(context.window_end, "date")
|
||||
else context.window_end
|
||||
)
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 提取数据,日期范围 %s ~ %s",
|
||||
self.get_task_code(),
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
|
||||
# 1. 结算单 + dim_table 区域映射
|
||||
settlement_rows = self._extract_settlement_with_area(
|
||||
site_id, start_date, end_date
|
||||
)
|
||||
|
||||
# 2. 全局现金流/充值/卡消费(从现有 dws_finance_daily_summary)
|
||||
global_summary = self._extract_global_summary(site_id, start_date, end_date)
|
||||
|
||||
return {
|
||||
"settlement_rows": settlement_rows,
|
||||
"global_summary": global_summary,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
def _extract_settlement_with_area(
|
||||
self, site_id: int, start_date: date, end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从 dwd_settlement_head + dim_table 提取结算单(含区域名称)。"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
{biz_expr} AS stat_date,
|
||||
dt.site_table_area_name AS area_name,
|
||||
sh.settle_type,
|
||||
-- 收入
|
||||
sh.table_charge_money AS table_fee_amount,
|
||||
sh.goods_money AS goods_amount,
|
||||
sh.assistant_pd_money AS assistant_pd_amount,
|
||||
sh.assistant_cx_money AS assistant_cx_amount,
|
||||
(sh.table_charge_money + sh.goods_money
|
||||
+ sh.assistant_pd_money + sh.assistant_cx_money)
|
||||
AS gross_amount,
|
||||
-- 优惠原始字段
|
||||
sh.coupon_amount,
|
||||
sh.pl_coupon_sale_amount,
|
||||
sh.adjust_amount,
|
||||
sh.member_discount_amount,
|
||||
sh.rounding_amount,
|
||||
sh.gift_card_amount
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.table_id = sh.table_id
|
||||
AND dt.site_id = sh.site_id
|
||||
AND dt.scd2_is_current = 1
|
||||
WHERE sh.site_id = %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND sh.settle_type IN (1, 3)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _extract_global_summary(
|
||||
self, site_id: int, start_date: date, end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从 dws_finance_daily_summary 提取全局现金流/充值/卡消费字段。"""
|
||||
sql = """
|
||||
SELECT
|
||||
stat_date,
|
||||
cash_pay_amount,
|
||||
cash_paper_amount,
|
||||
scan_pay_amount,
|
||||
groupbuy_pay_amount,
|
||||
recharge_cash_inflow,
|
||||
cash_inflow_total,
|
||||
cash_outflow_total,
|
||||
cash_balance_change,
|
||||
card_consume_total,
|
||||
recharge_card_consume,
|
||||
gift_card_consume,
|
||||
recharge_cash,
|
||||
first_recharge_amount AS first_recharge_cash,
|
||||
renewal_amount AS renewal_cash
|
||||
FROM dws.dws_finance_daily_summary
|
||||
WHERE site_id = %s
|
||||
AND stat_date >= %s
|
||||
AND stat_date <= %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
# ======================================================================
|
||||
# Transform(纯函数,不依赖数据库)
|
||||
# ======================================================================
|
||||
|
||||
def transform(
|
||||
self, extracted: Dict[str, Any], context: TaskContext
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按区域聚合收入和优惠,构建 9 行输出。"""
|
||||
settlement_rows = extracted["settlement_rows"]
|
||||
global_summary = extracted["global_summary"]
|
||||
site_id = extracted["site_id"]
|
||||
|
||||
return transform_area_daily(
|
||||
settlement_rows=settlement_rows,
|
||||
global_summary=global_summary,
|
||||
site_id=site_id,
|
||||
tenant_id=self.config.get("app.tenant_id", site_id),
|
||||
safe_decimal_fn=self.safe_decimal,
|
||||
logger=self.logger,
|
||||
)
|
||||
|
||||
# ======================================================================
|
||||
# Load(覆盖基类:按 site_id + stat_date 删除,而非仅按 DATE_COL)
|
||||
# ======================================================================
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> dict:
|
||||
"""delete-before-insert:按 site_id + stat_date 删除后插入 9 行。"""
|
||||
if not transformed:
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
# 目标表已含 schema 前缀
|
||||
target_table = self.get_target_table()
|
||||
|
||||
start_date = (
|
||||
context.window_start.date()
|
||||
if hasattr(context.window_start, "date")
|
||||
else context.window_start
|
||||
)
|
||||
end_date = (
|
||||
context.window_end.date()
|
||||
if hasattr(context.window_end, "date")
|
||||
else context.window_end
|
||||
)
|
||||
|
||||
delete_sql = f"""
|
||||
DELETE FROM {target_table}
|
||||
WHERE site_id = %s
|
||||
AND stat_date >= %s
|
||||
AND stat_date <= %s
|
||||
"""
|
||||
|
||||
columns = list(transformed[0].keys())
|
||||
cols_str = ", ".join(columns)
|
||||
placeholders = ", ".join(["%s"] * len(columns))
|
||||
insert_sql = f"INSERT INTO {target_table} ({cols_str}) VALUES ({placeholders})"
|
||||
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(delete_sql, (context.store_id, start_date, end_date))
|
||||
deleted = cur.rowcount
|
||||
|
||||
inserted = 0
|
||||
for row in transformed:
|
||||
values = [row.get(col) for col in columns]
|
||||
cur.execute(insert_sql, values)
|
||||
inserted += cur.rowcount
|
||||
|
||||
self.logger.info(
|
||||
"%s: 删除 %d 行,插入 %d 行",
|
||||
self.get_task_code(),
|
||||
deleted,
|
||||
inserted,
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
},
|
||||
"extra": {"deleted": deleted},
|
||||
}
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# 纯函数:transform 核心逻辑(方便属性测试直接调用)
|
||||
# ======================================================================
|
||||
|
||||
|
||||
def transform_area_daily(
|
||||
settlement_rows: List[Dict[str, Any]],
|
||||
global_summary: List[Dict[str, Any]],
|
||||
site_id: int,
|
||||
tenant_id: int,
|
||||
safe_decimal_fn=None,
|
||||
logger=None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""按区域聚合结算单,构建 9 行输出。
|
||||
|
||||
这是一个纯函数(不依赖数据库),方便属性测试直接调用。
|
||||
|
||||
Args:
|
||||
settlement_rows: 结算单行(含 area_name、settle_type 等)
|
||||
global_summary: 全局现金流/充值/卡消费(按 stat_date 索引)
|
||||
site_id: 门店 ID
|
||||
tenant_id: 租户 ID
|
||||
safe_decimal_fn: Decimal 安全转换函数(默认使用内置实现)
|
||||
logger: 日志记录器(可选)
|
||||
|
||||
Returns:
|
||||
9 × N 行字典列表(N = 涉及的日期数)
|
||||
"""
|
||||
if safe_decimal_fn is None:
|
||||
safe_decimal_fn = _safe_decimal
|
||||
if logger is None:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 全局汇总按日期索引
|
||||
global_index: Dict[date, Dict[str, Any]] = {}
|
||||
for row in global_summary:
|
||||
sd = row.get("stat_date")
|
||||
if sd is not None:
|
||||
global_index[sd] = row
|
||||
|
||||
# ── 第一步:按 (stat_date, area_code) 聚合结算单 ──────────────
|
||||
# area_agg[stat_date][area_code] = {field: Decimal, ...}
|
||||
area_agg: Dict[date, Dict[str, Dict[str, Decimal]]] = defaultdict(
|
||||
lambda: defaultdict(lambda: defaultdict(lambda: _ZERO))
|
||||
)
|
||||
# 收集所有涉及的日期
|
||||
all_dates: set[date] = set()
|
||||
|
||||
for row in settlement_rows:
|
||||
sd = row.get("stat_date")
|
||||
if sd is None:
|
||||
continue
|
||||
all_dates.add(sd)
|
||||
|
||||
# settle_type 过滤(extract 已过滤,transform 层再次防御)
|
||||
settle_type = row.get("settle_type")
|
||||
if settle_type not in (1, 3):
|
||||
continue
|
||||
|
||||
area_name = row.get("area_name")
|
||||
area_code = resolve_area_code(area_name)
|
||||
|
||||
if area_code is None:
|
||||
# 未知区域:记录警告,不计入具体区域行,但仍计入 all 行
|
||||
logger.warning(
|
||||
"DWS_FINANCE_AREA_DAILY: 未知区域名称 '%s',不计入具体区域",
|
||||
area_name,
|
||||
)
|
||||
|
||||
# 提取金额
|
||||
table_fee = safe_decimal_fn(row.get("table_fee_amount", 0))
|
||||
goods = safe_decimal_fn(row.get("goods_amount", 0))
|
||||
pd_amount = safe_decimal_fn(row.get("assistant_pd_amount", 0))
|
||||
cx_amount = safe_decimal_fn(row.get("assistant_cx_amount", 0))
|
||||
gross = table_fee + goods + pd_amount + cx_amount
|
||||
|
||||
coupon = safe_decimal_fn(row.get("coupon_amount", 0))
|
||||
pl_coupon_sale = safe_decimal_fn(row.get("pl_coupon_sale_amount", 0))
|
||||
adjust = safe_decimal_fn(row.get("adjust_amount", 0))
|
||||
member_discount = safe_decimal_fn(row.get("member_discount_amount", 0))
|
||||
rounding = safe_decimal_fn(row.get("rounding_amount", 0))
|
||||
gift_card_amount = safe_decimal_fn(row.get("gift_card_amount", 0))
|
||||
|
||||
# 团购优惠 = coupon_amount - 团购支付金额
|
||||
groupbuy_pay = pl_coupon_sale if pl_coupon_sale > 0 else _ZERO
|
||||
discount_groupbuy = max(coupon - groupbuy_pay, _ZERO)
|
||||
|
||||
# 赠送卡消费金额口径(与现有 ETL 一致)
|
||||
discount_gift_card = gift_card_amount
|
||||
|
||||
# 手动调整 = adjust_amount(简化:不拆分大客户,区域级无法区分)
|
||||
discount_manual = adjust
|
||||
|
||||
discount_total = (
|
||||
discount_groupbuy
|
||||
+ member_discount
|
||||
+ discount_manual
|
||||
+ discount_gift_card
|
||||
+ rounding
|
||||
)
|
||||
# discount_other 在区域级暂为 0(adjust 全部归入 manual)
|
||||
discount_other = _ZERO
|
||||
|
||||
# 重新计算 discount_total 确保恒等式
|
||||
discount_total = (
|
||||
discount_groupbuy
|
||||
+ member_discount
|
||||
+ discount_manual
|
||||
+ discount_gift_card
|
||||
+ rounding
|
||||
+ discount_other
|
||||
)
|
||||
|
||||
confirmed_income = gross - discount_total
|
||||
|
||||
fields = {
|
||||
"table_fee_amount": table_fee,
|
||||
"goods_amount": goods,
|
||||
"assistant_pd_amount": pd_amount,
|
||||
"assistant_cx_amount": cx_amount,
|
||||
"gross_amount": gross,
|
||||
"discount_groupbuy": discount_groupbuy,
|
||||
"discount_vip": member_discount,
|
||||
"discount_manual": discount_manual,
|
||||
"discount_gift_card": discount_gift_card,
|
||||
"discount_rounding": rounding,
|
||||
"discount_other": discount_other,
|
||||
"discount_total": discount_total,
|
||||
"confirmed_income": confirmed_income,
|
||||
"order_count": 1,
|
||||
}
|
||||
|
||||
# 累加到具体区域
|
||||
if area_code is not None:
|
||||
bucket = area_agg[sd][area_code]
|
||||
for k, v in fields.items():
|
||||
bucket[k] = bucket[k] + v
|
||||
|
||||
# 也收集 global_summary 中的日期
|
||||
for sd in global_index:
|
||||
all_dates.add(sd)
|
||||
|
||||
# ── 第二步:构建 9 行输出 ─────────────────────────────────────
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for sd in sorted(all_dates):
|
||||
day_agg = area_agg.get(sd, {})
|
||||
|
||||
# 计算各具体区域行
|
||||
specific_rows: Dict[str, Dict[str, Any]] = {}
|
||||
for ac in SPECIFIC_AREA_CODES:
|
||||
bucket = day_agg.get(ac, {})
|
||||
row_data = _build_area_row(
|
||||
site_id=site_id,
|
||||
tenant_id=tenant_id,
|
||||
stat_date=sd,
|
||||
area_code=ac,
|
||||
agg=bucket,
|
||||
)
|
||||
specific_rows[ac] = row_data
|
||||
|
||||
# CHANGE 2026-03-29 | hall = 台球大厅(hallA+hallB+hallC),不含 vip/snooker/mahjong/ktv
|
||||
_HALL_CODES = ("hallA", "hallB", "hallC")
|
||||
hall_source = [specific_rows[ac] for ac in _HALL_CODES if ac in specific_rows]
|
||||
hall_row = _build_sum_row(
|
||||
site_id=site_id,
|
||||
tenant_id=tenant_id,
|
||||
stat_date=sd,
|
||||
area_code="hall",
|
||||
source_rows=hall_source,
|
||||
)
|
||||
|
||||
# all 行 = 各具体区域之和(收入/优惠/order_count)+ 全局现金流/充值/卡消费
|
||||
all_row = _build_sum_row(
|
||||
site_id=site_id,
|
||||
tenant_id=tenant_id,
|
||||
stat_date=sd,
|
||||
area_code="all",
|
||||
source_rows=list(specific_rows.values()),
|
||||
)
|
||||
# 填充全局现金流/充值/卡消费
|
||||
gs = global_index.get(sd, {})
|
||||
for field in _ALL_ONLY_FIELDS:
|
||||
all_row[field] = safe_decimal_fn(gs.get(field, 0))
|
||||
|
||||
# 输出顺序:all, hall, hallA, hallB, hallC, vip, snooker, mahjong, ktv
|
||||
results.append(all_row)
|
||||
results.append(hall_row)
|
||||
for ac in SPECIFIC_AREA_CODES:
|
||||
results.append(specific_rows[ac])
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _build_area_row(
|
||||
site_id: int,
|
||||
tenant_id: int,
|
||||
stat_date: date,
|
||||
area_code: str,
|
||||
agg: Dict[str, Decimal],
|
||||
) -> Dict[str, Any]:
|
||||
"""构建单个区域行。"""
|
||||
row: Dict[str, Any] = {
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"stat_date": stat_date,
|
||||
"area_code": area_code,
|
||||
}
|
||||
# 收入 + 优惠 + confirmed_income + order_count
|
||||
for field in _AREA_AGG_FIELDS:
|
||||
if field == "order_count":
|
||||
row[field] = int(agg.get(field, 0))
|
||||
else:
|
||||
row[field] = agg.get(field, _ZERO)
|
||||
# 非 all 行:现金流/卡消费/充值 = 0
|
||||
for field in _ALL_ONLY_FIELDS:
|
||||
row[field] = _ZERO
|
||||
return row
|
||||
|
||||
|
||||
def _build_sum_row(
|
||||
site_id: int,
|
||||
tenant_id: int,
|
||||
stat_date: date,
|
||||
area_code: str,
|
||||
source_rows: List[Dict[str, Any]],
|
||||
) -> Dict[str, Any]:
|
||||
"""构建 hall/all 汇总行(收入/优惠 = 各具体区域之和)。"""
|
||||
row: Dict[str, Any] = {
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"stat_date": stat_date,
|
||||
"area_code": area_code,
|
||||
}
|
||||
for field in _AREA_AGG_FIELDS:
|
||||
if field == "order_count":
|
||||
row[field] = sum(int(r.get(field, 0)) for r in source_rows)
|
||||
else:
|
||||
row[field] = sum(
|
||||
(r.get(field, _ZERO) for r in source_rows), _ZERO
|
||||
)
|
||||
# 非 all 行:现金流/卡消费/充值 = 0(all 行由调用方覆盖)
|
||||
for field in _ALL_ONLY_FIELDS:
|
||||
row[field] = _ZERO
|
||||
return row
|
||||
|
||||
|
||||
def _safe_decimal(value: Any, default: Decimal = _ZERO) -> Decimal:
|
||||
"""内置的安全 Decimal 转换(供纯函数使用)。"""
|
||||
if value is None:
|
||||
return default
|
||||
try:
|
||||
return Decimal(str(value))
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
__all__ = ["FinanceAreaDailyTask", "transform_area_daily"]
|
||||
@@ -118,6 +118,35 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 支付方式拆分
|
||||
# ------------------------------------------------------------------
|
||||
# CHANGE 2026-03-27 | board-finance-integration T1.1 | 新增支付方式拆分
|
||||
def _extract_payment_split(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""支付方式拆分日汇总(纸币现金 vs 扫码/离线支付)"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("p.pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
{biz_expr} AS stat_date,
|
||||
SUM(CASE WHEN p.payment_method = 2 THEN p.pay_amount ELSE 0 END) AS cash_paper_amount,
|
||||
SUM(CASE WHEN p.payment_method = 4 THEN p.pay_amount ELSE 0 END) AS scan_pay_amount
|
||||
FROM dwd.dwd_payment p
|
||||
WHERE p.site_id = %s
|
||||
AND p.pay_status = 2
|
||||
AND p.relate_type = 2
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 团购核销汇总
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
525
apps/etl/connectors/feiqiu/tasks/dws/finance_board_cache.py
Normal file
525
apps/etl/connectors/feiqiu/tasks/dws/finance_board_cache.py
Normal file
@@ -0,0 +1,525 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
财务看板缓存层任务
|
||||
|
||||
功能说明:
|
||||
基于 dws_finance_area_daily 日粒度数据,为已完成周期(lastMonth/lastWeek/
|
||||
lastQuarter/quarter3/half6)× 9 个区域 = 45 组合维护缓存。
|
||||
通过数据指纹(MD5)检测源数据变化,仅对指纹不一致的组合重算并 upsert。
|
||||
|
||||
数据来源:
|
||||
- dws.dws_finance_area_daily:日粒度原子层
|
||||
- dws.dws_finance_board_cache:缓存层(读取已有指纹做对比)
|
||||
|
||||
目标表:
|
||||
dws.dws_finance_board_cache
|
||||
|
||||
更新策略:
|
||||
- 更新频率:每天一次(营业日切点后)
|
||||
- 幂等方式:ON CONFLICT (site_id, time_range, area_code) DO UPDATE
|
||||
- 当期周期(month/week/quarter)不写入缓存
|
||||
|
||||
业务规则:
|
||||
- 指纹 = MD5(sorted [(stat_date, gross_amount, discount_total), ...])
|
||||
- 指纹一致 → 跳过;指纹不一致 → 从日粒度表 SUM 重算后 upsert
|
||||
- overview 8 项:occurrence/discount/discount_rate/confirmed_revenue/
|
||||
cash_in/cash_out/cash_balance/balance_rate
|
||||
|
||||
Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from neozqyy_shared.area_mapping import ALL_AREA_CODES
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 已完成周期(当期 month/week/quarter 不缓存)
|
||||
COMPLETED_PERIODS: List[str] = [
|
||||
"lastMonth",
|
||||
"lastWeek",
|
||||
"lastQuarter",
|
||||
"quarter3",
|
||||
"half6",
|
||||
]
|
||||
|
||||
# 当期周期(不写入缓存)
|
||||
CURRENT_PERIODS: set[str] = {"month", "week", "quarter"}
|
||||
|
||||
_ZERO = Decimal("0")
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# 纯函数:指纹计算(导出供属性测试直接调用)
|
||||
# ======================================================================
|
||||
|
||||
|
||||
def compute_fingerprint(rows: list[dict]) -> str:
|
||||
"""对 (stat_date, gross_amount, discount_total) 排序后计算 MD5 指纹。
|
||||
|
||||
这是一个纯函数,不依赖数据库,方便属性测试直接调用。
|
||||
|
||||
Args:
|
||||
rows: 日粒度行列表,每行至少包含 stat_date/gross_amount/discount_total
|
||||
|
||||
Returns:
|
||||
MD5 十六进制字符串
|
||||
"""
|
||||
sorted_rows = sorted(rows, key=lambda r: str(r["stat_date"]))
|
||||
payload = json.dumps(
|
||||
[
|
||||
(str(r["stat_date"]), str(r["gross_amount"]), str(r["discount_total"]))
|
||||
for r in sorted_rows
|
||||
]
|
||||
)
|
||||
return hashlib.md5(payload.encode()).hexdigest()
|
||||
|
||||
|
||||
def is_current_period(time_range: str) -> bool:
|
||||
"""判断 time_range 是否为当期周期(不应写入缓存)。
|
||||
|
||||
纯函数,导出供属性测试直接调用。
|
||||
"""
|
||||
return time_range in CURRENT_PERIODS
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# 日期范围计算(复用 board_service 的逻辑)
|
||||
# ======================================================================
|
||||
|
||||
|
||||
def _calc_period_date_range(
|
||||
time_range: str, ref_date: date | None = None
|
||||
) -> Tuple[date, date]:
|
||||
"""计算已完成周期的日期范围。
|
||||
|
||||
与 board_service._calc_date_range 保持一致。
|
||||
"""
|
||||
today = ref_date or date.today()
|
||||
|
||||
if time_range == "lastMonth":
|
||||
first_of_this_month = today.replace(day=1)
|
||||
last_month_end = first_of_this_month - timedelta(days=1)
|
||||
last_month_start = last_month_end.replace(day=1)
|
||||
return last_month_start, last_month_end
|
||||
|
||||
if time_range == "lastWeek":
|
||||
this_monday = today - timedelta(days=today.weekday())
|
||||
last_sunday = this_monday - timedelta(days=1)
|
||||
last_monday = last_sunday - timedelta(days=6)
|
||||
return last_monday, last_sunday
|
||||
|
||||
if time_range == "lastQuarter":
|
||||
q_start_month = (today.month - 1) // 3 * 3 + 1
|
||||
this_q_start = date(today.year, q_start_month, 1)
|
||||
prev_q_end = this_q_start - timedelta(days=1)
|
||||
prev_q_start_month = (prev_q_end.month - 1) // 3 * 3 + 1
|
||||
prev_q_start = date(prev_q_end.year, prev_q_start_month, 1)
|
||||
return prev_q_start, prev_q_end
|
||||
|
||||
if time_range == "quarter3":
|
||||
first_of_this_month = today.replace(day=1)
|
||||
end = first_of_this_month - timedelta(days=1)
|
||||
start = _month_offset(first_of_this_month, -3)
|
||||
return start, end
|
||||
|
||||
if time_range == "half6":
|
||||
first_of_this_month = today.replace(day=1)
|
||||
end = first_of_this_month - timedelta(days=1)
|
||||
start = _month_offset(first_of_this_month, -6)
|
||||
return start, end
|
||||
|
||||
raise ValueError(f"不支持的已完成周期: {time_range}")
|
||||
|
||||
|
||||
def _month_offset(base_date: date, months: int) -> date:
|
||||
"""按月偏移日期(保持日不越界)。"""
|
||||
import calendar
|
||||
|
||||
total_months = base_date.year * 12 + (base_date.month - 1) + months
|
||||
year = total_months // 12
|
||||
month = total_months % 12 + 1
|
||||
last_day = calendar.monthrange(year, month)[1]
|
||||
day = min(base_date.day, last_day)
|
||||
return date(year, month, day)
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# ETL 任务类
|
||||
# ======================================================================
|
||||
|
||||
|
||||
class FinanceBoardCacheTask(BaseDwsTask):
|
||||
"""
|
||||
财务看板缓存层任务
|
||||
|
||||
遍历 5 个已完成周期 × 9 个区域 = 45 组合,
|
||||
通过数据指纹检测变化,仅对需要重算的组合 upsert 缓存。
|
||||
"""
|
||||
|
||||
DATE_COL = None # 缓存表无 stat_date 列
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_FINANCE_BOARD_CACHE"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws.dws_finance_board_cache"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "time_range", "area_code"]
|
||||
|
||||
# ======================================================================
|
||||
# Extract
|
||||
# ======================================================================
|
||||
|
||||
def extract(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""遍历 45 组合,从日粒度表读取源数据行 + 从缓存表读取已有指纹。"""
|
||||
site_id = context.store_id
|
||||
|
||||
self.logger.info(
|
||||
"%s: 开始提取,站点 %s",
|
||||
self.get_task_code(),
|
||||
site_id,
|
||||
)
|
||||
|
||||
combinations: List[Dict[str, Any]] = []
|
||||
|
||||
for time_range in COMPLETED_PERIODS:
|
||||
start_date, end_date = _calc_period_date_range(time_range)
|
||||
|
||||
for area_code in ALL_AREA_CODES:
|
||||
# 从日粒度表读取源数据行
|
||||
daily_rows = self._read_daily_rows(
|
||||
site_id, start_date, end_date, area_code
|
||||
)
|
||||
|
||||
# 从缓存表读取已有指纹
|
||||
existing_fp = self._read_existing_fingerprint(
|
||||
site_id, time_range, area_code
|
||||
)
|
||||
|
||||
combinations.append(
|
||||
{
|
||||
"time_range": time_range,
|
||||
"area_code": area_code,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"daily_rows": daily_rows,
|
||||
"existing_fingerprint": existing_fp,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"combinations": combinations,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
def _read_daily_rows(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
area_code: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""从 dws_finance_area_daily 读取日粒度行。"""
|
||||
sql = """
|
||||
SELECT stat_date, gross_amount, discount_total,
|
||||
confirmed_income,
|
||||
cash_inflow_total, cash_outflow_total, cash_balance_change
|
||||
FROM dws.dws_finance_area_daily
|
||||
WHERE site_id = %s
|
||||
AND stat_date >= %s
|
||||
AND stat_date <= %s
|
||||
AND area_code = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date, area_code))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _read_existing_fingerprint(
|
||||
self,
|
||||
site_id: int,
|
||||
time_range: str,
|
||||
area_code: str,
|
||||
) -> Optional[str]:
|
||||
"""从缓存表读取已有指纹。"""
|
||||
sql = """
|
||||
SELECT data_fingerprint
|
||||
FROM dws.dws_finance_board_cache
|
||||
WHERE site_id = %s
|
||||
AND time_range = %s
|
||||
AND area_code = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, time_range, area_code))
|
||||
if rows:
|
||||
return dict(rows[0]).get("data_fingerprint")
|
||||
return None
|
||||
|
||||
# ======================================================================
|
||||
# Transform(纯函数逻辑,标记需重算的组合)
|
||||
# ======================================================================
|
||||
|
||||
def transform(
|
||||
self, extracted: Dict[str, Any], context: TaskContext
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""计算指纹,与已有指纹对比,标记需重算的组合。"""
|
||||
site_id = extracted["site_id"]
|
||||
combinations = extracted["combinations"]
|
||||
|
||||
return transform_cache_combinations(
|
||||
combinations=combinations,
|
||||
site_id=site_id,
|
||||
logger=self.logger,
|
||||
)
|
||||
|
||||
# ======================================================================
|
||||
# Load(upsert 需重算的组合)
|
||||
# ======================================================================
|
||||
|
||||
def load(self, transformed: List[Dict[str, Any]], context: TaskContext) -> dict:
|
||||
"""对需重算的组合 upsert 到缓存表。"""
|
||||
if not transformed:
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
site_id = context.store_id
|
||||
upserted = 0
|
||||
skipped = 0
|
||||
|
||||
for combo in transformed:
|
||||
if not combo.get("needs_recompute"):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# 从日粒度表 SUM 计算 overview 指标
|
||||
overview = self._sum_overview(
|
||||
site_id,
|
||||
combo["start_date"],
|
||||
combo["end_date"],
|
||||
combo["area_code"],
|
||||
)
|
||||
|
||||
self._upsert_cache(
|
||||
site_id=site_id,
|
||||
time_range=combo["time_range"],
|
||||
area_code=combo["area_code"],
|
||||
start_date=combo["start_date"],
|
||||
end_date=combo["end_date"],
|
||||
overview=overview,
|
||||
fingerprint=combo["new_fingerprint"],
|
||||
)
|
||||
upserted += 1
|
||||
|
||||
self.logger.info(
|
||||
"%s: upsert %d 组合,跳过 %d 组合",
|
||||
self.get_task_code(),
|
||||
upserted,
|
||||
skipped,
|
||||
)
|
||||
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": upserted,
|
||||
"updated": 0,
|
||||
"skipped": skipped,
|
||||
"errors": 0,
|
||||
}
|
||||
}
|
||||
|
||||
def _sum_overview(
|
||||
self,
|
||||
site_id: int,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
area_code: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""从日粒度表 SUM 计算 overview 8 项指标。"""
|
||||
sql = """
|
||||
SELECT
|
||||
COALESCE(SUM(gross_amount), 0) AS occurrence,
|
||||
COALESCE(SUM(discount_total), 0) AS discount,
|
||||
COALESCE(SUM(confirmed_income), 0) AS confirmed_revenue,
|
||||
COALESCE(SUM(cash_inflow_total), 0) AS cash_in,
|
||||
COALESCE(SUM(cash_outflow_total), 0) AS cash_out,
|
||||
COALESCE(SUM(cash_balance_change), 0) AS cash_balance
|
||||
FROM dws.dws_finance_area_daily
|
||||
WHERE site_id = %s
|
||||
AND stat_date >= %s
|
||||
AND stat_date <= %s
|
||||
AND area_code = %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date, area_code))
|
||||
if rows:
|
||||
row = dict(rows[0])
|
||||
occurrence = Decimal(str(row.get("occurrence", 0)))
|
||||
discount = Decimal(str(row.get("discount", 0)))
|
||||
confirmed_revenue = Decimal(str(row.get("confirmed_revenue", 0)))
|
||||
cash_in = Decimal(str(row.get("cash_in", 0)))
|
||||
cash_out = Decimal(str(row.get("cash_out", 0)))
|
||||
cash_balance = Decimal(str(row.get("cash_balance", 0)))
|
||||
|
||||
# discount_rate = discount / occurrence(避免除零)
|
||||
discount_rate = (
|
||||
(discount / occurrence) if occurrence != 0 else _ZERO
|
||||
)
|
||||
# balance_rate = cash_balance / cash_in(避免除零)
|
||||
balance_rate = (
|
||||
(cash_balance / cash_in) if cash_in != 0 else _ZERO
|
||||
)
|
||||
|
||||
return {
|
||||
"occurrence": occurrence,
|
||||
"discount": discount,
|
||||
"discount_rate": discount_rate,
|
||||
"confirmed_revenue": confirmed_revenue,
|
||||
"cash_in": cash_in,
|
||||
"cash_out": cash_out,
|
||||
"cash_balance": cash_balance,
|
||||
"balance_rate": balance_rate,
|
||||
}
|
||||
|
||||
return {k: _ZERO for k in [
|
||||
"occurrence", "discount", "discount_rate", "confirmed_revenue",
|
||||
"cash_in", "cash_out", "cash_balance", "balance_rate",
|
||||
]}
|
||||
|
||||
def _upsert_cache(
|
||||
self,
|
||||
site_id: int,
|
||||
time_range: str,
|
||||
area_code: str,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
overview: Dict[str, Any],
|
||||
fingerprint: str,
|
||||
) -> None:
|
||||
"""ON CONFLICT DO UPDATE 写入缓存表。"""
|
||||
sql = """
|
||||
INSERT INTO dws.dws_finance_board_cache (
|
||||
site_id, time_range, area_code,
|
||||
start_date, end_date,
|
||||
occurrence, discount, discount_rate, confirmed_revenue,
|
||||
cash_in, cash_out, cash_balance, balance_rate,
|
||||
data_fingerprint, computed_at, updated_at
|
||||
) VALUES (
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
%s, %s, %s, %s,
|
||||
%s, %s, %s, %s,
|
||||
%s, NOW(), NOW()
|
||||
)
|
||||
ON CONFLICT (site_id, time_range, area_code) DO UPDATE SET
|
||||
start_date = EXCLUDED.start_date,
|
||||
end_date = EXCLUDED.end_date,
|
||||
occurrence = EXCLUDED.occurrence,
|
||||
discount = EXCLUDED.discount,
|
||||
discount_rate = EXCLUDED.discount_rate,
|
||||
confirmed_revenue = EXCLUDED.confirmed_revenue,
|
||||
cash_in = EXCLUDED.cash_in,
|
||||
cash_out = EXCLUDED.cash_out,
|
||||
cash_balance = EXCLUDED.cash_balance,
|
||||
balance_rate = EXCLUDED.balance_rate,
|
||||
data_fingerprint = EXCLUDED.data_fingerprint,
|
||||
computed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
"""
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
sql,
|
||||
(
|
||||
site_id,
|
||||
time_range,
|
||||
area_code,
|
||||
start_date,
|
||||
end_date,
|
||||
overview["occurrence"],
|
||||
overview["discount"],
|
||||
overview["discount_rate"],
|
||||
overview["confirmed_revenue"],
|
||||
overview["cash_in"],
|
||||
overview["cash_out"],
|
||||
overview["cash_balance"],
|
||||
overview["balance_rate"],
|
||||
fingerprint,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# 纯函数:transform 核心逻辑(方便属性测试直接调用)
|
||||
# ======================================================================
|
||||
|
||||
|
||||
def transform_cache_combinations(
|
||||
combinations: List[Dict[str, Any]],
|
||||
site_id: int,
|
||||
logger: Any = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""对每个组合计算指纹并与已有指纹对比,标记需重算的组合。
|
||||
|
||||
纯函数(不依赖数据库),方便属性测试直接调用。
|
||||
|
||||
Args:
|
||||
combinations: extract 输出的组合列表
|
||||
site_id: 门店 ID
|
||||
logger: 日志记录器(可选)
|
||||
|
||||
Returns:
|
||||
带 needs_recompute/new_fingerprint 标记的组合列表
|
||||
"""
|
||||
if logger is None:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for combo in combinations:
|
||||
daily_rows = combo.get("daily_rows", [])
|
||||
existing_fp = combo.get("existing_fingerprint")
|
||||
|
||||
new_fp = compute_fingerprint(daily_rows)
|
||||
|
||||
needs_recompute = new_fp != existing_fp
|
||||
|
||||
if needs_recompute:
|
||||
logger.info(
|
||||
"DWS_FINANCE_BOARD_CACHE: %s/%s 指纹变化,需重算",
|
||||
combo["time_range"],
|
||||
combo["area_code"],
|
||||
)
|
||||
|
||||
results.append(
|
||||
{
|
||||
**combo,
|
||||
"site_id": site_id,
|
||||
"new_fingerprint": new_fp,
|
||||
"needs_recompute": needs_recompute,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
__all__ = [
|
||||
"FinanceBoardCacheTask",
|
||||
"compute_fingerprint",
|
||||
"is_current_period",
|
||||
"transform_cache_combinations",
|
||||
"COMPLETED_PERIODS",
|
||||
"CURRENT_PERIODS",
|
||||
]
|
||||
@@ -55,6 +55,9 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
|
||||
# CHANGE 2025-07-15 | task 1.3: 声明日期列,供基类默认 load() 使用
|
||||
DATE_COL = "stat_date"
|
||||
|
||||
# CHANGE 2026-03-22 | P14: 财务日度任务完成后触发 AI App2 预生成
|
||||
AI_TRIGGER_EVENT = "dws_completed"
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_FINANCE_DAILY"
|
||||
@@ -93,6 +96,10 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
|
||||
# 3.1 获取赠送卡消费汇总(余额变动)
|
||||
gift_card_summary = self._extract_gift_card_consume_summary(site_id, start_date, end_date)
|
||||
|
||||
# 3.2 获取支付方式拆分
|
||||
# CHANGE 2026-03-27 | board-finance-integration T1.1 | 新增支付方式拆分
|
||||
payment_split = self._extract_payment_split(site_id, start_date, end_date)
|
||||
|
||||
# 4. 获取支出汇总(来自导入表)
|
||||
expense_summary = self._extract_expense_summary(site_id, start_date, end_date)
|
||||
@@ -108,6 +115,7 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
'groupbuy_summary': groupbuy_summary,
|
||||
'recharge_summary': recharge_summary,
|
||||
'gift_card_summary': gift_card_summary,
|
||||
'payment_split': payment_split,
|
||||
'expense_summary': expense_summary,
|
||||
'platform_summary': platform_summary,
|
||||
'big_customer_summary': big_customer_summary,
|
||||
@@ -127,6 +135,7 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
expense_summary = extracted['expense_summary']
|
||||
platform_summary = extracted['platform_summary']
|
||||
big_customer_summary = extracted['big_customer_summary']
|
||||
payment_split = extracted['payment_split']
|
||||
site_id = extracted['site_id']
|
||||
|
||||
self.logger.info(
|
||||
@@ -149,6 +158,7 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
expense_index = {e['stat_date']: e for e in expense_summary}
|
||||
platform_index = {p['stat_date']: p for p in platform_summary}
|
||||
big_customer_index = {b['stat_date']: b for b in big_customer_summary}
|
||||
payment_split_index = {ps['stat_date']: ps for ps in payment_split}
|
||||
|
||||
results = []
|
||||
for stat_date in sorted(dates):
|
||||
@@ -159,9 +169,10 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
expense = expense_index.get(stat_date, {})
|
||||
platform = platform_index.get(stat_date, {})
|
||||
big_customer = big_customer_index.get(stat_date, {})
|
||||
ps = payment_split_index.get(stat_date, {})
|
||||
|
||||
record = self._build_daily_record(
|
||||
stat_date, settle, groupbuy, recharge, gift_card, expense, platform, big_customer, site_id
|
||||
stat_date, settle, groupbuy, recharge, gift_card, expense, platform, big_customer, site_id, ps
|
||||
)
|
||||
results.append(record)
|
||||
|
||||
@@ -185,7 +196,8 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
expense: Dict[str, Any],
|
||||
platform: Dict[str, Any],
|
||||
big_customer: Dict[str, Any],
|
||||
site_id: int
|
||||
site_id: int,
|
||||
payment_split: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
构建日度财务记录
|
||||
@@ -237,6 +249,11 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
# 确认收入
|
||||
confirmed_income = gross_amount - discount_total
|
||||
|
||||
# 支付方式拆分
|
||||
# CHANGE 2026-03-27 | board-finance-integration T1.1 | 新增支付方式拆分
|
||||
cash_paper_amount = self.safe_decimal(payment_split.get('cash_paper_amount', 0))
|
||||
scan_pay_amount = self.safe_decimal(payment_split.get('scan_pay_amount', 0))
|
||||
|
||||
# 现金流
|
||||
platform_settlement_amount = self.safe_decimal(platform.get('settlement_amount', 0))
|
||||
platform_fee_amount = (
|
||||
@@ -298,6 +315,8 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
# 现金流
|
||||
'cash_inflow_total': cash_inflow_total,
|
||||
'cash_pay_amount': cash_pay_amount,
|
||||
'cash_paper_amount': cash_paper_amount,
|
||||
'scan_pay_amount': scan_pay_amount,
|
||||
'groupbuy_pay_amount': groupbuy_pay_amount,
|
||||
'platform_settlement_amount': platform_settlement_amount,
|
||||
'platform_fee_amount': platform_fee_amount,
|
||||
|
||||
@@ -83,7 +83,7 @@ class BaseIndexTask(BaseDwsTask):
|
||||
self._index_params_cache_by_type: Dict[str, IndexParameters] = {}
|
||||
|
||||
# 默认参数
|
||||
DEFAULT_LOOKBACK_DAYS = 60
|
||||
DEFAULT_LOOKBACK_DAYS = 90
|
||||
DEFAULT_PERCENTILE_LOWER = 5
|
||||
DEFAULT_PERCENTILE_UPPER = 95
|
||||
DEFAULT_EWMA_ALPHA = 0.2
|
||||
|
||||
@@ -86,7 +86,9 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
tenant_id = self._get_tenant_id()
|
||||
params = self._load_params()
|
||||
|
||||
activities = self._build_member_activity(site_id, tenant_id, params)
|
||||
# P19: 回测模式用 as_of_date 替代 datetime.now()
|
||||
as_of = (context.as_of_date if context and context.as_of_date else None) or datetime.now(self.tz)
|
||||
activities = self._build_member_activity(site_id, tenant_id, params, as_of=as_of)
|
||||
if not activities:
|
||||
self.logger.warning("No member activity data available; skip calculation")
|
||||
return {'status': 'skipped', 'reason': 'no_data'}
|
||||
@@ -402,9 +404,12 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
site_id: int,
|
||||
tenant_id: int,
|
||||
params: Dict[str, float],
|
||||
*,
|
||||
as_of: Optional[datetime] = None,
|
||||
) -> Dict[int, MemberActivityData]:
|
||||
"""构建会员活动特征"""
|
||||
now = datetime.now(self.tz)
|
||||
# P19: 回测模式用 as_of 替代 datetime.now()
|
||||
now = as_of or datetime.now(self.tz)
|
||||
base_date = now.date()
|
||||
|
||||
visit_lookback_days = int(params.get('visit_lookback_days', self.DEFAULT_VISIT_LOOKBACK_DAYS))
|
||||
|
||||
@@ -202,7 +202,9 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
avg_raw=sum(all_raw) / len(all_raw)
|
||||
)
|
||||
|
||||
inserted = self._save_newconv_data(newconv_list)
|
||||
# P19: 回测模式传入 calc_time
|
||||
calc_time = (context.as_of_date if context and context.as_of_date else None)
|
||||
inserted = self._save_newconv_data(newconv_list, calc_time=calc_time)
|
||||
self.logger.info("NCI calculation finished, inserted %d rows", inserted)
|
||||
|
||||
return {
|
||||
@@ -286,21 +288,30 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
if data.raw_score < 0:
|
||||
data.raw_score = 0.0
|
||||
|
||||
def _save_newconv_data(self, data_list: List[MemberNewconvData]) -> int:
|
||||
def _save_newconv_data(self, data_list: List[MemberNewconvData], *, calc_time=None) -> int:
|
||||
"""保存 NCI 数据"""
|
||||
if not data_list:
|
||||
return 0
|
||||
|
||||
site_id = data_list[0].activity.site_id
|
||||
# 按门店全量刷新,避免因分群变化导致过期数据残留。
|
||||
delete_sql = """
|
||||
DELETE FROM dws.dws_member_newconv_index
|
||||
WHERE site_id = %s
|
||||
"""
|
||||
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
|
||||
use_param_time = calc_time is not None
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(delete_sql, (site_id,))
|
||||
if use_param_time:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s AND calc_time = %s",
|
||||
(site_id, calc_time),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_newconv_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
insert_sql = """
|
||||
# P19: 回测模式传入 calc_time,正常模式用 NOW()
|
||||
use_param_time = calc_time is not None
|
||||
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
|
||||
insert_sql = f"""
|
||||
INSERT INTO dws.dws_member_newconv_index (
|
||||
site_id, tenant_id, member_id,
|
||||
status, segment,
|
||||
@@ -328,7 +339,7 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s,
|
||||
NOW(), NOW(), NOW()
|
||||
{time_placeholder}
|
||||
)
|
||||
"""
|
||||
|
||||
@@ -336,7 +347,7 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
with self.db.conn.cursor() as cur:
|
||||
for data in data_list:
|
||||
activity = data.activity
|
||||
cur.execute(insert_sql, (
|
||||
params = (
|
||||
activity.site_id, activity.tenant_id, activity.member_id,
|
||||
data.status, data.segment,
|
||||
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
|
||||
@@ -349,7 +360,10 @@ class NewconvIndexTask(MemberIndexBaseTask):
|
||||
data.raw_score_welcome, data.raw_score_convert, data.raw_score,
|
||||
data.display_score_welcome, data.display_score_convert, data.display_score,
|
||||
None,
|
||||
))
|
||||
)
|
||||
if use_param_time:
|
||||
params = params + (calc_time, calc_time, calc_time)
|
||||
cur.execute(insert_sql, params)
|
||||
inserted += cur.rowcount
|
||||
|
||||
self.db.conn.commit()
|
||||
|
||||
@@ -78,7 +78,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
INDEX_TYPE = "RS"
|
||||
|
||||
DEFAULT_PARAMS_RS: Dict[str, float] = {
|
||||
"lookback_days": 60,
|
||||
"lookback_days": 90,
|
||||
"session_merge_hours": 4,
|
||||
"incentive_weight": 1.5,
|
||||
"halflife_session": 14.0,
|
||||
@@ -93,15 +93,13 @@ class RelationIndexTask(BaseIndexTask):
|
||||
"ewma_alpha": 0.2,
|
||||
}
|
||||
DEFAULT_PARAMS_OS: Dict[str, float] = {
|
||||
"min_rs_raw_for_ownership": 0.05,
|
||||
"min_total_rs_raw": 0.10,
|
||||
"ownership_main_threshold": 0.60,
|
||||
"ownership_comanage_threshold": 0.35,
|
||||
"ownership_gap_threshold": 0.15,
|
||||
"min_rs_for_label": 0.10,
|
||||
"ownership_main_ratio": 0.70,
|
||||
"ownership_comanage_ratio": 0.30,
|
||||
"eps": 1e-6,
|
||||
}
|
||||
DEFAULT_PARAMS_MS: Dict[str, float] = {
|
||||
"lookback_days": 60,
|
||||
"lookback_days": 90,
|
||||
"session_merge_hours": 4,
|
||||
"incentive_weight": 1.5,
|
||||
"halflife_short": 7.0,
|
||||
@@ -115,7 +113,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
}
|
||||
# CHANGE 2026-02-13 | intent: ML 仅使用人工台账,移除 source_mode / recharge_attribute_hours
|
||||
DEFAULT_PARAMS_ML: Dict[str, float] = {
|
||||
"lookback_days": 60,
|
||||
"lookback_days": 90,
|
||||
"amount_base": 500.0,
|
||||
"halflife_recharge": 21.0,
|
||||
"percentile_lower": 5.0,
|
||||
@@ -143,7 +141,8 @@ class RelationIndexTask(BaseIndexTask):
|
||||
|
||||
site_id = self._get_site_id(context)
|
||||
tenant_id = self._get_tenant_id()
|
||||
now = datetime.now(self.tz)
|
||||
# P19: 回测模式用 as_of_date 替代 datetime.now()
|
||||
now = (context.as_of_date if context and context.as_of_date else None) or datetime.now(self.tz)
|
||||
|
||||
params_rs = self._load_params("RS", self.DEFAULT_PARAMS_RS)
|
||||
params_os = self._load_params("OS", self.DEFAULT_PARAMS_OS)
|
||||
@@ -151,8 +150,8 @@ class RelationIndexTask(BaseIndexTask):
|
||||
params_ml = self._load_params("ML", self.DEFAULT_PARAMS_ML)
|
||||
|
||||
service_lookback_days = max(
|
||||
int(params_rs.get("lookback_days", 60)),
|
||||
int(params_ms.get("lookback_days", 60)),
|
||||
int(params_rs.get("lookback_days", 90)),
|
||||
int(params_ms.get("lookback_days", 90)),
|
||||
)
|
||||
service_start = now - timedelta(days=service_lookback_days)
|
||||
merge_hours = max(
|
||||
@@ -181,7 +180,9 @@ class RelationIndexTask(BaseIndexTask):
|
||||
|
||||
self._apply_display_scores(pair_map, params_rs, params_ms, params_ml, site_id)
|
||||
|
||||
inserted = self._save_relation_rows(site_id, list(pair_map.values()))
|
||||
# P19: 仅回测模式传 calc_time(按 calc_time 删除保留其他快照),正常模式传 None(按 site_id 全量刷新)
|
||||
backtest_calc_time = now if (context and context.as_of_date) else None
|
||||
inserted = self._save_relation_rows(site_id, list(pair_map.values()), calc_time=backtest_calc_time)
|
||||
self.logger.info("关系指数计算完成,写入 %d 条记录", inserted)
|
||||
|
||||
return {
|
||||
@@ -313,7 +314,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
params: Dict[str, float],
|
||||
now: datetime,
|
||||
) -> None:
|
||||
lookback_days = int(params.get("lookback_days", 60))
|
||||
lookback_days = int(params.get("lookback_days", 90))
|
||||
halflife_session = float(params.get("halflife_session", 14.0))
|
||||
halflife_last = float(params.get("halflife_last", 10.0))
|
||||
weight_f = float(params.get("weight_f", 1.0))
|
||||
@@ -355,7 +356,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
params: Dict[str, float],
|
||||
now: datetime,
|
||||
) -> None:
|
||||
lookback_days = int(params.get("lookback_days", 60))
|
||||
lookback_days = int(params.get("lookback_days", 90))
|
||||
halflife_short = float(params.get("halflife_short", 7.0))
|
||||
halflife_long = float(params.get("halflife_long", 30.0))
|
||||
eps = float(params.get("eps", 1e-6))
|
||||
@@ -382,7 +383,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
site_id: int,
|
||||
now: datetime,
|
||||
) -> None:
|
||||
lookback_days = int(params.get("lookback_days", 60))
|
||||
lookback_days = int(params.get("lookback_days", 90))
|
||||
amount_base = float(params.get("amount_base", 500.0))
|
||||
halflife_recharge = float(params.get("halflife_recharge", 21.0))
|
||||
start_time = now - timedelta(days=lookback_days)
|
||||
@@ -439,68 +440,53 @@ class RelationIndexTask(BaseIndexTask):
|
||||
pair_map: Dict[Tuple[int, int], RelationPairMetrics],
|
||||
params: Dict[str, float],
|
||||
) -> None:
|
||||
min_rs = float(params.get("min_rs_raw_for_ownership", 0.05))
|
||||
min_total = float(params.get("min_total_rs_raw", 0.10))
|
||||
main_threshold = float(params.get("ownership_main_threshold", 0.60))
|
||||
comanage_threshold = float(params.get("ownership_comanage_threshold", 0.35))
|
||||
gap_threshold = float(params.get("ownership_gap_threshold", 0.15))
|
||||
"""CHANGE 2026-03-31 | 新 OS 方案:基于第一名分值比例。
|
||||
|
||||
规则:
|
||||
- 第一名一定为 MAIN(rs_raw ≥ min_rs_for_label)
|
||||
- 在第一名分值的 main_ratio_threshold 以内都为 MAIN
|
||||
- 在第一名分值的 comanage_ratio_threshold 以内都为 COMANAGE
|
||||
- 其余为 POOL
|
||||
- MAIN/COMANAGE 前提:rs_raw ≥ min_rs_for_label
|
||||
"""
|
||||
min_rs_for_label = float(params.get("min_rs_for_label", 0.1))
|
||||
main_ratio = float(params.get("ownership_main_ratio", 0.70))
|
||||
comanage_ratio = float(params.get("ownership_comanage_ratio", 0.30))
|
||||
|
||||
member_groups: Dict[int, List[RelationPairMetrics]] = {}
|
||||
for metrics in pair_map.values():
|
||||
member_groups.setdefault(metrics.member_id, []).append(metrics)
|
||||
|
||||
for _, rows in member_groups.items():
|
||||
eligible = [row for row in rows if row.rs_raw >= min_rs]
|
||||
sum_rs = sum(row.rs_raw for row in eligible)
|
||||
if sum_rs < min_total:
|
||||
for row in rows:
|
||||
row.os_share = 0.0
|
||||
row.os_label = "UNASSIGNED"
|
||||
row.os_rank = None
|
||||
continue
|
||||
|
||||
for row in rows:
|
||||
if row.rs_raw >= min_rs:
|
||||
row.os_share = row.rs_raw / sum_rs
|
||||
else:
|
||||
row.os_share = 0.0
|
||||
|
||||
sorted_eligible = sorted(
|
||||
eligible,
|
||||
# 按 rs_raw 降序排列
|
||||
sorted_rows = sorted(
|
||||
rows,
|
||||
key=lambda item: (
|
||||
-item.os_share,
|
||||
-item.rs_raw,
|
||||
item.days_since_last_session if item.days_since_last_session is not None else 10**9,
|
||||
item.assistant_id,
|
||||
),
|
||||
)
|
||||
for idx, row in enumerate(sorted_eligible, start=1):
|
||||
row.os_rank = idx
|
||||
|
||||
top1 = sorted_eligible[0]
|
||||
top2_share = sorted_eligible[1].os_share if len(sorted_eligible) > 1 else 0.0
|
||||
gap = top1.os_share - top2_share
|
||||
has_main = top1.os_share >= main_threshold and gap >= gap_threshold
|
||||
top_rs = sorted_rows[0].rs_raw if sorted_rows else 0.0
|
||||
|
||||
if has_main:
|
||||
for row in rows:
|
||||
if row is top1:
|
||||
row.os_label = "MAIN"
|
||||
elif row.os_share >= comanage_threshold:
|
||||
row.os_label = "COMANAGE"
|
||||
else:
|
||||
row.os_label = "POOL"
|
||||
else:
|
||||
for row in rows:
|
||||
if row.os_share >= comanage_threshold and row.rs_raw >= min_rs:
|
||||
row.os_label = "COMANAGE"
|
||||
else:
|
||||
row.os_label = "POOL"
|
||||
for idx, row in enumerate(sorted_rows):
|
||||
row.os_rank = idx + 1
|
||||
# 计算份额(保留兼容性,用于前端展示)
|
||||
sum_rs = sum(r.rs_raw for r in rows if r.rs_raw > 0)
|
||||
row.os_share = row.rs_raw / sum_rs if sum_rs > 0 else 0.0
|
||||
|
||||
# 非 eligible 不赋 rank
|
||||
for row in rows:
|
||||
if row.rs_raw < min_rs:
|
||||
row.os_rank = None
|
||||
if top_rs <= 0 or row.rs_raw < min_rs_for_label:
|
||||
row.os_label = "POOL"
|
||||
continue
|
||||
|
||||
ratio = row.rs_raw / top_rs
|
||||
if ratio >= main_ratio or row is sorted_rows[0]:
|
||||
row.os_label = "MAIN"
|
||||
elif ratio >= comanage_ratio:
|
||||
row.os_label = "COMANAGE"
|
||||
else:
|
||||
row.os_label = "POOL"
|
||||
|
||||
def _apply_display_scores(
|
||||
self,
|
||||
@@ -599,18 +585,27 @@ class RelationIndexTask(BaseIndexTask):
|
||||
return "asinh"
|
||||
return "none"
|
||||
|
||||
def _save_relation_rows(self, site_id: int, rows: List[RelationPairMetrics]) -> int:
|
||||
def _save_relation_rows(self, site_id: int, rows: List[RelationPairMetrics], *, calc_time: Optional[datetime] = None) -> int:
|
||||
# P19: 回测模式传入 calc_time,正常模式用 NOW()
|
||||
use_param_time = calc_time is not None
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
|
||||
if use_param_time:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s AND calc_time = %s",
|
||||
(site_id, calc_time),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_assistant_relation_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
if not rows:
|
||||
self.db.conn.commit()
|
||||
return 0
|
||||
|
||||
insert_sql = """
|
||||
insert_sql = f"""
|
||||
INSERT INTO dws.dws_member_assistant_relation_index (
|
||||
site_id, tenant_id, member_id, assistant_id,
|
||||
session_count, total_duration_minutes, basic_session_count, incentive_session_count,
|
||||
@@ -628,41 +623,41 @@ class RelationIndexTask(BaseIndexTask):
|
||||
%s, %s, %s,
|
||||
%s, %s, %s, %s,
|
||||
%s, %s, %s, %s,
|
||||
NOW(), NOW(), NOW()
|
||||
{('%s, %s, %s' if use_param_time else 'NOW(), NOW(), NOW()')}
|
||||
)
|
||||
"""
|
||||
inserted = 0
|
||||
for row in rows:
|
||||
cur.execute(
|
||||
insert_sql,
|
||||
(
|
||||
row.site_id,
|
||||
row.tenant_id,
|
||||
row.member_id,
|
||||
row.assistant_id,
|
||||
row.session_count,
|
||||
row.total_duration_minutes,
|
||||
row.basic_session_count,
|
||||
row.incentive_session_count,
|
||||
row.days_since_last_session,
|
||||
row.rs_f,
|
||||
row.rs_d,
|
||||
row.rs_r,
|
||||
row.rs_raw,
|
||||
row.rs_display,
|
||||
row.os_share,
|
||||
row.os_label,
|
||||
row.os_rank,
|
||||
row.ms_f_short,
|
||||
row.ms_f_long,
|
||||
row.ms_raw,
|
||||
row.ms_display,
|
||||
row.ml_order_count,
|
||||
row.ml_allocated_amount,
|
||||
row.ml_raw,
|
||||
row.ml_display,
|
||||
),
|
||||
params = (
|
||||
row.site_id,
|
||||
row.tenant_id,
|
||||
row.member_id,
|
||||
row.assistant_id,
|
||||
row.session_count,
|
||||
row.total_duration_minutes,
|
||||
row.basic_session_count,
|
||||
row.incentive_session_count,
|
||||
row.days_since_last_session,
|
||||
row.rs_f,
|
||||
row.rs_d,
|
||||
row.rs_r,
|
||||
row.rs_raw,
|
||||
row.rs_display,
|
||||
row.os_share,
|
||||
row.os_label,
|
||||
row.os_rank,
|
||||
row.ms_f_short,
|
||||
row.ms_f_long,
|
||||
row.ms_raw,
|
||||
row.ms_display,
|
||||
row.ml_order_count,
|
||||
row.ml_allocated_amount,
|
||||
row.ml_raw,
|
||||
row.ml_display,
|
||||
)
|
||||
if use_param_time:
|
||||
params = params + (calc_time, calc_time, calc_time)
|
||||
cur.execute(insert_sql, params)
|
||||
inserted += max(cur.rowcount, 0)
|
||||
self.db.conn.commit()
|
||||
return inserted
|
||||
|
||||
@@ -173,13 +173,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
# 1. 获取 site_id
|
||||
site_id = self._get_site_id(context)
|
||||
|
||||
# P19: 回测模式用 as_of_date 替代 NOW()
|
||||
from datetime import datetime as _dt
|
||||
as_of = (context.as_of_date if context and context.as_of_date else None) or _dt.now(self.tz)
|
||||
|
||||
# 2. 加载参数(配置表 + 默认值合并)
|
||||
db_params = self.load_index_parameters('SPI')
|
||||
params = {**self.DEFAULT_PARAMS, **db_params}
|
||||
|
||||
# 3. 提取特征
|
||||
features = self._extract_spending_features(site_id, params)
|
||||
recharge_map = self._extract_recharge_features(site_id, params)
|
||||
features = self._extract_spending_features(site_id, params, as_of=as_of)
|
||||
recharge_map = self._extract_recharge_features(site_id, params, as_of=as_of)
|
||||
|
||||
# 合并充值特征
|
||||
for mid, recharge_90 in recharge_map.items():
|
||||
@@ -189,7 +193,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
|
||||
# 批量计算日消费 EWMA 并合并
|
||||
member_ids = list(features.keys())
|
||||
ewma_map = self._compute_daily_spend_ewma_batch(site_id, member_ids, params)
|
||||
ewma_map = self._compute_daily_spend_ewma_batch(site_id, member_ids, params, as_of=as_of)
|
||||
for mid, ewma_val in ewma_map.items():
|
||||
if mid in features:
|
||||
features[mid].daily_spend_ewma_90 = ewma_val
|
||||
@@ -279,7 +283,9 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
feat.score_stability_display = stability_display_map.get(mid, 0.0)
|
||||
|
||||
# 8. delete-before-insert 持久化(Req 9.3)
|
||||
records_inserted = self._save_spi_data(feat_list, site_id)
|
||||
# P19: 仅回测模式传 calc_time,正常模式传 None(按 site_id 全量刷新)
|
||||
backtest_calc_time = as_of if (context and context.as_of_date) else None
|
||||
records_inserted = self._save_spi_data(feat_list, site_id, calc_time=backtest_calc_time)
|
||||
|
||||
# 9. 保存分位点历史(Req 9.5)——SPI 总分
|
||||
raw_values = [f.raw_score for f in feat_list]
|
||||
@@ -323,7 +329,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
# =========================================================================
|
||||
|
||||
def _extract_spending_features(
|
||||
self, site_id: int, params: Dict[str, float]
|
||||
self, site_id: int, params: Dict[str, float], *, as_of=None
|
||||
) -> Dict[int, SPIMemberFeatures]:
|
||||
"""从 dwd_settlement_head 提取消费特征,按 member_id 聚合。
|
||||
|
||||
@@ -339,8 +345,8 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
|
||||
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
|
||||
# INTERVAL 天数通过 f-string 内嵌(整数,安全);site_id 走参数化
|
||||
# P19: 回测模式用 as_of 参数替代 NOW()
|
||||
# 时间基准用 %s 参数化,正常模式传 NOW() 等效的 as_of
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
@@ -356,7 +362,8 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
AND s.pay_time >= %s - INTERVAL '{long_days} days'
|
||||
AND s.pay_time < %s
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
@@ -367,17 +374,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
|
||||
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
|
||||
-- 30 天窗口(子集过滤)
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
SUM(CASE WHEN pay_time >= %s - INTERVAL '{short_days} days'
|
||||
THEN pay_amount ELSE 0 END) AS spend_30,
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
SUM(CASE WHEN pay_time >= %s - INTERVAL '{short_days} days'
|
||||
THEN 1 ELSE 0 END) AS orders_30,
|
||||
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
COUNT(DISTINCT CASE WHEN pay_time >= %s - INTERVAL '{short_days} days'
|
||||
THEN {biz_expr} END) AS visit_days_30
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
rows = self.db.query(sql, (site_id, as_of, as_of, as_of, as_of, as_of))
|
||||
|
||||
result: Dict[int, SPIMemberFeatures] = {}
|
||||
for row in (rows or []):
|
||||
@@ -412,7 +419,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
|
||||
|
||||
def _extract_recharge_features(
|
||||
self, site_id: int, params: Dict[str, float]
|
||||
self, site_id: int, params: Dict[str, float], *, as_of=None
|
||||
) -> Dict[int, float]:
|
||||
"""从 dwd_recharge_order 提取充值特征,返回 {member_id: recharge_90}。
|
||||
|
||||
@@ -421,6 +428,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
"""
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
|
||||
# P19: 回测模式用 as_of 参数替代 NOW()
|
||||
sql = f"""
|
||||
WITH recharge_source AS (
|
||||
SELECT
|
||||
@@ -435,7 +443,8 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE r.site_id = %s
|
||||
AND r.settle_type = 5
|
||||
AND r.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
AND r.pay_time >= %s - INTERVAL '{long_days} days'
|
||||
AND r.pay_time < %s
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
@@ -444,7 +453,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
rows = self.db.query(sql, (site_id, as_of, as_of))
|
||||
|
||||
result: Dict[int, float] = {}
|
||||
for row in (rows or []):
|
||||
@@ -513,7 +522,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
return ewma
|
||||
|
||||
def _compute_daily_spend_ewma_batch(
|
||||
self, site_id: int, member_ids: List[int], params: Dict[str, float]
|
||||
self, site_id: int, member_ids: List[int], params: Dict[str, float], *, as_of=None
|
||||
) -> Dict[int, float]:
|
||||
"""批量计算多个会员的日消费 EWMA,单次 SQL 查询避免 N+1。
|
||||
|
||||
@@ -528,6 +537,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
|
||||
|
||||
# P19: 回测模式用 as_of 参数替代 NOW()
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
@@ -543,7 +553,8 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
|
||||
AND s.pay_time >= %s - INTERVAL '{long_days} days'
|
||||
AND s.pay_time < %s
|
||||
)
|
||||
SELECT canonical_member_id AS member_id,
|
||||
pay_date,
|
||||
@@ -553,7 +564,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
GROUP BY canonical_member_id, pay_date
|
||||
ORDER BY canonical_member_id, pay_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
rows = self.db.query(sql, (site_id, as_of, as_of))
|
||||
|
||||
# 按 member_id 分组,逐组计算 EWMA
|
||||
result: Dict[int, float] = {}
|
||||
@@ -727,21 +738,31 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
# =========================================================================
|
||||
|
||||
def _save_spi_data(
|
||||
self, data_list: List[SPIMemberFeatures], site_id: int
|
||||
self, data_list: List[SPIMemberFeatures], site_id: int, *, calc_time=None
|
||||
) -> int:
|
||||
"""delete-before-insert 写入 dws_member_spending_power_index"""
|
||||
with self.db.conn.cursor() as cur:
|
||||
# 先删除该门店旧记录(Req 9.3)
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
|
||||
use_param_time = calc_time is not None
|
||||
if use_param_time:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s AND calc_time = %s",
|
||||
(site_id, calc_time),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
if not data_list:
|
||||
self.db.conn.commit()
|
||||
return 0
|
||||
|
||||
insert_sql = """
|
||||
# P19: 回测模式传入 calc_time,正常模式用 NOW()
|
||||
use_param_time = calc_time is not None
|
||||
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
|
||||
insert_sql = f"""
|
||||
INSERT INTO dws.dws_member_spending_power_index (
|
||||
site_id, member_id,
|
||||
spend_30, spend_90, recharge_90,
|
||||
@@ -761,7 +782,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
%s, %s, %s,
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
NOW(), NOW(), NOW()
|
||||
{time_placeholder}
|
||||
)
|
||||
"""
|
||||
inserted = 0
|
||||
@@ -773,7 +794,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
return max(lo, min(hi, v))
|
||||
|
||||
for f in data_list:
|
||||
cur.execute(insert_sql, (
|
||||
params_tuple = (
|
||||
f.site_id, f.member_id,
|
||||
f.spend_30, f.spend_90, f.recharge_90,
|
||||
f.orders_30, f.orders_90,
|
||||
@@ -787,7 +808,10 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
_clamp(f.score_stability_display, 0, DISP_MAX),
|
||||
_clamp(f.raw_score, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.display_score, 0, DISP_MAX),
|
||||
))
|
||||
)
|
||||
if use_param_time:
|
||||
params_tuple = params_tuple + (calc_time, calc_time, calc_time)
|
||||
cur.execute(insert_sql, params_tuple)
|
||||
inserted += max(cur.rowcount, 0)
|
||||
|
||||
self.db.conn.commit()
|
||||
|
||||
@@ -7,7 +7,7 @@ from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, timedelta
|
||||
from datetime import date, datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .member_index_base import MemberActivityData, MemberIndexBaseTask
|
||||
@@ -178,7 +178,9 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
avg_raw=sum(all_raw) / len(all_raw)
|
||||
)
|
||||
|
||||
inserted = self._save_winback_data(winback_list)
|
||||
# P19: 回测模式传入 calc_time
|
||||
calc_time = (context.as_of_date if context and context.as_of_date else None)
|
||||
inserted = self._save_winback_data(winback_list, calc_time=calc_time)
|
||||
self.logger.info("WBI calculation finished, inserted %d rows", inserted)
|
||||
|
||||
return {
|
||||
@@ -339,21 +341,29 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
if data.raw_score < 0:
|
||||
data.raw_score = 0.0
|
||||
|
||||
def _save_winback_data(self, data_list: List[MemberWinbackData]) -> int:
|
||||
def _save_winback_data(self, data_list: List[MemberWinbackData], *, calc_time: Optional[datetime] = None) -> int:
|
||||
"""保存 WBI 数据"""
|
||||
if not data_list:
|
||||
return 0
|
||||
|
||||
site_id = data_list[0].activity.site_id
|
||||
# 按门店全量刷新,避免因分群变化导致过期数据残留。
|
||||
delete_sql = """
|
||||
DELETE FROM dws.dws_member_winback_index
|
||||
WHERE site_id = %s
|
||||
"""
|
||||
# P19: 回测模式传入 calc_time,正常模式用 NOW()
|
||||
use_param_time = calc_time is not None
|
||||
# P19: 回测模式按 calc_time 删除(保留其他快照),正常模式按 site_id 全量刷新
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(delete_sql, (site_id,))
|
||||
if use_param_time:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s AND calc_time = %s",
|
||||
(site_id, calc_time),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"DELETE FROM dws.dws_member_winback_index WHERE site_id = %s",
|
||||
(site_id,),
|
||||
)
|
||||
|
||||
insert_sql = """
|
||||
time_placeholder = "%s, %s, %s" if use_param_time else "NOW(), NOW(), NOW()"
|
||||
insert_sql = f"""
|
||||
INSERT INTO dws.dws_member_winback_index (
|
||||
site_id, tenant_id, member_id,
|
||||
status, segment,
|
||||
@@ -379,7 +389,7 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
%s, %s,
|
||||
%s, %s,
|
||||
%s,
|
||||
NOW(), NOW(), NOW()
|
||||
{time_placeholder}
|
||||
)
|
||||
"""
|
||||
|
||||
@@ -387,7 +397,7 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
with self.db.conn.cursor() as cur:
|
||||
for data in data_list:
|
||||
activity = data.activity
|
||||
cur.execute(insert_sql, (
|
||||
params = (
|
||||
activity.site_id, activity.tenant_id, activity.member_id,
|
||||
data.status, data.segment,
|
||||
activity.member_create_time, activity.first_visit_time, activity.last_visit_time, activity.last_recharge_time,
|
||||
@@ -399,7 +409,10 @@ class WinbackIndexTask(MemberIndexBaseTask):
|
||||
data.ideal_interval_days, data.ideal_next_visit_date,
|
||||
data.raw_score, data.display_score,
|
||||
None,
|
||||
))
|
||||
)
|
||||
if use_param_time:
|
||||
params = params + (calc_time, calc_time, calc_time)
|
||||
cur.execute(insert_sql, params)
|
||||
inserted += cur.rowcount
|
||||
|
||||
self.db.conn.commit()
|
||||
|
||||
@@ -53,6 +53,9 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
|
||||
# CHANGE 2025-07-15 | task 1.3: 声明日期列,供基类默认 load() 使用
|
||||
DATE_COL = "stat_date"
|
||||
|
||||
# CHANGE 2026-03-22 | P14: 消费汇总完成后触发 AI 消费事件链
|
||||
AI_TRIGGER_EVENT = "consumption"
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_MEMBER_CONSUMPTION"
|
||||
|
||||
124
apps/etl/connectors/feiqiu/tasks/dws/task_engine.py
Normal file
124
apps/etl/connectors/feiqiu/tasks/dws/task_engine.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# AI_CHANGELOG
|
||||
# - 2026-03-29 | Prompt: DWS_TASK_ENGINE ETL 任务 | 新建文件。
|
||||
# 编排任务引擎全流程:完成检查 → 过期检查 → 任务生成。
|
||||
# 通过 HTTP 调用后端 POST /api/internal/run-job 按 job_name 执行。
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS 任务引擎编排任务(DWS_TASK_ENGINE)
|
||||
|
||||
在 DWS 指数计算完成后执行,按顺序调用后端任务引擎的各个步骤:
|
||||
1. recall_completion_check — 检测召回是否完成,生成回访任务
|
||||
2. task_expiry_check — 标记超时未处理的任务
|
||||
3. task_generator — 根据 WBI/NCI/RS 指数生成/替换任务
|
||||
|
||||
通过 HTTP 调用后端 POST /api/internal/run-job(Internal-Token 认证),
|
||||
每步失败仅记录日志,不中断后续步骤。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from ..base_task import BaseTask, TaskContext
|
||||
|
||||
# 加载根 .env(BACKEND_API_URL / INTERNAL_API_TOKEN 不在 AppConfig 映射中)
|
||||
# task_engine.py → dws/ → tasks/ → feiqiu/ → connectors/ → etl/ → apps/ → root
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[6]
|
||||
load_dotenv(_REPO_ROOT / ".env", override=False)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TIMEOUT = (5, 30) # 连接 5s,读取 30s(任务执行可能较慢)
|
||||
|
||||
# 按顺序执行的后端任务列表
|
||||
_JOB_SEQUENCE = [
|
||||
"recall_completion_check",
|
||||
"task_expiry_check",
|
||||
"task_generator",
|
||||
]
|
||||
|
||||
|
||||
def _run_backend_job(backend_url: str, token: str, job_name: str) -> dict:
|
||||
"""调用后端 POST /api/internal/run-job 执行指定任务。
|
||||
|
||||
Returns:
|
||||
{"success": bool, "message": str} 或 {"success": False, "message": error}
|
||||
"""
|
||||
url = f"{backend_url}/api/internal/run-job"
|
||||
headers = {
|
||||
"Authorization": f"Internal-Token {token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
body = {"job_name": job_name}
|
||||
|
||||
try:
|
||||
resp = requests.post(url, json=body, headers=headers, timeout=_TIMEOUT)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
# 后端 ResponseWrapperMiddleware 包装:{"code": 0, "data": {...}}
|
||||
inner = data.get("data", data)
|
||||
return {
|
||||
"success": inner.get("success", False),
|
||||
"message": inner.get("message", ""),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"HTTP {resp.status_code}: {resp.text[:200]}",
|
||||
}
|
||||
except requests.RequestException as exc:
|
||||
return {"success": False, "message": str(exc)}
|
||||
|
||||
|
||||
class DwsTaskEngineTask(BaseTask):
|
||||
"""DWS 任务引擎编排任务。
|
||||
|
||||
不读写 DWS 表,仅通过 HTTP 调用后端执行任务引擎步骤。
|
||||
继承 BaseTask 而非 BaseDwsTask,因为不需要 DWS 层的数据操作方法。
|
||||
"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_TASK_ENGINE"
|
||||
|
||||
def extract(self, context: TaskContext) -> dict[str, Any]:
|
||||
"""无需提取数据,返回空上下文。"""
|
||||
return {}
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
|
||||
"""按顺序调用后端任务引擎的各个步骤。"""
|
||||
backend_url = os.environ.get("BACKEND_API_URL", "").rstrip("/")
|
||||
token = os.environ.get("INTERNAL_API_TOKEN", "")
|
||||
|
||||
if not backend_url:
|
||||
self.logger.error("DWS_TASK_ENGINE 跳过:BACKEND_API_URL 未配置")
|
||||
return {"skipped": True, "reason": "BACKEND_API_URL 未配置"}
|
||||
if not token:
|
||||
self.logger.error("DWS_TASK_ENGINE 跳过:INTERNAL_API_TOKEN 未配置")
|
||||
return {"skipped": True, "reason": "INTERNAL_API_TOKEN 未配置"}
|
||||
|
||||
results: dict[str, Any] = {}
|
||||
|
||||
for job_name in _JOB_SEQUENCE:
|
||||
self.logger.info("DWS_TASK_ENGINE: 执行 %s ...", job_name)
|
||||
result = _run_backend_job(backend_url, token, job_name)
|
||||
success = result.get("success", False)
|
||||
message = result.get("message", "")
|
||||
|
||||
results[job_name] = {"success": success, "message": message}
|
||||
|
||||
if success:
|
||||
self.logger.info(
|
||||
"DWS_TASK_ENGINE: %s 成功 — %s", job_name, message
|
||||
)
|
||||
else:
|
||||
self.logger.warning(
|
||||
"DWS_TASK_ENGINE: %s 失败 — %s", job_name, message
|
||||
)
|
||||
|
||||
return results
|
||||
@@ -16,7 +16,7 @@ class IndexVerifier(BaseVerifier):
|
||||
self,
|
||||
db_connection: Any,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
lookback_days: int = 60,
|
||||
lookback_days: int = 90,
|
||||
config: Any = None,
|
||||
):
|
||||
super().__init__(db_connection, logger)
|
||||
|
||||
Reference in New Issue
Block a user