在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -166,10 +166,14 @@ class BaseTask:
for key, value in (current or {}).items():
if isinstance(value, (int, float)):
total[key] = (total.get(key) or 0) + value
elif isinstance(value, list):
# 防御层list 类型转为 len() 累加
total[key] = (total.get(key) or 0) + len(value)
else:
total.setdefault(key, value)
return total
def _get_time_window(self, cursor_data: dict = None) -> tuple:
"""计算时间窗口"""
now = datetime.now(self.tz)

View File

@@ -38,6 +38,8 @@ class DwdLoadTask(BaseTask):
"dwd.dim_goods_category": "ods.stock_goods_category_tree",
"dwd.dim_groupbuy_package": "ods.group_buy_packages",
"dwd.dim_groupbuy_package_ex": "ods.group_buy_packages",
"dwd.dim_staff": "ods.staff_info_master",
"dwd.dim_staff_ex": "ods.staff_info_master",
# 事实
"dwd.dwd_settlement_head": "ods.settlement_records",
"dwd.dwd_settlement_head_ex": "ods.settlement_records",
@@ -49,8 +51,6 @@ class DwdLoadTask(BaseTask):
"dwd.dwd_store_goods_sale_ex": "ods.store_goods_sales_records",
"dwd.dwd_assistant_service_log": "ods.assistant_service_records",
"dwd.dwd_assistant_service_log_ex": "ods.assistant_service_records",
"dwd.dwd_assistant_trash_event": "ods.assistant_cancellation_records",
"dwd.dwd_assistant_trash_event_ex": "ods.assistant_cancellation_records",
"dwd.dwd_member_balance_change": "ods.member_balance_changes",
"dwd.dwd_member_balance_change_ex": "ods.member_balance_changes",
"dwd.dwd_groupbuy_redemption": "ods.group_buy_redemption_records",
@@ -62,6 +62,8 @@ class DwdLoadTask(BaseTask):
"dwd.dwd_payment": "ods.payment_transactions",
"dwd.dwd_refund": "ods.refund_transactions",
"dwd.dwd_refund_ex": "ods.refund_transactions",
"dwd.dwd_goods_stock_summary": "ods.goods_stock_summary",
"dwd.dwd_goods_stock_movement": "ods.goods_stock_movements",
}
SCD_COLS = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
@@ -90,6 +92,17 @@ class DwdLoadTask(BaseTask):
except Exception as exc: # noqa: BLE001
self.logger.warning("检查 fetched_at 为空记录失败:%s, err=%s", ods_table, exc)
@staticmethod
def _pick_snapshot_order_column(cols: Sequence[str]) -> str | None:
"""从 ODS 列中选取用于快照排序的列fetched_at > update_time > create_time
CHANGE 2026-02-21: BUG 9 fix — 从 integrity_checker 移入本类,解决 AttributeError。
"""
lower = {c.lower() for c in cols}
for candidate in ("fetched_at", "update_time", "create_time"):
if candidate in lower:
return candidate
return None
@staticmethod
def _latest_snapshot_select_sql(
select_cols_sql: str,
@@ -157,6 +170,20 @@ class DwdLoadTask(BaseTask):
"dwd.dim_table_ex": [
("table_id", "id", None),
("table_cloth_use_time", "table_cloth_use_time", None),
("create_time", "create_time", None), # 台桌创建时间
("light_status", "light_status", None), # 台灯状态
("tablestatusname", "tablestatusname", None), # 台桌状态中文名ODS 驼峰 → PG 小写)
("sitename", "sitename", None), # 门店名称快照ODS 驼峰 → PG 小写)
("applet_qr_code_url", '"appletQrCodeUrl"', None), # 小程序二维码 URLODS 带引号保留大小写)
("audit_status", "audit_status", None), # 审核状态
("charge_free", "charge_free", None), # 是否免费台
("delay_lights_time", "delay_lights_time", None), # 台灯熄灭延迟时间
("is_rest_area", "is_rest_area", None), # 是否休息区
("only_allow_groupon", "only_allow_groupon", None), # 是否仅允许团购
("order_delay_time", "order_delay_time", None), # 订单自动延时时长
("self_table", "self_table", None), # 是否自有台桌
("temporary_light_second", "temporary_light_second", None), # 临时开灯秒数
("virtual_table", "virtual_table", None), # 是否虚拟台桌
],
"dwd.dim_assistant": [("assistant_id", "id", None), ("user_id", "user_id", None)],
"dwd.dim_assistant_ex": [
@@ -164,11 +191,16 @@ class DwdLoadTask(BaseTask):
("introduce", "introduce", None),
("group_name", "group_name", None),
("light_equipment_id", "light_equipment_id", None),
("system_role_id", "system_role_id", None), # 系统角色 ID
("job_num", "job_num", None), # 工号
("cx_unit_price", "cx_unit_price", None), # 促销单价
("pd_unit_price", "pd_unit_price", None), # 陪打单价
],
"dwd.dim_member": [
("member_id", "id", None),
("pay_money_sum", "pay_money_sum", None),
("recharge_money_sum", "recharge_money_sum", None),
# update_time: 上游 API 不提供此字段,保持 NULL已在一致性检查白名单中标记
],
"dwd.dim_member_ex": [
("member_id", "id", None),
@@ -206,7 +238,7 @@ class DwdLoadTask(BaseTask):
("tenant_goods_id", "id", None),
("remark_name", "remark_name", None),
("goods_bar_code", "goods_bar_code", None),
("commodity_code_list", "commodity_code", None),
("commodity_code_list", "commoditycode", "TEXT[]"), # CHANGE 2026-02-21: 从 commoditycodePG 数组格式 {xxx}映射CAST 为 TEXT[]
("is_in_site", "isinsite", "boolean"),
],
"dwd.dim_store_goods": [
@@ -216,7 +248,7 @@ class DwdLoadTask(BaseTask):
("created_at", "create_time", None),
("updated_at", "update_time", None),
("avg_monthly_sales", "average_monthly_sales", None),
("batch_stock_qty", "stock", None),
("batch_stock_qty", "batch_stock_quantity", None), # CHANGE 2026-02-20: 修正映射源 stock→batch_stock_quantity批次库存非当前库存
("sale_qty", "sale_num", None),
("total_sales_qty", "total_sales", None),
("commodity_code", "commodity_code", None),
@@ -230,12 +262,13 @@ class DwdLoadTask(BaseTask):
("safety_stock_qty", "safe_stock", None),
("site_name", "sitename", None),
("goods_cover_url", "goods_cover", None),
("provisional_total_cost", "total_purchase_cost", None),
("provisional_total_cost", "provisional_total_cost", None), # CHANGE 2026-02-20: 修正映射源 total_purchase_cost→provisional_total_cost暂估成本非实际采购成本
("is_discountable", "able_discount", None),
("freeze_status", "freeze", None),
("remark", "remark", None),
("days_on_shelf", "days_available", None),
("sort_order", "sort", None),
("time_slot_sale", "time_slot_sale", None), # CHANGE 2026-02-21: 新增分时段销售标记
],
"dwd.dim_goods_category": [
("category_id", "id", None),
@@ -268,6 +301,24 @@ class DwdLoadTask(BaseTask):
("package_type", "type", None),
("tenant_coupon_sale_order_item_id", "tenantcouponsaleorderitemid", None),
],
"dwd.dim_staff": [
("staff_id", "id", None),
("entry_time", "entry_time", "timestamptz"),
("resign_time", "resign_time", "timestamptz"),
],
"dwd.dim_staff_ex": [
("staff_id", "id", None),
("rank_name", "rankname", None),
("cashier_point_id", "cashierpointid", "bigint"),
("cashier_point_name", "cashierpointname", None),
("group_id", "groupid", "bigint"),
("group_name", "groupname", None),
("system_user_id", "systemuserid", "bigint"),
("tenant_org_id", "tenantorgid", "bigint"),
("auth_code_create", "auth_code_create", "timestamptz"),
("create_time", "create_time", "timestamptz"),
("user_roles", "userroles", "jsonb"),
],
# 事实表主键及关键差异列
"dwd.dwd_table_fee_log": [
("table_fee_log_id", "id", None),
@@ -300,7 +351,10 @@ class DwdLoadTask(BaseTask):
],
"dwd.dwd_store_goods_sale": [
("store_goods_sale_id", "id", None),
("discount_price", "discount_money", None),
# CHANGE 2026-02-20: 修正列名误导——原 DWD discount_price 实际映射自 ODS discount_money折扣金额
# 现重命名为 discount_money 以反映真实语义;新增 discount_price 映射 ODS 真正的折后单价。
("discount_money", "discount_money", None),
("discount_price", "discount_price", None),
("coupon_share_money", "coupon_share_money", None),
],
"dwd.dwd_store_goods_sale_ex": [
@@ -316,7 +370,9 @@ class DwdLoadTask(BaseTask):
"dwd.dwd_assistant_service_log": [
("assistant_service_id", "id", None),
("assistant_no", "assistantno", None),
("site_assistant_id", "order_assistant_id", None),
# CHANGE 2026-02-20: 修正映射源——site_assistant_id 应来自 ODS site_assistant_id助教档案 ID
# 而非 order_assistant_id订单级助教明细 ID。order_assistant_id 由同名列自动映射。
("site_assistant_id", "site_assistant_id", None),
("level_name", "levelname", None),
("skill_name", "skillname", None),
("real_service_money", "real_service_money", None),
@@ -330,24 +386,8 @@ class DwdLoadTask(BaseTask):
("salesman_name", "salesman_name", None),
("table_name", "tablename", None),
("assistant_team_name", "assistantteamname", None),
],
"dwd.dwd_assistant_trash_event": [
("assistant_trash_event_id", "id", None),
("assistant_no", "assistantname", None),
("abolish_amount", "assistantabolishamount", None),
("charge_minutes_raw", "pdchargeminutes", None),
("site_id", "siteid", None),
("table_id", "tableid", None),
("table_area_id", "tableareaid", None),
("assistant_name", "assistantname", None),
("trash_reason", "trashreason", None),
("create_time", "createtime", None),
("tenant_id", "tenant_id", None),
],
"dwd.dwd_assistant_trash_event_ex": [
("assistant_trash_event_id", "id", None),
("table_area_name", "tablearea", None),
("table_name", "tablename", None),
("operator_id", "operator_id", None), # 操作员 ID
("operator_name", "operator_name", None), # 操作员姓名
],
"dwd.dwd_member_balance_change": [
("balance_change_id", "id", None),
@@ -360,12 +400,15 @@ class DwdLoadTask(BaseTask):
("member_mobile", "membermobile", None),
("principal_before", "principal_before", None),
("principal_after", "principal_after", None),
# CHANGE [2026-02-20] intent: 补齐 principal_change_amount由 principal_after - principal_before 计算
("principal_change_amount", "COALESCE(CAST(principal_after AS numeric),0) - COALESCE(CAST(principal_before AS numeric),0)", "numeric"),
],
"dwd.dwd_member_balance_change_ex": [
("balance_change_id", "id", None),
("pay_site_name", "paysitename", None),
("register_site_name", "registersitename", None),
("principal_data", "principal_data", None),
("relate_id", "relate_id", None), # 关联业务单据 ID
],
"dwd.dwd_groupbuy_redemption": [
("redemption_id", "id", None),
@@ -502,13 +545,16 @@ class DwdLoadTask(BaseTask):
("payment_method", "paymentmethod", None),
("create_time", "createtime", None),
("pay_time", "paytime", None),
("pl_coupon_sale_amount", "plcouponsaleamount", None), # 平台券销售金额
("mervou_sales_amount", "mervousalesamount", None), # 储值券销售金额
("electricity_money", "electricitymoney", None), # 电费金额
("real_electricity_money", "realelectricitymoney", None), # 实际电费金额
("electricity_adjust_money", "electricityadjustmoney", None), # 电费调整金额
],
# CHANGE 2026-02-20: 移除 salesman_name/order_remark/revoke_order_name 的重复条目(原列表首尾各出现一次)
"dwd.dwd_recharge_order_ex": [
("recharge_order_id", "id", None),
("site_name_snapshot", "sitename", None),
("salesman_name", "salesmanname", None),
("order_remark", "orderremark", None),
("revoke_order_name", "revokeordername", None),
("settle_status", "settlestatus", None),
("is_bind_member", "isbindmember", "boolean"),
("is_activity", "isactivity", "boolean"),
@@ -551,6 +597,47 @@ class DwdLoadTask(BaseTask):
("revoke_order_name", "revokeordername", None),
("revoke_time", "revoketime", None),
],
# 库存汇总goods_stock_summaryODS 列名全小写)
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写sitegoodsid不是驼峰
"dwd.dwd_goods_stock_summary": [
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 IDPK
("goods_name", '"goodsname"', None), # 商品名称
("goods_unit", '"goodsunit"', None), # 计量单位
("goods_category_id", '"goodscategoryid"', "bigint"), # 一级分类 ID
("goods_category_second_id", '"goodscategorysecondid"', "bigint"), # 二级分类 ID
("category_name", '"categoryname"', None), # 分类名称
("range_start_stock", '"rangestartstock"', "numeric"), # 期初库存
("range_end_stock", '"rangeendstock"', "numeric"), # 期末库存
("range_in", '"rangein"', "numeric"), # 入库数量
("range_out", '"rangeout"', "numeric"), # 出库数量
("range_sale", '"rangesale"', "numeric"), # 销售数量
("range_sale_money", '"rangesalemoney"', "numeric"), # 销售金额
("range_inventory", '"rangeinventory"', "numeric"), # 盘点调整量
("current_stock", '"currentstock"', "numeric"), # 当前库存
],
# 库存变动流水goods_stock_movementsODS 列名全小写)
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写,不是驼峰
"dwd.dwd_goods_stock_movement": [
("site_goods_stock_id", '"sitegoodsstockid"', "bigint"), # 库存变动记录 IDPK
("tenant_id", '"tenantid"', "bigint"), # 租户 ID
("site_id", '"siteid"', "bigint"), # 门店 ID
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 ID
("goods_name", '"goodsname"', None), # 商品名称
("goods_category_id", '"goodscategoryid"', "bigint"), # 一级分类 ID
("goods_second_category_id", '"goodssecondcategoryid"', "bigint"), # 二级分类 ID
("unit", "unit", None), # 计量单位ODS 已是小写)
("price", "price", "numeric"), # 商品单价ODS 已是小写)
("stock_type", '"stocktype"', "integer"), # 库存变动类型
("change_num", '"changenum"', "numeric"), # 变动数量
("start_num", '"startnum"', "numeric"), # 变动前库存
("end_num", '"endnum"', "numeric"), # 变动后库存
("change_num_a", '"changenuma"', "numeric"), # 辅助单位变动量
("start_num_a", '"startnuma"', "numeric"), # 辅助单位变动前库存
("end_num_a", '"endnuma"', "numeric"), # 辅助单位变动后库存
("remark", "remark", None), # 备注ODS 已是小写)
("operator_name", '"operatorname"', None), # 操作人
("create_time", '"createtime"', "timestamptz"), # 变动时间
],
}
def get_task_code(self) -> str:
@@ -624,7 +711,7 @@ class DwdLoadTask(BaseTask):
errors.append({"table": dwd_table, "error": str(exc)})
continue
return {"tables": summary, "errors": errors}
return {"tables": summary, "errors": len(errors), "error_details": errors}
# ---------------------- 辅助方法 ----------------------
def _get_columns(self, cur, table: str) -> List[str]:
@@ -777,6 +864,10 @@ class DwdLoadTask(BaseTask):
self.logger.error("跳过 %sODS 表 %s 缺少 fetched_at 列", dwd_table, ods_table)
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
self._log_missing_fetched_at(cur, ods_table)
# CHANGE 2026-02-22: BUG 12 — 获取列类型,用于哨兵日期过滤
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
ods_types = self._get_column_types(cur, ods_table, "ods")
ts_types = {"timestamp without time zone", "timestamp with time zone"}
table_sql = self._format_table(ods_table, "ods")
# 构造 SELECT 表达式,支持 JSON/expression 映射
select_exprs: list[str] = []
@@ -790,7 +881,14 @@ class DwdLoadTask(BaseTask):
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
added.add(lc)
elif lc in ods_set:
select_exprs.append(f'"{lc}" AS "{lc}"')
# CHANGE 2026-02-22: BUG 12 — 同名列如果是时间类型,加哨兵值过滤
if dwd_types.get(lc) in ts_types and ods_types.get(lc) in ts_types:
select_exprs.append(
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
)
else:
select_exprs.append(f'"{lc}" AS "{lc}"')
added.add(lc)
# 分类维度需要额外读取 categoryboxes 以展开子类
if dwd_table == "dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
@@ -810,7 +908,7 @@ class DwdLoadTask(BaseTask):
if not select_exprs:
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
order_col = self._pick_snapshot_order_column(ods_cols)
order_col = self._pick_snapshot_order_column(ods_cols) # CHANGE 2026-02-21: BUG 9 fix — 方法从 integrity_checker 移入本类
key_exprs: list[str] = []
for key in business_keys:
lc = key.lower()
@@ -860,9 +958,22 @@ class DwdLoadTask(BaseTask):
return {"processed": 0, "inserted": 0, "updated": 0, "skipped": 0}
# 预加载当前版本scd2_is_current=1避免逐行 SELECT 造成大量 round-trip
# CHANGE 2026-02-22: BUG 12 — 用显式列列表替代 SELECT *,对 timestamptz 列做哨兵值过滤
# 防止 BC 日期导致 psycopg2 fetchall() 抛出 ValueError
table_sql_dwd = self._format_table(dwd_table, "dwd")
where_current = " AND ".join([f"COALESCE(scd2_is_current,1)=1"])
cur.execute(f"SELECT * FROM {table_sql_dwd} WHERE {where_current}")
dwd_select_exprs: list[str] = []
for col in dwd_cols:
lc = col.lower()
if dwd_types.get(lc) in ts_types:
dwd_select_exprs.append(
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamptz '
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
)
else:
dwd_select_exprs.append(f'"{lc}"')
dwd_select_sql = ", ".join(dwd_select_exprs)
where_current = "COALESCE(scd2_is_current,1)=1"
cur.execute(f"SELECT {dwd_select_sql} FROM {table_sql_dwd} WHERE {where_current}")
current_rows = cur.fetchall() or []
current_by_pk: dict[tuple[Any, ...], Dict[str, Any]] = {}
for r in current_rows:
@@ -1245,12 +1356,19 @@ class DwdLoadTask(BaseTask):
"""构造事实表 SELECT 列表,需要时做类型转换。"""
numeric_types = {"integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal"}
text_types = {"text", "character varying", "varchar"}
ts_types = {"timestamp without time zone", "timestamp with time zone"}
exprs = []
for col in insert_cols:
d_type = dwd_types.get(col)
o_type = ods_types.get(col)
if d_type in numeric_types and o_type in text_types:
exprs.append(f"CAST(NULLIF(CAST(\"{col}\" AS text), '') AS numeric):: {d_type}")
elif d_type in ts_types and o_type in ts_types:
# CHANGE 2026-02-22: BUG 12 — 哨兵日期过滤,防止 0001-01-01 转 timestamptz 变 BC
exprs.append(
f'CASE WHEN "{col}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
f'THEN "{col}" ELSE NULL END'
)
else:
exprs.append(f'"{col}"')
return exprs
@@ -1271,8 +1389,15 @@ class DwdLoadTask(BaseTask):
schema, table = self._split_table_name(name, default_schema)
return f'"{schema}"."{table}"'
# CHANGE 2026-02-22: BUG 12 fix — 哨兵日期阈值,上游 API 用 0001-01-01 表示"未设置"
_SENTINEL_DATE_THRESHOLD = "0002-01-01"
def _cast_expr(self, col: str, cast_type: str | None) -> str:
"""构造带可选 CAST 的列表达式。"""
"""构造带可选 CAST 的列表达式。
对 timestamptz 转换额外包裹哨兵值过滤ODS 中 0001-01-01 在
Asia/Shanghai 时区下会变成 BC 日期psycopg2 无法解析。
"""
if col.upper() == "NULL":
base = "NULL"
else:
@@ -1282,8 +1407,11 @@ class DwdLoadTask(BaseTask):
cast_lower = cast_type.lower()
if cast_lower in {"bigint", "integer", "numeric", "decimal"}:
return f"CAST(NULLIF(CAST({base} AS text), '') AS numeric):: {cast_type}"
if cast_lower == "timestamptz":
return f"({base})::timestamptz"
if cast_lower in {"timestamptz", "timestamp with time zone"}:
# 哨兵值过滤:< 0002-01-01 的值置为 NULL
# base 可能是 textJSONB ->> 提取),需先 CAST 为 timestamp 再比较
return (f"CASE WHEN ({base})::timestamp >= '{self._SENTINEL_DATE_THRESHOLD}'::timestamp "
f"THEN ({base})::timestamptz ELSE NULL END")
return f"{base}::{cast_type}"
return base

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Iterable, List, Sequence, Tuple
@@ -16,7 +17,9 @@ from tasks.dwd.dwd_load_task import DwdLoadTask
class DwdQualityTask(BaseTask):
"""对 ODS 与 DWD 进行行数、金额对照核查,生成 JSON 报表。"""
REPORT_PATH = Path("reports/dwd_quality_report.json")
# 从 .env 读取 ETL_REPORT_ROOT必须配置
_report_root = os.environ.get("ETL_REPORT_ROOT")
REPORT_PATH = Path(_report_root) / "dwd_quality_report.json" if _report_root else None
AMOUNT_KEYWORDS = ("amount", "money", "fee", "balance")
def get_task_code(self) -> str:
@@ -29,6 +32,11 @@ class DwdQualityTask(BaseTask):
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
"""输出行数/金额差异报表到本地文件。"""
if self.REPORT_PATH is None:
raise RuntimeError(
"环境变量 ETL_REPORT_ROOT 未定义,无法生成质检报表。"
"请在根 .env 中配置,参考 docs/deployment/EXPORT-PATHS.md"
)
report: Dict[str, Any] = {
"generated_at": extracted["now"].isoformat(),
"tables": [],

View File

@@ -25,6 +25,9 @@ from .finance_income_task import FinanceIncomeStructureTask
from .finance_discount_task import FinanceDiscountDetailTask
from .finance_base_task import FinanceBaseTask
from .maintenance_task import DwsMaintenanceTask
from .goods_stock_daily_task import GoodsStockDailyTask
from .goods_stock_weekly_task import GoodsStockWeeklyTask
from .goods_stock_monthly_task import GoodsStockMonthlyTask
# 指数算法任务
from .index import (
@@ -32,6 +35,7 @@ from .index import (
NewconvIndexTask,
MlManualImportTask,
RelationIndexTask,
SpendingPowerIndexTask,
)
__all__ = [
@@ -57,9 +61,14 @@ __all__ = [
"FinanceIncomeStructureTask",
"FinanceDiscountDetailTask",
"DwsMaintenanceTask",
# 库存维度
"GoodsStockDailyTask",
"GoodsStockWeeklyTask",
"GoodsStockMonthlyTask",
# 指数算法
"WinbackIndexTask",
"NewconvIndexTask",
"MlManualImportTask",
"RelationIndexTask",
"SpendingPowerIndexTask",
]

View File

@@ -198,7 +198,7 @@ class AssistantCustomerTask(BaseDwsTask):
)
SELECT
assistant_id,
MAX(assistant_nickname) AS assistant_nickname,
(ARRAY_AGG(assistant_nickname ORDER BY service_date DESC))[1] AS assistant_nickname,
member_id,
MIN(service_date) AS first_service_date,
MAX(service_date) AS last_service_date,
@@ -247,21 +247,31 @@ class AssistantCustomerTask(BaseDwsTask):
"""
提取会员信息
"""
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# 加 scd2_is_current=1 只取当前有效版本
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
sql = """
SELECT
member_id,
nickname,
mobile
FROM dwd.dim_member
WHERE site_id = %s
WHERE member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_assistant_service_log
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
result = {}
for row in (rows or []):
row_dict = dict(row)
result[row_dict['member_id']] = row_dict
return result
def _extract_assistant_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""

View File

@@ -7,7 +7,7 @@
数据来源:
- dwd_assistant_service_log: 助教服务流水
- dwd_assistant_trash_event: 废除记录(排除
- dwd_assistant_service_log_ex: 扩展表(提供 is_trash 废除标记
- dim_assistant: 助教维度SCD2获取当日等级
- cfg_skill_type: 技能→课程类型映射
@@ -19,7 +19,7 @@
- 幂等方式delete-before-insert按日期窗口
业务规则:
- 有效业绩:需排除dwd_assistant_trash_event中的废除记录
- 有效业绩:通过 dwd_assistant_service_log_ex.is_trash 字段判断是否被废除
- 助教等级使用SCD2 as-of取值获取统计日当日生效的等级
- 课程类型通过skill_id映射分为基础课和附加课
@@ -78,18 +78,14 @@ class AssistantDailyTask(BaseDwsTask):
self.get_task_code(), start_date, end_date
)
# 1. 获取助教服务记录
# 1. 获取助教服务记录(含 is_trash 标记,来自 _ex 表 JOIN
service_records = self._extract_service_records(site_id, start_date, end_date)
# 2. 获取废除记录
trash_records = self._extract_trash_records(site_id, start_date, end_date)
# 3. 加载配置缓存
# 2. 加载配置缓存
self.load_config_cache()
return {
'service_records': service_records,
'trash_records': trash_records,
'start_date': start_date,
'end_date': end_date,
'site_id': site_id
@@ -100,21 +96,16 @@ class AssistantDailyTask(BaseDwsTask):
转换数据:按助教+日期聚合
"""
service_records = extracted['service_records']
trash_records = extracted['trash_records']
site_id = extracted['site_id']
self.logger.info(
"%s: 转换数据,服务记录 %d 条,废除记录 %d",
self.get_task_code(), len(service_records), len(trash_records)
"%s: 转换数据,服务记录 %d",
self.get_task_code(), len(service_records)
)
# 构建废除记录索引assistant_service_id -> trash_info
trash_index = self._build_trash_index(trash_records)
# 按助教+日期聚合
aggregated = self._aggregate_by_assistant_date(
service_records,
trash_index,
site_id
)
@@ -134,6 +125,8 @@ class AssistantDailyTask(BaseDwsTask):
) -> List[Dict[str, Any]]:
"""
提取助教服务记录
JOIN _ex 表取 is_trash 字段,用于直接判断服务是否被废除。
"""
sql = """
SELECT
@@ -150,8 +143,11 @@ class AssistantDailyTask(BaseDwsTask):
asl.real_use_seconds,
asl.ledger_amount,
asl.ledger_unit_price,
DATE(asl.start_use_time) AS service_date
DATE(asl.start_use_time) AS service_date,
COALESCE(ex.is_trash, 0) AS is_trash
FROM dwd.dwd_assistant_service_log asl
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
ON asl.assistant_service_id = ex.assistant_service_id
WHERE asl.site_id = %s
AND DATE(asl.start_use_time) >= %s
AND DATE(asl.start_use_time) <= %s
@@ -160,53 +156,14 @@ class AssistantDailyTask(BaseDwsTask):
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
def _extract_trash_records(
self,
site_id: int,
start_date: date,
end_date: date
) -> List[Dict[str, Any]]:
"""
提取废除记录
有效业绩的排除规则:仅对"助教废除表"的记录进行处理排除
"""
sql = """
SELECT
assistant_service_id,
trash_seconds,
trash_reason,
trash_time
FROM dwd.dwd_assistant_trash_event
WHERE site_id = %s
AND DATE(trash_time) >= %s
AND DATE(trash_time) <= %s
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
# ==========================================================================
# 数据转换方法
# ==========================================================================
def _build_trash_index(
self,
trash_records: List[Dict[str, Any]]
) -> Dict[int, Dict[str, Any]]:
"""
构建废除记录索引
"""
index = {}
for record in trash_records:
service_id = record.get('assistant_service_id')
if service_id:
index[service_id] = record
return index
def _aggregate_by_assistant_date(
self,
service_records: List[Dict[str, Any]],
trash_index: Dict[int, Dict[str, Any]],
site_id: int
) -> List[Dict[str, Any]]:
"""
@@ -275,14 +232,12 @@ class AssistantDailyTask(BaseDwsTask):
is_bonus = course_type == CourseType.BONUS
is_room = course_type == CourseType.ROOM
# 检查是否被废除
is_trashed = service_id in trash_index
# 检查是否被废除(使用 _ex 表的 is_trash 标记)
is_trashed = bool(record.get('is_trash', 0))
if is_trashed:
# 废除记录单独统计
trash_info = trash_index[service_id]
trash_seconds = self.safe_int(trash_info.get('trash_seconds', income_seconds))
agg['trashed_seconds'] += trash_seconds
# 废除记录:直接用服务记录的 income_seconds 作为废除时长
agg['trashed_seconds'] += income_seconds
agg['trashed_count'] += 1
else:
# 正常记录累加

View File

@@ -129,7 +129,7 @@ class AssistantFinanceTask(BaseDwsTask):
SELECT
DATE(s.start_use_time) AS stat_date,
s.site_assistant_id AS assistant_id,
MAX(s.nickname) AS assistant_nickname,
(ARRAY_AGG(s.nickname ORDER BY s.start_use_time DESC))[1] AS assistant_nickname,
COUNT(*) AS service_count,
SUM(s.income_seconds) / 3600.0 AS service_hours,
SUM(s.ledger_amount) AS revenue_total,

View File

@@ -261,12 +261,16 @@ class AssistantMonthlyTask(BaseDwsTask):
month_where = " OR ".join(month_conditions)
# CHANGE 2026-02-22 | Prompt: 需求 A — 按档位分段统计
# GROUP BY 加入 assistant_level_code/name使同一助教月内不同档位各自聚合
# nickname 改用 ARRAY_AGG 按时间倒序取最新值,替代 MAX() 的字典序取值。
# 唯一约束已同步变更为 (site_id, assistant_id, stat_month, assistant_level_code)
sql = f"""
SELECT
assistant_id,
assistant_nickname,
assistant_level_code,
assistant_level_name,
(ARRAY_AGG(assistant_nickname ORDER BY stat_date DESC))[1] AS assistant_nickname,
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
COUNT(DISTINCT stat_date) AS work_days,
SUM(total_service_count) AS total_service_count,
@@ -287,7 +291,7 @@ class AssistantMonthlyTask(BaseDwsTask):
SUM(trashed_count) AS trashed_count
FROM dws.dws_assistant_daily_detail
WHERE site_id = %s AND ({month_where})
GROUP BY assistant_id, assistant_nickname, assistant_level_code, assistant_level_name,
GROUP BY assistant_id, assistant_level_code, assistant_level_name,
DATE_TRUNC('month', stat_date)
"""
@@ -405,9 +409,10 @@ class AssistantMonthlyTask(BaseDwsTask):
max_tier_level=max_tier_level
)
# 获取月末的等级信息(用于记录)
month_end = self._get_month_end(month)
level_info = self.get_assistant_level_asof(assistant_id, month_end)
# CHANGE 2026-02-22 | Prompt: 需求 A 任务 7.3 — 多行适配
# 聚合行已按 assistant_level_code 分组,每行自带档位信息,
# 直接使用聚合行的 assistant_level_code/name不再用月末 SCD2 覆盖,
# 避免同一助教多档位行被统一覆盖为月末档位导致 UK 冲突。
# 月度去重客户/台桌从DWD直接去重
unique_info = monthly_unique_index.get((assistant_id, month), {})
@@ -424,8 +429,8 @@ class AssistantMonthlyTask(BaseDwsTask):
'assistant_id': assistant_id,
'assistant_nickname': agg.get('assistant_nickname'),
'stat_month': month,
'assistant_level_code': level_info.get('level_code') if level_info else agg.get('assistant_level_code'),
'assistant_level_name': level_info.get('level_name') if level_info else agg.get('assistant_level_name'),
'assistant_level_code': agg.get('assistant_level_code'),
'assistant_level_name': agg.get('assistant_level_name'),
'hire_date': hire_date,
'is_new_hire': is_new_hire,
'work_days': self.safe_int(agg.get('work_days', 0)),
@@ -536,7 +541,8 @@ class AssistantMonthlyTask(BaseDwsTask):
计算排名(考虑并列)
Top3排名口径按有效业绩总小时数排名
如遇并列则都算比如2个第一则记为2个第一一个第三
如遇并列则都算比如2个第一则记为2个第一一个第三
同一助教不同档位的行各自独立参与排名。
"""
if not records:
return
@@ -548,24 +554,29 @@ class AssistantMonthlyTask(BaseDwsTask):
reverse=True
)
# 计算考虑并列的排名
# CHANGE 2026-02-22 | Prompt: 需求 A 任务 7.3 — 多行排名适配
# 同一助教可能有多个档位行,用 (assistant_id, assistant_level_code) 做唯一标识,
# 避免 rank_map 中同一 assistant_id 的多行互相覆盖。
values = [
(r.get('assistant_id'), r.get('effective_hours', Decimal('0')))
(
(r.get('assistant_id'), r.get('assistant_level_code')),
r.get('effective_hours', Decimal('0'))
)
for r in sorted_records
]
ranked = self.calculate_rank_with_ties(values)
# 创建排名映射
# 创建排名映射key = (assistant_id, assistant_level_code)
rank_map = {
assistant_id: (rank, dense_rank)
for assistant_id, rank, dense_rank in ranked
entity_key: (rank, dense_rank)
for entity_key, rank, dense_rank in ranked
}
# 更新记录
for record in records:
assistant_id = record.get('assistant_id')
if assistant_id in rank_map:
rank, _ = rank_map[assistant_id]
key = (record.get('assistant_id'), record.get('assistant_level_code'))
if key in rank_map:
rank, _ = rank_map[key]
record['rank_by_hours'] = rank
record['rank_with_ties'] = rank # 使用考虑并列的排名

View File

@@ -1,4 +1,13 @@
# -*- coding: utf-8 -*-
# AI_CHANGELOG
# - 2026-02-21 | feature: 新增 GUARANTEE 保底月薪线逻辑
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
# 直接原因: cfg_bonus_rules 新增 GUARANTEE 类型规则_calculate_salary 需要对应处理
# 变更: (1) _calculate_salary 调用 calculate_guarantee() 获取保底金额
# (2) gross_salary = MAX(课时收入+奖金, 保底金额)
# (3) _build_calc_notes 增加保底生效/未触发备注
# 风险: 仅在 salary_month 落入 GUARANTEE 生效期2025-01-01~2026-02-28时触发
# 验证: 保底期外的月份不受影响calculate_guarantee 返回 (0, None)
"""
助教工资计算任务
@@ -64,7 +73,8 @@ class AssistantSalaryTask(BaseDwsTask):
return "dws_assistant_salary_calc"
def get_primary_keys(self) -> List[str]:
return ["site_id", "assistant_id", "salary_month"]
# CHANGE 2026-02-22 | task 7.4: 唯一键加入 assistant_level_code适配档位分段工资
return ["site_id", "assistant_id", "salary_month", "assistant_level_code"]
# ==========================================================================
# ETL主流程
@@ -330,12 +340,29 @@ class AssistantSalaryTask(BaseDwsTask):
# 获取充值提成
recharge_commission = commission_index.get(assistant_id, Decimal('0'))
# 汇总奖金
# CHANGE 2026-02-21 | 保底月薪线逻辑
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
# reason: 2025-01-01~2026-02-28 期间,满足条件的助教享受保底月薪线
# 公式: gross_salary = MAX(课时收入 + 其他奖金, guarantee_amount)
guarantee_amount, guarantee_rule = self.calculate_guarantee(
level_code=level_code,
effective_hours=effective_hours,
bonus_hours=bonus_hours,
effective_date=salary_month,
)
# 汇总奖金(不含保底,保底是月薪线而非额外奖金)
other_bonus = Decimal('0') # 预留其他奖金
total_bonus = sprint_bonus + top_rank_bonus + recharge_commission + other_bonus
# 计算应发工资 = 课时收入 + 奖金
gross_salary = total_course_income + total_bonus
# 计算应发工资
# 无保底或不满足条件gross_salary = 课时收入 + 奖金
# 有保底且满足条件gross_salary = MAX(课时收入 + 奖金, 保底金额)
raw_salary = total_course_income + total_bonus
if guarantee_amount > 0 and raw_salary < guarantee_amount:
gross_salary = guarantee_amount
else:
gross_salary = raw_salary
# 构建记录
return {
@@ -377,7 +404,11 @@ class AssistantSalaryTask(BaseDwsTask):
# 假期
'vacation_days': vacation_days,
'vacation_unlimited': vacation_unlimited,
'calc_notes': self._build_calc_notes(summary, tier, sprint_bonus, top_rank_bonus),
'calc_notes': self._build_calc_notes(
summary, tier, sprint_bonus, top_rank_bonus,
guarantee_amount=guarantee_amount, guarantee_rule=guarantee_rule,
raw_salary=raw_salary,
),
}
def _build_calc_notes(
@@ -385,7 +416,10 @@ class AssistantSalaryTask(BaseDwsTask):
summary: Dict[str, Any],
tier: Optional[Dict[str, Any]],
sprint_bonus: Decimal,
top_rank_bonus: Decimal
top_rank_bonus: Decimal,
guarantee_amount: Decimal = Decimal('0'),
guarantee_rule: Optional[str] = None,
raw_salary: Decimal = Decimal('0'),
) -> Optional[str]:
"""
构建计算备注
@@ -405,6 +439,20 @@ class AssistantSalaryTask(BaseDwsTask):
rank = summary.get('rank_with_ties')
notes.append(f"Top{rank}奖金: {top_rank_bonus}")
# 保底月薪线备注
if guarantee_amount > 0:
if raw_salary < guarantee_amount:
diff = guarantee_amount - raw_salary
notes.append(
f"保底生效({guarantee_rule}): 月薪线{guarantee_amount}, "
f"课时+奖金{raw_salary}, 补差{diff}"
)
else:
notes.append(
f"保底未触发({guarantee_rule}): 月薪线{guarantee_amount}, "
f"实际收入{raw_salary}已超过"
)
return "; ".join(notes) if notes else None
def _delete_by_month(

View File

@@ -824,6 +824,55 @@ class BaseDwsTask(BaseTask):
return Decimal(str(rule.get('bonus_amount', 0)))
return Decimal('0')
# CHANGE 2026-02-21 | 新增保底月薪线计算方法
# prompt: "这些种子规则,对应的计算方式,是否也实现?"
# reason: cfg_bonus_rules 新增 GUARANTEE 类型规则,需要对应的计算入口
def calculate_guarantee(
self,
level_code: int,
effective_hours: Decimal,
bonus_hours: Decimal,
effective_date: Optional[date] = None,
min_bonus_hours: Decimal = Decimal('10'),
) -> Tuple[Decimal, Optional[str]]:
"""
计算保底月薪线GUARANTEE
保底规则2025-01-01 ~ 2026-02-28
- 按助教等级匹配 rule_code = GUAR_LV{level_code}
- 条件effective_hours >= threshold_hours AND bonus_hours >= 10
- 保底含义salary_floor非额外奖金
实发 = MAX(课时收入, guarantee_amount)
Args:
level_code: 助教等级代码10/20/30/40
effective_hours: 有效业绩小时数
bonus_hours: 打赏课小时数
effective_date: 生效日期salary_month
min_bonus_hours: 打赏课最低要求默认10小时
Returns:
(guarantee_amount, rule_code) — 不满足条件时返回 (0, None)
"""
config = self.load_config_cache()
rules = self._filter_by_effective_date(config.bonus_rules, effective_date)
target_code = f"GUAR_LV{level_code}"
for rule in rules:
if rule.get('rule_type') != 'GUARANTEE':
continue
if rule.get('rule_code') != target_code:
continue
threshold = Decimal(str(rule.get('threshold_hours', 0)))
if effective_hours >= threshold and bonus_hours >= min_bonus_hours:
return (Decimal(str(rule.get('bonus_amount', 0))), target_code)
# 匹配到规则但条件不满足
return (Decimal('0'), None)
# 当前生效期内无 GUARANTEE 规则
return (Decimal('0'), None)
# ==========================================================================
# DWD数据读取方法

View File

@@ -82,21 +82,22 @@ class FinanceBaseTask(BaseDwsTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""充值日汇总(充值订单按日聚合)"""
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money实际字段为 pay_amount/point_amount
sql = """
SELECT
DATE(pay_time) AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_money + gift_money) AS recharge_total,
SUM(pay_money) AS recharge_cash,
SUM(gift_money) AS recharge_gift,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
SUM(point_amount) AS recharge_gift,
COUNT(CASE WHEN is_first = 1 THEN 1 END) AS first_recharge_count,
SUM(CASE WHEN is_first = 1 THEN pay_money + gift_money ELSE 0 END) AS first_recharge_total,
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
SUM(CASE WHEN is_first = 1 THEN pay_amount + point_amount ELSE 0 END) AS first_recharge_total,
SUM(CASE WHEN is_first = 1 THEN pay_amount ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN point_amount ELSE 0 END) AS first_recharge_gift,
COUNT(CASE WHEN is_first = 0 OR is_first IS NULL THEN 1 END) AS renewal_count,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_money + gift_money ELSE 0 END) AS renewal_total,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN gift_money ELSE 0 END) AS renewal_gift,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_amount + point_amount ELSE 0 END) AS renewal_total,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN pay_amount ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first = 0 OR is_first IS NULL THEN point_amount ELSE 0 END) AS renewal_gift,
COUNT(DISTINCT member_id) AS recharge_member_count
FROM dwd.dwd_recharge_order
WHERE site_id = %s

View File

@@ -140,6 +140,8 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
关联dim_table获取区域名称再映射到cfg_area_category
"""
# CHANGE 2026-02-22 | BUG 7 修复 | dim_table 主键是 table_id 而非 site_table_id
# JOIN 条件从 dt.site_table_id → dt.table_id事实表侧 site_table_id 不变)
sql = """
WITH area_orders AS (
SELECT
@@ -150,7 +152,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
COALESCE(tfl.ledger_time_seconds, 0) AS duration_seconds
FROM dwd.dwd_table_fee_log tfl
LEFT JOIN dwd.dim_table dt
ON dt.site_table_id = tfl.site_table_id
ON dt.table_id = tfl.site_table_id
WHERE tfl.site_id = %(site_id)s
AND tfl.pay_time >= %(start_date)s
AND tfl.pay_time < %(end_date)s + INTERVAL '1 day'
@@ -166,7 +168,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
COALESCE(asl.income_seconds, 0) AS duration_seconds
FROM dwd.dwd_assistant_service_log asl
LEFT JOIN dwd.dim_table dt
ON dt.site_table_id = asl.site_table_id
ON dt.table_id = asl.site_table_id
WHERE asl.site_id = %(site_id)s
AND asl.start_use_time >= %(start_date)s
AND asl.start_use_time < %(end_date)s + INTERVAL '1 day'

View File

@@ -18,7 +18,7 @@
业务规则:
- 首充/续充:通过 is_first 字段区分
- 现金/赠送:通过 pay_money/gift_money 区分
- 现金/赠送:通过 pay_amount/point_amount 区分
- 卡余额:区分储值卡和赠送卡
作者ETL团队
@@ -110,21 +110,22 @@ class FinanceRechargeTask(FinanceBaseTask):
# load() 已移除——使用 BaseDwsTask 默认实现DATE_COL="stat_date"
def _extract_recharge_summary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money实际字段为 pay_amount/point_amount
sql = """
SELECT
DATE(pay_time) AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_money + gift_money) AS recharge_total,
SUM(pay_money) AS recharge_cash,
SUM(gift_money) AS recharge_gift,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
SUM(point_amount) AS recharge_gift,
COUNT(CASE WHEN is_first = 1 THEN 1 END) AS first_recharge_count,
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
SUM(CASE WHEN is_first = 1 THEN pay_money + gift_money ELSE 0 END) AS first_recharge_total,
SUM(CASE WHEN is_first = 1 THEN pay_amount ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN point_amount ELSE 0 END) AS first_recharge_gift,
SUM(CASE WHEN is_first = 1 THEN pay_amount + point_amount ELSE 0 END) AS first_recharge_total,
COUNT(CASE WHEN is_first != 1 OR is_first IS NULL THEN 1 END) AS renewal_count,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN gift_money ELSE 0 END) AS renewal_gift,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money + gift_money ELSE 0 END) AS renewal_total,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_amount ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN point_amount ELSE 0 END) AS renewal_gift,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_amount + point_amount ELSE 0 END) AS renewal_total,
COUNT(DISTINCT member_id) AS recharge_member_count,
COUNT(DISTINCT CASE WHEN is_first = 1 THEN member_id END) AS new_member_count
FROM dwd.dwd_recharge_order
@@ -138,10 +139,18 @@ class FinanceRechargeTask(FinanceBaseTask):
CASH_CARD_TYPE_ID = 2793249295533893
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
# CHANGE 2026-02-21 | dim_member_card_account 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
sql = """
SELECT card_type_id, SUM(balance) AS total_balance
FROM dwd.dim_member_card_account
WHERE site_id = %s AND scd2_is_current = 1
WHERE tenant_member_id IN (
SELECT DISTINCT member_id
FROM dwd.dwd_recharge_order
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
) AND scd2_is_current = 1
AND COALESCE(is_delete, 0) = 0
GROUP BY card_type_id
"""

View File

@@ -0,0 +1,237 @@
# -*- coding: utf-8 -*-
"""
库存日度汇总任务
功能说明:
"门店+日期+商品"为粒度,汇总每日库存数据
数据来源:
- dwd.dwd_goods_stock_summary库存汇总明细按 fetched_at 日期聚合)
目标表:
dws.dws_goods_stock_daily_summary
更新策略:
- 更新频率:每日更新
- 幂等方式upsertON CONFLICT DO UPDATE
业务规则:
- 按 fetched_at 的日期部分分组,同一天同一商品可能有多条 DWD 记录
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
- current_stock 取当日最后一条记录的值(期末快照)
- range_start_stock 取当日第一条记录的值(期初快照)
- range_end_stock 取当日最后一条记录的值(期末快照)
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
class GoodsStockDailyTask(BaseDwsTask):
"""
库存日度汇总任务
从 dwd.dwd_goods_stock_summary 提取数据,按日粒度汇总后
upsert 写入 dws.dws_goods_stock_daily_summary。
"""
DATE_COL = "stat_date"
def get_task_code(self) -> str:
return "DWS_GOODS_STOCK_DAILY"
def get_target_table(self) -> str:
return "dws_goods_stock_daily_summary"
def get_primary_keys(self) -> List[str]:
return ["site_id", "stat_date", "site_goods_id"]
# ======================================================================
# Extract
# ======================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""从 DWD 层按时间范围提取库存汇总数据"""
start_date = (
context.window_start.date()
if hasattr(context.window_start, "date")
else context.window_start
)
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
site_id = context.store_id
self.logger.info(
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
SELECT
site_goods_id,
goods_name,
goods_unit,
goods_category_id,
goods_category_second_id,
category_name,
range_start_stock,
range_end_stock,
range_in,
range_out,
range_sale,
range_sale_money,
range_inventory,
current_stock,
site_id,
tenant_id,
fetched_at
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
self.logger.info(
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
)
return {
"rows": rows,
"start_date": start_date,
"end_date": end_date,
"site_id": site_id,
}
# ======================================================================
# Transform
# ======================================================================
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
"""按日粒度汇总:同一天同一商品聚合为一条记录"""
rows = extracted.get("rows", [])
site_id = extracted["site_id"]
if not rows:
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
return []
# 按 (stat_date, site_goods_id) 分组聚合
# key: (date, site_goods_id) -> 聚合数据
agg: Dict[tuple, Dict[str, Any]] = {}
for row in rows:
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
stat_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:
continue
key = (stat_date, site_goods_id)
if key not in agg:
# 首条记录:初始化,期初取第一条
agg[key] = {
"site_id": site_id,
"tenant_id": row.get("tenant_id"),
"stat_date": stat_date,
"site_goods_id": site_goods_id,
"goods_name": row.get("goods_name"),
"goods_unit": row.get("goods_unit"),
"goods_category_id": row.get("goods_category_id"),
"goods_category_second_id": row.get("goods_category_second_id"),
"category_name": row.get("category_name"),
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
"range_in": self.safe_decimal(row.get("range_in")),
"range_out": self.safe_decimal(row.get("range_out")),
"range_sale": self.safe_decimal(row.get("range_sale")),
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
"range_inventory": self.safe_decimal(row.get("range_inventory")),
"current_stock": self.safe_decimal(row.get("current_stock")),
"stat_period": "daily",
}
else:
# 后续记录:累加数值指标,更新期末快照
rec = agg[key]
rec["range_in"] += self.safe_decimal(row.get("range_in"))
rec["range_out"] += self.safe_decimal(row.get("range_out"))
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
# 期末/当前库存取最后一条rows 已按 fetched_at 排序)
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
result = list(agg.values())
self.logger.info(
"%s: 汇总完成,生成 %d 条日度记录",
self.get_task_code(), len(result),
)
return result
# ======================================================================
# Load
# ======================================================================
def load(
self, transformed: List[Dict[str, Any]], context: TaskContext
) -> Dict[str, Any]:
"""upsert 写入 DWS 目标表"""
if not transformed:
return {
"counts": {
"fetched": 0,
"inserted": 0,
"updated": 0,
"skipped": 0,
"errors": 0,
}
}
columns = [
"site_id", "tenant_id", "stat_date", "site_goods_id",
"goods_name", "goods_unit", "goods_category_id",
"goods_category_second_id", "category_name",
"range_start_stock", "range_end_stock",
"range_in", "range_out", "range_sale",
"range_sale_money", "range_inventory", "current_stock",
"stat_period",
]
inserted, updated = self.upsert(transformed, columns=columns)
self.logger.info(
"%s: 写入完成inserted=%d",
self.get_task_code(), inserted,
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": updated,
"skipped": 0,
"errors": 0,
}
}
__all__ = ["GoodsStockDailyTask"]

View File

@@ -0,0 +1,245 @@
# -*- coding: utf-8 -*-
"""
库存月度汇总任务
功能说明:
"门店+自然月+商品"为粒度,汇总每月库存数据
数据来源:
- dwd.dwd_goods_stock_summary库存汇总明细按 fetched_at 日期聚合)
目标表:
dws.dws_goods_stock_monthly_summary
更新策略:
- 更新频率:每日更新当月数据
- 幂等方式upsertON CONFLICT DO UPDATE
业务规则:
- 按自然月分组stat_date = 该月的第一天(如 2026-01-01 代表 2026 年 1 月)
- 同一月同一商品可能有多条 DWD 记录
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
- current_stock 取该月最后一条记录的值(期末快照)
- range_start_stock 取该月第一条记录的值(期初快照)
- range_end_stock 取该月最后一条记录的值(期末快照)
- stat_period = 'monthly'
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
def _month_first_day(d: date) -> date:
"""获取给定日期所在自然月的第一天"""
return d.replace(day=1)
class GoodsStockMonthlyTask(BaseDwsTask):
"""
库存月度汇总任务
从 dwd.dwd_goods_stock_summary 提取数据,按自然月粒度汇总后
upsert 写入 dws.dws_goods_stock_monthly_summary。
"""
DATE_COL = "stat_date"
def get_task_code(self) -> str:
return "DWS_GOODS_STOCK_MONTHLY"
def get_target_table(self) -> str:
return "dws_goods_stock_monthly_summary"
def get_primary_keys(self) -> List[str]:
return ["site_id", "stat_date", "site_goods_id"]
# ======================================================================
# Extract
# ======================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""从 DWD 层按时间范围提取库存汇总数据"""
start_date = (
context.window_start.date()
if hasattr(context.window_start, "date")
else context.window_start
)
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
site_id = context.store_id
self.logger.info(
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
SELECT
site_goods_id,
goods_name,
goods_unit,
goods_category_id,
goods_category_second_id,
category_name,
range_start_stock,
range_end_stock,
range_in,
range_out,
range_sale,
range_sale_money,
range_inventory,
current_stock,
site_id,
tenant_id,
fetched_at
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
self.logger.info(
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
)
return {
"rows": rows,
"start_date": start_date,
"end_date": end_date,
"site_id": site_id,
}
# ======================================================================
# Transform
# ======================================================================
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
"""按自然月粒度汇总:同一月同一商品聚合为一条记录"""
rows = extracted.get("rows", [])
site_id = extracted["site_id"]
if not rows:
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
return []
# 按 (month_first_day, site_goods_id) 分组聚合
agg: Dict[tuple, Dict[str, Any]] = {}
for row in rows:
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 自然月的第一天作为 stat_date
first_day = _month_first_day(row_date)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:
continue
key = (first_day, site_goods_id)
if key not in agg:
# 首条记录:初始化,期初取第一条
agg[key] = {
"site_id": site_id,
"tenant_id": row.get("tenant_id"),
"stat_date": first_day,
"site_goods_id": site_goods_id,
"goods_name": row.get("goods_name"),
"goods_unit": row.get("goods_unit"),
"goods_category_id": row.get("goods_category_id"),
"goods_category_second_id": row.get("goods_category_second_id"),
"category_name": row.get("category_name"),
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
"range_in": self.safe_decimal(row.get("range_in")),
"range_out": self.safe_decimal(row.get("range_out")),
"range_sale": self.safe_decimal(row.get("range_sale")),
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
"range_inventory": self.safe_decimal(row.get("range_inventory")),
"current_stock": self.safe_decimal(row.get("current_stock")),
"stat_period": "monthly",
}
else:
# 后续记录:累加数值指标,更新期末快照
rec = agg[key]
rec["range_in"] += self.safe_decimal(row.get("range_in"))
rec["range_out"] += self.safe_decimal(row.get("range_out"))
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
# 期末/当前库存取最后一条rows 已按 fetched_at 排序)
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
result = list(agg.values())
self.logger.info(
"%s: 汇总完成,生成 %d 条月度记录",
self.get_task_code(), len(result),
)
return result
# ======================================================================
# Load
# ======================================================================
def load(
self, transformed: List[Dict[str, Any]], context: TaskContext
) -> Dict[str, Any]:
"""upsert 写入 DWS 目标表"""
if not transformed:
return {
"counts": {
"fetched": 0,
"inserted": 0,
"updated": 0,
"skipped": 0,
"errors": 0,
}
}
columns = [
"site_id", "tenant_id", "stat_date", "site_goods_id",
"goods_name", "goods_unit", "goods_category_id",
"goods_category_second_id", "category_name",
"range_start_stock", "range_end_stock",
"range_in", "range_out", "range_sale",
"range_sale_money", "range_inventory", "current_stock",
"stat_period",
]
inserted, updated = self.upsert(transformed, columns=columns)
self.logger.info(
"%s: 写入完成inserted=%d",
self.get_task_code(), inserted,
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": updated,
"skipped": 0,
"errors": 0,
}
}
__all__ = ["GoodsStockMonthlyTask"]

View File

@@ -0,0 +1,246 @@
# -*- coding: utf-8 -*-
"""
库存周度汇总任务
功能说明:
"门店+ISO周+商品"为粒度,汇总每周库存数据
数据来源:
- dwd.dwd_goods_stock_summary库存汇总明细按 fetched_at 日期聚合)
目标表:
dws.dws_goods_stock_weekly_summary
更新策略:
- 更新频率:每周更新
- 幂等方式upsertON CONFLICT DO UPDATE
业务规则:
- 按 ISO 周分组isocalendarstat_date = 该周的周一日期
- 同一周同一商品可能有多条 DWD 记录
- 数值指标取 SUM 聚合(入库/出库/销售等为累计量)
- current_stock 取该周最后一条记录的值(期末快照)
- range_start_stock 取该周第一条记录的值(期初快照)
- range_end_stock 取该周最后一条记录的值(期末快照)
- stat_period = 'weekly'
"""
from __future__ import annotations
from datetime import date, timedelta
from decimal import Decimal
from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
def _iso_monday(d: date) -> date:
"""根据 ISO 日历计算给定日期所在周的周一"""
# weekday(): 0=周一 ... 6=周日
return d - timedelta(days=d.weekday())
class GoodsStockWeeklyTask(BaseDwsTask):
"""
库存周度汇总任务
从 dwd.dwd_goods_stock_summary 提取数据,按 ISO 周粒度汇总后
upsert 写入 dws.dws_goods_stock_weekly_summary。
"""
DATE_COL = "stat_date"
def get_task_code(self) -> str:
return "DWS_GOODS_STOCK_WEEKLY"
def get_target_table(self) -> str:
return "dws_goods_stock_weekly_summary"
def get_primary_keys(self) -> List[str]:
return ["site_id", "stat_date", "site_goods_id"]
# ======================================================================
# Extract
# ======================================================================
def extract(self, context: TaskContext) -> Dict[str, Any]:
"""从 DWD 层按时间范围提取库存汇总数据"""
start_date = (
context.window_start.date()
if hasattr(context.window_start, "date")
else context.window_start
)
end_date = (
context.window_end.date()
if hasattr(context.window_end, "date")
else context.window_end
)
site_id = context.store_id
self.logger.info(
"%s: 提取数据,门店=%s,日期范围 %s ~ %s",
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
SELECT
site_goods_id,
goods_name,
goods_unit,
goods_category_id,
goods_category_second_id,
category_name,
range_start_stock,
range_end_stock,
range_in,
range_out,
range_sale,
range_sale_money,
range_inventory,
current_stock,
site_id,
tenant_id,
fetched_at
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
self.logger.info(
"%s: 提取到 %d 条 DWD 记录", self.get_task_code(), len(rows),
)
return {
"rows": rows,
"start_date": start_date,
"end_date": end_date,
"site_id": site_id,
}
# ======================================================================
# Transform
# ======================================================================
def transform(
self, extracted: Dict[str, Any], context: TaskContext
) -> List[Dict[str, Any]]:
"""按 ISO 周粒度汇总:同一周同一商品聚合为一条记录"""
rows = extracted.get("rows", [])
site_id = extracted["site_id"]
if not rows:
self.logger.info("%s: 无数据需要汇总", self.get_task_code())
return []
# 按 (iso_monday, site_goods_id) 分组聚合
agg: Dict[tuple, Dict[str, Any]] = {}
for row in rows:
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# ISO 周的周一作为 stat_date
monday = _iso_monday(row_date)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:
continue
key = (monday, site_goods_id)
if key not in agg:
# 首条记录:初始化,期初取第一条
agg[key] = {
"site_id": site_id,
"tenant_id": row.get("tenant_id"),
"stat_date": monday,
"site_goods_id": site_goods_id,
"goods_name": row.get("goods_name"),
"goods_unit": row.get("goods_unit"),
"goods_category_id": row.get("goods_category_id"),
"goods_category_second_id": row.get("goods_category_second_id"),
"category_name": row.get("category_name"),
"range_start_stock": self.safe_decimal(row.get("range_start_stock")),
"range_end_stock": self.safe_decimal(row.get("range_end_stock")),
"range_in": self.safe_decimal(row.get("range_in")),
"range_out": self.safe_decimal(row.get("range_out")),
"range_sale": self.safe_decimal(row.get("range_sale")),
"range_sale_money": self.safe_decimal(row.get("range_sale_money")),
"range_inventory": self.safe_decimal(row.get("range_inventory")),
"current_stock": self.safe_decimal(row.get("current_stock")),
"stat_period": "weekly",
}
else:
# 后续记录:累加数值指标,更新期末快照
rec = agg[key]
rec["range_in"] += self.safe_decimal(row.get("range_in"))
rec["range_out"] += self.safe_decimal(row.get("range_out"))
rec["range_sale"] += self.safe_decimal(row.get("range_sale"))
rec["range_sale_money"] += self.safe_decimal(row.get("range_sale_money"))
rec["range_inventory"] += self.safe_decimal(row.get("range_inventory"))
# 期末/当前库存取最后一条rows 已按 fetched_at 排序)
rec["range_end_stock"] = self.safe_decimal(row.get("range_end_stock"))
rec["current_stock"] = self.safe_decimal(row.get("current_stock"))
result = list(agg.values())
self.logger.info(
"%s: 汇总完成,生成 %d 条周度记录",
self.get_task_code(), len(result),
)
return result
# ======================================================================
# Load
# ======================================================================
def load(
self, transformed: List[Dict[str, Any]], context: TaskContext
) -> Dict[str, Any]:
"""upsert 写入 DWS 目标表"""
if not transformed:
return {
"counts": {
"fetched": 0,
"inserted": 0,
"updated": 0,
"skipped": 0,
"errors": 0,
}
}
columns = [
"site_id", "tenant_id", "stat_date", "site_goods_id",
"goods_name", "goods_unit", "goods_category_id",
"goods_category_second_id", "category_name",
"range_start_stock", "range_end_stock",
"range_in", "range_out", "range_sale",
"range_sale_money", "range_inventory", "current_stock",
"stat_period",
]
inserted, updated = self.upsert(transformed, columns=columns)
self.logger.info(
"%s: 写入完成inserted=%d",
self.get_task_code(), inserted,
)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": updated,
"skipped": 0,
"errors": 0,
}
}
__all__ = ["GoodsStockWeeklyTask"]

View File

@@ -8,16 +8,19 @@
- NewconvIndexTask: 新客转化指数 (NCI)
- MlManualImportTask: ML 人工台账导入任务
- RelationIndexTask: 关系指数计算任务RS/OS/MS/ML
- SpendingPowerIndexTask: 消费力指数 (SPI)
"""
from .winback_index_task import WinbackIndexTask
from .newconv_index_task import NewconvIndexTask
from .ml_manual_import_task import MlManualImportTask
from .relation_index_task import RelationIndexTask
from .spending_power_index_task import SpendingPowerIndexTask
__all__ = [
'WinbackIndexTask',
'NewconvIndexTask',
'MlManualImportTask',
'RelationIndexTask',
'SpendingPowerIndexTask',
]

View File

@@ -0,0 +1,767 @@
# -*- coding: utf-8 -*-
"""
SPI 消费力指数任务Spending Power Index
设计说明:
1. 直接继承 BaseIndexTask不经过 MemberIndexBaseTask无需 NEW/OLD/STOP 分群)
2. 子分计算为 @staticmethod 纯函数,便于属性测试直接调用
3. 三个子分Level消费水平、Speed消费速度、Stability消费稳定性
4. 结果写入 dws.dws_member_spending_power_index按 site_id delete-before-insert
"""
from __future__ import annotations
import math
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from .base_index_task import BaseIndexTask
from ..base_dws_task import TaskContext
# =============================================================================
# 数据类定义
# =============================================================================
@dataclass
class SPIMemberFeatures:
"""SPI 计算所需的会员级特征"""
member_id: int
site_id: int
# 基础特征
spend_30: float = 0.0 # 近30天消费总额
spend_90: float = 0.0 # 近90天消费总额
recharge_90: float = 0.0 # 近90天充值总额
orders_30: int = 0 # 近30天消费笔数
orders_90: int = 0 # 近90天消费笔数
visit_days_30: int = 0 # 近30天消费日数按天去重
visit_days_90: int = 0 # 近90天消费日数按天去重
avg_ticket_90: float = 0.0 # 90天客单价
active_weeks_90: int = 0 # 近90天有消费的自然周数
daily_spend_ewma_90: float = 0.0 # 日消费 EWMA
# 子分
score_level_raw: float = 0.0
score_speed_raw: float = 0.0
score_stability_raw: float = 0.0
# 展示分(归一化后填充)
score_level_display: float = 0.0
score_speed_display: float = 0.0
score_stability_display: float = 0.0
# 总分
raw_score: float = 0.0
display_score: float = 0.0
# =============================================================================
# SPI 任务
# =============================================================================
class SpendingPowerIndexTask(BaseIndexTask):
"""SPI 消费力指数:单任务产出 Level / Speed / Stability 子分及 SPI 总分。"""
INDEX_TYPE = "SPI"
DEFAULT_PARAMS: Dict[str, float] = {
# 窗口参数
'spend_window_short_days': 30,
'spend_window_long_days': 90,
'ewma_alpha_daily_spend': 0.3,
# 金额压缩基数(初始默认值,可被自动校准或配置表覆盖)
'amount_base_spend_30': 500.0,
'amount_base_spend_90': 1500.0,
'amount_base_ticket_90': 200.0,
'amount_base_recharge_90': 1000.0,
'amount_base_speed_abs': 100.0,
'amount_base_ewma_90': 50.0,
# Level 子分权重
'w_level_spend_30': 0.30,
'w_level_spend_90': 0.35,
'w_level_ticket_90': 0.20,
'w_level_recharge_90': 0.15,
# Speed 子分权重
'w_speed_abs': 0.50,
'w_speed_rel': 0.30,
'w_speed_ewma': 0.20,
# 总分权重
'weight_level': 0.60,
'weight_speed': 0.30,
'weight_stability': 0.10,
# 稳定性参数
'stability_window_days': 90,
'use_stability': 1,
# 映射与平滑
'percentile_lower': 5,
'percentile_upper': 95,
'compression_mode': 1, # log1p
'use_smoothing': 1,
'ewma_alpha': 0.2,
# 速度计算
'speed_epsilon': 1e-6,
}
# =========================================================================
# 抽象方法实现
# =========================================================================
def get_task_code(self) -> str:
return "DWS_SPENDING_POWER_INDEX"
def get_target_table(self) -> str:
return "dws_member_spending_power_index"
def get_primary_keys(self) -> List[str]:
return ["site_id", "member_id"]
def get_index_type(self) -> str:
return self.INDEX_TYPE
# =========================================================================
# 辅助方法
# =========================================================================
def _get_site_id(self, context: Optional[TaskContext]) -> int:
"""从 context 或配置中获取门店 ID"""
if context and getattr(context, "store_id", None):
return int(context.store_id)
site_id = self.config.get("app.default_site_id") or self.config.get("app.store_id")
if site_id is not None:
return int(site_id)
# 回退:从消费数据中取一个 site_id
sql = "SELECT DISTINCT site_id FROM dwd.dwd_settlement_head WHERE site_id IS NOT NULL LIMIT 1"
rows = self.db.query(sql)
if rows:
return int(dict(rows[0]).get("site_id") or 0)
self.logger.warning("无法确定门店ID使用 0 继续执行")
return 0
@staticmethod
def _map_compression(params: Dict[str, float]) -> str:
"""将 compression_mode 数值映射为 batch_normalize_to_display 所需的字符串"""
mode = int(params.get('compression_mode', 0))
if mode == 1:
return "log1p"
if mode == 2:
return "asinh"
return "none"
# =========================================================================
# 核心执行流程
# =========================================================================
def execute(self, context: Optional[TaskContext] = None) -> Dict[str, Any]:
"""完整执行流程:提取 → 计算 → 归一化 → 持久化
流程:
1. 获取 site_id
2. 加载 SPI 参数cfg_index_parameters + DEFAULT_PARAMS 回退)
3. 提取消费特征 + 充值特征 + 日消费 EWMA合并到 SPIMemberFeatures
4. 无数据时返回 skipped
5. 校准金额压缩基数
6. 逐会员计算 Level / Speed / Stability / SPI_raw
7. 四组 raw_score 分别 batch_normalize_to_display
8. delete-before-insert 持久化
9. 保存分位点历史
"""
self.logger.info("开始计算 SPI 消费力指数")
# 1. 获取 site_id
site_id = self._get_site_id(context)
# 2. 加载参数(配置表 + 默认值合并)
db_params = self.load_index_parameters('SPI')
params = {**self.DEFAULT_PARAMS, **db_params}
# 3. 提取特征
features = self._extract_spending_features(site_id, params)
recharge_map = self._extract_recharge_features(site_id, params)
# 合并充值特征
for mid, recharge_90 in recharge_map.items():
if mid in features:
features[mid].recharge_90 = recharge_90
# 仅有充值无消费的会员不参与 SPI 计算(无消费基础特征)
# 批量计算日消费 EWMA 并合并
member_ids = list(features.keys())
ewma_map = self._compute_daily_spend_ewma_batch(site_id, member_ids, params)
for mid, ewma_val in ewma_map.items():
if mid in features:
features[mid].daily_spend_ewma_90 = ewma_val
# 4. 无数据时跳过Req 9.4
if not features:
self.logger.info("SPI: site_id=%s 无消费数据,跳过计算", site_id)
return {'status': 'skipped', 'reason': 'no_data'}
# 5. 校准金额压缩基数
params = self._calibrate_amount_bases(features, params)
# 6. 逐会员计算子分和总分
for feat in features.values():
feat.score_level_raw = self.compute_level(feat, params)
feat.score_speed_raw = self.compute_speed(feat, params)
feat.score_stability_raw = self.compute_stability(feat, params)
feat.raw_score = self.compute_spi_raw(
feat.score_level_raw,
feat.score_speed_raw,
feat.score_stability_raw,
params,
)
# 7. 四组 raw_score 分别归一化为展示分
percentile_lower = int(params.get('percentile_lower', 5))
percentile_upper = int(params.get('percentile_upper', 95))
use_smoothing = int(params.get('use_smoothing', 1)) == 1
compression = self._map_compression(params)
feat_list = list(features.values())
# SPI 总分展示分
spi_normalized = self.batch_normalize_to_display(
raw_scores=[(f.member_id, f.raw_score) for f in feat_list],
compression=compression,
percentile_lower=percentile_lower,
percentile_upper=percentile_upper,
use_smoothing=use_smoothing,
site_id=site_id,
index_type='SPI',
)
spi_display_map = {mid: display for mid, _, display in spi_normalized}
# Level 子分展示分
level_normalized = self.batch_normalize_to_display(
raw_scores=[(f.member_id, f.score_level_raw) for f in feat_list],
compression=compression,
percentile_lower=percentile_lower,
percentile_upper=percentile_upper,
use_smoothing=use_smoothing,
site_id=site_id,
index_type='SPI_LEVEL',
)
level_display_map = {mid: display for mid, _, display in level_normalized}
# Speed 子分展示分
speed_normalized = self.batch_normalize_to_display(
raw_scores=[(f.member_id, f.score_speed_raw) for f in feat_list],
compression=compression,
percentile_lower=percentile_lower,
percentile_upper=percentile_upper,
use_smoothing=use_smoothing,
site_id=site_id,
index_type='SPI_SPEED',
)
speed_display_map = {mid: display for mid, _, display in speed_normalized}
# Stability 子分展示分
stability_normalized = self.batch_normalize_to_display(
raw_scores=[(f.member_id, f.score_stability_raw) for f in feat_list],
compression=compression,
percentile_lower=percentile_lower,
percentile_upper=percentile_upper,
use_smoothing=use_smoothing,
site_id=site_id,
index_type='SPI_STABILITY',
)
stability_display_map = {mid: display for mid, _, display in stability_normalized}
# 回写展示分到特征对象
for feat in feat_list:
mid = feat.member_id
feat.display_score = spi_display_map.get(mid, 0.0)
feat.score_level_display = level_display_map.get(mid, 0.0)
feat.score_speed_display = speed_display_map.get(mid, 0.0)
feat.score_stability_display = stability_display_map.get(mid, 0.0)
# 8. delete-before-insert 持久化Req 9.3
records_inserted = self._save_spi_data(feat_list, site_id)
# 9. 保存分位点历史Req 9.5——SPI 总分
raw_values = [f.raw_score for f in feat_list]
q_l, q_u = self.calculate_percentiles(raw_values, percentile_lower, percentile_upper)
if use_smoothing:
smoothed_l, smoothed_u = self._apply_ewma_smoothing(
site_id=site_id,
current_p5=q_l,
current_p95=q_u,
index_type='SPI',
)
else:
smoothed_l, smoothed_u = q_l, q_u
self.save_percentile_history(
site_id=site_id,
percentile_5=q_l,
percentile_95=q_u,
percentile_5_smoothed=smoothed_l,
percentile_95_smoothed=smoothed_u,
record_count=len(raw_values),
min_raw=min(raw_values),
max_raw=max(raw_values),
avg_raw=sum(raw_values) / len(raw_values),
index_type='SPI',
)
self.logger.info(
"SPI 计算完成: site_id=%s, 会员数=%d, 写入记录=%d",
site_id, len(feat_list), records_inserted,
)
return {
'status': 'success',
'member_count': len(feat_list),
'records_inserted': records_inserted,
}
# =========================================================================
# 数据提取(后续任务实现)
# =========================================================================
def _extract_spending_features(
self, site_id: int, params: Dict[str, float]
) -> Dict[int, SPIMemberFeatures]:
"""从 dwd_settlement_head 提取消费特征,按 member_id 聚合。
提取近 90 天消费订单settle_type IN (1, 3)),聚合为会员级特征:
spend_30/90、orders_30/90、visit_days_30/90、avg_ticket_90、active_weeks_90。
使用 canonical_member_id 模式解析会员身份(与 WBI/NCI 一致)。
"""
short_days = int(params.get('spend_window_short_days', 30))
long_days = int(params.get('spend_window_long_days', 90))
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
# INTERVAL 天数通过 f-string 内嵌整数安全site_id 走参数化
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
s.pay_time,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
ON s.member_card_account_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = s.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = %s
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
)
SELECT
canonical_member_id AS member_id,
-- 90 天窗口
SUM(pay_amount) AS spend_90,
COUNT(*) AS orders_90,
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
-- 30 天窗口(子集过滤)
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN pay_amount ELSE 0 END) AS spend_30,
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN 1 ELSE 0 END) AS orders_30,
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN DATE(pay_time) END) AS visit_days_30
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
"""
rows = self.db.query(sql, (site_id,))
result: Dict[int, SPIMemberFeatures] = {}
for row in (rows or []):
r = dict(row)
mid = int(r['member_id'])
orders_90 = int(r['orders_90'] or 0)
spend_90 = float(r['spend_90'] or 0)
# avg_ticket_90 = spend_90 / max(orders_90, 1)Req 2.4
avg_ticket = spend_90 / max(orders_90, 1)
# active_weeks_90 上限 13Req 2.5
active_weeks = min(int(r['active_weeks_90'] or 0), 13)
result[mid] = SPIMemberFeatures(
member_id=mid,
site_id=site_id,
spend_30=float(r['spend_30'] or 0),
spend_90=spend_90,
orders_30=int(r['orders_30'] or 0),
orders_90=orders_90,
visit_days_30=int(r['visit_days_30'] or 0),
visit_days_90=int(r['visit_days_90'] or 0),
avg_ticket_90=avg_ticket,
active_weeks_90=active_weeks,
)
self.logger.info(
"SPI 消费特征提取完成: site_id=%s, 会员数=%d, 窗口=%d/%d",
site_id, len(result), short_days, long_days,
)
return result
def _extract_recharge_features(
self, site_id: int, params: Dict[str, float]
) -> Dict[int, float]:
"""从 dwd_recharge_order 提取充值特征,返回 {member_id: recharge_90}。
提取近 90 天充值订单settle_type = 5按 member_id 聚合充值总额。
使用 canonical_member_id 模式解析会员身份(与 _extract_spending_features 一致)。
"""
long_days = int(params.get('spend_window_long_days', 90))
sql = f"""
WITH recharge_source AS (
SELECT
COALESCE(NULLIF(r.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
COALESCE(r.pay_amount, 0) AS pay_amount
FROM dwd.dwd_recharge_order r
LEFT JOIN dwd.dim_member_card_account mca
ON r.tenant_member_card_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = r.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE r.site_id = %s
AND r.settle_type = 5
AND r.pay_time >= NOW() - INTERVAL '{long_days} days'
)
SELECT
canonical_member_id AS member_id,
SUM(pay_amount) AS recharge_90
FROM recharge_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
"""
rows = self.db.query(sql, (site_id,))
result: Dict[int, float] = {}
for row in (rows or []):
r = dict(row)
mid = int(r['member_id'])
result[mid] = float(r['recharge_90'] or 0)
self.logger.info(
"SPI 充值特征提取完成: site_id=%s, 有充值会员数=%d, 窗口=%d",
site_id, len(result), long_days,
)
return result
def _compute_daily_spend_ewma(
self, site_id: int, member_id: int, params: Dict[str, float]
) -> float:
"""对单个会员近 90 天日消费序列计算 EWMA。
从 dwd_settlement_head 查询该会员每日消费总额settle_type IN (1,3)
按日期升序排列后逐日计算 EWMA返回最终值。
EWMA 递推公式S_t = α × X_t + (1 - α) × S_{t-1}
初始值 S_0 = X_0首日消费额
无消费记录时返回 0.0。
"""
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
ON s.member_card_account_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = s.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = %s
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
)
SELECT pay_date, SUM(pay_amount) AS daily_spend
FROM consume_source
WHERE canonical_member_id = %s
GROUP BY pay_date
ORDER BY pay_date
"""
rows = self.db.query(sql, (site_id, member_id))
if not rows:
return 0.0
# 逐日 EWMA 递推S_0 = X_0, S_t = α·X_t + (1-α)·S_{t-1}
ewma = float(dict(rows[0])['daily_spend'] or 0)
for row in rows[1:]:
x = float(dict(row)['daily_spend'] or 0)
ewma = alpha * x + (1 - alpha) * ewma
return ewma
def _compute_daily_spend_ewma_batch(
self, site_id: int, member_ids: List[int], params: Dict[str, float]
) -> Dict[int, float]:
"""批量计算多个会员的日消费 EWMA单次 SQL 查询避免 N+1。
返回 {member_id: daily_spend_ewma_90},未出现的会员值为 0.0。
"""
if not member_ids:
return {}
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
ON s.member_card_account_id = mca.member_card_id
AND mca.scd2_is_current = 1
AND mca.register_site_id = s.site_id
AND COALESCE(mca.is_delete, 0) = 0
WHERE s.site_id = %s
AND s.settle_type IN (1, 3)
AND s.pay_time >= NOW() - INTERVAL '{long_days} days'
)
SELECT canonical_member_id AS member_id,
pay_date,
SUM(pay_amount) AS daily_spend
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id, pay_date
ORDER BY canonical_member_id, pay_date
"""
rows = self.db.query(sql, (site_id,))
# 按 member_id 分组,逐组计算 EWMA
result: Dict[int, float] = {}
if not rows:
return result
current_mid: Optional[int] = None
ewma = 0.0
for row in rows:
r = dict(row)
mid = int(r['member_id'])
x = float(r['daily_spend'] or 0)
if mid != current_mid:
# 新会员:保存上一个会员结果,重置
if current_mid is not None:
result[current_mid] = ewma
current_mid = mid
ewma = x # S_0 = X_0
else:
ewma = alpha * x + (1 - alpha) * ewma
# 保存最后一个会员
if current_mid is not None:
result[current_mid] = ewma
self.logger.info(
"SPI 日消费 EWMA 批量计算完成: site_id=%s, 会员数=%d, α=%.2f",
site_id, len(result), alpha,
)
return result
def _calibrate_amount_bases(
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
) -> Dict[str, float]:
"""从门店数据计算中位数作为金额压缩基数校准值。
优先级cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS。
"""
# 特征字段 → 对应的 amount_base 参数名
base_extractors: Dict[str, callable] = {
'amount_base_spend_30': lambda f: f.spend_30,
'amount_base_spend_90': lambda f: f.spend_90,
'amount_base_ticket_90': lambda f: f.avg_ticket_90,
'amount_base_recharge_90': lambda f: f.recharge_90,
'amount_base_speed_abs': lambda f: f.spend_30 / max(f.visit_days_30, 1),
'amount_base_ewma_90': lambda f: f.daily_spend_ewma_90,
}
calibrated = dict(params) # 以当前参数为基础,逐项覆盖
for base_key, extractor in base_extractors.items():
# 配置表已有值 → 跳过自动校准
if base_key in params and params[base_key] != self.DEFAULT_PARAMS.get(base_key):
self.logger.info(
"SPI 基数校准: %s 使用配置表值 %.2f", base_key, params[base_key],
)
continue
# 从特征数据计算中位数
values = [extractor(f) for f in features.values()]
median_val = self.calculate_median(values)
if median_val > 0:
calibrated[base_key] = median_val
self.logger.info(
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
)
else:
# 中位数 ≤ 0回退到 DEFAULT_PARAMS
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
self.logger.warning(
"SPI 基数校准: %s 中位数 %.2f ≤ 0回退到默认值 %.2f",
base_key, median_val, self.DEFAULT_PARAMS[base_key],
)
return calibrated
# =========================================================================
# 子分计算(纯函数,后续任务实现具体逻辑)
# =========================================================================
@staticmethod
def compute_level(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
"""Level 子分:消费水平
L = w_s30 × ln(1 + spend_30/M30)
+ w_s90 × ln(1 + spend_90/M90)
+ w_ticket × ln(1 + avg_ticket_90/T0)
+ w_r90 × ln(1 + recharge_90/R90)
"""
return (
params['w_level_spend_30'] * math.log1p(features.spend_30 / params['amount_base_spend_30'])
+ params['w_level_spend_90'] * math.log1p(features.spend_90 / params['amount_base_spend_90'])
+ params['w_level_ticket_90'] * math.log1p(features.avg_ticket_90 / params['amount_base_ticket_90'])
+ params['w_level_recharge_90'] * math.log1p(features.recharge_90 / params['amount_base_recharge_90'])
)
@staticmethod
def compute_speed(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
"""Speed 子分:消费速度
V_abs = ln(1 + spend_30 / (max(visit_days_30, 1) × V0))
V_rel = ln((v_30 + ε) / (v_90 + ε)), v_30=spend_30/30, v_90=spend_90/90
V_ewma = ln(1 + daily_spend_ewma_90 / E0)
S = w_abs × V_abs + w_rel × max(0, V_rel) + w_ewma × V_ewma
仅对加速V_rel > 0加分不对减速扣分Req 4.5)。
"""
eps = params.get('speed_epsilon', 1e-6)
# 绝对速度Req 4.1
v_abs = math.log1p(
features.spend_30
/ (max(features.visit_days_30, 1) * params['amount_base_speed_abs'])
)
# 相对速度Req 4.2)——仅加速加分
v_30 = features.spend_30 / 30.0
v_90 = features.spend_90 / 90.0
v_rel = math.log((v_30 + eps) / (v_90 + eps))
# EWMA 速度Req 4.3
v_ewma = math.log1p(
features.daily_spend_ewma_90 / params['amount_base_ewma_90']
)
# 加权合成Req 4.4, 4.5
return (
params['w_speed_abs'] * v_abs
+ params['w_speed_rel'] * max(0.0, v_rel)
+ params['w_speed_ewma'] * v_ewma
)
@staticmethod
def compute_stability(features: SPIMemberFeatures, params: Dict[str, float]) -> float:
"""Stability 子分:消费稳定性
P = active_weeks_90 / 13周覆盖率
use_stability=0 时返回 0.0Req 5.3
取值范围 [0, 1]Req 5.4
"""
# 配置关闭稳定性时直接返回 0Req 5.3
if params.get('use_stability', 1) == 0:
return 0.0
# 周覆盖率Req 5.1, 5.2
return features.active_weeks_90 / 13.0
@staticmethod
def compute_spi_raw(
level: float, speed: float, stability: float, params: Dict[str, float]
) -> float:
"""SPI 总分合成
SPI_raw = w_L × L + w_S × S + w_P × P
默认权重 w_L=0.60, w_S=0.30, w_P=0.10Req 6.1
"""
w_l = params.get('weight_level', 0.60)
w_s = params.get('weight_speed', 0.30)
w_p = params.get('weight_stability', 0.10)
return w_l * level + w_s * speed + w_p * stability
# =========================================================================
# 持久化(后续任务实现)
# =========================================================================
def _save_spi_data(
self, data_list: List[SPIMemberFeatures], site_id: int
) -> int:
"""delete-before-insert 写入 dws_member_spending_power_index"""
with self.db.conn.cursor() as cur:
# 先删除该门店旧记录Req 9.3
cur.execute(
"DELETE FROM dws.dws_member_spending_power_index WHERE site_id = %s",
(site_id,),
)
if not data_list:
self.db.conn.commit()
return 0
insert_sql = """
INSERT INTO dws.dws_member_spending_power_index (
site_id, member_id,
spend_30, spend_90, recharge_90,
orders_30, orders_90,
visit_days_30, visit_days_90,
avg_ticket_90, active_weeks_90, daily_spend_ewma_90,
score_level_raw, score_speed_raw, score_stability_raw,
score_level_display, score_speed_display, score_stability_display,
raw_score, display_score,
calc_time, created_at, updated_at
) VALUES (
%s, %s,
%s, %s, %s,
%s, %s,
%s, %s,
%s, %s, %s,
%s, %s, %s,
%s, %s, %s,
%s, %s,
NOW(), NOW(), NOW()
)
"""
inserted = 0
for f in data_list:
cur.execute(insert_sql, (
f.site_id, f.member_id,
f.spend_30, f.spend_90, f.recharge_90,
f.orders_30, f.orders_90,
f.visit_days_30, f.visit_days_90,
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
f.score_level_display, f.score_speed_display, f.score_stability_display,
f.raw_score, f.display_score,
))
inserted += max(cur.rowcount, 0)
self.db.conn.commit()
self.logger.info(
"SPI 数据写入完成: site_id=%s, 插入记录=%d", site_id, inserted,
)
return inserted

View File

@@ -233,21 +233,67 @@ class MemberConsumptionTask(BaseDwsTask):
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""
提取会员信息
生日优先级手动补录fdw_app.member_birthday_manual> API 来源dim_member.birthday
FDW 连接失败时降级为仅使用 dim_member.birthday
"""
sql = """
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 恢复 birthday 字段C1 迁移已加列),供后续 C2 COALESCE 使用
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
# CHANGE 2026-02-22 | 需求 C2COALESCE 优先手动补录生日FDW 失败时降级
sql_with_fdw = """
SELECT
m.member_id,
m.nickname,
m.mobile,
m.member_card_grade_name,
DATE(m.create_time) AS register_date,
m.recharge_money_sum,
COALESCE(
(SELECT birthday_value
FROM fdw_app.member_birthday_manual
WHERE member_id = m.member_id
ORDER BY recorded_at ASC
LIMIT 1),
m.birthday
) AS birthday
FROM dwd.dim_member m
WHERE m.member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND m.scd2_is_current = 1
"""
sql_fallback = """
SELECT
member_id,
nickname,
mobile,
member_card_grade_name,
DATE(create_time) AS register_date,
recharge_money_sum
recharge_money_sum,
birthday
FROM dwd.dim_member
WHERE site_id = %s
AND scd2_is_current = 1
WHERE member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
try:
rows = self.db.query(sql_with_fdw, (site_id,))
except Exception as exc:
# FDW 连接失败,降级为仅使用 dim_member.birthday
self.logger.warning(
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
self.get_task_code(), exc,
)
rows = self.db.query(sql_fallback, (site_id,))
result = {}
for row in (rows or []):
row_dict = dict(row)
@@ -262,14 +308,21 @@ class MemberConsumptionTask(BaseDwsTask):
CASH_CARD_TYPE_ID = 2793249295533893
GIFT_CARD_TYPE_IDS = [2791990152417157, 2793266846533445, 2794699703437125]
# CHANGE 2026-02-21 | dim_member_card_account 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
sql = """
SELECT
tenant_member_id AS member_id,
card_type_id,
balance
FROM dwd.dim_member_card_account
WHERE site_id = %s
AND scd2_is_current = 1
WHERE tenant_member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
AND COALESCE(is_delete, 0) = 0
"""
rows = self.db.query(sql, (site_id,))

View File

@@ -175,6 +175,7 @@ class MemberVisitTask(BaseDwsTask):
# 会员信息
'member_nickname': memb_info.get('nickname'),
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
# CHANGE 2026-02-22 | 恢复从 dim_member.birthday 读取
'member_birthday': memb_info.get('birthday'),
# 台桌信息
'table_id': table_id,
@@ -302,28 +303,73 @@ class MemberVisitTask(BaseDwsTask):
def _extract_member_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""
提取会员信息
生日优先级手动补录fdw_app.member_birthday_manual> API 来源dim_member.birthday
FDW 连接失败时降级为仅使用 dim_member.birthday
"""
sql = """
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 恢复 birthday 字段C1 迁移已加列)
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
# CHANGE 2026-02-22 | 需求 C2COALESCE 优先手动补录生日FDW 失败时降级
sql_with_fdw = """
SELECT
m.member_id,
m.nickname,
m.mobile,
COALESCE(
(SELECT birthday_value
FROM fdw_app.member_birthday_manual
WHERE member_id = m.member_id
ORDER BY recorded_at ASC
LIMIT 1),
m.birthday
) AS birthday
FROM dwd.dim_member m
WHERE m.member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND m.scd2_is_current = 1
"""
sql_fallback = """
SELECT
member_id,
nickname,
mobile,
birthday
FROM dwd.dim_member
WHERE site_id = %s
AND scd2_is_current = 1
WHERE member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
try:
rows = self.db.query(sql_with_fdw, (site_id,))
except Exception as exc:
# FDW 连接失败,降级为仅使用 dim_member.birthday
self.logger.warning(
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
self.get_task_code(), exc,
)
rows = self.db.query(sql_fallback, (site_id,))
return {r['member_id']: dict(r) for r in (rows or [])}
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""
提取台桌信息
"""
# CHANGE 2026-02-22 | BUG 6 修复 | dim_table 无 site_table_id/site_table_name
# 正确字段为 table_id/table_name参考 dwd.sql DDL
sql = """
SELECT
site_table_id AS table_id,
site_table_name AS table_name,
table_id AS table_id,
table_name AS table_name,
site_table_area_name AS area_name
FROM dwd.dim_table
WHERE site_id = %s

View File

@@ -306,6 +306,11 @@ class BaseOdsTask(BaseTask):
# 用户明确指定了窗口,尊重用户选择
return base_start, base_end, base_minutes
# full_window 模式:直接使用基础窗口,跳过 MAX(fetched_at) 兜底
# 该模式以 API 返回数据的实际时间范围为准,无游标偏移风险
if self.config.get("run.processing_mode") == "full_window":
return base_start, base_end, base_minutes
# 以 ODS 表 MAX(fetched_at) 兜底:避免“窗口游标推进但未实际入库”导致漏数。
last_fetched = self._get_max_fetched_at(self.SPEC.table_name)
if last_fetched:
@@ -652,6 +657,15 @@ class BaseOdsTask(BaseTask):
now = datetime.now(self.tz)
insert_rows: list[tuple] = []
# CHANGE [2026-02-20] intent: 识别所有 JSONB 列索引,防止 dict/list 值导致 psycopg2 适配错误
jsonb_col_indices: set[int] = set()
for ci in cols_info:
col_lower = ci[2] # udt_name
if col_lower in ("jsonb", "json"):
idx = col_index.get(ci[0].lower())
if idx is not None:
jsonb_col_indices.add(idx)
for row in latest_rows:
row = list(row)
@@ -675,11 +689,14 @@ class BaseOdsTask(BaseTask):
if fetched_at_idx is not None:
row[fetched_at_idx] = now
# 将 payload 包装为 Json 以便 psycopg2 正确序列化
row[payload_idx] = Json(
original_payload,
dumps=lambda v: json.dumps(v, ensure_ascii=False),
)
# 将所有 JSONB 列的 dict/list 值包装为 Json 以便 psycopg2 正确序列化
for ji in jsonb_col_indices:
val = row[ji]
if isinstance(val, (dict, list)):
row[ji] = Json(
val,
dumps=lambda v: json.dumps(v, ensure_ascii=False),
)
insert_rows.append(tuple(row))
@@ -1217,19 +1234,6 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
snapshot_time_column="create_time",
description="助教服务流水 ODSGetOrderAssistantDetails -> orderAssistantDetails 原始 JSON",
),
OdsTaskSpec(
code="ODS_ASSISTANT_ABOLISH",
class_name="OdsAssistantAbolishTask",
table_name="ods.assistant_cancellation_records",
endpoint="/AssistantPerformance/GetAbolitionAssistant",
data_path=("data",),
list_key="abolitionAssistants",
pk_columns=(_int_col("id", "id", required=True),),
include_source_endpoint=False,
include_fetched_at=False,
include_record_index=True,
description="助教废除记录 ODSGetAbolitionAssistant -> abolitionAssistants 原始 JSON",
),
OdsTaskSpec(
code="ODS_STORE_GOODS_SALES",
class_name="OdsGoodsLedgerTask",
@@ -1454,7 +1458,8 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
include_source_endpoint=False,
include_fetched_at=False,
include_record_index=True,
requires_window=False,
requires_window=True,
time_fields=("startTime", "endTime"),
description="库存汇总 ODSGetGoodsStockReport 原始 JSON",
),
OdsTaskSpec(
@@ -1545,284 +1550,32 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
description="租户商品档案 ODSQueryTenantGoods -> tenantGoodsList 原始 JSON",
),
OdsTaskSpec(
code="ODS_SETTLEMENT_TICKET",
class_name="OdsSettlementTicketTask",
table_name="ods.settlement_ticket_details",
endpoint="/Order/GetOrderSettleTicketNew",
data_path=(),
list_key=None,
pk_columns=(_int_col("ordersettleid", "orderSettleId", required=True),),
code="ODS_STAFF_INFO",
class_name="OdsStaffInfoTask",
table_name="ods.staff_info_master",
endpoint="/PersonnelManagement/SearchSystemStaffInfo",
data_path=("data",),
list_key="staffProfiles",
pk_columns=(_int_col("id", "id", required=True),),
extra_params={
"workStatusEnum": 0,
"dingTalkSynced": 0,
"staffIdentity": 0,
"rankId": 0,
"criticismStatus": 0,
"signStatus": -1,
},
include_source_endpoint=False,
include_fetched_at=True,
include_fetched_at=False,
include_record_index=True,
requires_window=False,
include_site_id=False,
description="结账小票详情 ODSGetOrderSettleTicketNew 原始 JSON",
time_fields=None,
snapshot_mode=SnapshotMode.FULL_TABLE,
description="员工档案 ODSSearchSystemStaffInfo -> staffProfiles 原始 JSON",
),
)
def _get_spec(code: str) -> OdsTaskSpec:
for spec in ODS_TASK_SPECS:
if spec.code == code:
return spec
raise KeyError(f"Spec not found for code {code}")
_SETTLEMENT_TICKET_SPEC = _get_spec("ODS_SETTLEMENT_TICKET")
class OdsSettlementTicketTask(BaseOdsTask):
"""Special handling: fetch ticket details per payment relate_id/orderSettleId."""
SPEC = _SETTLEMENT_TICKET_SPEC
def extract(self, context) -> dict:
"""Fetch ticket payloads only (used by fetch-only flow)."""
existing_ids = self._fetch_existing_ticket_ids()
candidates = self._collect_settlement_ids(
context.store_id or 0, existing_ids, context.window_start, context.window_end
)
candidates = [cid for cid in candidates if cid and cid not in existing_ids]
payloads, skipped = self._fetch_ticket_payloads(candidates)
return {"records": payloads, "skipped": skipped, "fetched": len(candidates)}
def execute(self, cursor_data: dict | None = None) -> dict:
spec = self.SPEC
base_context = self._build_context(cursor_data)
segments = build_window_segments(
self.config,
base_context.window_start,
base_context.window_end,
tz=self.tz,
override_only=True,
)
if not segments:
segments = [(base_context.window_start, base_context.window_end)]
total_segments = len(segments)
if total_segments > 1:
self.logger.info("%s: 窗口拆分为 %s", spec.code, total_segments)
store_id = TypeParser.parse_int(self.config.get("app.store_id")) or 0
counts_total = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
segment_results: list[dict] = []
source_file = self._resolve_source_file_hint(spec)
try:
existing_ids = self._fetch_existing_ticket_ids()
for idx, (seg_start, seg_end) in enumerate(segments, start=1):
context = self._build_context_for_window(seg_start, seg_end, cursor_data)
self.logger.info(
"%s: 开始执行(%s/%s),窗口[%s ~ %s]",
spec.code,
idx,
total_segments,
context.window_start,
context.window_end,
)
candidates = self._collect_settlement_ids(
store_id, existing_ids, context.window_start, context.window_end
)
candidates = [cid for cid in candidates if cid and cid not in existing_ids]
segment_counts = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
segment_counts["fetched"] = len(candidates)
if not candidates:
self.logger.info(
"%s: 窗口[%s ~ %s] 未发现需要抓取的小票",
spec.code,
context.window_start,
context.window_end,
)
self._accumulate_counts(counts_total, segment_counts)
if total_segments > 1:
segment_results.append(
{
"window": {
"start": context.window_start,
"end": context.window_end,
"minutes": context.window_minutes,
},
"counts": segment_counts,
}
)
continue
payloads, skipped = self._fetch_ticket_payloads(candidates)
segment_counts["skipped"] += skipped
inserted, updated, skipped2 = self._insert_records_schema_aware(
table=spec.table_name,
records=payloads,
response_payload=None,
source_file=source_file,
source_endpoint=spec.endpoint,
)
segment_counts["inserted"] += inserted
segment_counts["updated"] += updated
segment_counts["skipped"] += skipped2
self.db.commit()
existing_ids.update(candidates)
self._accumulate_counts(counts_total, segment_counts)
if total_segments > 1:
segment_results.append(
{
"window": {
"start": context.window_start,
"end": context.window_end,
"minutes": context.window_minutes,
},
"counts": segment_counts,
}
)
self.logger.info(
"%s: 小票抓取完成,抓取=%s 插入=%s 更新=%s 跳过=%s",
spec.code,
counts_total["fetched"],
counts_total["inserted"],
counts_total["updated"],
counts_total["skipped"],
)
result = self._build_result("SUCCESS", counts_total)
overall_start = segments[0][0]
overall_end = segments[-1][1]
result["window"] = {
"start": overall_start,
"end": overall_end,
"minutes": calc_window_minutes(overall_start, overall_end),
}
if segment_results:
result["segments"] = segment_results
result["request_params"] = {"candidates": counts_total["fetched"]}
return result
except Exception:
counts_total["errors"] += 1
self.db.rollback()
self.logger.error("%s: 小票抓取失败", spec.code, exc_info=True)
raise
def _fetch_existing_ticket_ids(self) -> set[int]:
sql = """
SELECT DISTINCT
CASE WHEN (payload ->> 'orderSettleId') ~ '^[0-9]+$'
THEN (payload ->> 'orderSettleId')::bigint
END AS order_settle_id
FROM ods.settlement_ticket_details
"""
try:
rows = self.db.query(sql)
except Exception:
self.logger.warning("查询已有小票失败,按空集处理", exc_info=True)
return set()
return {
TypeParser.parse_int(row.get("order_settle_id"))
for row in rows
if row.get("order_settle_id") is not None
}
def _collect_settlement_ids(
self, store_id: int, existing_ids: set[int], window_start, window_end
) -> list[int]:
ids = self._fetch_from_payment_table(store_id)
if not ids:
ids = self._fetch_from_payment_api(store_id, window_start, window_end)
return sorted(i for i in ids if i is not None and i not in existing_ids)
def _fetch_from_payment_table(self, store_id: int) -> set[int]:
sql = """
SELECT DISTINCT COALESCE(
CASE WHEN (payload ->> 'orderSettleId') ~ '^[0-9]+$'
THEN (payload ->> 'orderSettleId')::bigint END,
CASE WHEN (payload ->> 'relateId') ~ '^[0-9]+$'
THEN (payload ->> 'relateId')::bigint END
) AS order_settle_id
FROM ods.payment_transactions
WHERE (payload ->> 'orderSettleId') ~ '^[0-9]+$'
OR (payload ->> 'relateId') ~ '^[0-9]+$'
"""
params = None
if store_id:
sql += " AND COALESCE((payload ->> 'siteId')::bigint, %s) = %s"
params = (store_id, store_id)
try:
rows = self.db.query(sql, params)
except Exception:
self.logger.warning("读取支付流水以获取结算单ID失败将尝试调用支付接口回退", exc_info=True)
return set()
return {
TypeParser.parse_int(row.get("order_settle_id"))
for row in rows
if row.get("order_settle_id") is not None
}
def _fetch_from_payment_api(self, store_id: int, window_start, window_end) -> set[int]:
params = self._merge_common_params(
{
"siteId": store_id,
"StartPayTime": TypeParser.format_timestamp(window_start, self.tz),
"EndPayTime": TypeParser.format_timestamp(window_end, self.tz),
}
)
candidate_ids: set[int] = set()
try:
for _, records, _, _ in self.api.iter_paginated(
endpoint="/PayLog/GetPayLogListPage",
params=params,
page_size=self.config.get("api.page_size", 200),
data_path=("data",),
):
for rec in records:
relate_id = TypeParser.parse_int(
(rec or {}).get("relateId")
or (rec or {}).get("orderSettleId")
or (rec or {}).get("order_settle_id")
)
if relate_id:
candidate_ids.add(relate_id)
except Exception:
self.logger.warning("调用支付接口获取结算单ID失败当前批次将跳过回退来源", exc_info=True)
return candidate_ids
def _fetch_ticket_payload(self, order_settle_id: int):
payload = None
try:
for _, _, _, response in self.api.iter_paginated(
endpoint=self.SPEC.endpoint,
params={"orderSettleId": order_settle_id},
page_size=None,
data_path=self.SPEC.data_path,
list_key=self.SPEC.list_key,
):
payload = response
except Exception:
self.logger.warning(
"调用小票接口失败 orderSettleId=%s", order_settle_id, exc_info=True
)
if isinstance(payload, dict) and isinstance(payload.get("data"), list) and len(payload["data"]) == 1:
# 本地桩回放可能把响应包装成单元素 list这里展开以贴近真实结果
payload = payload["data"][0]
return payload
def _fetch_ticket_payloads(self, candidates: list[int]) -> tuple[list, int]:
"""Fetch ticket payloads for a set of orderSettleIds; returns (payloads, skipped)."""
payloads: list = []
skipped = 0
for order_settle_id in candidates:
payload = self._fetch_ticket_payload(order_settle_id)
if payload:
payloads.append(payload)
else:
skipped += 1
return payloads, skipped
def _build_task_class(spec: OdsTaskSpec) -> Type[BaseOdsTask]:
attrs = {
"SPEC": spec,
@@ -1835,7 +1588,6 @@ def _build_task_class(spec: OdsTaskSpec) -> Type[BaseOdsTask]:
ENABLED_ODS_CODES = {
"ODS_ASSISTANT_ACCOUNT",
"ODS_ASSISTANT_LEDGER",
"ODS_ASSISTANT_ABOLISH",
"ODS_INVENTORY_CHANGE",
"ODS_INVENTORY_STOCK",
"ODS_GROUP_PACKAGE",
@@ -1854,8 +1606,8 @@ ENABLED_ODS_CODES = {
"ODS_TABLE_FEE_DISCOUNT",
"ODS_STORE_GOODS_SALES",
"ODS_TENANT_GOODS",
"ODS_SETTLEMENT_TICKET",
"ODS_SETTLEMENT_RECORDS",
"ODS_STAFF_INFO",
}
ODS_TASK_CLASSES: Dict[str, Type[BaseOdsTask]] = {
@@ -1863,7 +1615,5 @@ ODS_TASK_CLASSES: Dict[str, Type[BaseOdsTask]] = {
for spec in ODS_TASK_SPECS
if spec.code in ENABLED_ODS_CODES
}
# 使用专用的结账小票实现覆盖默认流程
ODS_TASK_CLASSES["ODS_SETTLEMENT_TICKET"] = OdsSettlementTicketTask
__all__ = ["ODS_TASK_CLASSES", "ODS_TASK_SPECS", "BaseOdsTask", "ENABLED_ODS_CODES"]

View File

@@ -36,7 +36,6 @@ class ManualIngestTask(BaseTask):
(("platform_coupon_redemption_records",), "ods.platform_coupon_redemption_records"),
(("group_buy_redemption_records",), "ods.group_buy_redemption_records"),
(("group_buy_packages",), "ods.group_buy_packages"),
(("settlement_ticket_details",), "ods.settlement_ticket_details"),
(("store_goods_master",), "ods.store_goods_master"),
(("tenant_goods_master",), "ods.tenant_goods_master"),
(("store_goods_sales_records",), "ods.store_goods_sales_records"),
@@ -63,10 +62,6 @@ class ManualIngestTask(BaseTask):
"ods.tenant_goods_master": {"pk": "id"},
"ods.group_buy_packages": {"pk": "id"},
"ods.group_buy_redemption_records": {"pk": "id"},
"ods.settlement_ticket_details": {
"pk": "orderSettleId",
"json_cols": ["memberProfile", "orderItem", "tenantMemberCardLogs"],
},
"ods.store_goods_master": {"pk": "id"},
"ods.store_goods_sales_records": {"pk": "id"},
}

View File

@@ -61,7 +61,6 @@ class DwdVerifier(BaseVerifier):
"table_fee_discount_records": "id",
"store_goods_sales_records": "id",
"assistant_service_records": "id",
"assistant_cancellation_records": "id",
"member_balance_changes": "id",
"group_buy_redemption_records": "id",
"platform_coupon_redemption_records": "id",
@@ -69,7 +68,6 @@ class DwdVerifier(BaseVerifier):
"payment_transactions": "id",
"refund_transactions": "id",
"goods_stock_summary": "sitegoodsid", # 特殊:主键不是 id
"settlement_ticket_details": "ordersettleid", # 特殊:主键不是 id
}
# ODS 主键特殊覆盖(按 DWD 表名)
@@ -111,8 +109,6 @@ class DwdVerifier(BaseVerifier):
"dwd_store_goods_sale_ex": {"id": "store_goods_sale_id"},
"dwd_assistant_service_log": {"id": "assistant_service_id"},
"dwd_assistant_service_log_ex": {"id": "assistant_service_id"},
"dwd_assistant_trash_event": {"id": "assistant_trash_event_id"},
"dwd_assistant_trash_event_ex": {"id": "assistant_trash_event_id"},
"dwd_member_balance_change": {"id": "balance_change_id"},
"dwd_member_balance_change_ex": {"id": "balance_change_id"},
"dwd_groupbuy_redemption": {"id": "redemption_id"},
@@ -138,8 +134,6 @@ class DwdVerifier(BaseVerifier):
"dwd_store_goods_sale_ex": "create_time",
"dwd_assistant_service_log": "start_use_time",
"dwd_assistant_service_log_ex": "start_use_time",
"dwd_assistant_trash_event": "create_time",
"dwd_assistant_trash_event_ex": "create_time",
"dwd_member_balance_change": "create_time",
"dwd_member_balance_change_ex": "create_time",
"dwd_groupbuy_redemption": "create_time",

View File

@@ -88,7 +88,7 @@ class OdsVerifier(BaseVerifier):
pk_columns.append(ods_col_name)
# 如果 pk_columns 为空,尝试使用 conflict_columns_override 或跳过校验
# 一些特殊表(如 goods_stock_summary, settlement_ticket_details)没有标准主键
# 一些特殊表(如 goods_stock_summary没有标准主键
if not pk_columns:
# 跳过没有明确主键定义的表
self.logger.debug("%s 没有定义主键列,跳过校验配置", table_key)