微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -184,11 +184,18 @@ class BaseTask:
if not (override_start and override_end):
raise ValueError("run.window_override.start/end 需要同时提供")
# CHANGE 2026-03-04 | 纯日期字符串按业务日分割start→当天biz_hour, end→次日biz_hour
biz_hour = int(self.config.get("app.business_day_start_hour", 8))
window_start = override_start
if isinstance(window_start, str):
window_start = dtparser.parse(window_start)
if isinstance(window_start, datetime) and window_start.tzinfo is None:
window_start = window_start.replace(tzinfo=self.tz)
# 纯日期(时分秒全零)→ 当天业务日起始时刻
if window_start.hour == 0 and window_start.minute == 0 and window_start.second == 0:
window_start = window_start.replace(hour=biz_hour, tzinfo=self.tz)
else:
window_start = window_start.replace(tzinfo=self.tz)
elif isinstance(window_start, datetime):
window_start = window_start.astimezone(self.tz)
@@ -196,7 +203,11 @@ class BaseTask:
if isinstance(window_end, str):
window_end = dtparser.parse(window_end)
if isinstance(window_end, datetime) and window_end.tzinfo is None:
window_end = window_end.replace(tzinfo=self.tz)
# 纯日期(时分秒全零)→ 次日业务日起始时刻
if window_end.hour == 0 and window_end.minute == 0 and window_end.second == 0:
window_end = (window_end + timedelta(days=1)).replace(hour=biz_hour, tzinfo=self.tz)
else:
window_end = window_end.replace(tzinfo=self.tz)
elif isinstance(window_end, datetime):
window_end = window_end.astimezone(self.tz)

View File

@@ -5,12 +5,14 @@ from __future__ import annotations
import os
import re
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import date, datetime
from decimal import Decimal, InvalidOperation
from typing import Any, Dict, Iterable, List, Sequence
from psycopg2.extras import RealDictCursor, execute_batch, execute_values
from psycopg2.extras import Json, RealDictCursor, execute_batch, execute_values
from database.connection import DatabaseConnection
from tasks.base_task import BaseTask, TaskContext
@@ -70,6 +72,16 @@ class DwdLoadTask(BaseTask):
_NUMERIC_RE = re.compile(r"^[+-]?\d+(?:\.\d+)?$")
_BOOL_STRINGS = {"true", "false", "1", "0", "yes", "no", "y", "n", "t", "f"}
# 详情表 LEFT JOIN 配置:当 DWD 表需要从额外的 ODS 详情表获取字段时使用
# detail_columns 中的列在 FACT_MAPPINGS 中以 detail."col" 形式引用
DETAIL_JOIN_CONFIG: dict[str, dict] = {
"dwd.dim_groupbuy_package_ex": {
"detail_table": "ods.group_buy_package_details",
"join_condition": 'ods_main."id" = detail."coupon_id"',
"detail_columns": ["table_area_ids", "table_area_names", "assistant_services", "groupon_site_infos"],
},
}
def _strip_scd2_keys(self, pk_cols: Sequence[str]) -> list[str]:
return [c for c in pk_cols if c.lower() not in self.SCD_COLS]
@@ -113,7 +125,10 @@ class DwdLoadTask(BaseTask):
) -> str:
if key_exprs and order_col:
distinct_on = ", ".join(key_exprs)
order_by = ", ".join([*key_exprs, f'"{order_col}" DESC NULLS LAST'])
# order_col 可能是预格式化的表达式(如 ods_main."fetched_at"),此时直接使用;
# 否则包裹双引号
order_col_expr = order_col if '"' in order_col else f'"{order_col}"'
order_by = ", ".join([*key_exprs, f'{order_col_expr} DESC NULLS LAST'])
return (
f"SELECT DISTINCT ON ({distinct_on}) {select_cols_sql} "
f"FROM {ods_table_sql} {where_sql} ORDER BY {order_by}"
@@ -303,6 +318,11 @@ class DwdLoadTask(BaseTask):
("table_area_id_list", "table_area_id_list", None),
("package_type", "type", None),
("tenant_coupon_sale_order_item_id", "tenantcouponsaleorderitemid", None),
# CHANGE 2026-03-05: 团购详情字段(来自 ods.group_buy_package_details通过 LEFT JOIN 关联)
("table_area_ids", 'detail."table_area_ids"', None),
("table_area_names", 'detail."table_area_names"', None),
("assistant_services", 'detail."assistant_services"', None),
("groupon_site_infos", 'detail."groupon_site_infos"', None),
],
"dwd.dim_staff": [
("staff_id", "id", None),
@@ -311,16 +331,16 @@ class DwdLoadTask(BaseTask):
],
"dwd.dim_staff_ex": [
("staff_id", "id", None),
("rank_name", "rankname", None),
("cashier_point_id", "cashierpointid", "bigint"),
("cashier_point_name", "cashierpointname", None),
("group_id", "groupid", "bigint"),
("group_name", "groupname", None),
("system_user_id", "systemuserid", "bigint"),
("tenant_org_id", "tenantorgid", "bigint"),
("rank_name", "rank_name", None),
("cashier_point_id", "cashier_point_id", "bigint"),
("cashier_point_name", "cashier_point_name", None),
("group_id", "group_id", "bigint"),
("group_name", "group_name", None),
("system_user_id", "system_user_id", "bigint"),
("tenant_org_id", "tenant_org_id", "bigint"),
("auth_code_create", "auth_code_create", "timestamptz"),
("create_time", "create_time", "timestamptz"),
("user_roles", "userroles", "jsonb"),
("user_roles", "user_roles", "jsonb"),
],
# 事实表主键及关键差异列
"dwd.dwd_table_fee_log": [
@@ -602,6 +622,7 @@ class DwdLoadTask(BaseTask):
],
# 库存汇总goods_stock_summaryODS 列名全小写)
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写sitegoodsid不是驼峰
# CHANGE 2026-03-01: 补 site_id 映射ODS 入库时从 app.store_id 注入 siteid
"dwd.dwd_goods_stock_summary": [
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 IDPK
("goods_name", '"goodsname"', None), # 商品名称
@@ -617,6 +638,7 @@ class DwdLoadTask(BaseTask):
("range_sale_money", '"rangesalemoney"', "numeric"), # 销售金额
("range_inventory", '"rangeinventory"', "numeric"), # 盘点调整量
("current_stock", '"currentstock"', "numeric"), # 当前库存
("site_id", '"siteid"', "bigint"), # 门店 IDODS 入库时注入)
],
# 库存变动流水goods_stock_movementsODS 列名全小写)
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写,不是驼峰
@@ -653,11 +675,12 @@ class DwdLoadTask(BaseTask):
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
"""
遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。
并行遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。
说明:
- 为避免长事务导致锁堆积/中断后遗留 idle-in-tx本任务按“每张表一次事务”提交
- 单表失败会回滚该表并继续后续表,最终在结果中汇总错误信息
- 使用 ThreadPoolExecutor 并行处理多张表,每张表使用独立数据库连接和事务
- 单表失败会回滚该表并继续后续表,最终在结果中汇总错误信息
- 并行线程数通过 AppConfig 的 dwd.parallel_workers 配置(默认 4
"""
now = extracted["now"]
summary: List[Dict[str, Any]] = []
@@ -668,54 +691,109 @@ class DwdLoadTask(BaseTask):
if env_only and not only_tables_cfg:
only_tables_cfg = [t.strip() for t in env_only.split(",") if t.strip()]
only_tables = {str(t).strip().lower() for t in only_tables_cfg if str(t).strip()} if only_tables_cfg else set()
with self.db.conn.cursor(cursor_factory=RealDictCursor) as cur:
for dwd_table, ods_table in self.TABLE_MAP.items():
if only_tables and dwd_table.lower() not in only_tables and self._table_base(dwd_table).lower() not in only_tables:
continue
started = time.monotonic()
self.logger.info("DWD 装载开始:%s <= %s", dwd_table, ods_table)
parallel_workers = int(self.config.get("dwd.parallel_workers", 4))
# 筛选需要处理的表
tables_to_process: list[tuple[str, str]] = []
for dwd_table, ods_table in self.TABLE_MAP.items():
if only_tables and dwd_table.lower() not in only_tables and self._table_base(dwd_table).lower() not in only_tables:
continue
tables_to_process.append((dwd_table, ods_table))
if not tables_to_process:
return {"tables": summary, "errors": 0, "error_details": errors}
# 并行调度:每张表在独立线程中执行,使用独立数据库连接
with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
futures = {}
for dwd_table, ods_table in tables_to_process:
future = executor.submit(
self._process_single_table,
dwd_table, ods_table, now, context,
)
futures[future] = dwd_table
for future in as_completed(futures):
dwd_table = futures[future]
try:
dwd_cols = self._get_columns(cur, dwd_table)
ods_cols = self._get_columns(cur, ods_table)
if not dwd_cols:
self.logger.warning("跳过 %s:未能获取 DWD 列信息", dwd_table)
continue
if self._table_base(dwd_table).startswith("dim_"):
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
self.db.conn.commit()
summary.append({"table": dwd_table, "mode": "SCD2", **dim_counts})
else:
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
ods_types = self._get_column_types(cur, ods_table, "ods")
fact_counts = self._merge_fact_increment(
cur,
dwd_table,
ods_table,
dwd_cols,
ods_cols,
dwd_types,
ods_types,
window_start=context.window_start,
window_end=context.window_end,
)
self.db.conn.commit()
summary.append({"table": dwd_table, "mode": "INCREMENT", **fact_counts})
elapsed = time.monotonic() - started
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
table_result = future.result()
summary.append(table_result)
except Exception as exc: # noqa: BLE001
try:
self.db.conn.rollback()
except Exception:
pass
elapsed = time.monotonic() - started
self.logger.exception("DWD 装载失败:%s,用时 %.2fserr=%s", dwd_table, elapsed, exc)
self.logger.error(
"DWD 并行装载失败:%serr=%s", dwd_table, exc,
)
errors.append({"table": dwd_table, "error": str(exc)})
continue
return {"tables": summary, "errors": len(errors), "error_details": errors}
def _process_single_table(
self,
dwd_table: str,
ods_table: str,
now: datetime,
context: TaskContext,
) -> Dict[str, Any]:
"""在独立线程中处理单张 DWD 表,使用独立数据库连接和事务。
每张表创建独立的 DatabaseConnection处理完成后关闭
保证线程间事务隔离,单表失败不影响其他表。
"""
started = time.monotonic()
self.logger.info("DWD 装载开始:%s <= %s", dwd_table, ods_table)
# 为当前线程创建独立数据库连接
thread_db = DatabaseConnection(
dsn=self.db._dsn,
session=self.db._session,
connect_timeout=self.db._connect_timeout,
)
try:
with thread_db.conn.cursor(cursor_factory=RealDictCursor) as cur:
dwd_cols = self._get_columns(cur, dwd_table)
ods_cols = self._get_columns(cur, ods_table)
if not dwd_cols:
self.logger.warning("跳过 %s:未能获取 DWD 列信息", dwd_table)
return {"table": dwd_table, "mode": "SKIPPED", "inserted": 0, "updated": 0}
if self._table_base(dwd_table).startswith("dim_"):
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
thread_db.conn.commit()
result = {"table": dwd_table, "mode": "SCD2", **dim_counts}
else:
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
ods_types = self._get_column_types(cur, ods_table, "ods")
fact_counts = self._merge_fact_increment(
cur,
dwd_table,
ods_table,
dwd_cols,
ods_cols,
dwd_types,
ods_types,
window_start=context.window_start,
window_end=context.window_end,
)
thread_db.conn.commit()
result = {"table": dwd_table, "mode": "INCREMENT", **fact_counts}
elapsed = time.monotonic() - started
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
return result
except Exception as exc:
try:
thread_db.conn.rollback()
except Exception:
pass
elapsed = time.monotonic() - started
self.logger.exception(
"DWD 装载失败:%s,用时 %.2fserr=%s", dwd_table, elapsed, exc,
)
# 重新抛出,让 future.result() 在主线程捕获
raise
finally:
thread_db.close()
# ---------------------- 辅助方法 ----------------------
def _get_columns(self, cur, table: str) -> List[str]:
"""获取指定表的列名(小写)。"""
@@ -872,6 +950,17 @@ class DwdLoadTask(BaseTask):
ods_types = self._get_column_types(cur, ods_table, "ods")
ts_types = {"timestamp without time zone", "timestamp with time zone"}
table_sql = self._format_table(ods_table, "ods")
# CHANGE 2026-03-05: 详情表 LEFT JOIN 支持 — 当 DWD 表配置了 DETAIL_JOIN_CONFIG 时,
# 给 ODS 主表加别名 ods_mainLEFT JOIN 详情表为 detail
# 非 detail 列引用加 ods_main. 前缀避免歧义
detail_join = self.DETAIL_JOIN_CONFIG.get(dwd_table)
ods_alias = "ods_main" if detail_join else ""
if detail_join:
detail_table_sql = self._format_table(detail_join["detail_table"], "ods")
table_sql = (
f"{table_sql} AS ods_main "
f'LEFT JOIN {detail_table_sql} AS detail ON {detail_join["join_condition"]}'
)
# 构造 SELECT 表达式,支持 JSON/expression 映射
select_exprs: list[str] = []
added: set[str] = set()
@@ -881,21 +970,26 @@ class DwdLoadTask(BaseTask):
continue
if lc in mapping:
src, cast_type = mapping[lc]
# detail. 前缀的列直接使用(来自详情表),其他列加 ods_main. 前缀
if ods_alias and not src.startswith("detail."):
src = self._qualify_column_ref(src, ods_alias)
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
added.add(lc)
elif lc in ods_set:
col_ref = f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"'
# CHANGE 2026-02-22: BUG 12 — 同名列如果是时间类型,加哨兵值过滤
if dwd_types.get(lc) in ts_types and ods_types.get(lc) in ts_types:
select_exprs.append(
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
f"CASE WHEN {col_ref} >= '{self._SENTINEL_DATE_THRESHOLD}'::timestamp "
f'THEN {col_ref} ELSE NULL END AS "{lc}"'
)
else:
select_exprs.append(f'"{lc}" AS "{lc}"')
select_exprs.append(f'{col_ref} AS "{lc}"')
added.add(lc)
# 分类维度需要额外读取 categoryboxes 以展开子类
if dwd_table == "dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
select_exprs.append('"categoryboxes" AS "categoryboxes"')
col_ref = f'{ods_alias}."categoryboxes"' if ods_alias else '"categoryboxes"'
select_exprs.append(f'{col_ref} AS "categoryboxes"')
added.add("categoryboxes")
# 主键兜底确保被选出
for pk in business_keys:
@@ -903,9 +997,12 @@ class DwdLoadTask(BaseTask):
if lc not in added:
if lc in mapping:
src, cast_type = mapping[lc]
if ods_alias and not src.startswith("detail."):
src = self._qualify_column_ref(src, ods_alias)
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
elif lc in ods_set:
select_exprs.append(f'"{lc}" AS "{lc}"')
col_ref = f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"'
select_exprs.append(f'{col_ref} AS "{lc}"')
added.add(lc)
if not select_exprs:
@@ -917,14 +1014,19 @@ class DwdLoadTask(BaseTask):
lc = key.lower()
if lc in mapping:
src, cast_type = mapping[lc]
if ods_alias and not src.startswith("detail."):
src = self._qualify_column_ref(src, ods_alias)
key_exprs.append(self._cast_expr(src, cast_type))
elif lc in ods_set:
key_exprs.append(f'"{lc}"')
key_exprs.append(f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"')
select_cols_sql = ", ".join(select_exprs)
where_sql = self._append_where_condition("", '"fetched_at" IS NOT NULL')
fetched_at_ref = f'{ods_alias}."fetched_at"' if ods_alias else '"fetched_at"'
where_sql = self._append_where_condition("", f'{fetched_at_ref} IS NOT NULL')
# CHANGE 2026-03-05: order_col 也需要加别名前缀
qualified_order_col = f'{ods_alias}."{order_col}"' if ods_alias and order_col else (f'"{order_col}"' if order_col else None)
sql = self._latest_snapshot_select_sql(
select_cols_sql, table_sql, key_exprs, order_col, where_sql
select_cols_sql, table_sql, key_exprs, qualified_order_col, where_sql
)
cur.execute(sql)
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
@@ -1006,7 +1108,7 @@ class DwdLoadTask(BaseTask):
# 批量插入新版本
if to_insert:
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now)
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now, dwd_types=dwd_types)
processed = len(src_rows_by_pk)
updated = len(to_close)
@@ -1050,11 +1152,16 @@ class DwdLoadTask(BaseTask):
dwd_cols: Sequence[str],
rows_with_version: Sequence[tuple[Dict[str, Any], int]],
now: datetime,
dwd_types: Dict[str, str] | None = None,
) -> None:
"""批量插入新的 SCD2 版本行。"""
sorted_cols = [c.lower() for c in sorted(dwd_cols)]
insert_cols_sql = ", ".join(f'"{c}"' for c in sorted_cols)
table_sql = self._format_table(table, "dwd")
# 预计算数组类型列集合,避免 list 值被误包装为 Json
_array_cols: set[str] = set()
if dwd_types:
_array_cols = {c for c, t in dwd_types.items() if "ARRAY" in t.upper() or "[]" in t}
def build_row(src_row: Dict[str, Any], version: int) -> list[Any]:
values: list[Any] = []
@@ -1068,7 +1175,15 @@ class DwdLoadTask(BaseTask):
elif c == "scd2_version":
values.append(version)
else:
values.append(src_row.get(c))
val = src_row.get(c)
# CHANGE 2026-03-07: 区分数组列和 JSONB 列
# 数组列TEXT[] 等)的 list 值直接传递psycopg2 自动转为 PG 数组格式
# JSONB 列的 dict/list 值需要 Json() 包装
if isinstance(val, list) and c not in _array_cols:
val = Json(val)
elif isinstance(val, dict):
val = Json(val)
values.append(val)
return values
values_rows = [build_row(r, ver) for r, ver in rows_with_version]
@@ -1395,6 +1510,23 @@ class DwdLoadTask(BaseTask):
# CHANGE 2026-02-22: BUG 12 fix — 哨兵日期阈值,上游 API 用 0001-01-01 表示"未设置"
_SENTINEL_DATE_THRESHOLD = "0002-01-01"
@staticmethod
def _qualify_column_ref(src: str, alias: str) -> str:
"""为裸列引用添加表别名前缀。
已包含 detail.、别名前缀、JSON 操作符、表达式CASE/COALESCE 等)的源不做修改。
仅对简单列名(如 "col" 或 col添加 alias."col" 前缀。
"""
# 已有 detail. 或其他表前缀(含 .)→ 不修改
if "." in src:
return src
# JSON 操作符、SQL 表达式 → 不修改
if any(tok in src for tok in ("->", "#>>", "::", "CASE ", "COALESCE", "NULLIF", "(")):
return src
# 裸列名(可能带引号)→ 加别名前缀
bare = src.strip('"')
return f'{alias}."{bare}"'
def _cast_expr(self, col: str, cast_type: str | None) -> str:
"""构造带可选 CAST 的列表达式。

View File

@@ -20,6 +20,8 @@ from .assistant_salary_task import AssistantSalaryTask
from .assistant_finance_task import AssistantFinanceTask
from .member_consumption_task import MemberConsumptionTask
from .member_visit_task import MemberVisitTask
from .assistant_project_tag_task import AssistantProjectTagTask
from .member_project_tag_task import MemberProjectTagTask
from .finance_daily_task import FinanceDailyTask
from .finance_recharge_task import FinanceRechargeTask
from .finance_income_task import FinanceIncomeStructureTask
@@ -56,6 +58,8 @@ __all__ = [
# 客户维度
"MemberConsumptionTask",
"MemberVisitTask",
"AssistantProjectTagTask",
"MemberProjectTagTask",
# 财务维度
"FinanceBaseTask",
"FinanceDailyTask",

View File

@@ -34,6 +34,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple
from .base_dws_task import BaseDwsTask, TaskContext
from .dws_helpers import mask_mobile, calc_days_since
from neozqyy_shared.datetime_utils import biz_date_sql_expr
class AssistantCustomerTask(BaseDwsTask):
"""
@@ -181,13 +183,16 @@ class AssistantCustomerTask(BaseDwsTask):
"""
提取助教-客户服务统计(含滚动窗口)
"""
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.3: DATE(start_use_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
sql = f"""
WITH service_base AS (
SELECT
site_assistant_id AS assistant_id,
nickname AS assistant_nickname,
tenant_member_id AS member_id,
DATE(start_use_time) AS service_date,
{biz_expr} AS service_date,
income_seconds,
ledger_amount
FROM dwd.dwd_assistant_service_log

View File

@@ -34,6 +34,8 @@ from datetime import date, datetime, time, timedelta
from decimal import Decimal, ROUND_HALF_UP
from typing import Any, Dict, List, Optional, Set, Tuple
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
# 惩罚区域集合:大厅 A/B/C/S/TV + 麻将房 M1M7
@@ -197,7 +199,12 @@ class AssistantDailyTask(BaseDwsTask):
JOIN _ex 表取 is_trash 字段,用于直接判断服务是否被废除。
"""
sql = """
# CHANGE 2026-02-26: dwd_assistant_service_log 无 table_area_name 列,
# 改为 JOIN dim_table 取 site_table_area_name
# CHANGE 2026-03-01 | business-day-cutoff 6.1: DATE() → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
sql = f"""
SELECT
asl.assistant_service_id,
asl.order_settle_id,
@@ -214,15 +221,18 @@ class AssistantDailyTask(BaseDwsTask):
asl.ledger_unit_price,
asl.start_use_time,
asl.last_use_time,
asl.table_area_name,
DATE(asl.start_use_time) AS service_date,
COALESCE(dt.site_table_area_name, '') AS table_area_name,
{biz_expr} AS service_date,
COALESCE(ex.is_trash, 0) AS is_trash
FROM dwd.dwd_assistant_service_log asl
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
ON asl.assistant_service_id = ex.assistant_service_id
LEFT JOIN dwd.dim_table dt
ON asl.site_table_id = dt.table_id
AND dt.scd2_is_current = 1
WHERE asl.site_id = %s
AND DATE(asl.start_use_time) >= %s
AND DATE(asl.start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND asl.is_delete = 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
@@ -258,14 +268,20 @@ class AssistantDailyTask(BaseDwsTask):
# 获取助教当日等级SCD2 as-of
level_info = self.get_assistant_level_asof(assistant_id, service_date)
# CHANGE 2026-02-27 | level_name 始终由 code 静态映射得出
# SCD2 仅用于取历史 level_code等级可能变过
# name 不再依赖 SCD2 返回值,避免 SCD2 缺失时 NULL
level_code = level_info.get('level_code') if level_info else record.get('assistant_level')
level_name = self.level_code_to_name(level_code)
agg_dict[key] = {
'site_id': site_id,
'tenant_id': self.config.get("app.tenant_id", site_id),
'assistant_id': assistant_id,
'assistant_nickname': record.get('assistant_nickname'),
'stat_date': service_date,
'assistant_level_code': level_info.get('level_code') if level_info else record.get('assistant_level'),
'assistant_level_name': level_info.get('level_name') if level_info else None,
'assistant_level_code': level_code,
'assistant_level_name': level_name,
'total_service_count': 0,
'base_service_count': 0,
'bonus_service_count': 0,

View File

@@ -28,6 +28,8 @@ from typing import Any, Dict, List, Optional, Tuple
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
class AssistantFinanceTask(BaseDwsTask):
"""
@@ -98,6 +100,8 @@ class AssistantFinanceTask(BaseDwsTask):
revenue_total = self.safe_decimal(rev.get('revenue_total', 0))
gross_profit = revenue_total - cost_daily
gross_margin = gross_profit / revenue_total if revenue_total > 0 else Decimal('0')
# 防御clamp 到 numeric(7,4) 安全范围,避免极端值溢出
gross_margin = max(Decimal('-999.9999'), min(Decimal('999.9999'), gross_margin))
record = {
'site_id': site_id,
@@ -125,9 +129,12 @@ class AssistantFinanceTask(BaseDwsTask):
# load() 已移除——使用 BaseDwsTask 默认实现DATE_COL="stat_date"
def _extract_daily_revenue(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.5: DATE(start_use_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("s.start_use_time", cutoff)
sql = f"""
SELECT
DATE(s.start_use_time) AS stat_date,
{biz_expr} AS stat_date,
s.site_assistant_id AS assistant_id,
(ARRAY_AGG(s.nickname ORDER BY s.start_use_time DESC))[1] AS assistant_nickname,
COUNT(*) AS service_count,
@@ -143,10 +150,10 @@ class AssistantFinanceTask(BaseDwsTask):
LEFT JOIN dws.cfg_skill_type st
ON st.skill_id = s.skill_id AND st.is_active = TRUE
WHERE s.site_id = %s
AND DATE(s.start_use_time) >= %s
AND DATE(s.start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND s.is_delete = 0
GROUP BY DATE(s.start_use_time), s.site_assistant_id
GROUP BY {biz_expr}, s.site_assistant_id
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []

View File

@@ -35,6 +35,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple
from .base_dws_task import BaseDwsTask, TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
class AssistantMonthlyTask(BaseDwsTask):
"""
@@ -262,14 +264,18 @@ class AssistantMonthlyTask(BaseDwsTask):
month_where = " OR ".join(month_conditions)
# CHANGE 2026-02-22 | Prompt: 需求 A — 按档位分段统计
# GROUP BY 加入 assistant_level_code/name,使同一助教月内不同档位各自聚合;
# GROUP BY 加入 assistant_level_code使同一助教月内不同档位各自聚合
# nickname 改用 ARRAY_AGG 按时间倒序取最新值,替代 MAX() 的字典序取值。
# 唯一约束已同步变更为 (site_id, assistant_id, stat_month, assistant_level_code)
# CHANGE 2026-02-27 | BUG: assistant_level_name 从 GROUP BY 移到 ARRAY_AGG FILTER
# 同一 level_code 在 daily_detail 中可能有 NULL 和非 NULL 的 name
# GROUP BY 会产生多行导致 UK 冲突
sql = f"""
SELECT
assistant_id,
assistant_level_code,
assistant_level_name,
-- 同一 level_code 可能有 NULL 和非 NULL 的 name取最新非空值避免 UK 冲突
(ARRAY_AGG(assistant_level_name ORDER BY stat_date DESC) FILTER (WHERE assistant_level_name IS NOT NULL))[1] AS assistant_level_name,
(ARRAY_AGG(assistant_nickname ORDER BY stat_date DESC))[1] AS assistant_nickname,
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
COUNT(DISTINCT stat_date) AS work_days,
@@ -291,7 +297,7 @@ class AssistantMonthlyTask(BaseDwsTask):
SUM(trashed_count) AS trashed_count
FROM dws.dws_assistant_daily_detail
WHERE site_id = %s AND ({month_where})
GROUP BY assistant_id, assistant_level_code, assistant_level_name,
GROUP BY assistant_id, assistant_level_code,
DATE_TRUNC('month', stat_date)
"""
@@ -313,10 +319,13 @@ class AssistantMonthlyTask(BaseDwsTask):
end_month = max(months)
next_month = (end_month.replace(day=28) + timedelta(days=4)).replace(day=1)
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.4: 使用 Business_Month 口径
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
sql = f"""
SELECT
site_assistant_id AS assistant_id,
DATE_TRUNC('month', start_use_time)::DATE AS stat_month,
DATE_TRUNC('month', {biz_expr}::timestamp)::DATE AS stat_month,
COUNT(DISTINCT CASE WHEN tenant_member_id > 0 THEN tenant_member_id END) AS unique_customers,
COUNT(DISTINCT site_table_id) AS unique_tables
FROM dwd.dwd_assistant_service_log
@@ -324,7 +333,7 @@ class AssistantMonthlyTask(BaseDwsTask):
AND start_use_time >= %s
AND start_use_time < %s
AND is_delete = 0
GROUP BY site_assistant_id, DATE_TRUNC('month', start_use_time)
GROUP BY site_assistant_id, DATE_TRUNC('month', {biz_expr}::timestamp)
"""
rows = self.db.query(sql, (site_id, start_month, next_month))
return [dict(row) for row in rows] if rows else []

View File

@@ -43,6 +43,8 @@ from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# =============================================================================
# 数据结构
@@ -225,19 +227,22 @@ class AssistantOrderContributionTask(BaseDwsTask):
settle_type=1 为台桌结账,包含台费、酒水食品等金额。
"""
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(pay_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
order_settle_id,
site_id,
tenant_id,
table_charge_money,
goods_money,
DATE(pay_time) AS stat_date
{biz_expr} AS stat_date
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND settle_type = 1
AND DATE(pay_time) >= %s
AND DATE(pay_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -250,7 +255,10 @@ class AssistantOrderContributionTask(BaseDwsTask):
每条记录对应一张台桌在一个订单中的台费信息。
real_table_use_seconds 为台桌实际使用时长。
"""
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(start_use_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("tfl.start_use_time", cutoff)
sql = f"""
SELECT
tfl.order_settle_id,
tfl.site_table_id AS table_id,
@@ -259,8 +267,8 @@ class AssistantOrderContributionTask(BaseDwsTask):
COALESCE(tfl.ledger_amount, 0) AS table_fee
FROM dwd.dwd_table_fee_log tfl
WHERE tfl.site_id = %s
AND DATE(tfl.start_use_time) >= %s
AND DATE(tfl.start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND COALESCE(tfl.is_delete, 0) = 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
@@ -274,7 +282,10 @@ class AssistantOrderContributionTask(BaseDwsTask):
通过 LEFT JOIN cfg_skill_type 获取 course_type_code
real_service_money 为助教分成。
"""
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(start_use_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
sql = f"""
SELECT
asl.order_settle_id,
asl.site_assistant_id AS assistant_id,
@@ -290,8 +301,8 @@ class AssistantOrderContributionTask(BaseDwsTask):
ON asl.skill_id = cst.skill_id
AND cst.is_active = TRUE
WHERE asl.site_id = %s
AND DATE(asl.start_use_time) >= %s
AND DATE(asl.start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND COALESCE(asl.is_delete, 0) = 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))

View File

@@ -0,0 +1,236 @@
# -*- coding: utf-8 -*-
"""
DWS 助教项目标签任务
按时间窗口计算每位助教在四大项目BILLIARD/SNOOKER/MAHJONG/KTV
工作时长占比占比≥25% 则分配标签。
数据链路:
dwd_assistant_service_log (income_seconds)
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
→ get_area_category(area_name, table_name)
→ 按 category_code 汇总 → 计算占比 → 写入 dws_assistant_project_tag
目标表:
dws.dws_assistant_project_tag
更新策略:
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List, Optional
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# 只计算四大项目,排除 SPECIAL/OTHER
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
# 助教看板的 6 个时间窗口
ASSISTANT_WINDOWS = [
TimeWindow.THIS_MONTH,
TimeWindow.THIS_QUARTER,
TimeWindow.LAST_MONTH,
TimeWindow.LAST_3_MONTHS_EXCL_CURRENT,
TimeWindow.LAST_QUARTER,
TimeWindow.LAST_6_MONTHS,
]
TAG_THRESHOLD = Decimal("0.25")
class AssistantProjectTagTask(BaseDwsTask):
"""助教项目标签 ETL 任务"""
def get_task_code(self) -> str:
return "DWS_ASSISTANT_PROJECT_TAG"
def get_target_table(self) -> str:
return "dws_assistant_project_tag"
def get_primary_keys(self) -> List[str]:
return ["site_id", "assistant_id", "time_window", "category_code"]
def extract(self, context) -> Dict[str, Any]:
site_id = context.store_id
self.logger.info("%s: 提取助教服务数据", self.get_task_code())
# 加载配置cfg_area_category 等)
self.load_config_cache()
# 提取台桌信息(用于 get_area_category 的 table_name 参数)
table_info = self._extract_table_info(site_id)
# 按时间窗口提取助教服务时长
window_data: Dict[str, List[Dict]] = {}
for window in ASSISTANT_WINDOWS:
time_range = self.get_time_window_range(window)
rows = self._extract_assistant_durations(
site_id, time_range.start, time_range.end
)
window_data[window.value] = rows
return {
"window_data": window_data,
"table_info": table_info,
"site_id": site_id,
}
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""提取台桌维度信息"""
sql = """
SELECT table_id, table_name, site_table_area_name AS area_name
FROM dwd.dim_table
WHERE site_id = %s AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
return {r["table_id"]: dict(r) for r in (rows or [])}
def _extract_assistant_durations(
self, site_id: int, start_date: date, end_date: date
) -> List[Dict[str, Any]]:
"""提取助教服务时长明细(按助教+台桌聚合)"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
sql = f"""
SELECT
asl.site_assistant_id AS assistant_id,
asl.site_table_id AS table_id,
COALESCE(SUM(asl.income_seconds), 0) AS duration_seconds
FROM dwd.dwd_assistant_service_log asl
WHERE asl.site_id = %(site_id)s
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND asl.is_delete = 0
GROUP BY asl.site_assistant_id, asl.site_table_id
"""
rows = self.db.query(sql, {
"site_id": site_id,
"start_date": start_date,
"end_date": end_date,
})
return [dict(r) for r in rows] if rows else []
def transform(self, extracted: Dict[str, Any], context) -> List[Dict[str, Any]]:
table_info = extracted["table_info"]
site_id = extracted["site_id"]
tenant_id = getattr(context, "tenant_id", 0) or 0
results: List[Dict[str, Any]] = []
for window_value, rows in extracted["window_data"].items():
# 按助教汇总各项目时长
# assistant_id → category_code → seconds
assistant_cats: Dict[int, Dict[str, int]] = {}
for row in rows:
aid = row["assistant_id"]
tid = row["table_id"]
secs = self.safe_int(row["duration_seconds"])
if secs <= 0:
continue
# 通过 dim_table 获取区域和台桌名
tinfo = table_info.get(tid, {})
area_name = tinfo.get("area_name")
table_name = tinfo.get("table_name")
cat = self.get_area_category(area_name, table_name)
code = cat.get("category_code", "OTHER")
# 只计算四大项目
if code not in VALID_CATEGORIES:
continue
if aid not in assistant_cats:
assistant_cats[aid] = {}
assistant_cats[aid][code] = assistant_cats[aid].get(code, 0) + secs
# 计算占比并生成记录
for aid, cats in assistant_cats.items():
total = sum(cats.values())
if total <= 0:
continue
for code, secs in cats.items():
pct = Decimal(str(secs)) / Decimal(str(total))
pct = pct.quantize(Decimal("0.0001"))
cat_info = self._get_category_display(code)
results.append({
"site_id": site_id,
"tenant_id": tenant_id,
"assistant_id": aid,
"time_window": window_value,
"category_code": code,
"category_name": cat_info["category_name"],
"short_name": cat_info["short_name"],
"duration_seconds": secs,
"total_seconds": total,
"percentage": float(pct),
"is_tagged": pct >= TAG_THRESHOLD,
})
self.logger.info(
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
self.get_task_code(),
len(results),
sum(1 for r in results if r["is_tagged"]),
)
return results
def _get_category_display(self, code: str) -> Dict[str, str]:
"""从配置缓存获取分类的显示名和简写"""
cache = self.load_config_cache()
for key, cat in cache.area_categories.items():
if cat.get("category_code") == code:
return {
"category_name": cat.get("display_name") or cat.get("category_name", code),
"short_name": cat.get("short_name", code[:1]),
}
# 兜底
fallback = {
"BILLIARD": ("🎱 中式/追分", "🎱"),
"SNOOKER": ("斯诺克", ""),
"MAHJONG": ("🀄 麻将/棋牌", "🀄"),
"KTV": ("🎤 团建/K歌", "🎤"),
}
name, short = fallback.get(code, (code, code[:1]))
return {"category_name": name, "short_name": short}
def load(self, transformed, context) -> dict:
if not transformed:
return {"status": "SUCCESS", "counts": {"inserted": 0, "deleted": 0}}
site_id = transformed[0]["site_id"]
# 全量删除该门店的标签数据后重建
delete_sql = "DELETE FROM dws.dws_assistant_project_tag WHERE site_id = %s"
self.db.execute(delete_sql, (site_id,))
deleted = self.db.cursor.rowcount if hasattr(self.db, "cursor") else 0
insert_sql = """
INSERT INTO dws.dws_assistant_project_tag (
site_id, tenant_id, assistant_id, time_window,
category_code, category_name, short_name,
duration_seconds, total_seconds, percentage, is_tagged,
computed_at, created_at, updated_at
) VALUES (
%(site_id)s, %(tenant_id)s, %(assistant_id)s, %(time_window)s,
%(category_code)s, %(category_name)s, %(short_name)s,
%(duration_seconds)s, %(total_seconds)s, %(percentage)s, %(is_tagged)s,
NOW(), NOW(), NOW()
)
"""
for row in transformed:
self.db.execute(insert_sql, row)
self.logger.info(
"%s: 删除 %d 条,插入 %d",
self.get_task_code(), deleted, len(transformed),
)
return {
"status": "SUCCESS",
"counts": {"inserted": len(transformed), "deleted": deleted},
}

View File

@@ -27,8 +27,9 @@ DWS层任务基类
- 提供滚动窗口统计方法
时间口径说明:
- 周起始日:周一
- 月/季度起始第一天0点
- 营业日切点BUSINESS_DAY_START_HOUR默认 08:0008:00 前的记录归属前一天
- 周起始日:周一 08:00
- 月/季度起始:第一天 08:00
- 环比规则:对比上一个等长区间
- 前3个月含/不含本月(用于财务筛选)
- 最近半年:不含本月
@@ -52,6 +53,8 @@ from decimal import Decimal, InvalidOperation
from enum import Enum
from typing import Any, Dict, Iterator, List, Optional, Tuple, TypeVar
from neozqyy_shared.datetime_utils import biz_date_sql_expr, business_date, now_shanghai
from ..base_task import BaseTask, TaskContext
# =============================================================================
@@ -81,6 +84,8 @@ class TimeWindow(Enum):
THIS_QUARTER = "THIS_QUARTER" # 本季度
LAST_QUARTER = "LAST_QUARTER" # 上季度
LAST_6_MONTHS = "LAST_6_MONTHS" # 最近半年(不含本月)
LAST_30_DAYS = "LAST_30_DAYS" # 近30天含今天
LAST_60_DAYS = "LAST_60_DAYS" # 近60天含今天
class CourseType(Enum):
@@ -292,18 +297,20 @@ class BaseDwsTask(BaseTask):
获取时间窗口的日期范围(用于财务报表)
时间口径说明:
- 周起始日为周一
- 月/季度起始为第一天0点
- 营业日切点BUSINESS_DAY_START_HOUR默认 08:00
- 周起始日为周一 08:00
- 月/季度起始为第一天 08:00
Args:
window: 时间窗口枚举
base_date: 基准日期,默认为今天
base_date: 基准日期,默认为当前营业日
Returns:
TimeRange对象
"""
if base_date is None:
base_date = date.today()
cutoff = self.config.get("app.business_day_start_hour", 8)
base_date = business_date(now_shanghai(), cutoff)
if window == TimeWindow.THIS_WEEK:
# 本周(周一起始)
@@ -369,6 +376,16 @@ class BaseDwsTask(BaseTask):
start = self.get_month_first_day(self._shift_months(month_start, -6))
return TimeRange(start=start, end=end)
elif window == TimeWindow.LAST_30_DAYS:
# 近30天含今天
start = base_date - timedelta(days=29)
return TimeRange(start=start, end=base_date)
elif window == TimeWindow.LAST_60_DAYS:
# 近60天含今天
start = base_date - timedelta(days=59)
return TimeRange(start=start, end=base_date)
raise ValueError(f"不支持的时间窗口类型: {window}")
def get_comparison_range(self, time_range: TimeRange) -> TimeRange:
@@ -410,9 +427,9 @@ class BaseDwsTask(BaseTask):
def is_new_hire_in_month(self, hire_date: date, stat_month: date) -> bool:
"""
判断是否为新入职月1日0点后入职)
判断是否为新入职月1日8点后入职)
新入职定档规则月1日0点之后入职的,计算为新入职
新入职定档规则月1日8点之后入职的,计算为新入职
Args:
hire_date: 入职日期
@@ -527,10 +544,12 @@ class BaseDwsTask(BaseTask):
return [dict(row) for row in rows] if rows else []
def _load_area_categories(self) -> Dict[str, Dict[str, Any]]:
"""加载区域分类映射"""
"""加载区域分类映射(支持台桌级细分)"""
sql = """
SELECT
source_area_name, category_code, category_name,
source_area_name, source_table_name,
category_code, category_name,
display_name, short_name,
match_type, match_priority
FROM dws.cfg_area_category
WHERE is_active = TRUE
@@ -540,10 +559,15 @@ class BaseDwsTask(BaseTask):
if not rows:
return {}
# 双层索引:(area_name, table_name) → config
# table_name 为 NULL 时用空字符串作 key
result = {}
for row in rows:
row_dict = dict(row)
result[row_dict['source_area_name']] = row_dict
area = row_dict['source_area_name']
table = row_dict.get('source_table_name') or ''
key = f"{area}\x00{table}" # 复合键,\x00 不会出现在正常名称中
result[key] = row_dict
return result
def _load_skill_types(self) -> Dict[int, Dict[str, Any]]:
@@ -709,50 +733,57 @@ class BaseDwsTask(BaseTask):
# 默认为基础课
return CourseType.BASE
def get_area_category(self, area_name: Optional[str]) -> Dict[str, str]:
def get_area_category(self, area_name: Optional[str], table_name: Optional[str] = None) -> Dict[str, str]:
"""
获取区域分类(支持精确匹配、模糊匹配、兜底)
获取区域分类(支持台桌级精确 > 区域精确 > 模糊 > 兜底)
Args:
area_name: 原始区域名称
area_name: 原始区域名称dim_table.site_table_area_name
table_name: 台桌名称dim_table.table_name用于台桌级细分映射
Returns:
包含 category_code category_name 的字典
包含 category_code, category_name, display_name, short_name 的字典
"""
config = self.load_config_cache()
default = {'category_code': 'OTHER', 'category_name': '其他', 'display_name': '其他', 'short_name': ''}
if not area_name:
# 无区域名称,返回默认
return {'category_code': 'OTHER', 'category_name': '其他区域'}
return default
# 1. 精确匹配
if area_name in config.area_categories:
cat = config.area_categories[area_name]
if cat.get('match_type') == 'EXACT':
return {
'category_code': cat['category_code'],
'category_name': cat['category_name']
}
cats = config.area_categories
# 2. 模糊匹配(按优先级)
for key, cat in config.area_categories.items():
if cat.get('match_type') == 'LIKE':
pattern = key.replace('%', '')
if pattern and pattern in area_name:
return {
'category_code': cat['category_code'],
'category_name': cat['category_name']
}
# 3. 兜底
if 'DEFAULT' in config.area_categories:
cat = config.area_categories['DEFAULT']
def _pick(cat: Dict[str, Any]) -> Dict[str, str]:
return {
'category_code': cat['category_code'],
'category_name': cat['category_name']
'category_name': cat['category_name'],
'display_name': cat.get('display_name') or cat['category_name'],
'short_name': cat.get('short_name') or '',
}
return {'category_code': 'OTHER', 'category_name': '其他区域'}
# 1. 台桌级精确匹配area_name + table_name
if table_name:
key = f"{area_name}\x00{table_name}"
if key in cats and cats[key].get('match_type') == 'EXACT':
return _pick(cats[key])
# 2. 区域级精确匹配area_name + 空 table_name
key = f"{area_name}\x00"
if key in cats and cats[key].get('match_type') == 'EXACT':
return _pick(cats[key])
# 3. 模糊匹配(按优先级,已排序)
for k, cat in cats.items():
if cat.get('match_type') == 'LIKE':
pattern = cat['source_area_name'].replace('%', '')
if pattern and pattern in area_name:
return _pick(cat)
# 4. 兜底
fallback_key = f"DEFAULT\x00"
if fallback_key in cats:
return _pick(cats[fallback_key])
return default
def calculate_sprint_bonus(
self,
@@ -908,8 +939,10 @@ class BaseDwsTask(BaseTask):
offset = 0
cols_str = ", ".join(columns)
# 构建WHERE条件
where_parts = [f"DATE({date_col}) >= %s", f"DATE({date_col}) <= %s"]
# 构建WHERE条件 — 使用营业日归属表达式替代 DATE()
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr(date_col, cutoff)
where_parts = [f"{biz_expr} >= %s", f"{biz_expr} <= %s"]
params: List[Any] = [start_date, end_date]
if where_clause:
@@ -972,15 +1005,24 @@ class BaseDwsTask(BaseTask):
获取助教在指定日期的等级SCD2 as-of取值
助教等级是SCD2维度历史月份不能直接用"当前等级"
需要按有效期as-of join取数。
优先精确匹配 [scd2_start, scd2_end) 区间;
若无匹配(服务日期早于首条 SCD2 或区间有间隙),
回退取 scd2_start_time <= asof_date 的最近一条,
因为从该记录起等级未变。
Args:
assistant_id: 助教ID
asof_date: 取值日期
Returns:
助教等级信息包含level_code和level_name
助教等级信息包含level_code和level_name无记录时返回None
"""
# CHANGE 2026-02-27 | 放宽 SCD2 匹配:去掉 scd2_end_time 条件,
# 改为取 scd2_start_time <= asof_date 的最近一条。
# 原逻辑要求 asof_date 严格落在 [start, end) 区间内,
# 当 SCD2 记录有间隙或服务日期早于首条记录时返回 None
# 导致 dws_assistant_daily_detail.assistant_level_name 出现 NULL
# 下游 monthly 聚合时同一 level_code 有 NULL/非NULL 两种值引发 UK 冲突。
sql = """
SELECT
assistant_id,
@@ -999,13 +1041,30 @@ class BaseDwsTask(BaseTask):
FROM dwd.dim_assistant
WHERE assistant_id = %s
AND scd2_start_time <= %s
AND (scd2_end_time IS NULL OR scd2_end_time > %s)
ORDER BY scd2_start_time DESC
LIMIT 1
"""
rows = self.db.query(sql, (assistant_id, asof_date, asof_date))
rows = self.db.query(sql, (assistant_id, asof_date))
return dict(rows[0]) if rows else None
# CHANGE 2026-02-27 | 新增 level_code → level_name 静态映射
# 当 SCD2 记录晚于服务日期dim_assistant 后期才开始同步)时,
# 用服务记录自带的 assistant_level 做 fallback 映射
LEVEL_CODE_NAME_MAP: dict[int, str] = {
8: "助教管理",
10: "初级",
20: "中级",
30: "高级",
40: "星级",
}
@staticmethod
def level_code_to_name(level_code: int | None) -> str | None:
"""将 assistant_level code 映射为中文名称,无匹配返回 None"""
if level_code is None:
return None
return BaseDwsTask.LEVEL_CODE_NAME_MAP.get(int(level_code))
def get_member_card_balance_asof(
self,
member_id: int,

View File

@@ -22,6 +22,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask
from .dws_helpers import parse_id_list
@@ -39,9 +41,11 @@ class FinanceBaseTask(BaseDwsTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""结账单日汇总(结算头表按日聚合)"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
DATE(pay_time) AS stat_date,
{biz_expr} AS stat_date,
COUNT(*) AS order_count,
COUNT(CASE WHEN member_id != 0 AND member_id IS NOT NULL THEN 1 END) AS member_order_count,
COUNT(CASE WHEN member_id = 0 OR member_id IS NULL THEN 1 END) AS guest_order_count,
@@ -61,13 +65,17 @@ class FinanceBaseTask(BaseDwsTask):
SUM(member_discount_amount) AS member_discount_amount,
SUM(rounding_amount) AS rounding_amount,
SUM(pl_coupon_sale_amount) AS pl_coupon_sale_amount,
-- 消费金额
SUM(consume_money) AS total_consume
-- CHANGE 2026-03-07 | consume_money → items_sum 口径校准
-- consume_money 存在三种历史口径混合DWS 层统一使用 items_sum
SUM(table_charge_money + goods_money + assistant_pd_money
+ assistant_cx_money + electricity_money) AS items_sum
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND DATE(pay_time) >= %s
AND DATE(pay_time) <= %s
GROUP BY DATE(pay_time)
AND {biz_expr} >= %s
AND {biz_expr} <= %s
-- CHANGE 2026-03-07 | 排除退货(6)/退款(7),仅保留台桌结账(1)+商城订单(3)
AND settle_type IN (1, 3)
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -83,9 +91,11 @@ class FinanceBaseTask(BaseDwsTask):
) -> List[Dict[str, Any]]:
"""充值日汇总(充值订单按日聚合)"""
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money实际字段为 pay_amount/point_amount
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
DATE(pay_time) AS stat_date,
{biz_expr} AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
@@ -101,9 +111,9 @@ class FinanceBaseTask(BaseDwsTask):
COUNT(DISTINCT member_id) AS recharge_member_count
FROM dwd.dwd_recharge_order
WHERE site_id = %s
AND DATE(pay_time) >= %s
AND DATE(pay_time) <= %s
GROUP BY DATE(pay_time)
AND {biz_expr} >= %s
AND {biz_expr} <= %s
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -118,9 +128,11 @@ class FinanceBaseTask(BaseDwsTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""团购核销日汇总(结算头表 + 团购核销表联查)"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
sql = f"""
SELECT
sh.pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
COUNT(CASE WHEN sh.coupon_amount > 0 THEN 1 END) AS groupbuy_count,
SUM(
CASE
@@ -137,9 +149,9 @@ class FinanceBaseTask(BaseDwsTask):
ON gr.order_settle_id = sh.order_settle_id
AND COALESCE(gr.is_delete, 0) = 0
WHERE sh.site_id = %s
AND sh.pay_time >= %s
AND sh.pay_time < %s + INTERVAL '1 day'
GROUP BY sh.pay_time::DATE
AND {biz_expr} >= %s
AND {biz_expr} <= %s
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -188,16 +200,18 @@ class FinanceBaseTask(BaseDwsTask):
if not member_ids and not order_ids:
return []
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
order_settle_id,
member_id,
adjust_amount
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND pay_time >= %s
AND pay_time < %s + INTERVAL '1 day'
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND adjust_amount != 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
@@ -242,20 +256,22 @@ class FinanceBaseTask(BaseDwsTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""赠送卡消费汇总(余额变动按日聚合)"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_change = biz_date_sql_expr("change_time", cutoff)
id_list = ", ".join(str(card_id) for card_id in self.GIFT_CARD_TYPE_IDS)
sql = f"""
SELECT
change_time::DATE AS stat_date,
{biz_expr_change} AS stat_date,
SUM(ABS(change_amount)) AS gift_card_consume
FROM dwd.dwd_member_balance_change
WHERE site_id = %s
AND change_time >= %s
AND change_time < %s + INTERVAL '1 day'
AND {biz_expr_change} >= %s
AND {biz_expr_change} <= %s
AND from_type = 1
AND change_amount < 0
AND COALESCE(is_delete, 0) = 0
AND card_type_id IN ({id_list})
GROUP BY change_time::DATE
GROUP BY {biz_expr_change}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []

View File

@@ -222,6 +222,8 @@ class FinanceDailyTask(FinanceBaseTask):
member_discount = self.safe_decimal(settle.get('member_discount_amount', 0))
rounding_amount = self.safe_decimal(settle.get('rounding_amount', 0))
big_customer_amount = self.safe_decimal(big_customer.get('big_customer_amount', 0))
# 大客户优惠不超过手动调整总额(大客户是 adjust 的子集)
big_customer_amount = min(big_customer_amount, adjust_amount) if adjust_amount > 0 else Decimal('0')
other_discount = adjust_amount - big_customer_amount
if other_discount < 0:
other_discount = Decimal('0')
@@ -229,8 +231,8 @@ class FinanceDailyTask(FinanceBaseTask):
# 赠送卡消费(来自余额变动)
gift_card_consume_amount = self.safe_decimal(gift_card.get('gift_card_consume', 0))
# 优惠合计
discount_total = discount_groupbuy + member_discount + gift_card_consume_amount + adjust_amount + rounding_amount
# 优惠合计(大客户 + 其他 = adjust_amount互斥拆分
discount_total = discount_groupbuy + member_discount + gift_card_consume_amount + big_customer_amount + other_discount + rounding_amount
# 确认收入
confirmed_income = gross_amount - discount_total
@@ -249,9 +251,12 @@ class FinanceDailyTask(FinanceBaseTask):
cash_balance_change = cash_inflow_total - cash_outflow_total
# 卡消费
cash_card_consume = card_pay_amount + balance_pay_amount
# CHANGE 2026-03-07 | balance 恒等式校准
# balance_amount = recharge_card_amount + gift_card_amount
# recharge_card_consume 只取现金充值部分recharge_card_amount不加 balance_amount 避免重复计算
recharge_card_consume = card_pay_amount
gift_card_consume = gift_card_consume_amount
card_consume_total = cash_card_consume + gift_card_consume
card_consume_total = recharge_card_consume + gift_card_consume
# 充值统计
recharge_count = self.safe_int(recharge.get('recharge_count', 0))
@@ -284,7 +289,8 @@ class FinanceDailyTask(FinanceBaseTask):
'discount_groupbuy': discount_groupbuy,
'discount_vip': member_discount,
'discount_gift_card': gift_card_consume_amount,
'discount_manual': adjust_amount,
# CHANGE 2026-03-07 | discount_manual 语义修正:存储大客户优惠(与 discount_other 互斥,两者之和 = adjust_amount
'discount_manual': big_customer_amount,
'discount_rounding': rounding_amount,
'discount_other': other_discount,
# 确认收入
@@ -297,7 +303,7 @@ class FinanceDailyTask(FinanceBaseTask):
'platform_fee_amount': platform_fee_amount,
'recharge_cash_inflow': recharge_cash_inflow,
'card_consume_total': card_consume_total,
'cash_card_consume': cash_card_consume,
'recharge_card_consume': recharge_card_consume,
'gift_card_consume': gift_card_consume,
'cash_outflow_total': cash_outflow_total,
'cash_balance_change': cash_balance_change,

View File

@@ -35,6 +35,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import TaskContext
from .finance_base_task import FinanceBaseTask
@@ -112,9 +114,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
- rounding_amount: 抹零金额
- pl_coupon_sale_amount: 平台券销售金额团购实付路径1
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
-- 团购相关
COALESCE(SUM(coupon_amount), 0) AS coupon_amount_total,
COALESCE(SUM(pl_coupon_sale_amount), 0) AS pl_coupon_sale_total,
@@ -132,10 +136,10 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
COUNT(*) AS total_orders
FROM dwd.dwd_settlement_head
WHERE site_id = %(site_id)s
AND pay_time >= %(start_date)s
AND pay_time < %(end_date)s + INTERVAL '1 day'
AND settle_status = 1 -- 已结账
GROUP BY pay_time::DATE
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
GROUP BY {biz_expr}
ORDER BY stat_date
"""
rows = self.db.query(sql, {
@@ -160,9 +164,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
返回:{日期: 团购实付总额}
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
sql = f"""
SELECT
sh.pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
SUM(
CASE
WHEN sh.pl_coupon_sale_amount > 0 THEN sh.pl_coupon_sale_amount
@@ -174,11 +180,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
ON gr.order_settle_id = sh.order_settle_id
AND COALESCE(gr.is_delete, 0) = 0
WHERE sh.site_id = %(site_id)s
AND sh.pay_time >= %(start_date)s
AND sh.pay_time < %(end_date)s + INTERVAL '1 day'
AND sh.settle_status = 1
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND sh.settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
AND sh.coupon_amount > 0 -- 只统计有团购的订单
GROUP BY sh.pay_time::DATE
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, {
'site_id': site_id,
@@ -206,22 +212,24 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
2794699703437125, # 酒水卡
2793266846533445, # 活动抵用券
)
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("change_time", cutoff)
id_list = ", ".join(str(card_id) for card_id in gift_card_type_ids)
sql = f"""
SELECT
change_time::DATE AS stat_date,
{biz_expr} AS stat_date,
card_type_id,
COUNT(*) AS consume_count,
SUM(ABS(change_amount)) AS consume_amount
FROM dwd.dwd_member_balance_change
WHERE site_id = %(site_id)s
AND change_time >= %(start_date)s
AND change_time < %(end_date)s + INTERVAL '1 day'
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND from_type = 1
AND change_amount < 0
AND COALESCE(is_delete, 0) = 0
AND card_type_id IN ({id_list})
GROUP BY change_time::DATE, card_type_id
GROUP BY {biz_expr}, card_type_id
"""
rows = self.db.query(sql, {
'site_id': site_id,

View File

@@ -33,6 +33,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List, Optional
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import TaskContext
from .finance_base_task import FinanceBaseTask
@@ -94,32 +96,35 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
收入类型分类:
- TABLE_FEE: 台费收入 (table_charge_money)
- GOODS: 商品收入 (goods_money)
- ASSISTANT_BASE: 助教基础课 (assistant_pd_money)
- ASSISTANT_BONUS: 助教附加课 (assistant_cx_money)
- ASSISTANT_PD: 助教陪打收入 (assistant_pd_money)
- ASSISTANT_CX: 助教超休收入 (assistant_cx_money)
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
-- 台费收入
COALESCE(SUM(table_charge_money), 0) AS table_fee_income,
COUNT(CASE WHEN table_charge_money > 0 THEN 1 END) AS table_fee_orders,
-- 商品收入
COALESCE(SUM(goods_money), 0) AS goods_income,
COUNT(CASE WHEN goods_money > 0 THEN 1 END) AS goods_orders,
-- 助教基础课收入PD=陪打)
COALESCE(SUM(assistant_pd_money), 0) AS assistant_base_income,
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_base_orders,
-- 助教附加课收入CX=超休/促销)
COALESCE(SUM(assistant_cx_money), 0) AS assistant_bonus_income,
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_bonus_orders,
-- CHANGE 2026-03-07 | ASSISTANT_BASE/BONUS → PD/CX 命名校准
-- 助教陪打收入
COALESCE(SUM(assistant_pd_money), 0) AS assistant_pd_income,
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_pd_orders,
-- 助教超休收入
COALESCE(SUM(assistant_cx_money), 0) AS assistant_cx_income,
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_cx_orders,
-- 总订单数
COUNT(*) AS total_orders
FROM dwd.dwd_settlement_head
WHERE site_id = %(site_id)s
AND pay_time >= %(start_date)s
AND pay_time < %(end_date)s + INTERVAL '1 day'
AND settle_status = 1 -- 已结账
GROUP BY pay_time::DATE
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
GROUP BY {biz_expr}
ORDER BY stat_date
"""
rows = self.db.query(sql, {
@@ -142,46 +147,57 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
"""
# CHANGE 2026-02-22 | BUG 7 修复 | dim_table 主键是 table_id 而非 site_table_id
# JOIN 条件从 dt.site_table_id → dt.table_id事实表侧 site_table_id 不变)
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
sql = f"""
WITH area_orders AS (
SELECT
tfl.pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
dt.site_table_area_name AS area_name,
dt.table_name AS table_name,
tfl.order_settle_id,
COALESCE(tfl.ledger_amount, 0) AS income_amount,
COALESCE(tfl.ledger_time_seconds, 0) AS duration_seconds
COALESCE(tfl.ledger_count, 0) AS duration_seconds
FROM dwd.dwd_table_fee_log tfl
INNER JOIN dwd.dwd_settlement_head sh
ON sh.order_settle_id = tfl.order_settle_id
LEFT JOIN dwd.dim_table dt
ON dt.table_id = tfl.site_table_id
AND dt.scd2_is_current = 1
WHERE tfl.site_id = %(site_id)s
AND tfl.pay_time >= %(start_date)s
AND tfl.pay_time < %(end_date)s + INTERVAL '1 day'
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND COALESCE(tfl.is_delete, 0) = 0
UNION ALL
SELECT
asl.start_use_time::DATE AS stat_date,
{biz_expr} AS stat_date,
dt.site_table_area_name AS area_name,
dt.table_name AS table_name,
asl.order_settle_id,
COALESCE(asl.ledger_amount, 0) AS income_amount,
COALESCE(asl.income_seconds, 0) AS duration_seconds
FROM dwd.dwd_assistant_service_log asl
INNER JOIN dwd.dwd_settlement_head sh
ON sh.order_settle_id = asl.order_settle_id
LEFT JOIN dwd.dim_table dt
ON dt.table_id = asl.site_table_id
AND dt.scd2_is_current = 1
WHERE asl.site_id = %(site_id)s
AND asl.start_use_time >= %(start_date)s
AND asl.start_use_time < %(end_date)s + INTERVAL '1 day'
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND asl.is_delete = 0
)
SELECT
stat_date,
area_name,
table_name,
COALESCE(SUM(income_amount), 0) AS income_amount,
COALESCE(SUM(duration_seconds), 0) AS duration_seconds,
COUNT(DISTINCT order_settle_id) AS order_count
FROM area_orders
GROUP BY stat_date, area_name
GROUP BY stat_date, area_name, table_name
ORDER BY stat_date, area_name
"""
rows = self.db.query(sql, {
@@ -232,14 +248,14 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
"""
转换按收入类型的数据
将每日汇总数据展开为4条记录台费/商品/基础课/附加课
将每日汇总数据展开为4条记录台费/商品/陪打/超休
"""
# 收入类型定义
# CHANGE 2026-03-07 | ASSISTANT_BASE/BONUS → PD/CX 命名校准
income_types = [
('TABLE_FEE', '台费收入', 'table_fee_income', 'table_fee_orders'),
('GOODS', '商品收入', 'goods_income', 'goods_orders'),
('ASSISTANT_BASE', '助教基础课', 'assistant_base_income', 'assistant_base_orders'),
('ASSISTANT_BONUS', '助教附加课', 'assistant_bonus_income', 'assistant_bonus_orders'),
('ASSISTANT_PD', '助教陪打收入', 'assistant_pd_income', 'assistant_pd_orders'),
('ASSISTANT_CX', '助教超休收入', 'assistant_cx_income', 'assistant_cx_orders'),
]
records = []
@@ -309,8 +325,8 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
duration_seconds = row.get('duration_seconds', 0) or 0
order_count = row.get('order_count', 0) or 0
# 映射区域名称到分类代码
category = self.get_area_category(area_name)
# CHANGE 2026-03-07 | 传入 table_name 支持台桌级映射VIP包厢 V5→斯诺克
category = self.get_area_category(area_name, row.get('table_name'))
category_code = category.get('category_code', 'OTHER')
category_name = category.get('category_name', '其他区域')
@@ -363,7 +379,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
"""
兼容旧逻辑的映射方法(当前使用 get_area_category
"""
return self.get_area_category(area_name)
return self.get_area_category(area_name, None)
def load(self, records: List[Dict[str, Any]], context: TaskContext) -> Dict[str, Any]:
"""

View File

@@ -31,6 +31,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import TaskContext
from .finance_base_task import FinanceBaseTask
@@ -111,9 +113,11 @@ class FinanceRechargeTask(FinanceBaseTask):
def _extract_recharge_summary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money实际字段为 pay_amount/point_amount
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
DATE(pay_time) AS stat_date,
{biz_expr} AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
@@ -129,8 +133,8 @@ class FinanceRechargeTask(FinanceBaseTask):
COUNT(DISTINCT member_id) AS recharge_member_count,
COUNT(DISTINCT CASE WHEN is_first = 1 THEN member_id END) AS new_member_count
FROM dwd.dwd_recharge_order
WHERE site_id = %s AND DATE(pay_time) >= %s AND DATE(pay_time) <= %s
GROUP BY DATE(pay_time)
WHERE site_id = %s AND {biz_expr} >= %s AND {biz_expr} <= %s
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []

View File

@@ -29,6 +29,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
@@ -74,7 +76,9 @@ class GoodsStockDailyTask(BaseDwsTask):
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
sql = f"""
SELECT
site_goods_id,
goods_name,
@@ -92,11 +96,12 @@ class GoodsStockDailyTask(BaseDwsTask):
current_stock,
site_id,
tenant_id,
fetched_at
fetched_at,
{biz_expr} AS biz_date
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
@@ -135,11 +140,14 @@ class GoodsStockDailyTask(BaseDwsTask):
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
stat_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 使用 SQL 层计算的营业日归属日期
stat_date = row.get("biz_date")
if stat_date is None:
stat_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:
continue

View File

@@ -31,6 +31,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
@@ -81,7 +83,9 @@ class GoodsStockMonthlyTask(BaseDwsTask):
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
sql = f"""
SELECT
site_goods_id,
goods_name,
@@ -99,11 +103,12 @@ class GoodsStockMonthlyTask(BaseDwsTask):
current_stock,
site_id,
tenant_id,
fetched_at
fetched_at,
{biz_expr} AS biz_date
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
@@ -141,12 +146,15 @@ class GoodsStockMonthlyTask(BaseDwsTask):
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 自然月的第一天作为 stat_date
# 使用 SQL 层计算的营业日归属日期
row_date = row.get("biz_date")
if row_date is None:
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 营业月的第一天作为 stat_date
first_day = _month_first_day(row_date)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:

View File

@@ -31,6 +31,8 @@ from datetime import date, timedelta
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
@@ -82,7 +84,9 @@ class GoodsStockWeeklyTask(BaseDwsTask):
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
sql = f"""
SELECT
site_goods_id,
goods_name,
@@ -100,11 +104,12 @@ class GoodsStockWeeklyTask(BaseDwsTask):
current_stock,
site_id,
tenant_id,
fetched_at
fetched_at,
{biz_expr} AS biz_date
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
@@ -142,12 +147,15 @@ class GoodsStockWeeklyTask(BaseDwsTask):
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# ISO 周的周一作为 stat_date
# 使用 SQL 层计算的营业日归属日期
row_date = row.get("biz_date")
if row_date is None:
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 营业周的周一作为 stat_date
monday = _iso_monday(row_date)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:

View File

@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Optional, Tuple
from .base_index_task import BaseIndexTask
from ..base_dws_task import TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
@dataclass
class MemberActivityData:
@@ -238,6 +240,8 @@ class MemberIndexBaseTask(BaseIndexTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""提取到店记录(按天去重)"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
condition_sql = self._build_visit_condition_sql()
sql = f"""
WITH visit_source AS (
@@ -258,12 +262,12 @@ class MemberIndexBaseTask(BaseIndexTask):
)
SELECT
canonical_member_id AS member_id,
DATE(pay_time) AS visit_date,
{biz_expr} AS visit_date,
MAX(pay_time) AS last_visit_time,
SUM(COALESCE(pay_amount, 0)) AS day_pay_amount
FROM visit_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id, DATE(pay_time)
GROUP BY canonical_member_id, {biz_expr}
ORDER BY canonical_member_id, visit_date
"""
rows = self.db.query(sql, (site_id, start_date, end_date))

View File

@@ -214,7 +214,7 @@ class RelationIndexTask(BaseIndexTask):
JOIN dwd.dim_assistant d
ON s.user_id = d.user_id
AND d.scd2_is_current = 1
AND COALESCE(d.is_delete, 0) = 0
AND COALESCE(d.leave_status, 0) = 0
WHERE s.site_id = %s
AND s.tenant_member_id > 0
AND s.user_id > 0

View File

@@ -18,6 +18,8 @@ from typing import Any, Dict, List, Optional
from .base_index_task import BaseIndexTask
from ..base_dws_task import TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# =============================================================================
# 数据类定义
@@ -333,6 +335,10 @@ class SpendingPowerIndexTask(BaseIndexTask):
short_days = int(params.get('spend_window_short_days', 30))
long_days = int(params.get('spend_window_long_days', 90))
# CHANGE 2026-03-01 | business-day-cutoff 7.6: DATE(pay_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
# INTERVAL 天数通过 f-string 内嵌整数安全site_id 走参数化
sql = f"""
@@ -357,7 +363,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
-- 90 天窗口
SUM(pay_amount) AS spend_90,
COUNT(*) AS orders_90,
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
COUNT(DISTINCT {biz_expr}) AS visit_days_90,
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
-- 30 天窗口(子集过滤)
@@ -366,7 +372,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN 1 ELSE 0 END) AS orders_30,
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN DATE(pay_time) END) AS visit_days_30
THEN {biz_expr} END) AS visit_days_30
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
@@ -467,12 +473,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
{biz_expr_s} AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
@@ -516,12 +525,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
{biz_expr_s} AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
@@ -572,13 +584,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
return result
# CHANGE 2026-03-02 | 基数校准改用非零样本中位数,零消费会员不参与校准
# 原因:零消费会员不参与 SPI 有效区分,纳入中位数只会拉低基数
_CALIBRATE_MIN_SAMPLE = 10 # 非零样本最小数量,低于此值回退默认值
def _calibrate_amount_bases(
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
) -> Dict[str, float]:
"""从门店数据计算中位数作为金额压缩基数校准值。
优先级cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS
优先级cfg_index_parameters 配置值 > 非零样本自动校准中位数 > DEFAULT_PARAMS 默认值。
仅使用值 > 0 的样本计算中位数;非零样本数 < _CALIBRATE_MIN_SAMPLE 时回退默认值
"""
# 特征字段 → 对应的 amount_base 参数名
base_extractors: Dict[str, callable] = {
@@ -600,21 +616,23 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
continue
# 从特征数据计算中位数
values = [extractor(f) for f in features.values()]
median_val = self.calculate_median(values)
# 仅取非零样本计算中位数
nonzero_values = [v for v in (extractor(f) for f in features.values()) if v > 0]
if median_val > 0:
if len(nonzero_values) >= self._CALIBRATE_MIN_SAMPLE:
median_val = self.calculate_median(nonzero_values)
calibrated[base_key] = median_val
self.logger.info(
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
"SPI 基数校准: %s 非零样本 %d/%d,中位数 %.2f",
base_key, len(nonzero_values), len(features), median_val,
)
else:
# 中位数 ≤ 0,回退到 DEFAULT_PARAMS
# 非零样本不足,回退到 DEFAULT_PARAMS
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
self.logger.warning(
"SPI 基数校准: %s 中位数 %.2f ≤ 0,回退到默认值 %.2f",
base_key, median_val, self.DEFAULT_PARAMS[base_key],
"SPI 基数校准: %s 非零样本 %d 不足(最低 %d,回退到默认值 %.2f",
base_key, len(nonzero_values), self._CALIBRATE_MIN_SAMPLE,
self.DEFAULT_PARAMS[base_key],
)
return calibrated
@@ -747,6 +765,13 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
"""
inserted = 0
# raw score 列为 numeric(10,4)display 列为 numeric(5,2)
# 防止极端数据导致 NumericValueOutOfRange
RAW_MAX = 999999.9999
DISP_MAX = 999.99
def _clamp(v, lo, hi):
return max(lo, min(hi, v))
for f in data_list:
cur.execute(insert_sql, (
f.site_id, f.member_id,
@@ -754,9 +779,14 @@ class SpendingPowerIndexTask(BaseIndexTask):
f.orders_30, f.orders_90,
f.visit_days_30, f.visit_days_90,
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
f.score_level_display, f.score_speed_display, f.score_stability_display,
f.raw_score, f.display_score,
_clamp(f.score_level_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_speed_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_stability_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_level_display, 0, DISP_MAX),
_clamp(f.score_speed_display, 0, DISP_MAX),
_clamp(f.score_stability_display, 0, DISP_MAX),
_clamp(f.raw_score, -RAW_MAX, RAW_MAX),
_clamp(f.display_score, 0, DISP_MAX),
))
inserted += max(cur.rowcount, 0)

View File

@@ -68,6 +68,10 @@ class DwsMaintenanceTask(BaseDwsTask):
{"table": "dws_finance_recharge_summary", "date_col": "stat_date"},
{"table": "dws_finance_expense_summary", "date_col": "expense_month"},
{"table": "dws_platform_settlement", "date_col": "settlement_date"},
# CHANGE [2026-03-07] intent: 项目标签表纳入历史数据清理范围
# assumptions: computed_at 为清理日期列,与其他表的 stat_date 语义一致
{"table": "dws_assistant_project_tag", "date_col": "computed_at"},
{"table": "dws_member_project_tag", "date_col": "computed_at"},
]
def get_task_code(self) -> str:

View File

@@ -6,9 +6,10 @@
"会员"为粒度,统计消费行为和滚动窗口指标
数据来源:
- dwd_settlement_head: 结账单头表
- dwd_settlement_head: 结账单头表settle_type IN (1,3) 过滤有效订单)
- dim_member: 会员维度
- dim_member_card_account: 会员卡账户
- dwd_recharge_order: 充值订单30/60/90 天窗口统计)
目标表:
dws.dws_member_consumption_summary
@@ -32,6 +33,8 @@ from datetime import date, datetime, timedelta
from decimal import Decimal
from typing import Any, Dict, List, Optional, Set, Tuple
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
from .dws_helpers import mask_mobile, calc_days_since
@@ -209,12 +212,18 @@ class MemberConsumptionTask(BaseDwsTask):
"""
提取会员消费统计(含滚动窗口)
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
-- CHANGE 2026-03-07 | consume_money → items_sum 口径校准
-- consume_money 存在三种历史口径(A/B/C)混合DWS 层统一使用 items_sum
-- items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money
WITH consume_base AS (
SELECT
member_id,
DATE(pay_time) AS consume_date,
consume_money,
{biz_expr} AS consume_date,
table_charge_money + goods_money + assistant_pd_money
+ assistant_cx_money + electricity_money AS items_sum,
table_charge_money,
goods_money,
assistant_pd_money + assistant_cx_money AS assistant_amount
@@ -222,6 +231,9 @@ class MemberConsumptionTask(BaseDwsTask):
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
-- CHANGE 2026-03-07 | dwd_settlement_head 无 is_delete 字段,改用 settle_type 过滤
-- settle_type: 1=台桌结账, 3=商城订单; 排除 6=退货, 7=撤销
AND settle_type IN (1, 3)
)
SELECT
member_id,
@@ -229,7 +241,7 @@ class MemberConsumptionTask(BaseDwsTask):
MAX(consume_date) AS last_consume_date,
-- 全量累计
COUNT(*) AS total_visit_count,
SUM(consume_money) AS total_consume_amount,
SUM(items_sum) AS total_consume_amount,
SUM(table_charge_money) AS total_table_fee,
SUM(goods_money) AS total_goods_amount,
SUM(assistant_amount) AS total_assistant_amount,
@@ -240,12 +252,12 @@ class MemberConsumptionTask(BaseDwsTask):
COUNT(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN 1 END) AS visit_count_30d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN 1 END) AS visit_count_60d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN 1 END) AS visit_count_90d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN consume_money ELSE 0 END) AS consume_amount_7d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN consume_money ELSE 0 END) AS consume_amount_10d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN consume_money ELSE 0 END) AS consume_amount_15d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN consume_money ELSE 0 END) AS consume_amount_30d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN consume_money ELSE 0 END) AS consume_amount_60d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN consume_money ELSE 0 END) AS consume_amount_90d
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN items_sum ELSE 0 END) AS consume_amount_7d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN items_sum ELSE 0 END) AS consume_amount_10d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN items_sum ELSE 0 END) AS consume_amount_15d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN items_sum ELSE 0 END) AS consume_amount_30d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN items_sum ELSE 0 END) AS consume_amount_60d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN items_sum ELSE 0 END) AS consume_amount_90d
FROM consume_base
GROUP BY member_id
"""
@@ -257,29 +269,21 @@ class MemberConsumptionTask(BaseDwsTask):
"""
提取会员信息
生日优先级手动补录fdw_app.member_birthday_manual> API 来源dim_member.birthday
FDW 连接失败时降级为仅使用 dim_member.birthday
生日来源dim_member.birthdayAPI 来源
CHANGE 2026-02-26 | 维客线索重构:移除 FDW member_birthday_manual 读取,
生日不再单独补录,归入维客线索"客户基础信息"大类
"""
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 恢复 birthday 字段C1 迁移已加列),供后续 C2 COALESCE 使用
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
# CHANGE 2026-02-22 | 需求 C2COALESCE 优先手动补录生日FDW 失败时降级
sql_with_fdw = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_create = biz_date_sql_expr("m.create_time", cutoff)
sql = f"""
SELECT
m.member_id,
m.nickname,
m.mobile,
m.member_card_grade_name,
DATE(m.create_time) AS register_date,
{biz_expr_create} AS register_date,
m.recharge_money_sum,
COALESCE(
(SELECT birthday_value
FROM fdw_app.member_birthday_manual
WHERE member_id = m.member_id
ORDER BY recorded_at ASC
LIMIT 1),
m.birthday
) AS birthday
m.birthday
FROM dwd.dim_member m
WHERE m.member_id IN (
SELECT DISTINCT member_id
@@ -289,36 +293,7 @@ class MemberConsumptionTask(BaseDwsTask):
AND member_id != 0
) AND m.scd2_is_current = 1
"""
# CHANGE 2026-02-24 | 修复列名tenant_member_id → member_iddwd_settlement_head 无 tenant_member_id 列)
sql_fallback = """
SELECT
member_id,
nickname,
mobile,
member_card_grade_name,
DATE(create_time) AS register_date,
recharge_money_sum,
birthday
FROM dwd.dim_member
WHERE member_id IN (
SELECT DISTINCT member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
) AND scd2_is_current = 1
"""
try:
rows = self.db.query(sql_with_fdw, (site_id,))
except Exception as exc:
# CHANGE [2026-02-24] FDW 查询失败后事务处于 failed 状态,必须先 rollback 再执行 fallback
self.db.rollback()
# FDW 连接失败,降级为仅使用 dim_member.birthday
self.logger.warning(
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
self.get_task_code(), exc,
)
rows = self.db.query(sql_fallback, (site_id,))
rows = self.db.query(sql, (site_id,))
result = {}
for row in (rows or []):
@@ -343,11 +318,11 @@ class MemberConsumptionTask(BaseDwsTask):
balance
FROM dwd.dim_member_card_account
WHERE tenant_member_id IN (
SELECT DISTINCT tenant_member_id
SELECT DISTINCT member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
AND member_id IS NOT NULL
AND member_id != 0
) AND scd2_is_current = 1
AND COALESCE(is_delete, 0) = 0
"""
@@ -390,21 +365,23 @@ class MemberConsumptionTask(BaseDwsTask):
返回: {member_id: {count_30d, count_60d, count_90d,
amount_30d, amount_60d, amount_90d}}
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
member_id,
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '29 days' THEN 1 END) AS count_30d,
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '59 days' THEN 1 END) AS count_60d,
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '89 days' THEN 1 END) AS count_90d,
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '29 days' THEN pay_amount ELSE 0 END), 0) AS amount_30d,
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '59 days' THEN pay_amount ELSE 0 END), 0) AS amount_60d,
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '89 days' THEN pay_amount ELSE 0 END), 0) AS amount_90d
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '29 days' THEN 1 END) AS count_30d,
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '59 days' THEN 1 END) AS count_60d,
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '89 days' THEN 1 END) AS count_90d,
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '29 days' THEN pay_amount ELSE 0 END), 0) AS amount_30d,
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '59 days' THEN pay_amount ELSE 0 END), 0) AS amount_60d,
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '89 days' THEN pay_amount ELSE 0 END), 0) AS amount_90d
FROM dwd.dwd_recharge_order
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
AND pay_time IS NOT NULL
AND DATE(pay_time) <= %s
AND {biz_expr} <= %s
GROUP BY member_id
"""
params = (

View File

@@ -0,0 +1,224 @@
# -*- coding: utf-8 -*-
"""
DWS 客户项目标签任务
按时间窗口计算每位客户在四大项目BILLIARD/SNOOKER/MAHJONG/KTV
消费时长占比占比≥25% 则分配标签。散客member_id=0不参与。
数据链路:
dwd_table_fee_log (ledger_count)
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
→ get_area_category(area_name, table_name)
→ 按 category_code 汇总 → 计算占比 → 写入 dws_member_project_tag
目标表:
dws.dws_member_project_tag
更新策略:
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List, Optional
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# 只计算四大项目
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
# 客户看板的 2 个时间窗口
MEMBER_WINDOWS = [
TimeWindow.LAST_30_DAYS,
TimeWindow.LAST_60_DAYS,
]
TAG_THRESHOLD = Decimal("0.25")
class MemberProjectTagTask(BaseDwsTask):
"""客户项目标签 ETL 任务"""
def get_task_code(self) -> str:
return "DWS_MEMBER_PROJECT_TAG"
def get_target_table(self) -> str:
return "dws_member_project_tag"
def get_primary_keys(self) -> List[str]:
return ["site_id", "member_id", "time_window", "category_code"]
def extract(self, context) -> Dict[str, Any]:
site_id = context.store_id
self.logger.info("%s: 提取客户台费时长数据", self.get_task_code())
self.load_config_cache()
table_info = self._extract_table_info(site_id)
window_data: Dict[str, List[Dict]] = {}
for window in MEMBER_WINDOWS:
time_range = self.get_time_window_range(window)
rows = self._extract_member_durations(
site_id, time_range.start, time_range.end
)
window_data[window.value] = rows
return {
"window_data": window_data,
"table_info": table_info,
"site_id": site_id,
}
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""提取台桌维度信息"""
sql = """
SELECT table_id, table_name, site_table_area_name AS area_name
FROM dwd.dim_table
WHERE site_id = %s AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
return {r["table_id"]: dict(r) for r in (rows or [])}
def _extract_member_durations(
self, site_id: int, start_date: date, end_date: date
) -> List[Dict[str, Any]]:
"""提取客户台费时长明细(按客户+台桌聚合),排除散客"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("tfl.ledger_end_time", cutoff)
sql = f"""
SELECT
tfl.member_id,
tfl.site_table_id AS table_id,
COALESCE(SUM(tfl.ledger_count), 0) AS duration_seconds
FROM dwd.dwd_table_fee_log tfl
WHERE tfl.site_id = %(site_id)s
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND COALESCE(tfl.is_delete, 0) = 0
AND tfl.member_id IS NOT NULL
AND tfl.member_id != 0
GROUP BY tfl.member_id, tfl.site_table_id
"""
rows = self.db.query(sql, {
"site_id": site_id,
"start_date": start_date,
"end_date": end_date,
})
return [dict(r) for r in rows] if rows else []
def transform(self, extracted: Dict[str, Any], context) -> List[Dict[str, Any]]:
table_info = extracted["table_info"]
site_id = extracted["site_id"]
tenant_id = getattr(context, "tenant_id", 0) or 0
results: List[Dict[str, Any]] = []
for window_value, rows in extracted["window_data"].items():
# member_id → category_code → seconds
member_cats: Dict[int, Dict[str, int]] = {}
for row in rows:
mid = row["member_id"]
tid = row["table_id"]
secs = self.safe_int(row["duration_seconds"])
if secs <= 0:
continue
tinfo = table_info.get(tid, {})
area_name = tinfo.get("area_name")
table_name = tinfo.get("table_name")
cat = self.get_area_category(area_name, table_name)
code = cat.get("category_code", "OTHER")
if code not in VALID_CATEGORIES:
continue
if mid not in member_cats:
member_cats[mid] = {}
member_cats[mid][code] = member_cats[mid].get(code, 0) + secs
for mid, cats in member_cats.items():
total = sum(cats.values())
if total <= 0:
continue
for code, secs in cats.items():
pct = Decimal(str(secs)) / Decimal(str(total))
pct = pct.quantize(Decimal("0.0001"))
cat_info = self._get_category_display(code)
results.append({
"site_id": site_id,
"tenant_id": tenant_id,
"member_id": mid,
"time_window": window_value,
"category_code": code,
"category_name": cat_info["category_name"],
"short_name": cat_info["short_name"],
"duration_seconds": secs,
"total_seconds": total,
"percentage": float(pct),
"is_tagged": pct >= TAG_THRESHOLD,
})
self.logger.info(
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
self.get_task_code(),
len(results),
sum(1 for r in results if r["is_tagged"]),
)
return results
def _get_category_display(self, code: str) -> Dict[str, str]:
"""从配置缓存获取分类的显示名和简写"""
cache = self.load_config_cache()
for key, cat in cache.area_categories.items():
if cat.get("category_code") == code:
return {
"category_name": cat.get("display_name") or cat.get("category_name", code),
"short_name": cat.get("short_name", code[:1]),
}
fallback = {
"BILLIARD": ("🎱 中式/追分", "🎱"),
"SNOOKER": ("斯诺克", ""),
"MAHJONG": ("🀄 麻将/棋牌", "🀄"),
"KTV": ("🎤 团建/K歌", "🎤"),
}
name, short = fallback.get(code, (code, code[:1]))
return {"category_name": name, "short_name": short}
def load(self, transformed, context) -> dict:
if not transformed:
return {"status": "SUCCESS", "counts": {"inserted": 0, "deleted": 0}}
site_id = transformed[0]["site_id"]
delete_sql = "DELETE FROM dws.dws_member_project_tag WHERE site_id = %s"
self.db.execute(delete_sql, (site_id,))
deleted = self.db.cursor.rowcount if hasattr(self.db, "cursor") else 0
insert_sql = """
INSERT INTO dws.dws_member_project_tag (
site_id, tenant_id, member_id, time_window,
category_code, category_name, short_name,
duration_seconds, total_seconds, percentage, is_tagged,
computed_at, created_at, updated_at
) VALUES (
%(site_id)s, %(tenant_id)s, %(member_id)s, %(time_window)s,
%(category_code)s, %(category_name)s, %(short_name)s,
%(duration_seconds)s, %(total_seconds)s, %(percentage)s, %(is_tagged)s,
NOW(), NOW(), NOW()
)
"""
for row in transformed:
self.db.execute(insert_sql, row)
self.logger.info(
"%s: 删除 %d 条,插入 %d",
self.get_task_code(), deleted, len(transformed),
)
return {
"status": "SUCCESS",
"counts": {"inserted": len(transformed), "deleted": deleted},
}

View File

@@ -35,6 +35,8 @@ from datetime import date, datetime, timedelta
from decimal import Decimal
from typing import Any, Dict, List, Optional, Set, Tuple
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
from .dws_helpers import mask_mobile
@@ -152,7 +154,7 @@ class MemberVisitTask(BaseDwsTask):
# 获取区域分类
area_name = tbl_info.get('area_name')
area_cat = self.get_area_category(area_name)
area_cat = self.get_area_category(area_name, tbl_info.get('table_name'))
# 构建助教服务JSON
assistant_services_json = self._build_assistant_services_json(services)
@@ -175,7 +177,7 @@ class MemberVisitTask(BaseDwsTask):
# 会员信息
'member_nickname': memb_info.get('nickname'),
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
# CHANGE 2026-02-22 | 恢复从 dim_member.birthday 读取
# CHANGE 2026-02-26 | 生日仅从 dim_member.birthday 读取API 来源)
'member_birthday': memb_info.get('birthday'),
# 台桌信息
'table_id': table_id,
@@ -187,12 +189,20 @@ class MemberVisitTask(BaseDwsTask):
'goods_amount': self.safe_decimal(settle.get('goods_money', 0)),
'assistant_amount': self.safe_decimal(settle.get('assistant_pd_money', 0)) + \
self.safe_decimal(settle.get('assistant_cx_money', 0)),
'total_consume': self.safe_decimal(settle.get('consume_money', 0)),
# CHANGE 2026-03-07 | consume_money → items_sum 口径校准
'total_consume': (
self.safe_decimal(settle.get('table_charge_money', 0))
+ self.safe_decimal(settle.get('goods_money', 0))
+ self.safe_decimal(settle.get('assistant_pd_money', 0))
+ self.safe_decimal(settle.get('assistant_cx_money', 0))
+ self.safe_decimal(settle.get('electricity_money', 0))
),
'total_discount': self._calc_total_discount(settle),
'actual_pay': self.safe_decimal(settle.get('pay_amount', 0)),
# 支付方式
'cash_pay': self.safe_decimal(settle.get('pay_amount', 0)),
'cash_card_pay': self.safe_decimal(settle.get('balance_amount', 0)),
'balance_pay': self.safe_decimal(settle.get('balance_amount', 0)),
'recharge_card_pay': self.safe_decimal(settle.get('recharge_card_amount', 0)),
'gift_card_pay': self.safe_decimal(settle.get('gift_card_amount', 0)),
'groupbuy_pay': self.safe_decimal(settle.get('coupon_amount', 0)),
# 时长
@@ -205,7 +215,49 @@ class MemberVisitTask(BaseDwsTask):
return results
# load() 已移除——使用 BaseDwsTask 默认实现DATE_COL="visit_date"
# CHANGE 2026-02-27 | bugfix: 覆盖 load(),在标准 delete-by-window 后
# 额外按 order_settle_id 清理旧数据,防止 biz_date 切换后残留记录导致唯一约束冲突。
# 背景visit_date 从 pay_time::date 改为 biz_date_sql_expr 后,凌晨订单的
# visit_date 前移一天,旧数据不在新窗口的 delete 范围内insert 时触发
# uk_dws_member_visit (site_id, member_id, order_settle_id) 冲突。
def load(self, transformed, context: "TaskContext") -> dict:
if not transformed:
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
date_col = self.DATE_COL or "stat_date"
deleted = self.delete_existing_data(context, date_col=date_col)
# 额外清理:按本批 order_settle_id 删除可能残留在其他日期窗口的旧记录
order_ids = [r["order_settle_id"] for r in transformed if r.get("order_settle_id")]
extra_deleted = 0
if order_ids:
full_table = f"{self.DWS_SCHEMA}.{self.get_target_table()}"
placeholders = ",".join(["%s"] * len(order_ids))
sql = (
f"DELETE FROM {full_table} "
f"WHERE site_id = %s AND order_settle_id IN ({placeholders})"
)
site_id = transformed[0].get("site_id", context.store_id)
with self.db.conn.cursor() as cur:
cur.execute(sql, [site_id] + order_ids)
extra_deleted = cur.rowcount
if extra_deleted:
self.logger.info(
"%s: 额外清理残留旧数据 %dorder_settle_id 去重)",
self.get_task_code(), extra_deleted,
)
inserted = self.bulk_insert(transformed)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": 0,
"skipped": 0,
"errors": 0,
},
"extra": {"deleted": deleted, "extra_deleted": extra_deleted},
}
# ==========================================================================
# 数据提取方法
@@ -220,7 +272,9 @@ class MemberVisitTask(BaseDwsTask):
"""
提取结账单
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
order_settle_id,
order_trade_no,
@@ -228,8 +282,9 @@ class MemberVisitTask(BaseDwsTask):
member_id,
create_time,
pay_time,
DATE(pay_time) AS visit_date,
consume_money,
{biz_expr} AS visit_date,
-- CHANGE 2026-03-07 | 新增 electricity_money 用于 items_sum 计算
electricity_money,
pay_amount,
table_charge_money,
goods_money,
@@ -244,10 +299,12 @@ class MemberVisitTask(BaseDwsTask):
recharge_card_amount
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND DATE(pay_time) >= %s
AND DATE(pay_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND member_id IS NOT NULL
AND member_id != 0
-- CHANGE 2026-03-07 | 排除退货(6)/退款(7),仅保留台桌结账(1)+商城订单(3)
AND settle_type IN (1, 3)
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -261,7 +318,9 @@ class MemberVisitTask(BaseDwsTask):
"""
提取助教服务明细
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
sql = f"""
SELECT
order_settle_id,
site_assistant_id AS assistant_id,
@@ -270,8 +329,8 @@ class MemberVisitTask(BaseDwsTask):
ledger_amount
FROM dwd.dwd_assistant_service_log
WHERE site_id = %s
AND DATE(start_use_time) >= %s
AND DATE(start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND is_delete = 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
@@ -286,14 +345,16 @@ class MemberVisitTask(BaseDwsTask):
"""
提取台费时长(真实秒数)
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("ledger_end_time", cutoff)
sql = f"""
SELECT
order_settle_id,
SUM(COALESCE(real_table_use_seconds, 0)) AS table_use_seconds
FROM dwd.dwd_table_fee_log
WHERE site_id = %s
AND DATE(ledger_end_time) >= %s
AND DATE(ledger_end_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND COALESCE(is_delete, 0) = 0
GROUP BY order_settle_id
"""
@@ -304,61 +365,26 @@ class MemberVisitTask(BaseDwsTask):
"""
提取会员信息
生日优先级手动补录fdw_app.member_birthday_manual> API 来源dim_member.birthday
FDW 连接失败时降级为仅使用 dim_member.birthday
生日来源dim_member.birthdayAPI 来源
CHANGE 2026-02-26 | 维客线索重构:移除 FDW member_birthday_manual 读取,
生日不再单独补录,归入维客线索"客户基础信息"大类
"""
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 恢复 birthday 字段C1 迁移已加列)
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
# CHANGE 2026-02-22 | 需求 C2COALESCE 优先手动补录生日FDW 失败时降级
sql_with_fdw = """
sql = """
SELECT
m.member_id,
m.nickname,
m.mobile,
COALESCE(
(SELECT birthday_value
FROM fdw_app.member_birthday_manual
WHERE member_id = m.member_id
ORDER BY recorded_at ASC
LIMIT 1),
m.birthday
) AS birthday
m.birthday
FROM dwd.dim_member m
WHERE m.member_id IN (
SELECT DISTINCT tenant_member_id
SELECT DISTINCT member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
AND member_id IS NOT NULL
AND member_id != 0
) AND m.scd2_is_current = 1
"""
sql_fallback = """
SELECT
member_id,
nickname,
mobile,
birthday
FROM dwd.dim_member
WHERE member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
"""
try:
rows = self.db.query(sql_with_fdw, (site_id,))
except Exception as exc:
# CHANGE [2026-02-24] FDW 查询失败后事务处于 failed 状态,必须先 rollback 再执行 fallback
self.db.rollback()
# FDW 连接失败,降级为仅使用 dim_member.birthday
self.logger.warning(
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
self.get_task_code(), exc,
)
rows = self.db.query(sql_fallback, (site_id,))
rows = self.db.query(sql, (site_id,))
return {r['member_id']: dict(r) for r in (rows or [])}

View File

@@ -15,6 +15,11 @@ from psycopg2.extras import Json, execute_values
from models.parsers import TypeParser
from tasks.base_task import BaseTask
from utils.windowing import build_window_segments, calc_window_minutes, calc_window_days, format_window_days
from config.pipeline_config import PipelineConfig
from pipeline.models import PipelineRequest, PipelineResult, WriteResult
from pipeline.unified_pipeline import UnifiedPipeline
from utils.cancellation import CancellationToken
from utils.task_log_buffer import TaskLogBuffer
ColumnTransform = Callable[[Any], Any]
@@ -67,6 +72,15 @@ class OdsTaskSpec:
# WINDOW 模式的时间列名
snapshot_time_column: str | None = None
# ── Detail_Mode 可选配置(二级详情拉取)──
detail_endpoint: str | None = None # 详情接口 endpoint
detail_param_builder: Callable[[dict], dict] | None = None # 详情请求参数构造函数
detail_target_table: str | None = None # 详情数据目标表名
detail_data_path: Tuple[str, ...] | None = None # 详情数据的 data_path
detail_list_key: str | None = None # 详情数据的 list_key
detail_id_column: str | None = None # 从列表数据中提取 ID 的列名
detail_process_fn: Callable[[Any], list[dict]] | None = None # 自定义详情处理函数
def __post_init__(self) -> None:
if self.snapshot_mode == SnapshotMode.WINDOW and not self.snapshot_time_column:
raise ValueError(
@@ -88,7 +102,10 @@ class BaseOdsTask(BaseTask):
def execute(self, cursor_data: dict | None = None) -> dict:
spec = self.SPEC
# 创建任务级日志缓冲区,任务完成后一次性输出,避免多任务日志交叉
self._log_buf = TaskLogBuffer(spec.code, self.logger)
self.logger.info("开始执行%s (ODS)", spec.code)
self._log_buf.info("开始执行%s (ODS)", spec.code)
window_start, window_end, window_minutes = self._resolve_window(cursor_data)
segments = build_window_segments(
@@ -111,6 +128,11 @@ class BaseOdsTask(BaseTask):
total_segments,
format_window_days(total_days),
)
self._log_buf.info(
"窗口拆分为 %s 段(共 %s 天)",
total_segments,
format_window_days(total_days),
)
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
if not store_id:
@@ -141,6 +163,10 @@ class BaseOdsTask(BaseTask):
]
has_is_delete = self._table_has_column(spec.table_name, "is_delete")
# 构建 PipelineConfig支持任务级覆盖
pipeline_config = PipelineConfig.from_app_config(self.config, spec.code)
cancel_token = getattr(self, '_cancel_token', None) or CancellationToken()
try:
for idx, (seg_start, seg_end) in enumerate(segments, start=1):
params = self._build_params(
@@ -158,11 +184,12 @@ class BaseOdsTask(BaseTask):
"errors": 0,
"deleted": 0,
}
# 快照软删除需要的共享状态process_fn 闭包写入)
segment_keys: set[tuple] = set()
# CHANGE 2026-02-18 | 收集 WINDOW 模式下 API 返回数据的实际最早时间戳
segment_earliest_time: datetime | None = None
segment_earliest_time: list[datetime | None] = [None]
# CHANGE [2026-02-24] 收集 API 返回数据的实际最晚时间戳,用于 late-cutoff 保护
segment_latest_time: datetime | None = None
segment_latest_time: list[datetime | None] = [None]
self.logger.info(
"%s: 开始执行(%s/%s),窗口[%s ~ %s]",
@@ -172,52 +199,51 @@ class BaseOdsTask(BaseTask):
seg_start,
seg_end,
)
self._log_buf.info(
"开始执行(%s/%s),窗口[%s ~ %s]",
idx, total_segments, seg_start, seg_end,
)
for _, page_records, _, response_payload in self.api.iter_paginated(
endpoint=spec.endpoint,
params=params,
page_size=page_size,
data_path=spec.data_path,
list_key=spec.list_key,
):
if (
snapshot_missing_delete
and has_is_delete
and business_pk_cols
and snapshot_mode != SnapshotMode.NONE
):
segment_keys.update(self._collect_business_keys(page_records, business_pk_cols))
# CHANGE 2026-02-18 | 收集实际最早时间戳,用于 early-cutoff 保护
if (
snapshot_protect_early_cutoff
and snapshot_mode == SnapshotMode.WINDOW
and snapshot_time_column
):
page_earliest = self._collect_earliest_time(
page_records, snapshot_time_column
)
if page_earliest is not None:
if segment_earliest_time is None or page_earliest < segment_earliest_time:
segment_earliest_time = page_earliest
# CHANGE [2026-02-24] 收集实际最晚时间戳,用于 late-cutoff 保护
page_latest = self._collect_latest_time(
page_records, snapshot_time_column
)
if page_latest is not None:
if segment_latest_time is None or page_latest > segment_latest_time:
segment_latest_time = page_latest
inserted, updated, skipped = self._insert_records_schema_aware(
table=spec.table_name,
records=page_records,
response_payload=response_payload,
source_file=source_file,
source_endpoint=spec.endpoint if spec.include_source_endpoint else None,
)
segment_counts["fetched"] += len(page_records)
segment_counts["inserted"] += inserted
segment_counts["updated"] += updated
segment_counts["skipped"] += skipped
# 构建 UnifiedPipeline 并执行当前 segment
pipeline = UnifiedPipeline(
api_client=self.api,
db_connection=self.db,
logger=self.logger,
config=pipeline_config,
cancel_token=cancel_token,
)
requests = self._build_requests(
spec, params, page_size, idx - 1,
)
process_fn = self._build_process_fn(
spec,
snapshot_missing_delete=snapshot_missing_delete,
has_is_delete=has_is_delete,
business_pk_cols=business_pk_cols,
snapshot_mode=snapshot_mode,
snapshot_protect_early_cutoff=snapshot_protect_early_cutoff,
snapshot_time_column=snapshot_time_column,
segment_keys=segment_keys,
segment_earliest_time=segment_earliest_time,
segment_latest_time=segment_latest_time,
)
write_fn = self._build_write_fn(spec, source_file)
pipe_result = pipeline.run(requests, process_fn, write_fn)
# 将 PipelineResult 映射到 segment_counts
segment_counts["fetched"] = pipe_result.total_fetched
segment_counts["inserted"] = pipe_result.total_inserted
segment_counts["updated"] = pipe_result.total_updated
segment_counts["skipped"] = pipe_result.total_skipped
segment_counts["errors"] = (
pipe_result.request_failures
+ pipe_result.processing_failures
+ pipe_result.write_failures
)
# 快照软删除pipeline 完成后执行,保留原有逻辑)
if (
snapshot_missing_delete
and has_is_delete
@@ -230,28 +256,36 @@ class BaseOdsTask(BaseTask):
if (
snapshot_protect_early_cutoff
and snapshot_mode == SnapshotMode.WINDOW
and segment_earliest_time is not None
and segment_earliest_time > seg_start
and segment_earliest_time[0] is not None
and segment_earliest_time[0] > seg_start
):
self.logger.info(
"%s: early-cutoff 保护生效,软删除窗口起点从 %s 收窄至 %s",
spec.code, seg_start, segment_earliest_time,
spec.code, seg_start, segment_earliest_time[0],
)
effective_window_start = segment_earliest_time
self._log_buf.info(
"early-cutoff 保护生效,软删除窗口起点从 %s 收窄至 %s",
seg_start, segment_earliest_time[0],
)
effective_window_start = segment_earliest_time[0]
# CHANGE [2026-02-24] late-cutoff 保护:用 API 实际最晚时间戳收窄软删除范围
# 防止 recent endpoint 数据保留期滚动导致窗口尾部数据消失时误标删除
effective_window_end = seg_end
if (
snapshot_protect_early_cutoff
and snapshot_mode == SnapshotMode.WINDOW
and segment_latest_time is not None
and segment_latest_time < seg_end
and segment_latest_time[0] is not None
and segment_latest_time[0] < seg_end
):
self.logger.info(
"%s: late-cutoff 保护生效,软删除窗口终点从 %s 收窄至 %s",
spec.code, seg_end, segment_latest_time,
spec.code, seg_end, segment_latest_time[0],
)
effective_window_end = segment_latest_time
self._log_buf.info(
"late-cutoff 保护生效,软删除窗口终点从 %s 收窄至 %s",
seg_end, segment_latest_time[0],
)
effective_window_end = segment_latest_time[0]
deleted = self._mark_missing_as_deleted(
table=spec.table_name,
business_pk_cols=business_pk_cols,
@@ -279,6 +313,12 @@ class BaseOdsTask(BaseTask):
format_window_days(processed_days),
format_window_days(total_days),
)
self._log_buf.info(
"完成(%s/%s),已处理 %s/%s",
idx, total_segments,
format_window_days(processed_days),
format_window_days(total_days),
)
if total_segments > 1:
segment_results.append(
{
@@ -291,13 +331,76 @@ class BaseOdsTask(BaseTask):
}
)
# ── Detail_Mode列表拉取全部完成后执行二级详情拉取 ──
detail_counts = {
"detail_success": 0,
"detail_failure": 0,
"detail_skipped": 0,
}
if spec.detail_endpoint:
self.logger.info("%s: 列表阶段完成,进入详情拉取阶段", spec.code)
self._log_buf.info("列表阶段完成,进入详情拉取阶段")
detail_pipeline = UnifiedPipeline(
api_client=self.api,
db_connection=self.db,
logger=self.logger,
config=pipeline_config,
cancel_token=cancel_token,
)
detail_requests = self._build_detail_requests(spec)
detail_process_fn = self._build_detail_process_fn(spec)
detail_write_fn = self._build_detail_write_fn(spec, source_file)
detail_result = detail_pipeline.run(
detail_requests, detail_process_fn, detail_write_fn,
)
self.db.commit()
# 填充详情统计:成功 = 完成的请求数,失败 = 请求失败数,跳过 = 0无跳过逻辑
detail_counts["detail_success"] = detail_result.completed_requests
detail_counts["detail_failure"] = (
detail_result.request_failures
+ detail_result.processing_failures
+ detail_result.write_failures
)
# 记录详情阶段每个失败项的错误日志
for err in detail_result.errors:
self.logger.error(
"%s: 详情请求失败, detail_id=%s, error=%s",
spec.code,
err.get("detail_id", err.get("endpoint", "unknown")),
err.get("error", "unknown"),
)
self._log_buf.error(
"详情请求失败, detail_id=%s, error=%s",
err.get("detail_id", err.get("endpoint", "unknown")),
err.get("error", "unknown"),
)
self.logger.info(
"%s: 详情拉取完成, success=%d, failure=%d, skipped=%d",
spec.code,
detail_counts["detail_success"],
detail_counts["detail_failure"],
detail_counts["detail_skipped"],
)
self._log_buf.info(
"详情拉取完成, success=%d, failure=%d, skipped=%d",
detail_counts["detail_success"],
detail_counts["detail_failure"],
detail_counts["detail_skipped"],
)
self.logger.info("%s ODS 任务完成: %s", spec.code, total_counts)
self._log_buf.info("ODS 任务完成: %s", total_counts)
allow_empty_advance = bool(self.config.get("run.allow_empty_result_advance", False))
status = "SUCCESS"
if total_counts["fetched"] == 0 and not allow_empty_advance:
status = "PARTIAL"
result = self._build_result(status, total_counts)
# 附加详情统计到结果
result["detail"] = detail_counts
overall_start = segments[0][0]
overall_end = segments[-1][1]
result["window"] = {
@@ -311,14 +414,223 @@ class BaseOdsTask(BaseTask):
result["request_params"] = params_list[0]
else:
result["request_params"] = params_list
# 任务完成,将缓冲日志一次性输出到父 logger
self._log_buf.flush()
return result
except Exception:
self.db.rollback()
total_counts["errors"] += 1
self.logger.error("%s ODS 任务失败", spec.code, exc_info=True)
self._log_buf.error("ODS 任务失败")
# 异常时也 flush确保已收集的日志不丢失
self._log_buf.flush()
raise
# ── Pipeline 集成方法 ──
def _build_requests(
self,
spec: OdsTaskSpec,
params: dict,
page_size: int,
segment_index: int,
) -> Iterable[PipelineRequest]:
"""生成 PipelineRequest 序列,内部使用 iter_paginated 处理分页。
每一页的数据通过 _prefetched_response 预取UnifiedPipeline 的
_request_loop 跳过 api.post() 直接使用预取数据。
"""
for page_num, page_records, total, response_payload in self.api.iter_paginated(
endpoint=spec.endpoint,
params=params,
page_size=page_size,
data_path=spec.data_path,
list_key=spec.list_key,
):
yield PipelineRequest(
endpoint=spec.endpoint,
params=params,
page_size=page_size,
data_path=spec.data_path,
list_key=spec.list_key,
segment_index=segment_index,
_prefetched_response={
"records": page_records,
"response_payload": response_payload,
},
)
def _build_process_fn(
self,
spec: OdsTaskSpec,
*,
snapshot_missing_delete: bool,
has_is_delete: bool,
business_pk_cols: list[str],
snapshot_mode: SnapshotMode,
snapshot_protect_early_cutoff: bool,
snapshot_time_column: str | None,
segment_keys: set[tuple],
segment_earliest_time: list[datetime | None],
segment_latest_time: list[datetime | None],
) -> Callable[[Any], list[dict]]:
"""构建处理函数:从预取响应中提取记录,收集快照软删除所需的共享状态。"""
def process_fn(response: Any) -> list[dict]:
# response 是 _prefetched_response 字典
records = response.get("records", [])
if not records:
return []
# 收集业务主键(快照软删除用)
if (
snapshot_missing_delete
and has_is_delete
and business_pk_cols
and snapshot_mode != SnapshotMode.NONE
):
segment_keys.update(
self._collect_business_keys(records, business_pk_cols)
)
# CHANGE 2026-02-18 | 收集实际最早时间戳,用于 early-cutoff 保护
if (
snapshot_protect_early_cutoff
and snapshot_mode == SnapshotMode.WINDOW
and snapshot_time_column
):
page_earliest = self._collect_earliest_time(
records, snapshot_time_column
)
if page_earliest is not None:
if segment_earliest_time[0] is None or page_earliest < segment_earliest_time[0]:
segment_earliest_time[0] = page_earliest
# CHANGE [2026-02-24] 收集实际最晚时间戳,用于 late-cutoff 保护
page_latest = self._collect_latest_time(
records, snapshot_time_column
)
if page_latest is not None:
if segment_latest_time[0] is None or page_latest > segment_latest_time[0]:
segment_latest_time[0] = page_latest
return records
return process_fn
def _build_write_fn(
self,
spec: OdsTaskSpec,
source_file: str | None,
) -> Callable[[list[dict]], WriteResult]:
"""构建写入函数:调用 _insert_records_schema_aware返回 WriteResult。"""
def write_fn(records: list[dict]) -> WriteResult:
inserted, updated, skipped = self._insert_records_schema_aware(
table=spec.table_name,
records=records,
response_payload=None,
source_file=source_file,
source_endpoint=spec.endpoint if spec.include_source_endpoint else None,
)
return WriteResult(inserted=inserted, updated=updated, skipped=skipped)
return write_fn
# ── Detail_Mode 方法 ──
def _build_detail_requests(
self,
spec: OdsTaskSpec,
) -> Iterable[PipelineRequest]:
"""从已写入 ODS 的记录中提取 ID 列表,生成详情请求序列。
仅在 spec.detail_endpoint 已配置时调用。查询 ODS 目标表获取
detail_id_column 列的值,为每个 ID 生成一个 is_detail=True 的
PipelineRequest。
"""
if not spec.detail_endpoint or not spec.detail_id_column:
return
# 从 ODS 目标表查询刚写入的 ID 列表
id_col = spec.detail_id_column
table = spec.table_name
query = f"SELECT DISTINCT {id_col} FROM {table} WHERE {id_col} IS NOT NULL"
try:
cursor = self.db.cursor()
cursor.execute(query)
rows = cursor.fetchall()
cursor.close()
except Exception:
self.logger.error(
"%s: 查询详情 ID 列表失败, table=%s, column=%s",
spec.code, table, id_col, exc_info=True,
)
return
if not rows:
self.logger.info("%s: 无需拉取详情ID 列表为空", spec.code)
return
self.logger.info(
"%s: 开始详情拉取,共 %d 个 ID", spec.code, len(rows),
)
for (record_id,) in rows:
# 使用 detail_param_builder 构造请求参数,或默认 {"id": record_id}
if spec.detail_param_builder:
params = spec.detail_param_builder({"id": record_id})
else:
params = {"id": record_id}
yield PipelineRequest(
endpoint=spec.detail_endpoint,
params=params,
data_path=spec.detail_data_path or ("data",),
list_key=spec.detail_list_key,
is_detail=True,
detail_id=record_id,
)
def _build_detail_process_fn(
self,
spec: OdsTaskSpec,
) -> Callable[[Any], list[dict]]:
"""构建详情阶段的处理函数:从预取响应中提取记录。
优先使用 spec.detail_process_fn自定义处理函数
否则回退到默认的 response.get("records") 提取。
"""
if spec.detail_process_fn is not None:
return spec.detail_process_fn
def detail_process_fn(response: Any) -> list[dict]:
records = response.get("records", [])
return records
return detail_process_fn
def _build_detail_write_fn(
self,
spec: OdsTaskSpec,
source_file: str | None,
) -> Callable[[list[dict]], WriteResult]:
"""构建详情阶段的写入函数:写入 detail_target_table。"""
target_table = spec.detail_target_table or spec.table_name
def detail_write_fn(records: list[dict]) -> WriteResult:
inserted, updated, skipped = self._insert_records_schema_aware(
table=target_table,
records=records,
response_payload=None,
source_file=source_file,
source_endpoint=spec.detail_endpoint if spec.include_source_endpoint else None,
)
return WriteResult(inserted=inserted, updated=updated, skipped=skipped)
return detail_write_fn
def _resolve_window(self, cursor_data: dict | None) -> tuple[datetime, datetime, int]:
base_start, base_end, base_minutes = self._get_time_window(cursor_data)
@@ -909,6 +1221,18 @@ class BaseOdsTask(BaseTask):
_fill_missing("siteid", [site_profile.get("siteId"), site_profile.get("id")])
_fill_missing("sitename", [site_profile.get("shop_name"), site_profile.get("siteName")])
# 通用 siteid 注入ODS 表有 siteid 列但 API 记录不含时,从 app.store_id 填充
# 场景goods_stock_summary 等按门店请求但返回记录不含 siteId 的接口
ods_has_siteid = any(c[0].lower() == "siteid" for c in cols_info)
if ods_has_siteid:
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
if store_id:
for item in merged_records:
merged = item["merged"]
existing = self._get_value_case_insensitive(merged, "siteid")
if existing in (None, "", 0):
merged["siteid"] = store_id
business_keys = [c for c in pk_cols if str(c).lower() != "content_hash"]
# P2(A): 使用 spec 上的显式开关控制去重,不再隐式依赖 has_fetched_at
# CHANGE 2026-02-19 | force_full_update 时仍查最新 hash用于判断是否回退到历史版本
@@ -1240,6 +1564,56 @@ def _bool_col(name: str, *sources: str) -> ColumnSpec:
return ColumnSpec(column=name, sources=sources, transform=_to_bool)
# ── 团购详情接口自定义 process_fn ──
# API 原始响应结构:{"data": {"groupPurchasePackage": {...}, "packageCouponAssistants": [...], ...}, "code": 0}
# detail_mode 下 process_fn 收到的是 api.post() 的原始 JSON 响应
def _group_package_detail_process_fn(response: Any) -> list[dict]:
"""从 QueryPackageCouponInfo 响应中提取字段,组装为一条扁平记录。
匹配 ods.group_buy_package_details 表结构。
"""
data = response.get("data")
if not data:
return []
pkg = data.get("groupPurchasePackage")
if not pkg:
return []
# 结构化字段(来自 data.groupPurchasePackage
record: dict[str, Any] = {
"coupon_id": pkg.get("id"),
"package_name": pkg.get("package_name"),
"duration": pkg.get("duration"),
"start_time": pkg.get("start_time"),
"end_time": pkg.get("end_time"),
"add_start_clock": pkg.get("add_start_clock"),
"add_end_clock": pkg.get("add_end_clock"),
"is_enabled": pkg.get("is_enabled"),
"is_delete": pkg.get("is_delete"),
"site_id": pkg.get("site_id"),
"tenant_id": pkg.get("tenant_id"),
"create_time": pkg.get("create_time"),
"creator_name": pkg.get("creator_name"),
}
# JSONB 数组字段
record["table_area_ids"] = pkg.get("tableAreaId")
record["table_area_names"] = pkg.get("tableAreaNameList")
record["assistant_services"] = data.get("packageCouponAssistants")
record["groupon_site_infos"] = data.get("grouponSiteInfos")
record["package_services"] = data.get("packagePackageService")
record["coupon_details_list"] = data.get("packageCouponDetailsList")
# content_hash对业务字段不含 content_hash、payload、fetched_at计算 SHA256
hash_input = json.dumps(record, sort_keys=True, ensure_ascii=False, default=str)
record["content_hash"] = hashlib.sha256(hash_input.encode("utf-8")).hexdigest()
# payload完整的 data 对象
record["payload"] = data
return [record]
ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
@@ -1251,9 +1625,18 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
data_path=("data",),
list_key="assistantInfos",
pk_columns=(_int_col("id", "id", required=True),),
extra_params={
"workStatusEnum": 0,
"dingTalkSynced": 0,
"leaveId": 0,
"criticismStatus": 0,
"signStatus": -1,
},
include_source_endpoint=False,
include_fetched_at=False,
include_record_index=True,
requires_window=False,
time_fields=None,
snapshot_mode=SnapshotMode.FULL_TABLE,
description="助教账号档案 ODSSearchAssistantInfo -> assistantInfos 原始 JSON",
),
@@ -1314,7 +1697,8 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
include_source_endpoint=False,
include_fetched_at=False,
include_record_index=True,
requires_window=False,
requires_window=True,
time_fields=("startTime", "endTime"),
snapshot_mode=SnapshotMode.WINDOW,
snapshot_time_column="create_time",
description="门店商品销售流水 ODSGetGoodsSalesList -> orderGoodsLedgers 原始 JSON",
@@ -1499,6 +1883,13 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
include_record_index=True,
requires_window=False,
snapshot_mode=SnapshotMode.FULL_TABLE,
# ── Detail_Mode 配置:团购详情接口 ──
detail_endpoint="/PackageCoupon/QueryPackageCouponInfo",
detail_param_builder=lambda rec: {"couponId": rec["id"]},
detail_target_table="ods.group_buy_package_details",
detail_data_path=("data",),
detail_id_column="id",
detail_process_fn=_group_package_detail_process_fn,
description="团购套餐定义 ODSQueryPackageCouponList -> packageCouponList 原始 JSON",
),
OdsTaskSpec(

View File

@@ -24,7 +24,8 @@ WITH base AS (
COALESCE(sh.member_discount_amount, 0) AS member_discount_amount,
COALESCE(sh.adjust_amount, 0) AS manual_discount_amount,
COALESCE(sh.pay_amount, 0) AS total_paid_amount,
COALESCE(sh.balance_amount, 0) + COALESCE(sh.recharge_card_amount, 0) + COALESCE(sh.gift_card_amount, 0) AS stored_card_deduct,
-- balance_amount = recharge_card_amount + gift_card_amount恒等式不可三者相加
COALESCE(sh.balance_amount, 0) AS stored_card_deduct,
COALESCE(sh.coupon_amount, 0) AS total_coupon_deduction,
COALESCE(sh.table_charge_money, 0) AS settle_table_fee_amount,
COALESCE(sh.assistant_pd_money, 0) + COALESCE(sh.assistant_cx_money, 0) AS settle_assistant_service_amount,

View File

@@ -22,7 +22,6 @@ class ManualIngestTask(BaseTask):
(("member_stored_value_cards",), "ods.member_stored_value_cards"),
(("recharge_settlements",), "ods.recharge_settlements"),
(("settlement_records",), "ods.settlement_records"),
(("assistant_cancellation_records",), "ods.assistant_cancellation_records"),
(("assistant_accounts_master",), "ods.assistant_accounts_master"),
(("assistant_service_records",), "ods.assistant_service_records"),
(("site_tables_master",), "ods.site_tables_master"),
@@ -47,7 +46,6 @@ class ManualIngestTask(BaseTask):
"ods.member_stored_value_cards": {"pk": "id"},
"ods.recharge_settlements": {"pk": "id"},
"ods.settlement_records": {"pk": "id"},
"ods.assistant_cancellation_records": {"pk": "id", "json_cols": ["siteProfile"]},
"ods.assistant_accounts_master": {"pk": "id"},
"ods.assistant_service_records": {"pk": "id", "json_cols": ["siteProfile"]},
"ods.site_tables_master": {"pk": "id"},

View File

@@ -51,6 +51,7 @@ class DwsVerifier(BaseVerifier):
"time_column": "stat_date",
"source_table": "dwd.dwd_settlement_head",
"source_time_column": "pay_time",
# CHANGE 2026-03-07 | 补齐 settle_type 过滤,与 finance_base_task 对齐
"agg_sql": """
SELECT
site_id,
@@ -59,9 +60,10 @@ class DwsVerifier(BaseVerifier):
COALESCE(SUM(pay_amount), 0) as cash_pay_amount,
COALESCE(SUM(table_charge_money), 0) as table_fee_amount,
COALESCE(SUM(goods_money), 0) as goods_amount,
COALESCE(SUM(table_charge_money) + SUM(goods_money) + COALESCE(SUM(assistant_pd_money), 0) + COALESCE(SUM(assistant_cx_money), 0), 0) as gross_amount
COALESCE(SUM(table_charge_money + goods_money + assistant_pd_money + assistant_cx_money), 0) as gross_amount
FROM dwd.dwd_settlement_head
WHERE pay_time >= %s AND pay_time < %s
AND settle_type IN (1, 3)
GROUP BY site_id, tenant_id, DATE(pay_time)
""",
"compare_columns": ["cash_pay_amount", "table_fee_amount", "goods_amount", "gross_amount"],

View File

@@ -97,7 +97,7 @@ class IndexVerifier(BaseVerifier):
JOIN dwd.dim_assistant d
ON s.user_id = d.user_id
AND d.scd2_is_current = 1
AND COALESCE(d.is_delete, 0) = 0
AND COALESCE(d.leave_status, 0) = 0
CROSS JOIN params p
WHERE s.last_use_time >= p.start_time
AND s.last_use_time < p.end_time