微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
@@ -184,11 +184,18 @@ class BaseTask:
|
||||
if not (override_start and override_end):
|
||||
raise ValueError("run.window_override.start/end 需要同时提供")
|
||||
|
||||
# CHANGE 2026-03-04 | 纯日期字符串按业务日分割:start→当天biz_hour, end→次日biz_hour
|
||||
biz_hour = int(self.config.get("app.business_day_start_hour", 8))
|
||||
|
||||
window_start = override_start
|
||||
if isinstance(window_start, str):
|
||||
window_start = dtparser.parse(window_start)
|
||||
if isinstance(window_start, datetime) and window_start.tzinfo is None:
|
||||
window_start = window_start.replace(tzinfo=self.tz)
|
||||
# 纯日期(时分秒全零)→ 当天业务日起始时刻
|
||||
if window_start.hour == 0 and window_start.minute == 0 and window_start.second == 0:
|
||||
window_start = window_start.replace(hour=biz_hour, tzinfo=self.tz)
|
||||
else:
|
||||
window_start = window_start.replace(tzinfo=self.tz)
|
||||
elif isinstance(window_start, datetime):
|
||||
window_start = window_start.astimezone(self.tz)
|
||||
|
||||
@@ -196,7 +203,11 @@ class BaseTask:
|
||||
if isinstance(window_end, str):
|
||||
window_end = dtparser.parse(window_end)
|
||||
if isinstance(window_end, datetime) and window_end.tzinfo is None:
|
||||
window_end = window_end.replace(tzinfo=self.tz)
|
||||
# 纯日期(时分秒全零)→ 次日业务日起始时刻
|
||||
if window_end.hour == 0 and window_end.minute == 0 and window_end.second == 0:
|
||||
window_end = (window_end + timedelta(days=1)).replace(hour=biz_hour, tzinfo=self.tz)
|
||||
else:
|
||||
window_end = window_end.replace(tzinfo=self.tz)
|
||||
elif isinstance(window_end, datetime):
|
||||
window_end = window_end.astimezone(self.tz)
|
||||
|
||||
|
||||
@@ -5,12 +5,14 @@ from __future__ import annotations
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any, Dict, Iterable, List, Sequence
|
||||
|
||||
from psycopg2.extras import RealDictCursor, execute_batch, execute_values
|
||||
from psycopg2.extras import Json, RealDictCursor, execute_batch, execute_values
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
from tasks.base_task import BaseTask, TaskContext
|
||||
|
||||
|
||||
@@ -70,6 +72,16 @@ class DwdLoadTask(BaseTask):
|
||||
_NUMERIC_RE = re.compile(r"^[+-]?\d+(?:\.\d+)?$")
|
||||
_BOOL_STRINGS = {"true", "false", "1", "0", "yes", "no", "y", "n", "t", "f"}
|
||||
|
||||
# 详情表 LEFT JOIN 配置:当 DWD 表需要从额外的 ODS 详情表获取字段时使用
|
||||
# detail_columns 中的列在 FACT_MAPPINGS 中以 detail."col" 形式引用
|
||||
DETAIL_JOIN_CONFIG: dict[str, dict] = {
|
||||
"dwd.dim_groupbuy_package_ex": {
|
||||
"detail_table": "ods.group_buy_package_details",
|
||||
"join_condition": 'ods_main."id" = detail."coupon_id"',
|
||||
"detail_columns": ["table_area_ids", "table_area_names", "assistant_services", "groupon_site_infos"],
|
||||
},
|
||||
}
|
||||
|
||||
def _strip_scd2_keys(self, pk_cols: Sequence[str]) -> list[str]:
|
||||
return [c for c in pk_cols if c.lower() not in self.SCD_COLS]
|
||||
|
||||
@@ -113,7 +125,10 @@ class DwdLoadTask(BaseTask):
|
||||
) -> str:
|
||||
if key_exprs and order_col:
|
||||
distinct_on = ", ".join(key_exprs)
|
||||
order_by = ", ".join([*key_exprs, f'"{order_col}" DESC NULLS LAST'])
|
||||
# order_col 可能是预格式化的表达式(如 ods_main."fetched_at"),此时直接使用;
|
||||
# 否则包裹双引号
|
||||
order_col_expr = order_col if '"' in order_col else f'"{order_col}"'
|
||||
order_by = ", ".join([*key_exprs, f'{order_col_expr} DESC NULLS LAST'])
|
||||
return (
|
||||
f"SELECT DISTINCT ON ({distinct_on}) {select_cols_sql} "
|
||||
f"FROM {ods_table_sql} {where_sql} ORDER BY {order_by}"
|
||||
@@ -303,6 +318,11 @@ class DwdLoadTask(BaseTask):
|
||||
("table_area_id_list", "table_area_id_list", None),
|
||||
("package_type", "type", None),
|
||||
("tenant_coupon_sale_order_item_id", "tenantcouponsaleorderitemid", None),
|
||||
# CHANGE 2026-03-05: 团购详情字段(来自 ods.group_buy_package_details,通过 LEFT JOIN 关联)
|
||||
("table_area_ids", 'detail."table_area_ids"', None),
|
||||
("table_area_names", 'detail."table_area_names"', None),
|
||||
("assistant_services", 'detail."assistant_services"', None),
|
||||
("groupon_site_infos", 'detail."groupon_site_infos"', None),
|
||||
],
|
||||
"dwd.dim_staff": [
|
||||
("staff_id", "id", None),
|
||||
@@ -311,16 +331,16 @@ class DwdLoadTask(BaseTask):
|
||||
],
|
||||
"dwd.dim_staff_ex": [
|
||||
("staff_id", "id", None),
|
||||
("rank_name", "rankname", None),
|
||||
("cashier_point_id", "cashierpointid", "bigint"),
|
||||
("cashier_point_name", "cashierpointname", None),
|
||||
("group_id", "groupid", "bigint"),
|
||||
("group_name", "groupname", None),
|
||||
("system_user_id", "systemuserid", "bigint"),
|
||||
("tenant_org_id", "tenantorgid", "bigint"),
|
||||
("rank_name", "rank_name", None),
|
||||
("cashier_point_id", "cashier_point_id", "bigint"),
|
||||
("cashier_point_name", "cashier_point_name", None),
|
||||
("group_id", "group_id", "bigint"),
|
||||
("group_name", "group_name", None),
|
||||
("system_user_id", "system_user_id", "bigint"),
|
||||
("tenant_org_id", "tenant_org_id", "bigint"),
|
||||
("auth_code_create", "auth_code_create", "timestamptz"),
|
||||
("create_time", "create_time", "timestamptz"),
|
||||
("user_roles", "userroles", "jsonb"),
|
||||
("user_roles", "user_roles", "jsonb"),
|
||||
],
|
||||
# 事实表主键及关键差异列
|
||||
"dwd.dwd_table_fee_log": [
|
||||
@@ -602,6 +622,7 @@ class DwdLoadTask(BaseTask):
|
||||
],
|
||||
# 库存汇总:goods_stock_summary(ODS 列名全小写)
|
||||
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写(sitegoodsid),不是驼峰
|
||||
# CHANGE 2026-03-01: 补 site_id 映射(ODS 入库时从 app.store_id 注入 siteid)
|
||||
"dwd.dwd_goods_stock_summary": [
|
||||
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 ID(PK)
|
||||
("goods_name", '"goodsname"', None), # 商品名称
|
||||
@@ -617,6 +638,7 @@ class DwdLoadTask(BaseTask):
|
||||
("range_sale_money", '"rangesalemoney"', "numeric"), # 销售金额
|
||||
("range_inventory", '"rangeinventory"', "numeric"), # 盘点调整量
|
||||
("current_stock", '"currentstock"', "numeric"), # 当前库存
|
||||
("site_id", '"siteid"', "bigint"), # 门店 ID(ODS 入库时注入)
|
||||
],
|
||||
# 库存变动流水:goods_stock_movements(ODS 列名全小写)
|
||||
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写,不是驼峰
|
||||
@@ -653,11 +675,12 @@ class DwdLoadTask(BaseTask):
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
|
||||
"""
|
||||
遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。
|
||||
并行遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。
|
||||
|
||||
说明:
|
||||
- 为避免长事务导致锁堆积/中断后遗留 idle-in-tx,本任务按“每张表一次事务”提交;
|
||||
- 单表失败会回滚该表并继续后续表,最终在结果中汇总错误信息。
|
||||
- 使用 ThreadPoolExecutor 并行处理多张表,每张表使用独立数据库连接和事务;
|
||||
- 单表失败会回滚该表并继续后续表,最终在结果中汇总错误信息;
|
||||
- 并行线程数通过 AppConfig 的 dwd.parallel_workers 配置(默认 4)。
|
||||
"""
|
||||
now = extracted["now"]
|
||||
summary: List[Dict[str, Any]] = []
|
||||
@@ -668,54 +691,109 @@ class DwdLoadTask(BaseTask):
|
||||
if env_only and not only_tables_cfg:
|
||||
only_tables_cfg = [t.strip() for t in env_only.split(",") if t.strip()]
|
||||
only_tables = {str(t).strip().lower() for t in only_tables_cfg if str(t).strip()} if only_tables_cfg else set()
|
||||
with self.db.conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
for dwd_table, ods_table in self.TABLE_MAP.items():
|
||||
if only_tables and dwd_table.lower() not in only_tables and self._table_base(dwd_table).lower() not in only_tables:
|
||||
continue
|
||||
started = time.monotonic()
|
||||
self.logger.info("DWD 装载开始:%s <= %s", dwd_table, ods_table)
|
||||
|
||||
parallel_workers = int(self.config.get("dwd.parallel_workers", 4))
|
||||
|
||||
# 筛选需要处理的表
|
||||
tables_to_process: list[tuple[str, str]] = []
|
||||
for dwd_table, ods_table in self.TABLE_MAP.items():
|
||||
if only_tables and dwd_table.lower() not in only_tables and self._table_base(dwd_table).lower() not in only_tables:
|
||||
continue
|
||||
tables_to_process.append((dwd_table, ods_table))
|
||||
|
||||
if not tables_to_process:
|
||||
return {"tables": summary, "errors": 0, "error_details": errors}
|
||||
|
||||
# 并行调度:每张表在独立线程中执行,使用独立数据库连接
|
||||
with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
|
||||
futures = {}
|
||||
for dwd_table, ods_table in tables_to_process:
|
||||
future = executor.submit(
|
||||
self._process_single_table,
|
||||
dwd_table, ods_table, now, context,
|
||||
)
|
||||
futures[future] = dwd_table
|
||||
|
||||
for future in as_completed(futures):
|
||||
dwd_table = futures[future]
|
||||
try:
|
||||
dwd_cols = self._get_columns(cur, dwd_table)
|
||||
ods_cols = self._get_columns(cur, ods_table)
|
||||
if not dwd_cols:
|
||||
self.logger.warning("跳过 %s:未能获取 DWD 列信息", dwd_table)
|
||||
continue
|
||||
|
||||
if self._table_base(dwd_table).startswith("dim_"):
|
||||
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||||
self.db.conn.commit()
|
||||
summary.append({"table": dwd_table, "mode": "SCD2", **dim_counts})
|
||||
else:
|
||||
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
|
||||
ods_types = self._get_column_types(cur, ods_table, "ods")
|
||||
fact_counts = self._merge_fact_increment(
|
||||
cur,
|
||||
dwd_table,
|
||||
ods_table,
|
||||
dwd_cols,
|
||||
ods_cols,
|
||||
dwd_types,
|
||||
ods_types,
|
||||
window_start=context.window_start,
|
||||
window_end=context.window_end,
|
||||
)
|
||||
self.db.conn.commit()
|
||||
summary.append({"table": dwd_table, "mode": "INCREMENT", **fact_counts})
|
||||
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
|
||||
table_result = future.result()
|
||||
summary.append(table_result)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
try:
|
||||
self.db.conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.exception("DWD 装载失败:%s,用时 %.2fs,err=%s", dwd_table, elapsed, exc)
|
||||
self.logger.error(
|
||||
"DWD 并行装载失败:%s,err=%s", dwd_table, exc,
|
||||
)
|
||||
errors.append({"table": dwd_table, "error": str(exc)})
|
||||
continue
|
||||
|
||||
return {"tables": summary, "errors": len(errors), "error_details": errors}
|
||||
|
||||
def _process_single_table(
|
||||
self,
|
||||
dwd_table: str,
|
||||
ods_table: str,
|
||||
now: datetime,
|
||||
context: TaskContext,
|
||||
) -> Dict[str, Any]:
|
||||
"""在独立线程中处理单张 DWD 表,使用独立数据库连接和事务。
|
||||
|
||||
每张表创建独立的 DatabaseConnection,处理完成后关闭,
|
||||
保证线程间事务隔离,单表失败不影响其他表。
|
||||
"""
|
||||
started = time.monotonic()
|
||||
self.logger.info("DWD 装载开始:%s <= %s", dwd_table, ods_table)
|
||||
|
||||
# 为当前线程创建独立数据库连接
|
||||
thread_db = DatabaseConnection(
|
||||
dsn=self.db._dsn,
|
||||
session=self.db._session,
|
||||
connect_timeout=self.db._connect_timeout,
|
||||
)
|
||||
try:
|
||||
with thread_db.conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
dwd_cols = self._get_columns(cur, dwd_table)
|
||||
ods_cols = self._get_columns(cur, ods_table)
|
||||
if not dwd_cols:
|
||||
self.logger.warning("跳过 %s:未能获取 DWD 列信息", dwd_table)
|
||||
return {"table": dwd_table, "mode": "SKIPPED", "inserted": 0, "updated": 0}
|
||||
|
||||
if self._table_base(dwd_table).startswith("dim_"):
|
||||
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||||
thread_db.conn.commit()
|
||||
result = {"table": dwd_table, "mode": "SCD2", **dim_counts}
|
||||
else:
|
||||
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
|
||||
ods_types = self._get_column_types(cur, ods_table, "ods")
|
||||
fact_counts = self._merge_fact_increment(
|
||||
cur,
|
||||
dwd_table,
|
||||
ods_table,
|
||||
dwd_cols,
|
||||
ods_cols,
|
||||
dwd_types,
|
||||
ods_types,
|
||||
window_start=context.window_start,
|
||||
window_end=context.window_end,
|
||||
)
|
||||
thread_db.conn.commit()
|
||||
result = {"table": dwd_table, "mode": "INCREMENT", **fact_counts}
|
||||
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
|
||||
return result
|
||||
except Exception as exc:
|
||||
try:
|
||||
thread_db.conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.exception(
|
||||
"DWD 装载失败:%s,用时 %.2fs,err=%s", dwd_table, elapsed, exc,
|
||||
)
|
||||
# 重新抛出,让 future.result() 在主线程捕获
|
||||
raise
|
||||
finally:
|
||||
thread_db.close()
|
||||
|
||||
# ---------------------- 辅助方法 ----------------------
|
||||
def _get_columns(self, cur, table: str) -> List[str]:
|
||||
"""获取指定表的列名(小写)。"""
|
||||
@@ -872,6 +950,17 @@ class DwdLoadTask(BaseTask):
|
||||
ods_types = self._get_column_types(cur, ods_table, "ods")
|
||||
ts_types = {"timestamp without time zone", "timestamp with time zone"}
|
||||
table_sql = self._format_table(ods_table, "ods")
|
||||
# CHANGE 2026-03-05: 详情表 LEFT JOIN 支持 — 当 DWD 表配置了 DETAIL_JOIN_CONFIG 时,
|
||||
# 给 ODS 主表加别名 ods_main,LEFT JOIN 详情表为 detail,
|
||||
# 非 detail 列引用加 ods_main. 前缀避免歧义
|
||||
detail_join = self.DETAIL_JOIN_CONFIG.get(dwd_table)
|
||||
ods_alias = "ods_main" if detail_join else ""
|
||||
if detail_join:
|
||||
detail_table_sql = self._format_table(detail_join["detail_table"], "ods")
|
||||
table_sql = (
|
||||
f"{table_sql} AS ods_main "
|
||||
f'LEFT JOIN {detail_table_sql} AS detail ON {detail_join["join_condition"]}'
|
||||
)
|
||||
# 构造 SELECT 表达式,支持 JSON/expression 映射
|
||||
select_exprs: list[str] = []
|
||||
added: set[str] = set()
|
||||
@@ -881,21 +970,26 @@ class DwdLoadTask(BaseTask):
|
||||
continue
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
# detail. 前缀的列直接使用(来自详情表),其他列加 ods_main. 前缀
|
||||
if ods_alias and not src.startswith("detail."):
|
||||
src = self._qualify_column_ref(src, ods_alias)
|
||||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||||
added.add(lc)
|
||||
elif lc in ods_set:
|
||||
col_ref = f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"'
|
||||
# CHANGE 2026-02-22: BUG 12 — 同名列如果是时间类型,加哨兵值过滤
|
||||
if dwd_types.get(lc) in ts_types and ods_types.get(lc) in ts_types:
|
||||
select_exprs.append(
|
||||
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
|
||||
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
|
||||
f"CASE WHEN {col_ref} >= '{self._SENTINEL_DATE_THRESHOLD}'::timestamp "
|
||||
f'THEN {col_ref} ELSE NULL END AS "{lc}"'
|
||||
)
|
||||
else:
|
||||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||||
select_exprs.append(f'{col_ref} AS "{lc}"')
|
||||
added.add(lc)
|
||||
# 分类维度需要额外读取 categoryboxes 以展开子类
|
||||
if dwd_table == "dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
|
||||
select_exprs.append('"categoryboxes" AS "categoryboxes"')
|
||||
col_ref = f'{ods_alias}."categoryboxes"' if ods_alias else '"categoryboxes"'
|
||||
select_exprs.append(f'{col_ref} AS "categoryboxes"')
|
||||
added.add("categoryboxes")
|
||||
# 主键兜底确保被选出
|
||||
for pk in business_keys:
|
||||
@@ -903,9 +997,12 @@ class DwdLoadTask(BaseTask):
|
||||
if lc not in added:
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
if ods_alias and not src.startswith("detail."):
|
||||
src = self._qualify_column_ref(src, ods_alias)
|
||||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||||
elif lc in ods_set:
|
||||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||||
col_ref = f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"'
|
||||
select_exprs.append(f'{col_ref} AS "{lc}"')
|
||||
added.add(lc)
|
||||
|
||||
if not select_exprs:
|
||||
@@ -917,14 +1014,19 @@ class DwdLoadTask(BaseTask):
|
||||
lc = key.lower()
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
if ods_alias and not src.startswith("detail."):
|
||||
src = self._qualify_column_ref(src, ods_alias)
|
||||
key_exprs.append(self._cast_expr(src, cast_type))
|
||||
elif lc in ods_set:
|
||||
key_exprs.append(f'"{lc}"')
|
||||
key_exprs.append(f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"')
|
||||
|
||||
select_cols_sql = ", ".join(select_exprs)
|
||||
where_sql = self._append_where_condition("", '"fetched_at" IS NOT NULL')
|
||||
fetched_at_ref = f'{ods_alias}."fetched_at"' if ods_alias else '"fetched_at"'
|
||||
where_sql = self._append_where_condition("", f'{fetched_at_ref} IS NOT NULL')
|
||||
# CHANGE 2026-03-05: order_col 也需要加别名前缀
|
||||
qualified_order_col = f'{ods_alias}."{order_col}"' if ods_alias and order_col else (f'"{order_col}"' if order_col else None)
|
||||
sql = self._latest_snapshot_select_sql(
|
||||
select_cols_sql, table_sql, key_exprs, order_col, where_sql
|
||||
select_cols_sql, table_sql, key_exprs, qualified_order_col, where_sql
|
||||
)
|
||||
cur.execute(sql)
|
||||
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
|
||||
@@ -1006,7 +1108,7 @@ class DwdLoadTask(BaseTask):
|
||||
|
||||
# 批量插入新版本
|
||||
if to_insert:
|
||||
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now)
|
||||
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now, dwd_types=dwd_types)
|
||||
|
||||
processed = len(src_rows_by_pk)
|
||||
updated = len(to_close)
|
||||
@@ -1050,11 +1152,16 @@ class DwdLoadTask(BaseTask):
|
||||
dwd_cols: Sequence[str],
|
||||
rows_with_version: Sequence[tuple[Dict[str, Any], int]],
|
||||
now: datetime,
|
||||
dwd_types: Dict[str, str] | None = None,
|
||||
) -> None:
|
||||
"""批量插入新的 SCD2 版本行。"""
|
||||
sorted_cols = [c.lower() for c in sorted(dwd_cols)]
|
||||
insert_cols_sql = ", ".join(f'"{c}"' for c in sorted_cols)
|
||||
table_sql = self._format_table(table, "dwd")
|
||||
# 预计算数组类型列集合,避免 list 值被误包装为 Json
|
||||
_array_cols: set[str] = set()
|
||||
if dwd_types:
|
||||
_array_cols = {c for c, t in dwd_types.items() if "ARRAY" in t.upper() or "[]" in t}
|
||||
|
||||
def build_row(src_row: Dict[str, Any], version: int) -> list[Any]:
|
||||
values: list[Any] = []
|
||||
@@ -1068,7 +1175,15 @@ class DwdLoadTask(BaseTask):
|
||||
elif c == "scd2_version":
|
||||
values.append(version)
|
||||
else:
|
||||
values.append(src_row.get(c))
|
||||
val = src_row.get(c)
|
||||
# CHANGE 2026-03-07: 区分数组列和 JSONB 列
|
||||
# 数组列(TEXT[] 等)的 list 值直接传递,psycopg2 自动转为 PG 数组格式
|
||||
# JSONB 列的 dict/list 值需要 Json() 包装
|
||||
if isinstance(val, list) and c not in _array_cols:
|
||||
val = Json(val)
|
||||
elif isinstance(val, dict):
|
||||
val = Json(val)
|
||||
values.append(val)
|
||||
return values
|
||||
|
||||
values_rows = [build_row(r, ver) for r, ver in rows_with_version]
|
||||
@@ -1395,6 +1510,23 @@ class DwdLoadTask(BaseTask):
|
||||
# CHANGE 2026-02-22: BUG 12 fix — 哨兵日期阈值,上游 API 用 0001-01-01 表示"未设置"
|
||||
_SENTINEL_DATE_THRESHOLD = "0002-01-01"
|
||||
|
||||
@staticmethod
|
||||
def _qualify_column_ref(src: str, alias: str) -> str:
|
||||
"""为裸列引用添加表别名前缀。
|
||||
|
||||
已包含 detail.、别名前缀、JSON 操作符、表达式(CASE/COALESCE 等)的源不做修改。
|
||||
仅对简单列名(如 "col" 或 col)添加 alias."col" 前缀。
|
||||
"""
|
||||
# 已有 detail. 或其他表前缀(含 .)→ 不修改
|
||||
if "." in src:
|
||||
return src
|
||||
# JSON 操作符、SQL 表达式 → 不修改
|
||||
if any(tok in src for tok in ("->", "#>>", "::", "CASE ", "COALESCE", "NULLIF", "(")):
|
||||
return src
|
||||
# 裸列名(可能带引号)→ 加别名前缀
|
||||
bare = src.strip('"')
|
||||
return f'{alias}."{bare}"'
|
||||
|
||||
def _cast_expr(self, col: str, cast_type: str | None) -> str:
|
||||
"""构造带可选 CAST 的列表达式。
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ from .assistant_salary_task import AssistantSalaryTask
|
||||
from .assistant_finance_task import AssistantFinanceTask
|
||||
from .member_consumption_task import MemberConsumptionTask
|
||||
from .member_visit_task import MemberVisitTask
|
||||
from .assistant_project_tag_task import AssistantProjectTagTask
|
||||
from .member_project_tag_task import MemberProjectTagTask
|
||||
from .finance_daily_task import FinanceDailyTask
|
||||
from .finance_recharge_task import FinanceRechargeTask
|
||||
from .finance_income_task import FinanceIncomeStructureTask
|
||||
@@ -56,6 +58,8 @@ __all__ = [
|
||||
# 客户维度
|
||||
"MemberConsumptionTask",
|
||||
"MemberVisitTask",
|
||||
"AssistantProjectTagTask",
|
||||
"MemberProjectTagTask",
|
||||
# 财务维度
|
||||
"FinanceBaseTask",
|
||||
"FinanceDailyTask",
|
||||
|
||||
@@ -34,6 +34,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
from .dws_helpers import mask_mobile, calc_days_since
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
class AssistantCustomerTask(BaseDwsTask):
|
||||
"""
|
||||
@@ -181,13 +183,16 @@ class AssistantCustomerTask(BaseDwsTask):
|
||||
"""
|
||||
提取助教-客户服务统计(含滚动窗口)
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.3: DATE(start_use_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
|
||||
sql = f"""
|
||||
WITH service_base AS (
|
||||
SELECT
|
||||
site_assistant_id AS assistant_id,
|
||||
nickname AS assistant_nickname,
|
||||
tenant_member_id AS member_id,
|
||||
DATE(start_use_time) AS service_date,
|
||||
{biz_expr} AS service_date,
|
||||
income_seconds,
|
||||
ledger_amount
|
||||
FROM dwd.dwd_assistant_service_log
|
||||
|
||||
@@ -34,6 +34,8 @@ from datetime import date, datetime, time, timedelta
|
||||
from decimal import Decimal, ROUND_HALF_UP
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
|
||||
|
||||
# 惩罚区域集合:大厅 A/B/C/S/TV + 麻将房 M1–M7
|
||||
@@ -197,7 +199,12 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
|
||||
JOIN _ex 表取 is_trash 字段,用于直接判断服务是否被废除。
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-02-26: dwd_assistant_service_log 无 table_area_name 列,
|
||||
# 改为 JOIN dim_table 取 site_table_area_name
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.1: DATE() → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
asl.assistant_service_id,
|
||||
asl.order_settle_id,
|
||||
@@ -214,15 +221,18 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
asl.ledger_unit_price,
|
||||
asl.start_use_time,
|
||||
asl.last_use_time,
|
||||
asl.table_area_name,
|
||||
DATE(asl.start_use_time) AS service_date,
|
||||
COALESCE(dt.site_table_area_name, '') AS table_area_name,
|
||||
{biz_expr} AS service_date,
|
||||
COALESCE(ex.is_trash, 0) AS is_trash
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
|
||||
ON asl.assistant_service_id = ex.assistant_service_id
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON asl.site_table_id = dt.table_id
|
||||
AND dt.scd2_is_current = 1
|
||||
WHERE asl.site_id = %s
|
||||
AND DATE(asl.start_use_time) >= %s
|
||||
AND DATE(asl.start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND asl.is_delete = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
@@ -258,14 +268,20 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
# 获取助教当日等级(SCD2 as-of)
|
||||
level_info = self.get_assistant_level_asof(assistant_id, service_date)
|
||||
|
||||
# CHANGE 2026-02-27 | level_name 始终由 code 静态映射得出
|
||||
# SCD2 仅用于取历史 level_code(等级可能变过),
|
||||
# name 不再依赖 SCD2 返回值,避免 SCD2 缺失时 NULL
|
||||
level_code = level_info.get('level_code') if level_info else record.get('assistant_level')
|
||||
level_name = self.level_code_to_name(level_code)
|
||||
|
||||
agg_dict[key] = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'assistant_id': assistant_id,
|
||||
'assistant_nickname': record.get('assistant_nickname'),
|
||||
'stat_date': service_date,
|
||||
'assistant_level_code': level_info.get('level_code') if level_info else record.get('assistant_level'),
|
||||
'assistant_level_name': level_info.get('level_name') if level_info else None,
|
||||
'assistant_level_code': level_code,
|
||||
'assistant_level_name': level_name,
|
||||
'total_service_count': 0,
|
||||
'base_service_count': 0,
|
||||
'bonus_service_count': 0,
|
||||
|
||||
@@ -28,6 +28,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
class AssistantFinanceTask(BaseDwsTask):
|
||||
"""
|
||||
@@ -98,6 +100,8 @@ class AssistantFinanceTask(BaseDwsTask):
|
||||
revenue_total = self.safe_decimal(rev.get('revenue_total', 0))
|
||||
gross_profit = revenue_total - cost_daily
|
||||
gross_margin = gross_profit / revenue_total if revenue_total > 0 else Decimal('0')
|
||||
# 防御:clamp 到 numeric(7,4) 安全范围,避免极端值溢出
|
||||
gross_margin = max(Decimal('-999.9999'), min(Decimal('999.9999'), gross_margin))
|
||||
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
@@ -125,9 +129,12 @@ class AssistantFinanceTask(BaseDwsTask):
|
||||
# load() 已移除——使用 BaseDwsTask 默认实现(DATE_COL="stat_date")
|
||||
|
||||
def _extract_daily_revenue(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.5: DATE(start_use_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("s.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
DATE(s.start_use_time) AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
s.site_assistant_id AS assistant_id,
|
||||
(ARRAY_AGG(s.nickname ORDER BY s.start_use_time DESC))[1] AS assistant_nickname,
|
||||
COUNT(*) AS service_count,
|
||||
@@ -143,10 +150,10 @@ class AssistantFinanceTask(BaseDwsTask):
|
||||
LEFT JOIN dws.cfg_skill_type st
|
||||
ON st.skill_id = s.skill_id AND st.is_active = TRUE
|
||||
WHERE s.site_id = %s
|
||||
AND DATE(s.start_use_time) >= %s
|
||||
AND DATE(s.start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND s.is_delete = 0
|
||||
GROUP BY DATE(s.start_use_time), s.site_assistant_id
|
||||
GROUP BY {biz_expr}, s.site_assistant_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
@@ -35,6 +35,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
class AssistantMonthlyTask(BaseDwsTask):
|
||||
"""
|
||||
@@ -262,14 +264,18 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
month_where = " OR ".join(month_conditions)
|
||||
|
||||
# CHANGE 2026-02-22 | Prompt: 需求 A — 按档位分段统计
|
||||
# GROUP BY 加入 assistant_level_code/name,使同一助教月内不同档位各自聚合;
|
||||
# GROUP BY 加入 assistant_level_code,使同一助教月内不同档位各自聚合;
|
||||
# nickname 改用 ARRAY_AGG 按时间倒序取最新值,替代 MAX() 的字典序取值。
|
||||
# 唯一约束已同步变更为 (site_id, assistant_id, stat_month, assistant_level_code)
|
||||
# CHANGE 2026-02-27 | BUG: assistant_level_name 从 GROUP BY 移到 ARRAY_AGG FILTER
|
||||
# 同一 level_code 在 daily_detail 中可能有 NULL 和非 NULL 的 name,
|
||||
# GROUP BY 会产生多行导致 UK 冲突
|
||||
sql = f"""
|
||||
SELECT
|
||||
assistant_id,
|
||||
assistant_level_code,
|
||||
assistant_level_name,
|
||||
-- 同一 level_code 可能有 NULL 和非 NULL 的 name,取最新非空值避免 UK 冲突
|
||||
(ARRAY_AGG(assistant_level_name ORDER BY stat_date DESC) FILTER (WHERE assistant_level_name IS NOT NULL))[1] AS assistant_level_name,
|
||||
(ARRAY_AGG(assistant_nickname ORDER BY stat_date DESC))[1] AS assistant_nickname,
|
||||
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
|
||||
COUNT(DISTINCT stat_date) AS work_days,
|
||||
@@ -291,7 +297,7 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
SUM(trashed_count) AS trashed_count
|
||||
FROM dws.dws_assistant_daily_detail
|
||||
WHERE site_id = %s AND ({month_where})
|
||||
GROUP BY assistant_id, assistant_level_code, assistant_level_name,
|
||||
GROUP BY assistant_id, assistant_level_code,
|
||||
DATE_TRUNC('month', stat_date)
|
||||
"""
|
||||
|
||||
@@ -313,10 +319,13 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
end_month = max(months)
|
||||
next_month = (end_month.replace(day=28) + timedelta(days=4)).replace(day=1)
|
||||
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.4: 使用 Business_Month 口径
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
site_assistant_id AS assistant_id,
|
||||
DATE_TRUNC('month', start_use_time)::DATE AS stat_month,
|
||||
DATE_TRUNC('month', {biz_expr}::timestamp)::DATE AS stat_month,
|
||||
COUNT(DISTINCT CASE WHEN tenant_member_id > 0 THEN tenant_member_id END) AS unique_customers,
|
||||
COUNT(DISTINCT site_table_id) AS unique_tables
|
||||
FROM dwd.dwd_assistant_service_log
|
||||
@@ -324,7 +333,7 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
AND start_use_time >= %s
|
||||
AND start_use_time < %s
|
||||
AND is_delete = 0
|
||||
GROUP BY site_assistant_id, DATE_TRUNC('month', start_use_time)
|
||||
GROUP BY site_assistant_id, DATE_TRUNC('month', {biz_expr}::timestamp)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_month, next_month))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
@@ -43,6 +43,8 @@ from typing import Any, Dict, List
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据结构
|
||||
@@ -225,19 +227,22 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
|
||||
settle_type=1 为台桌结账,包含台费、酒水食品等金额。
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(pay_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
order_settle_id,
|
||||
site_id,
|
||||
tenant_id,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
DATE(pay_time) AS stat_date
|
||||
{biz_expr} AS stat_date
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND settle_type = 1
|
||||
AND DATE(pay_time) >= %s
|
||||
AND DATE(pay_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -250,7 +255,10 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
每条记录对应一张台桌在一个订单中的台费信息。
|
||||
real_table_use_seconds 为台桌实际使用时长。
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(start_use_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("tfl.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
tfl.order_settle_id,
|
||||
tfl.site_table_id AS table_id,
|
||||
@@ -259,8 +267,8 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
COALESCE(tfl.ledger_amount, 0) AS table_fee
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
WHERE tfl.site_id = %s
|
||||
AND DATE(tfl.start_use_time) >= %s
|
||||
AND DATE(tfl.start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND COALESCE(tfl.is_delete, 0) = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
@@ -274,7 +282,10 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
通过 LEFT JOIN cfg_skill_type 获取 course_type_code,
|
||||
real_service_money 为助教分成。
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(start_use_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
asl.order_settle_id,
|
||||
asl.site_assistant_id AS assistant_id,
|
||||
@@ -290,8 +301,8 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
ON asl.skill_id = cst.skill_id
|
||||
AND cst.is_active = TRUE
|
||||
WHERE asl.site_id = %s
|
||||
AND DATE(asl.start_use_time) >= %s
|
||||
AND DATE(asl.start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND COALESCE(asl.is_delete, 0) = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS 助教项目标签任务
|
||||
|
||||
按时间窗口计算每位助教在四大项目(BILLIARD/SNOOKER/MAHJONG/KTV)的
|
||||
工作时长占比,占比≥25% 则分配标签。
|
||||
|
||||
数据链路:
|
||||
dwd_assistant_service_log (income_seconds)
|
||||
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
|
||||
→ get_area_category(area_name, table_name)
|
||||
→ 按 category_code 汇总 → 计算占比 → 写入 dws_assistant_project_tag
|
||||
|
||||
目标表:
|
||||
dws.dws_assistant_project_tag
|
||||
|
||||
更新策略:
|
||||
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
# 只计算四大项目,排除 SPECIAL/OTHER
|
||||
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
|
||||
|
||||
# 助教看板的 6 个时间窗口
|
||||
ASSISTANT_WINDOWS = [
|
||||
TimeWindow.THIS_MONTH,
|
||||
TimeWindow.THIS_QUARTER,
|
||||
TimeWindow.LAST_MONTH,
|
||||
TimeWindow.LAST_3_MONTHS_EXCL_CURRENT,
|
||||
TimeWindow.LAST_QUARTER,
|
||||
TimeWindow.LAST_6_MONTHS,
|
||||
]
|
||||
|
||||
TAG_THRESHOLD = Decimal("0.25")
|
||||
|
||||
|
||||
class AssistantProjectTagTask(BaseDwsTask):
|
||||
"""助教项目标签 ETL 任务"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_ASSISTANT_PROJECT_TAG"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_assistant_project_tag"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "assistant_id", "time_window", "category_code"]
|
||||
|
||||
def extract(self, context) -> Dict[str, Any]:
|
||||
site_id = context.store_id
|
||||
self.logger.info("%s: 提取助教服务数据", self.get_task_code())
|
||||
|
||||
# 加载配置(cfg_area_category 等)
|
||||
self.load_config_cache()
|
||||
|
||||
# 提取台桌信息(用于 get_area_category 的 table_name 参数)
|
||||
table_info = self._extract_table_info(site_id)
|
||||
|
||||
# 按时间窗口提取助教服务时长
|
||||
window_data: Dict[str, List[Dict]] = {}
|
||||
for window in ASSISTANT_WINDOWS:
|
||||
time_range = self.get_time_window_range(window)
|
||||
rows = self._extract_assistant_durations(
|
||||
site_id, time_range.start, time_range.end
|
||||
)
|
||||
window_data[window.value] = rows
|
||||
|
||||
return {
|
||||
"window_data": window_data,
|
||||
"table_info": table_info,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""提取台桌维度信息"""
|
||||
sql = """
|
||||
SELECT table_id, table_name, site_table_area_name AS area_name
|
||||
FROM dwd.dim_table
|
||||
WHERE site_id = %s AND scd2_is_current = 1
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r["table_id"]: dict(r) for r in (rows or [])}
|
||||
|
||||
def _extract_assistant_durations(
|
||||
self, site_id: int, start_date: date, end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""提取助教服务时长明细(按助教+台桌聚合)"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
asl.site_assistant_id AS assistant_id,
|
||||
asl.site_table_id AS table_id,
|
||||
COALESCE(SUM(asl.income_seconds), 0) AS duration_seconds
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
WHERE asl.site_id = %(site_id)s
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND asl.is_delete = 0
|
||||
GROUP BY asl.site_assistant_id, asl.site_table_id
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
"site_id": site_id,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
})
|
||||
return [dict(r) for r in rows] if rows else []
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context) -> List[Dict[str, Any]]:
|
||||
table_info = extracted["table_info"]
|
||||
site_id = extracted["site_id"]
|
||||
tenant_id = getattr(context, "tenant_id", 0) or 0
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for window_value, rows in extracted["window_data"].items():
|
||||
# 按助教汇总各项目时长
|
||||
# assistant_id → category_code → seconds
|
||||
assistant_cats: Dict[int, Dict[str, int]] = {}
|
||||
|
||||
for row in rows:
|
||||
aid = row["assistant_id"]
|
||||
tid = row["table_id"]
|
||||
secs = self.safe_int(row["duration_seconds"])
|
||||
if secs <= 0:
|
||||
continue
|
||||
|
||||
# 通过 dim_table 获取区域和台桌名
|
||||
tinfo = table_info.get(tid, {})
|
||||
area_name = tinfo.get("area_name")
|
||||
table_name = tinfo.get("table_name")
|
||||
cat = self.get_area_category(area_name, table_name)
|
||||
code = cat.get("category_code", "OTHER")
|
||||
|
||||
# 只计算四大项目
|
||||
if code not in VALID_CATEGORIES:
|
||||
continue
|
||||
|
||||
if aid not in assistant_cats:
|
||||
assistant_cats[aid] = {}
|
||||
assistant_cats[aid][code] = assistant_cats[aid].get(code, 0) + secs
|
||||
|
||||
# 计算占比并生成记录
|
||||
for aid, cats in assistant_cats.items():
|
||||
total = sum(cats.values())
|
||||
if total <= 0:
|
||||
continue
|
||||
|
||||
for code, secs in cats.items():
|
||||
pct = Decimal(str(secs)) / Decimal(str(total))
|
||||
pct = pct.quantize(Decimal("0.0001"))
|
||||
cat_info = self._get_category_display(code)
|
||||
|
||||
results.append({
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"assistant_id": aid,
|
||||
"time_window": window_value,
|
||||
"category_code": code,
|
||||
"category_name": cat_info["category_name"],
|
||||
"short_name": cat_info["short_name"],
|
||||
"duration_seconds": secs,
|
||||
"total_seconds": total,
|
||||
"percentage": float(pct),
|
||||
"is_tagged": pct >= TAG_THRESHOLD,
|
||||
})
|
||||
|
||||
self.logger.info(
|
||||
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
|
||||
self.get_task_code(),
|
||||
len(results),
|
||||
sum(1 for r in results if r["is_tagged"]),
|
||||
)
|
||||
return results
|
||||
|
||||
def _get_category_display(self, code: str) -> Dict[str, str]:
|
||||
"""从配置缓存获取分类的显示名和简写"""
|
||||
cache = self.load_config_cache()
|
||||
for key, cat in cache.area_categories.items():
|
||||
if cat.get("category_code") == code:
|
||||
return {
|
||||
"category_name": cat.get("display_name") or cat.get("category_name", code),
|
||||
"short_name": cat.get("short_name", code[:1]),
|
||||
}
|
||||
# 兜底
|
||||
fallback = {
|
||||
"BILLIARD": ("🎱 中式/追分", "🎱"),
|
||||
"SNOOKER": ("斯诺克", "斯"),
|
||||
"MAHJONG": ("🀄 麻将/棋牌", "🀄"),
|
||||
"KTV": ("🎤 团建/K歌", "🎤"),
|
||||
}
|
||||
name, short = fallback.get(code, (code, code[:1]))
|
||||
return {"category_name": name, "short_name": short}
|
||||
|
||||
def load(self, transformed, context) -> dict:
|
||||
if not transformed:
|
||||
return {"status": "SUCCESS", "counts": {"inserted": 0, "deleted": 0}}
|
||||
|
||||
site_id = transformed[0]["site_id"]
|
||||
|
||||
# 全量删除该门店的标签数据后重建
|
||||
delete_sql = "DELETE FROM dws.dws_assistant_project_tag WHERE site_id = %s"
|
||||
self.db.execute(delete_sql, (site_id,))
|
||||
deleted = self.db.cursor.rowcount if hasattr(self.db, "cursor") else 0
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_assistant_project_tag (
|
||||
site_id, tenant_id, assistant_id, time_window,
|
||||
category_code, category_name, short_name,
|
||||
duration_seconds, total_seconds, percentage, is_tagged,
|
||||
computed_at, created_at, updated_at
|
||||
) VALUES (
|
||||
%(site_id)s, %(tenant_id)s, %(assistant_id)s, %(time_window)s,
|
||||
%(category_code)s, %(category_name)s, %(short_name)s,
|
||||
%(duration_seconds)s, %(total_seconds)s, %(percentage)s, %(is_tagged)s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
for row in transformed:
|
||||
self.db.execute(insert_sql, row)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 删除 %d 条,插入 %d 条",
|
||||
self.get_task_code(), deleted, len(transformed),
|
||||
)
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"counts": {"inserted": len(transformed), "deleted": deleted},
|
||||
}
|
||||
@@ -27,8 +27,9 @@ DWS层任务基类
|
||||
- 提供滚动窗口统计方法
|
||||
|
||||
时间口径说明:
|
||||
- 周起始日:周一
|
||||
- 月/季度起始:第一天0点
|
||||
- 营业日切点:BUSINESS_DAY_START_HOUR(默认 08:00),08:00 前的记录归属前一天
|
||||
- 周起始日:周一 08:00
|
||||
- 月/季度起始:第一天 08:00
|
||||
- 环比规则:对比上一个等长区间
|
||||
- 前3个月:含/不含本月(用于财务筛选)
|
||||
- 最近半年:不含本月
|
||||
@@ -52,6 +53,8 @@ from decimal import Decimal, InvalidOperation
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, TypeVar
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr, business_date, now_shanghai
|
||||
|
||||
from ..base_task import BaseTask, TaskContext
|
||||
|
||||
# =============================================================================
|
||||
@@ -81,6 +84,8 @@ class TimeWindow(Enum):
|
||||
THIS_QUARTER = "THIS_QUARTER" # 本季度
|
||||
LAST_QUARTER = "LAST_QUARTER" # 上季度
|
||||
LAST_6_MONTHS = "LAST_6_MONTHS" # 最近半年(不含本月)
|
||||
LAST_30_DAYS = "LAST_30_DAYS" # 近30天(含今天)
|
||||
LAST_60_DAYS = "LAST_60_DAYS" # 近60天(含今天)
|
||||
|
||||
|
||||
class CourseType(Enum):
|
||||
@@ -292,18 +297,20 @@ class BaseDwsTask(BaseTask):
|
||||
获取时间窗口的日期范围(用于财务报表)
|
||||
|
||||
时间口径说明:
|
||||
- 周起始日为周一
|
||||
- 月/季度起始为第一天0点
|
||||
- 营业日切点:BUSINESS_DAY_START_HOUR(默认 08:00)
|
||||
- 周起始日为周一 08:00
|
||||
- 月/季度起始为第一天 08:00
|
||||
|
||||
Args:
|
||||
window: 时间窗口枚举
|
||||
base_date: 基准日期,默认为今天
|
||||
base_date: 基准日期,默认为当前营业日
|
||||
|
||||
Returns:
|
||||
TimeRange对象
|
||||
"""
|
||||
if base_date is None:
|
||||
base_date = date.today()
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
base_date = business_date(now_shanghai(), cutoff)
|
||||
|
||||
if window == TimeWindow.THIS_WEEK:
|
||||
# 本周(周一起始)
|
||||
@@ -369,6 +376,16 @@ class BaseDwsTask(BaseTask):
|
||||
start = self.get_month_first_day(self._shift_months(month_start, -6))
|
||||
return TimeRange(start=start, end=end)
|
||||
|
||||
elif window == TimeWindow.LAST_30_DAYS:
|
||||
# 近30天(含今天)
|
||||
start = base_date - timedelta(days=29)
|
||||
return TimeRange(start=start, end=base_date)
|
||||
|
||||
elif window == TimeWindow.LAST_60_DAYS:
|
||||
# 近60天(含今天)
|
||||
start = base_date - timedelta(days=59)
|
||||
return TimeRange(start=start, end=base_date)
|
||||
|
||||
raise ValueError(f"不支持的时间窗口类型: {window}")
|
||||
|
||||
def get_comparison_range(self, time_range: TimeRange) -> TimeRange:
|
||||
@@ -410,9 +427,9 @@ class BaseDwsTask(BaseTask):
|
||||
|
||||
def is_new_hire_in_month(self, hire_date: date, stat_month: date) -> bool:
|
||||
"""
|
||||
判断是否为新入职(月1日0点后入职)
|
||||
判断是否为新入职(月1日8点后入职)
|
||||
|
||||
新入职定档规则:月1日0点之后入职的,计算为新入职
|
||||
新入职定档规则:月1日8点之后入职的,计算为新入职
|
||||
|
||||
Args:
|
||||
hire_date: 入职日期
|
||||
@@ -527,10 +544,12 @@ class BaseDwsTask(BaseTask):
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _load_area_categories(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""加载区域分类映射"""
|
||||
"""加载区域分类映射(支持台桌级细分)"""
|
||||
sql = """
|
||||
SELECT
|
||||
source_area_name, category_code, category_name,
|
||||
source_area_name, source_table_name,
|
||||
category_code, category_name,
|
||||
display_name, short_name,
|
||||
match_type, match_priority
|
||||
FROM dws.cfg_area_category
|
||||
WHERE is_active = TRUE
|
||||
@@ -540,10 +559,15 @@ class BaseDwsTask(BaseTask):
|
||||
if not rows:
|
||||
return {}
|
||||
|
||||
# 双层索引:(area_name, table_name) → config
|
||||
# table_name 为 NULL 时用空字符串作 key
|
||||
result = {}
|
||||
for row in rows:
|
||||
row_dict = dict(row)
|
||||
result[row_dict['source_area_name']] = row_dict
|
||||
area = row_dict['source_area_name']
|
||||
table = row_dict.get('source_table_name') or ''
|
||||
key = f"{area}\x00{table}" # 复合键,\x00 不会出现在正常名称中
|
||||
result[key] = row_dict
|
||||
return result
|
||||
|
||||
def _load_skill_types(self) -> Dict[int, Dict[str, Any]]:
|
||||
@@ -709,50 +733,57 @@ class BaseDwsTask(BaseTask):
|
||||
# 默认为基础课
|
||||
return CourseType.BASE
|
||||
|
||||
def get_area_category(self, area_name: Optional[str]) -> Dict[str, str]:
|
||||
def get_area_category(self, area_name: Optional[str], table_name: Optional[str] = None) -> Dict[str, str]:
|
||||
"""
|
||||
获取区域分类(支持精确匹配、模糊匹配、兜底)
|
||||
获取区域分类(支持台桌级精确 > 区域精确 > 模糊 > 兜底)
|
||||
|
||||
Args:
|
||||
area_name: 原始区域名称
|
||||
area_name: 原始区域名称(dim_table.site_table_area_name)
|
||||
table_name: 台桌名称(dim_table.table_name),用于台桌级细分映射
|
||||
|
||||
Returns:
|
||||
包含 category_code 和 category_name 的字典
|
||||
包含 category_code, category_name, display_name, short_name 的字典
|
||||
"""
|
||||
config = self.load_config_cache()
|
||||
default = {'category_code': 'OTHER', 'category_name': '其他', 'display_name': '其他', 'short_name': '他'}
|
||||
|
||||
if not area_name:
|
||||
# 无区域名称,返回默认
|
||||
return {'category_code': 'OTHER', 'category_name': '其他区域'}
|
||||
return default
|
||||
|
||||
# 1. 精确匹配
|
||||
if area_name in config.area_categories:
|
||||
cat = config.area_categories[area_name]
|
||||
if cat.get('match_type') == 'EXACT':
|
||||
return {
|
||||
'category_code': cat['category_code'],
|
||||
'category_name': cat['category_name']
|
||||
}
|
||||
cats = config.area_categories
|
||||
|
||||
# 2. 模糊匹配(按优先级)
|
||||
for key, cat in config.area_categories.items():
|
||||
if cat.get('match_type') == 'LIKE':
|
||||
pattern = key.replace('%', '')
|
||||
if pattern and pattern in area_name:
|
||||
return {
|
||||
'category_code': cat['category_code'],
|
||||
'category_name': cat['category_name']
|
||||
}
|
||||
|
||||
# 3. 兜底
|
||||
if 'DEFAULT' in config.area_categories:
|
||||
cat = config.area_categories['DEFAULT']
|
||||
def _pick(cat: Dict[str, Any]) -> Dict[str, str]:
|
||||
return {
|
||||
'category_code': cat['category_code'],
|
||||
'category_name': cat['category_name']
|
||||
'category_name': cat['category_name'],
|
||||
'display_name': cat.get('display_name') or cat['category_name'],
|
||||
'short_name': cat.get('short_name') or '',
|
||||
}
|
||||
|
||||
return {'category_code': 'OTHER', 'category_name': '其他区域'}
|
||||
# 1. 台桌级精确匹配(area_name + table_name)
|
||||
if table_name:
|
||||
key = f"{area_name}\x00{table_name}"
|
||||
if key in cats and cats[key].get('match_type') == 'EXACT':
|
||||
return _pick(cats[key])
|
||||
|
||||
# 2. 区域级精确匹配(area_name + 空 table_name)
|
||||
key = f"{area_name}\x00"
|
||||
if key in cats and cats[key].get('match_type') == 'EXACT':
|
||||
return _pick(cats[key])
|
||||
|
||||
# 3. 模糊匹配(按优先级,已排序)
|
||||
for k, cat in cats.items():
|
||||
if cat.get('match_type') == 'LIKE':
|
||||
pattern = cat['source_area_name'].replace('%', '')
|
||||
if pattern and pattern in area_name:
|
||||
return _pick(cat)
|
||||
|
||||
# 4. 兜底
|
||||
fallback_key = f"DEFAULT\x00"
|
||||
if fallback_key in cats:
|
||||
return _pick(cats[fallback_key])
|
||||
|
||||
return default
|
||||
|
||||
def calculate_sprint_bonus(
|
||||
self,
|
||||
@@ -908,8 +939,10 @@ class BaseDwsTask(BaseTask):
|
||||
offset = 0
|
||||
cols_str = ", ".join(columns)
|
||||
|
||||
# 构建WHERE条件
|
||||
where_parts = [f"DATE({date_col}) >= %s", f"DATE({date_col}) <= %s"]
|
||||
# 构建WHERE条件 — 使用营业日归属表达式替代 DATE()
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr(date_col, cutoff)
|
||||
where_parts = [f"{biz_expr} >= %s", f"{biz_expr} <= %s"]
|
||||
params: List[Any] = [start_date, end_date]
|
||||
|
||||
if where_clause:
|
||||
@@ -972,15 +1005,24 @@ class BaseDwsTask(BaseTask):
|
||||
获取助教在指定日期的等级(SCD2 as-of取值)
|
||||
|
||||
助教等级是SCD2维度,历史月份不能直接用"当前等级"。
|
||||
需要按有效期as-of join取数。
|
||||
优先精确匹配 [scd2_start, scd2_end) 区间;
|
||||
若无匹配(服务日期早于首条 SCD2 或区间有间隙),
|
||||
回退取 scd2_start_time <= asof_date 的最近一条,
|
||||
因为从该记录起等级未变。
|
||||
|
||||
Args:
|
||||
assistant_id: 助教ID
|
||||
asof_date: 取值日期
|
||||
|
||||
Returns:
|
||||
助教等级信息,包含level_code和level_name
|
||||
助教等级信息,包含level_code和level_name;无记录时返回None
|
||||
"""
|
||||
# CHANGE 2026-02-27 | 放宽 SCD2 匹配:去掉 scd2_end_time 条件,
|
||||
# 改为取 scd2_start_time <= asof_date 的最近一条。
|
||||
# 原逻辑要求 asof_date 严格落在 [start, end) 区间内,
|
||||
# 当 SCD2 记录有间隙或服务日期早于首条记录时返回 None,
|
||||
# 导致 dws_assistant_daily_detail.assistant_level_name 出现 NULL,
|
||||
# 下游 monthly 聚合时同一 level_code 有 NULL/非NULL 两种值引发 UK 冲突。
|
||||
sql = """
|
||||
SELECT
|
||||
assistant_id,
|
||||
@@ -999,13 +1041,30 @@ class BaseDwsTask(BaseTask):
|
||||
FROM dwd.dim_assistant
|
||||
WHERE assistant_id = %s
|
||||
AND scd2_start_time <= %s
|
||||
AND (scd2_end_time IS NULL OR scd2_end_time > %s)
|
||||
ORDER BY scd2_start_time DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
rows = self.db.query(sql, (assistant_id, asof_date, asof_date))
|
||||
rows = self.db.query(sql, (assistant_id, asof_date))
|
||||
return dict(rows[0]) if rows else None
|
||||
|
||||
|
||||
# CHANGE 2026-02-27 | 新增 level_code → level_name 静态映射
|
||||
# 当 SCD2 记录晚于服务日期(dim_assistant 后期才开始同步)时,
|
||||
# 用服务记录自带的 assistant_level 做 fallback 映射
|
||||
LEVEL_CODE_NAME_MAP: dict[int, str] = {
|
||||
8: "助教管理",
|
||||
10: "初级",
|
||||
20: "中级",
|
||||
30: "高级",
|
||||
40: "星级",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def level_code_to_name(level_code: int | None) -> str | None:
|
||||
"""将 assistant_level code 映射为中文名称,无匹配返回 None"""
|
||||
if level_code is None:
|
||||
return None
|
||||
return BaseDwsTask.LEVEL_CODE_NAME_MAP.get(int(level_code))
|
||||
|
||||
def get_member_card_balance_asof(
|
||||
self,
|
||||
member_id: int,
|
||||
|
||||
@@ -22,6 +22,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask
|
||||
from .dws_helpers import parse_id_list
|
||||
|
||||
@@ -39,9 +41,11 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""结账单日汇总(结算头表按日聚合)"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
COUNT(*) AS order_count,
|
||||
COUNT(CASE WHEN member_id != 0 AND member_id IS NOT NULL THEN 1 END) AS member_order_count,
|
||||
COUNT(CASE WHEN member_id = 0 OR member_id IS NULL THEN 1 END) AS guest_order_count,
|
||||
@@ -61,13 +65,17 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
SUM(member_discount_amount) AS member_discount_amount,
|
||||
SUM(rounding_amount) AS rounding_amount,
|
||||
SUM(pl_coupon_sale_amount) AS pl_coupon_sale_amount,
|
||||
-- 消费金额
|
||||
SUM(consume_money) AS total_consume
|
||||
-- CHANGE 2026-03-07 | consume_money → items_sum 口径校准
|
||||
-- consume_money 存在三种历史口径混合,DWS 层统一使用 items_sum
|
||||
SUM(table_charge_money + goods_money + assistant_pd_money
|
||||
+ assistant_cx_money + electricity_money) AS items_sum
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND DATE(pay_time) >= %s
|
||||
AND DATE(pay_time) <= %s
|
||||
GROUP BY DATE(pay_time)
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
-- CHANGE 2026-03-07 | 排除退货(6)/退款(7),仅保留台桌结账(1)+商城订单(3)
|
||||
AND settle_type IN (1, 3)
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -83,9 +91,11 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""充值日汇总(充值订单按日聚合)"""
|
||||
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money,实际字段为 pay_amount/point_amount
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_amount + point_amount) AS recharge_total,
|
||||
SUM(pay_amount) AS recharge_cash,
|
||||
@@ -101,9 +111,9 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count
|
||||
FROM dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
AND DATE(pay_time) >= %s
|
||||
AND DATE(pay_time) <= %s
|
||||
GROUP BY DATE(pay_time)
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -118,9 +128,11 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""团购核销日汇总(结算头表 + 团购核销表联查)"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
sh.pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
COUNT(CASE WHEN sh.coupon_amount > 0 THEN 1 END) AS groupbuy_count,
|
||||
SUM(
|
||||
CASE
|
||||
@@ -137,9 +149,9 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
ON gr.order_settle_id = sh.order_settle_id
|
||||
AND COALESCE(gr.is_delete, 0) = 0
|
||||
WHERE sh.site_id = %s
|
||||
AND sh.pay_time >= %s
|
||||
AND sh.pay_time < %s + INTERVAL '1 day'
|
||||
GROUP BY sh.pay_time::DATE
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -188,16 +200,18 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
if not member_ids and not order_ids:
|
||||
return []
|
||||
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
order_settle_id,
|
||||
member_id,
|
||||
adjust_amount
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND pay_time >= %s
|
||||
AND pay_time < %s + INTERVAL '1 day'
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND adjust_amount != 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
@@ -242,20 +256,22 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""赠送卡消费汇总(余额变动按日聚合)"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_change = biz_date_sql_expr("change_time", cutoff)
|
||||
id_list = ", ".join(str(card_id) for card_id in self.GIFT_CARD_TYPE_IDS)
|
||||
sql = f"""
|
||||
SELECT
|
||||
change_time::DATE AS stat_date,
|
||||
{biz_expr_change} AS stat_date,
|
||||
SUM(ABS(change_amount)) AS gift_card_consume
|
||||
FROM dwd.dwd_member_balance_change
|
||||
WHERE site_id = %s
|
||||
AND change_time >= %s
|
||||
AND change_time < %s + INTERVAL '1 day'
|
||||
AND {biz_expr_change} >= %s
|
||||
AND {biz_expr_change} <= %s
|
||||
AND from_type = 1
|
||||
AND change_amount < 0
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
AND card_type_id IN ({id_list})
|
||||
GROUP BY change_time::DATE
|
||||
GROUP BY {biz_expr_change}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
@@ -222,6 +222,8 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
member_discount = self.safe_decimal(settle.get('member_discount_amount', 0))
|
||||
rounding_amount = self.safe_decimal(settle.get('rounding_amount', 0))
|
||||
big_customer_amount = self.safe_decimal(big_customer.get('big_customer_amount', 0))
|
||||
# 大客户优惠不超过手动调整总额(大客户是 adjust 的子集)
|
||||
big_customer_amount = min(big_customer_amount, adjust_amount) if adjust_amount > 0 else Decimal('0')
|
||||
other_discount = adjust_amount - big_customer_amount
|
||||
if other_discount < 0:
|
||||
other_discount = Decimal('0')
|
||||
@@ -229,8 +231,8 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
# 赠送卡消费(来自余额变动)
|
||||
gift_card_consume_amount = self.safe_decimal(gift_card.get('gift_card_consume', 0))
|
||||
|
||||
# 优惠合计
|
||||
discount_total = discount_groupbuy + member_discount + gift_card_consume_amount + adjust_amount + rounding_amount
|
||||
# 优惠合计(大客户 + 其他 = adjust_amount,互斥拆分)
|
||||
discount_total = discount_groupbuy + member_discount + gift_card_consume_amount + big_customer_amount + other_discount + rounding_amount
|
||||
|
||||
# 确认收入
|
||||
confirmed_income = gross_amount - discount_total
|
||||
@@ -249,9 +251,12 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
cash_balance_change = cash_inflow_total - cash_outflow_total
|
||||
|
||||
# 卡消费
|
||||
cash_card_consume = card_pay_amount + balance_pay_amount
|
||||
# CHANGE 2026-03-07 | balance 恒等式校准
|
||||
# balance_amount = recharge_card_amount + gift_card_amount
|
||||
# recharge_card_consume 只取现金充值部分(recharge_card_amount),不加 balance_amount 避免重复计算
|
||||
recharge_card_consume = card_pay_amount
|
||||
gift_card_consume = gift_card_consume_amount
|
||||
card_consume_total = cash_card_consume + gift_card_consume
|
||||
card_consume_total = recharge_card_consume + gift_card_consume
|
||||
|
||||
# 充值统计
|
||||
recharge_count = self.safe_int(recharge.get('recharge_count', 0))
|
||||
@@ -284,7 +289,8 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
'discount_groupbuy': discount_groupbuy,
|
||||
'discount_vip': member_discount,
|
||||
'discount_gift_card': gift_card_consume_amount,
|
||||
'discount_manual': adjust_amount,
|
||||
# CHANGE 2026-03-07 | discount_manual 语义修正:存储大客户优惠(与 discount_other 互斥,两者之和 = adjust_amount)
|
||||
'discount_manual': big_customer_amount,
|
||||
'discount_rounding': rounding_amount,
|
||||
'discount_other': other_discount,
|
||||
# 确认收入
|
||||
@@ -297,7 +303,7 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
'platform_fee_amount': platform_fee_amount,
|
||||
'recharge_cash_inflow': recharge_cash_inflow,
|
||||
'card_consume_total': card_consume_total,
|
||||
'cash_card_consume': cash_card_consume,
|
||||
'recharge_card_consume': recharge_card_consume,
|
||||
'gift_card_consume': gift_card_consume,
|
||||
'cash_outflow_total': cash_outflow_total,
|
||||
'cash_balance_change': cash_balance_change,
|
||||
|
||||
@@ -35,6 +35,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import TaskContext
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
|
||||
@@ -112,9 +114,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
- rounding_amount: 抹零金额
|
||||
- pl_coupon_sale_amount: 平台券销售金额(团购实付路径1)
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
-- 团购相关
|
||||
COALESCE(SUM(coupon_amount), 0) AS coupon_amount_total,
|
||||
COALESCE(SUM(pl_coupon_sale_amount), 0) AS pl_coupon_sale_total,
|
||||
@@ -132,10 +136,10 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
COUNT(*) AS total_orders
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %(site_id)s
|
||||
AND pay_time >= %(start_date)s
|
||||
AND pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND settle_status = 1 -- 已结账
|
||||
GROUP BY pay_time::DATE
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
|
||||
GROUP BY {biz_expr}
|
||||
ORDER BY stat_date
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
@@ -160,9 +164,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
|
||||
返回:{日期: 团购实付总额}
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
sh.pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
SUM(
|
||||
CASE
|
||||
WHEN sh.pl_coupon_sale_amount > 0 THEN sh.pl_coupon_sale_amount
|
||||
@@ -174,11 +180,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
ON gr.order_settle_id = sh.order_settle_id
|
||||
AND COALESCE(gr.is_delete, 0) = 0
|
||||
WHERE sh.site_id = %(site_id)s
|
||||
AND sh.pay_time >= %(start_date)s
|
||||
AND sh.pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND sh.settle_status = 1
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND sh.settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
|
||||
AND sh.coupon_amount > 0 -- 只统计有团购的订单
|
||||
GROUP BY sh.pay_time::DATE
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
@@ -206,22 +212,24 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
2794699703437125, # 酒水卡
|
||||
2793266846533445, # 活动抵用券
|
||||
)
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("change_time", cutoff)
|
||||
id_list = ", ".join(str(card_id) for card_id in gift_card_type_ids)
|
||||
sql = f"""
|
||||
SELECT
|
||||
change_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
card_type_id,
|
||||
COUNT(*) AS consume_count,
|
||||
SUM(ABS(change_amount)) AS consume_amount
|
||||
FROM dwd.dwd_member_balance_change
|
||||
WHERE site_id = %(site_id)s
|
||||
AND change_time >= %(start_date)s
|
||||
AND change_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND from_type = 1
|
||||
AND change_amount < 0
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
AND card_type_id IN ({id_list})
|
||||
GROUP BY change_time::DATE, card_type_id
|
||||
GROUP BY {biz_expr}, card_type_id
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
|
||||
@@ -33,6 +33,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import TaskContext
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
|
||||
@@ -94,32 +96,35 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
收入类型分类:
|
||||
- TABLE_FEE: 台费收入 (table_charge_money)
|
||||
- GOODS: 商品收入 (goods_money)
|
||||
- ASSISTANT_BASE: 助教基础课 (assistant_pd_money)
|
||||
- ASSISTANT_BONUS: 助教附加课 (assistant_cx_money)
|
||||
- ASSISTANT_PD: 助教陪打收入 (assistant_pd_money)
|
||||
- ASSISTANT_CX: 助教超休收入 (assistant_cx_money)
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
-- 台费收入
|
||||
COALESCE(SUM(table_charge_money), 0) AS table_fee_income,
|
||||
COUNT(CASE WHEN table_charge_money > 0 THEN 1 END) AS table_fee_orders,
|
||||
-- 商品收入
|
||||
COALESCE(SUM(goods_money), 0) AS goods_income,
|
||||
COUNT(CASE WHEN goods_money > 0 THEN 1 END) AS goods_orders,
|
||||
-- 助教基础课收入(PD=陪打)
|
||||
COALESCE(SUM(assistant_pd_money), 0) AS assistant_base_income,
|
||||
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_base_orders,
|
||||
-- 助教附加课收入(CX=超休/促销)
|
||||
COALESCE(SUM(assistant_cx_money), 0) AS assistant_bonus_income,
|
||||
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_bonus_orders,
|
||||
-- CHANGE 2026-03-07 | ASSISTANT_BASE/BONUS → PD/CX 命名校准
|
||||
-- 助教陪打收入
|
||||
COALESCE(SUM(assistant_pd_money), 0) AS assistant_pd_income,
|
||||
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_pd_orders,
|
||||
-- 助教超休收入
|
||||
COALESCE(SUM(assistant_cx_money), 0) AS assistant_cx_income,
|
||||
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_cx_orders,
|
||||
-- 总订单数
|
||||
COUNT(*) AS total_orders
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %(site_id)s
|
||||
AND pay_time >= %(start_date)s
|
||||
AND pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND settle_status = 1 -- 已结账
|
||||
GROUP BY pay_time::DATE
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
|
||||
GROUP BY {biz_expr}
|
||||
ORDER BY stat_date
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
@@ -142,46 +147,57 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
"""
|
||||
# CHANGE 2026-02-22 | BUG 7 修复 | dim_table 主键是 table_id 而非 site_table_id,
|
||||
# JOIN 条件从 dt.site_table_id → dt.table_id(事实表侧 site_table_id 不变)
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
|
||||
sql = f"""
|
||||
WITH area_orders AS (
|
||||
SELECT
|
||||
tfl.pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
dt.site_table_area_name AS area_name,
|
||||
dt.table_name AS table_name,
|
||||
tfl.order_settle_id,
|
||||
COALESCE(tfl.ledger_amount, 0) AS income_amount,
|
||||
COALESCE(tfl.ledger_time_seconds, 0) AS duration_seconds
|
||||
COALESCE(tfl.ledger_count, 0) AS duration_seconds
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
INNER JOIN dwd.dwd_settlement_head sh
|
||||
ON sh.order_settle_id = tfl.order_settle_id
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.table_id = tfl.site_table_id
|
||||
AND dt.scd2_is_current = 1
|
||||
WHERE tfl.site_id = %(site_id)s
|
||||
AND tfl.pay_time >= %(start_date)s
|
||||
AND tfl.pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND COALESCE(tfl.is_delete, 0) = 0
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
asl.start_use_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
dt.site_table_area_name AS area_name,
|
||||
dt.table_name AS table_name,
|
||||
asl.order_settle_id,
|
||||
COALESCE(asl.ledger_amount, 0) AS income_amount,
|
||||
COALESCE(asl.income_seconds, 0) AS duration_seconds
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
INNER JOIN dwd.dwd_settlement_head sh
|
||||
ON sh.order_settle_id = asl.order_settle_id
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.table_id = asl.site_table_id
|
||||
AND dt.scd2_is_current = 1
|
||||
WHERE asl.site_id = %(site_id)s
|
||||
AND asl.start_use_time >= %(start_date)s
|
||||
AND asl.start_use_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND asl.is_delete = 0
|
||||
)
|
||||
SELECT
|
||||
stat_date,
|
||||
area_name,
|
||||
table_name,
|
||||
COALESCE(SUM(income_amount), 0) AS income_amount,
|
||||
COALESCE(SUM(duration_seconds), 0) AS duration_seconds,
|
||||
COUNT(DISTINCT order_settle_id) AS order_count
|
||||
FROM area_orders
|
||||
GROUP BY stat_date, area_name
|
||||
GROUP BY stat_date, area_name, table_name
|
||||
ORDER BY stat_date, area_name
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
@@ -232,14 +248,14 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
"""
|
||||
转换按收入类型的数据
|
||||
|
||||
将每日汇总数据展开为4条记录(台费/商品/基础课/附加课)
|
||||
将每日汇总数据展开为4条记录(台费/商品/陪打/超休)
|
||||
"""
|
||||
# 收入类型定义
|
||||
# CHANGE 2026-03-07 | ASSISTANT_BASE/BONUS → PD/CX 命名校准
|
||||
income_types = [
|
||||
('TABLE_FEE', '台费收入', 'table_fee_income', 'table_fee_orders'),
|
||||
('GOODS', '商品收入', 'goods_income', 'goods_orders'),
|
||||
('ASSISTANT_BASE', '助教基础课', 'assistant_base_income', 'assistant_base_orders'),
|
||||
('ASSISTANT_BONUS', '助教附加课', 'assistant_bonus_income', 'assistant_bonus_orders'),
|
||||
('ASSISTANT_PD', '助教陪打收入', 'assistant_pd_income', 'assistant_pd_orders'),
|
||||
('ASSISTANT_CX', '助教超休收入', 'assistant_cx_income', 'assistant_cx_orders'),
|
||||
]
|
||||
|
||||
records = []
|
||||
@@ -309,8 +325,8 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
duration_seconds = row.get('duration_seconds', 0) or 0
|
||||
order_count = row.get('order_count', 0) or 0
|
||||
|
||||
# 映射区域名称到分类代码
|
||||
category = self.get_area_category(area_name)
|
||||
# CHANGE 2026-03-07 | 传入 table_name 支持台桌级映射(VIP包厢 V5→斯诺克)
|
||||
category = self.get_area_category(area_name, row.get('table_name'))
|
||||
category_code = category.get('category_code', 'OTHER')
|
||||
category_name = category.get('category_name', '其他区域')
|
||||
|
||||
@@ -363,7 +379,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
"""
|
||||
兼容旧逻辑的映射方法(当前使用 get_area_category)
|
||||
"""
|
||||
return self.get_area_category(area_name)
|
||||
return self.get_area_category(area_name, None)
|
||||
|
||||
def load(self, records: List[Dict[str, Any]], context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
|
||||
@@ -31,6 +31,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import TaskContext
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
|
||||
@@ -111,9 +113,11 @@ class FinanceRechargeTask(FinanceBaseTask):
|
||||
|
||||
def _extract_recharge_summary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
|
||||
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money,实际字段为 pay_amount/point_amount
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_amount + point_amount) AS recharge_total,
|
||||
SUM(pay_amount) AS recharge_cash,
|
||||
@@ -129,8 +133,8 @@ class FinanceRechargeTask(FinanceBaseTask):
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count,
|
||||
COUNT(DISTINCT CASE WHEN is_first = 1 THEN member_id END) AS new_member_count
|
||||
FROM dwd.dwd_recharge_order
|
||||
WHERE site_id = %s AND DATE(pay_time) >= %s AND DATE(pay_time) <= %s
|
||||
GROUP BY DATE(pay_time)
|
||||
WHERE site_id = %s AND {biz_expr} >= %s AND {biz_expr} <= %s
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
@@ -29,6 +29,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
@@ -74,7 +76,9 @@ class GoodsStockDailyTask(BaseDwsTask):
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
@@ -92,11 +96,12 @@ class GoodsStockDailyTask(BaseDwsTask):
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
fetched_at,
|
||||
{biz_expr} AS biz_date
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
@@ -135,11 +140,14 @@ class GoodsStockDailyTask(BaseDwsTask):
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
stat_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 使用 SQL 层计算的营业日归属日期
|
||||
stat_date = row.get("biz_date")
|
||||
if stat_date is None:
|
||||
stat_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
continue
|
||||
|
||||
@@ -31,6 +31,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
@@ -81,7 +83,9 @@ class GoodsStockMonthlyTask(BaseDwsTask):
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
@@ -99,11 +103,12 @@ class GoodsStockMonthlyTask(BaseDwsTask):
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
fetched_at,
|
||||
{biz_expr} AS biz_date
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
@@ -141,12 +146,15 @@ class GoodsStockMonthlyTask(BaseDwsTask):
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 自然月的第一天作为 stat_date
|
||||
# 使用 SQL 层计算的营业日归属日期
|
||||
row_date = row.get("biz_date")
|
||||
if row_date is None:
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 营业月的第一天作为 stat_date
|
||||
first_day = _month_first_day(row_date)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
|
||||
@@ -31,6 +31,8 @@ from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
@@ -82,7 +84,9 @@ class GoodsStockWeeklyTask(BaseDwsTask):
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
@@ -100,11 +104,12 @@ class GoodsStockWeeklyTask(BaseDwsTask):
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
fetched_at,
|
||||
{biz_expr} AS biz_date
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
@@ -142,12 +147,15 @@ class GoodsStockWeeklyTask(BaseDwsTask):
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# ISO 周的周一作为 stat_date
|
||||
# 使用 SQL 层计算的营业日归属日期
|
||||
row_date = row.get("biz_date")
|
||||
if row_date is None:
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 营业周的周一作为 stat_date
|
||||
monday = _iso_monday(row_date)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
|
||||
@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
from .base_index_task import BaseIndexTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemberActivityData:
|
||||
@@ -238,6 +240,8 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""提取到店记录(按天去重)"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
condition_sql = self._build_visit_condition_sql()
|
||||
sql = f"""
|
||||
WITH visit_source AS (
|
||||
@@ -258,12 +262,12 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
DATE(pay_time) AS visit_date,
|
||||
{biz_expr} AS visit_date,
|
||||
MAX(pay_time) AS last_visit_time,
|
||||
SUM(COALESCE(pay_amount, 0)) AS day_pay_amount
|
||||
FROM visit_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id, DATE(pay_time)
|
||||
GROUP BY canonical_member_id, {biz_expr}
|
||||
ORDER BY canonical_member_id, visit_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
|
||||
@@ -214,7 +214,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
JOIN dwd.dim_assistant d
|
||||
ON s.user_id = d.user_id
|
||||
AND d.scd2_is_current = 1
|
||||
AND COALESCE(d.is_delete, 0) = 0
|
||||
AND COALESCE(d.leave_status, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.tenant_member_id > 0
|
||||
AND s.user_id > 0
|
||||
|
||||
@@ -18,6 +18,8 @@ from typing import Any, Dict, List, Optional
|
||||
from .base_index_task import BaseIndexTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据类定义
|
||||
@@ -333,6 +335,10 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
short_days = int(params.get('spend_window_short_days', 30))
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 7.6: DATE(pay_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
|
||||
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
|
||||
# INTERVAL 天数通过 f-string 内嵌(整数,安全);site_id 走参数化
|
||||
sql = f"""
|
||||
@@ -357,7 +363,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
-- 90 天窗口
|
||||
SUM(pay_amount) AS spend_90,
|
||||
COUNT(*) AS orders_90,
|
||||
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
|
||||
COUNT(DISTINCT {biz_expr}) AS visit_days_90,
|
||||
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
|
||||
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
|
||||
-- 30 天窗口(子集过滤)
|
||||
@@ -366,7 +372,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN 1 ELSE 0 END) AS orders_30,
|
||||
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN DATE(pay_time) END) AS visit_days_30
|
||||
THEN {biz_expr} END) AS visit_days_30
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
@@ -467,12 +473,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
{biz_expr_s} AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
@@ -516,12 +525,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
{biz_expr_s} AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
@@ -572,13 +584,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
return result
|
||||
|
||||
# CHANGE 2026-03-02 | 基数校准改用非零样本中位数,零消费会员不参与校准
|
||||
# 原因:零消费会员不参与 SPI 有效区分,纳入中位数只会拉低基数
|
||||
_CALIBRATE_MIN_SAMPLE = 10 # 非零样本最小数量,低于此值回退默认值
|
||||
|
||||
def _calibrate_amount_bases(
|
||||
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
|
||||
) -> Dict[str, float]:
|
||||
"""从门店数据计算中位数作为金额压缩基数校准值。
|
||||
|
||||
优先级:cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
|
||||
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS。
|
||||
优先级:cfg_index_parameters 配置值 > 非零样本自动校准中位数 > DEFAULT_PARAMS 默认值。
|
||||
仅使用值 > 0 的样本计算中位数;非零样本数 < _CALIBRATE_MIN_SAMPLE 时回退默认值。
|
||||
"""
|
||||
# 特征字段 → 对应的 amount_base 参数名
|
||||
base_extractors: Dict[str, callable] = {
|
||||
@@ -600,21 +616,23 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
continue
|
||||
|
||||
# 从特征数据计算中位数
|
||||
values = [extractor(f) for f in features.values()]
|
||||
median_val = self.calculate_median(values)
|
||||
# 仅取非零样本计算中位数
|
||||
nonzero_values = [v for v in (extractor(f) for f in features.values()) if v > 0]
|
||||
|
||||
if median_val > 0:
|
||||
if len(nonzero_values) >= self._CALIBRATE_MIN_SAMPLE:
|
||||
median_val = self.calculate_median(nonzero_values)
|
||||
calibrated[base_key] = median_val
|
||||
self.logger.info(
|
||||
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
|
||||
"SPI 基数校准: %s 非零样本 %d/%d,中位数 %.2f",
|
||||
base_key, len(nonzero_values), len(features), median_val,
|
||||
)
|
||||
else:
|
||||
# 中位数 ≤ 0,回退到 DEFAULT_PARAMS
|
||||
# 非零样本不足,回退到 DEFAULT_PARAMS
|
||||
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
|
||||
self.logger.warning(
|
||||
"SPI 基数校准: %s 中位数 %.2f ≤ 0,回退到默认值 %.2f",
|
||||
base_key, median_val, self.DEFAULT_PARAMS[base_key],
|
||||
"SPI 基数校准: %s 非零样本 %d 不足(最低 %d),回退到默认值 %.2f",
|
||||
base_key, len(nonzero_values), self._CALIBRATE_MIN_SAMPLE,
|
||||
self.DEFAULT_PARAMS[base_key],
|
||||
)
|
||||
|
||||
return calibrated
|
||||
@@ -747,6 +765,13 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
"""
|
||||
inserted = 0
|
||||
# raw score 列为 numeric(10,4),display 列为 numeric(5,2)
|
||||
# 防止极端数据导致 NumericValueOutOfRange
|
||||
RAW_MAX = 999999.9999
|
||||
DISP_MAX = 999.99
|
||||
def _clamp(v, lo, hi):
|
||||
return max(lo, min(hi, v))
|
||||
|
||||
for f in data_list:
|
||||
cur.execute(insert_sql, (
|
||||
f.site_id, f.member_id,
|
||||
@@ -754,9 +779,14 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
f.orders_30, f.orders_90,
|
||||
f.visit_days_30, f.visit_days_90,
|
||||
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
|
||||
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
|
||||
f.score_level_display, f.score_speed_display, f.score_stability_display,
|
||||
f.raw_score, f.display_score,
|
||||
_clamp(f.score_level_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_speed_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_stability_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_level_display, 0, DISP_MAX),
|
||||
_clamp(f.score_speed_display, 0, DISP_MAX),
|
||||
_clamp(f.score_stability_display, 0, DISP_MAX),
|
||||
_clamp(f.raw_score, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.display_score, 0, DISP_MAX),
|
||||
))
|
||||
inserted += max(cur.rowcount, 0)
|
||||
|
||||
|
||||
@@ -68,6 +68,10 @@ class DwsMaintenanceTask(BaseDwsTask):
|
||||
{"table": "dws_finance_recharge_summary", "date_col": "stat_date"},
|
||||
{"table": "dws_finance_expense_summary", "date_col": "expense_month"},
|
||||
{"table": "dws_platform_settlement", "date_col": "settlement_date"},
|
||||
# CHANGE [2026-03-07] intent: 项目标签表纳入历史数据清理范围
|
||||
# assumptions: computed_at 为清理日期列,与其他表的 stat_date 语义一致
|
||||
{"table": "dws_assistant_project_tag", "date_col": "computed_at"},
|
||||
{"table": "dws_member_project_tag", "date_col": "computed_at"},
|
||||
]
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
|
||||
@@ -6,9 +6,10 @@
|
||||
以"会员"为粒度,统计消费行为和滚动窗口指标
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 结账单头表
|
||||
- dwd_settlement_head: 结账单头表(settle_type IN (1,3) 过滤有效订单)
|
||||
- dim_member: 会员维度
|
||||
- dim_member_card_account: 会员卡账户
|
||||
- dwd_recharge_order: 充值订单(30/60/90 天窗口统计)
|
||||
|
||||
目标表:
|
||||
dws.dws_member_consumption_summary
|
||||
@@ -32,6 +33,8 @@ from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
from .dws_helpers import mask_mobile, calc_days_since
|
||||
|
||||
@@ -209,12 +212,18 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
"""
|
||||
提取会员消费统计(含滚动窗口)
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
-- CHANGE 2026-03-07 | consume_money → items_sum 口径校准
|
||||
-- consume_money 存在三种历史口径(A/B/C)混合,DWS 层统一使用 items_sum
|
||||
-- items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money
|
||||
WITH consume_base AS (
|
||||
SELECT
|
||||
member_id,
|
||||
DATE(pay_time) AS consume_date,
|
||||
consume_money,
|
||||
{biz_expr} AS consume_date,
|
||||
table_charge_money + goods_money + assistant_pd_money
|
||||
+ assistant_cx_money + electricity_money AS items_sum,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
assistant_pd_money + assistant_cx_money AS assistant_amount
|
||||
@@ -222,6 +231,9 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
-- CHANGE 2026-03-07 | dwd_settlement_head 无 is_delete 字段,改用 settle_type 过滤
|
||||
-- settle_type: 1=台桌结账, 3=商城订单; 排除 6=退货, 7=撤销
|
||||
AND settle_type IN (1, 3)
|
||||
)
|
||||
SELECT
|
||||
member_id,
|
||||
@@ -229,7 +241,7 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
MAX(consume_date) AS last_consume_date,
|
||||
-- 全量累计
|
||||
COUNT(*) AS total_visit_count,
|
||||
SUM(consume_money) AS total_consume_amount,
|
||||
SUM(items_sum) AS total_consume_amount,
|
||||
SUM(table_charge_money) AS total_table_fee,
|
||||
SUM(goods_money) AS total_goods_amount,
|
||||
SUM(assistant_amount) AS total_assistant_amount,
|
||||
@@ -240,12 +252,12 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN 1 END) AS visit_count_30d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN 1 END) AS visit_count_60d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN 1 END) AS visit_count_90d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN consume_money ELSE 0 END) AS consume_amount_7d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN consume_money ELSE 0 END) AS consume_amount_10d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN consume_money ELSE 0 END) AS consume_amount_15d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN consume_money ELSE 0 END) AS consume_amount_30d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN consume_money ELSE 0 END) AS consume_amount_60d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN consume_money ELSE 0 END) AS consume_amount_90d
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN items_sum ELSE 0 END) AS consume_amount_7d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN items_sum ELSE 0 END) AS consume_amount_10d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN items_sum ELSE 0 END) AS consume_amount_15d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN items_sum ELSE 0 END) AS consume_amount_30d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN items_sum ELSE 0 END) AS consume_amount_60d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN items_sum ELSE 0 END) AS consume_amount_90d
|
||||
FROM consume_base
|
||||
GROUP BY member_id
|
||||
"""
|
||||
@@ -257,29 +269,21 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
"""
|
||||
提取会员信息
|
||||
|
||||
生日优先级:手动补录(fdw_app.member_birthday_manual)> API 来源(dim_member.birthday)
|
||||
FDW 连接失败时降级为仅使用 dim_member.birthday
|
||||
生日来源:dim_member.birthday(API 来源)
|
||||
CHANGE 2026-02-26 | 维客线索重构:移除 FDW member_birthday_manual 读取,
|
||||
生日不再单独补录,归入维客线索"客户基础信息"大类
|
||||
"""
|
||||
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
|
||||
# CHANGE 2026-02-22 | 恢复 birthday 字段(C1 迁移已加列),供后续 C2 COALESCE 使用
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
# CHANGE 2026-02-22 | 需求 C2:COALESCE 优先手动补录生日,FDW 失败时降级
|
||||
sql_with_fdw = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_create = biz_date_sql_expr("m.create_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
m.member_id,
|
||||
m.nickname,
|
||||
m.mobile,
|
||||
m.member_card_grade_name,
|
||||
DATE(m.create_time) AS register_date,
|
||||
{biz_expr_create} AS register_date,
|
||||
m.recharge_money_sum,
|
||||
COALESCE(
|
||||
(SELECT birthday_value
|
||||
FROM fdw_app.member_birthday_manual
|
||||
WHERE member_id = m.member_id
|
||||
ORDER BY recorded_at ASC
|
||||
LIMIT 1),
|
||||
m.birthday
|
||||
) AS birthday
|
||||
m.birthday
|
||||
FROM dwd.dim_member m
|
||||
WHERE m.member_id IN (
|
||||
SELECT DISTINCT member_id
|
||||
@@ -289,36 +293,7 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
AND member_id != 0
|
||||
) AND m.scd2_is_current = 1
|
||||
"""
|
||||
# CHANGE 2026-02-24 | 修复列名:tenant_member_id → member_id(dwd_settlement_head 无 tenant_member_id 列)
|
||||
sql_fallback = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
member_card_grade_name,
|
||||
DATE(create_time) AS register_date,
|
||||
recharge_money_sum,
|
||||
birthday
|
||||
FROM dwd.dim_member
|
||||
WHERE member_id IN (
|
||||
SELECT DISTINCT member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
"""
|
||||
try:
|
||||
rows = self.db.query(sql_with_fdw, (site_id,))
|
||||
except Exception as exc:
|
||||
# CHANGE [2026-02-24] FDW 查询失败后事务处于 failed 状态,必须先 rollback 再执行 fallback
|
||||
self.db.rollback()
|
||||
# FDW 连接失败,降级为仅使用 dim_member.birthday
|
||||
self.logger.warning(
|
||||
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
|
||||
self.get_task_code(), exc,
|
||||
)
|
||||
rows = self.db.query(sql_fallback, (site_id,))
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
@@ -343,11 +318,11 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
balance
|
||||
FROM dwd.dim_member_card_account
|
||||
WHERE tenant_member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
SELECT DISTINCT member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
"""
|
||||
@@ -390,21 +365,23 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
返回: {member_id: {count_30d, count_60d, count_90d,
|
||||
amount_30d, amount_60d, amount_90d}}
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
member_id,
|
||||
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '29 days' THEN 1 END) AS count_30d,
|
||||
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '59 days' THEN 1 END) AS count_60d,
|
||||
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '89 days' THEN 1 END) AS count_90d,
|
||||
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '29 days' THEN pay_amount ELSE 0 END), 0) AS amount_30d,
|
||||
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '59 days' THEN pay_amount ELSE 0 END), 0) AS amount_60d,
|
||||
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '89 days' THEN pay_amount ELSE 0 END), 0) AS amount_90d
|
||||
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '29 days' THEN 1 END) AS count_30d,
|
||||
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '59 days' THEN 1 END) AS count_60d,
|
||||
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '89 days' THEN 1 END) AS count_90d,
|
||||
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '29 days' THEN pay_amount ELSE 0 END), 0) AS amount_30d,
|
||||
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '59 days' THEN pay_amount ELSE 0 END), 0) AS amount_60d,
|
||||
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '89 days' THEN pay_amount ELSE 0 END), 0) AS amount_90d
|
||||
FROM dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
AND pay_time IS NOT NULL
|
||||
AND DATE(pay_time) <= %s
|
||||
AND {biz_expr} <= %s
|
||||
GROUP BY member_id
|
||||
"""
|
||||
params = (
|
||||
|
||||
224
apps/etl/connectors/feiqiu/tasks/dws/member_project_tag_task.py
Normal file
224
apps/etl/connectors/feiqiu/tasks/dws/member_project_tag_task.py
Normal file
@@ -0,0 +1,224 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS 客户项目标签任务
|
||||
|
||||
按时间窗口计算每位客户在四大项目(BILLIARD/SNOOKER/MAHJONG/KTV)的
|
||||
消费时长占比,占比≥25% 则分配标签。散客(member_id=0)不参与。
|
||||
|
||||
数据链路:
|
||||
dwd_table_fee_log (ledger_count)
|
||||
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
|
||||
→ get_area_category(area_name, table_name)
|
||||
→ 按 category_code 汇总 → 计算占比 → 写入 dws_member_project_tag
|
||||
|
||||
目标表:
|
||||
dws.dws_member_project_tag
|
||||
|
||||
更新策略:
|
||||
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
# 只计算四大项目
|
||||
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
|
||||
|
||||
# 客户看板的 2 个时间窗口
|
||||
MEMBER_WINDOWS = [
|
||||
TimeWindow.LAST_30_DAYS,
|
||||
TimeWindow.LAST_60_DAYS,
|
||||
]
|
||||
|
||||
TAG_THRESHOLD = Decimal("0.25")
|
||||
|
||||
|
||||
class MemberProjectTagTask(BaseDwsTask):
|
||||
"""客户项目标签 ETL 任务"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_MEMBER_PROJECT_TAG"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_project_tag"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "member_id", "time_window", "category_code"]
|
||||
|
||||
def extract(self, context) -> Dict[str, Any]:
|
||||
site_id = context.store_id
|
||||
self.logger.info("%s: 提取客户台费时长数据", self.get_task_code())
|
||||
|
||||
self.load_config_cache()
|
||||
table_info = self._extract_table_info(site_id)
|
||||
|
||||
window_data: Dict[str, List[Dict]] = {}
|
||||
for window in MEMBER_WINDOWS:
|
||||
time_range = self.get_time_window_range(window)
|
||||
rows = self._extract_member_durations(
|
||||
site_id, time_range.start, time_range.end
|
||||
)
|
||||
window_data[window.value] = rows
|
||||
|
||||
return {
|
||||
"window_data": window_data,
|
||||
"table_info": table_info,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""提取台桌维度信息"""
|
||||
sql = """
|
||||
SELECT table_id, table_name, site_table_area_name AS area_name
|
||||
FROM dwd.dim_table
|
||||
WHERE site_id = %s AND scd2_is_current = 1
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r["table_id"]: dict(r) for r in (rows or [])}
|
||||
|
||||
def _extract_member_durations(
|
||||
self, site_id: int, start_date: date, end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""提取客户台费时长明细(按客户+台桌聚合),排除散客"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("tfl.ledger_end_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
tfl.member_id,
|
||||
tfl.site_table_id AS table_id,
|
||||
COALESCE(SUM(tfl.ledger_count), 0) AS duration_seconds
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
WHERE tfl.site_id = %(site_id)s
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND COALESCE(tfl.is_delete, 0) = 0
|
||||
AND tfl.member_id IS NOT NULL
|
||||
AND tfl.member_id != 0
|
||||
GROUP BY tfl.member_id, tfl.site_table_id
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
"site_id": site_id,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
})
|
||||
return [dict(r) for r in rows] if rows else []
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context) -> List[Dict[str, Any]]:
|
||||
table_info = extracted["table_info"]
|
||||
site_id = extracted["site_id"]
|
||||
tenant_id = getattr(context, "tenant_id", 0) or 0
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for window_value, rows in extracted["window_data"].items():
|
||||
# member_id → category_code → seconds
|
||||
member_cats: Dict[int, Dict[str, int]] = {}
|
||||
|
||||
for row in rows:
|
||||
mid = row["member_id"]
|
||||
tid = row["table_id"]
|
||||
secs = self.safe_int(row["duration_seconds"])
|
||||
if secs <= 0:
|
||||
continue
|
||||
|
||||
tinfo = table_info.get(tid, {})
|
||||
area_name = tinfo.get("area_name")
|
||||
table_name = tinfo.get("table_name")
|
||||
cat = self.get_area_category(area_name, table_name)
|
||||
code = cat.get("category_code", "OTHER")
|
||||
|
||||
if code not in VALID_CATEGORIES:
|
||||
continue
|
||||
|
||||
if mid not in member_cats:
|
||||
member_cats[mid] = {}
|
||||
member_cats[mid][code] = member_cats[mid].get(code, 0) + secs
|
||||
|
||||
for mid, cats in member_cats.items():
|
||||
total = sum(cats.values())
|
||||
if total <= 0:
|
||||
continue
|
||||
|
||||
for code, secs in cats.items():
|
||||
pct = Decimal(str(secs)) / Decimal(str(total))
|
||||
pct = pct.quantize(Decimal("0.0001"))
|
||||
cat_info = self._get_category_display(code)
|
||||
|
||||
results.append({
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"member_id": mid,
|
||||
"time_window": window_value,
|
||||
"category_code": code,
|
||||
"category_name": cat_info["category_name"],
|
||||
"short_name": cat_info["short_name"],
|
||||
"duration_seconds": secs,
|
||||
"total_seconds": total,
|
||||
"percentage": float(pct),
|
||||
"is_tagged": pct >= TAG_THRESHOLD,
|
||||
})
|
||||
|
||||
self.logger.info(
|
||||
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
|
||||
self.get_task_code(),
|
||||
len(results),
|
||||
sum(1 for r in results if r["is_tagged"]),
|
||||
)
|
||||
return results
|
||||
|
||||
def _get_category_display(self, code: str) -> Dict[str, str]:
|
||||
"""从配置缓存获取分类的显示名和简写"""
|
||||
cache = self.load_config_cache()
|
||||
for key, cat in cache.area_categories.items():
|
||||
if cat.get("category_code") == code:
|
||||
return {
|
||||
"category_name": cat.get("display_name") or cat.get("category_name", code),
|
||||
"short_name": cat.get("short_name", code[:1]),
|
||||
}
|
||||
fallback = {
|
||||
"BILLIARD": ("🎱 中式/追分", "🎱"),
|
||||
"SNOOKER": ("斯诺克", "斯"),
|
||||
"MAHJONG": ("🀄 麻将/棋牌", "🀄"),
|
||||
"KTV": ("🎤 团建/K歌", "🎤"),
|
||||
}
|
||||
name, short = fallback.get(code, (code, code[:1]))
|
||||
return {"category_name": name, "short_name": short}
|
||||
|
||||
def load(self, transformed, context) -> dict:
|
||||
if not transformed:
|
||||
return {"status": "SUCCESS", "counts": {"inserted": 0, "deleted": 0}}
|
||||
|
||||
site_id = transformed[0]["site_id"]
|
||||
|
||||
delete_sql = "DELETE FROM dws.dws_member_project_tag WHERE site_id = %s"
|
||||
self.db.execute(delete_sql, (site_id,))
|
||||
deleted = self.db.cursor.rowcount if hasattr(self.db, "cursor") else 0
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_member_project_tag (
|
||||
site_id, tenant_id, member_id, time_window,
|
||||
category_code, category_name, short_name,
|
||||
duration_seconds, total_seconds, percentage, is_tagged,
|
||||
computed_at, created_at, updated_at
|
||||
) VALUES (
|
||||
%(site_id)s, %(tenant_id)s, %(member_id)s, %(time_window)s,
|
||||
%(category_code)s, %(category_name)s, %(short_name)s,
|
||||
%(duration_seconds)s, %(total_seconds)s, %(percentage)s, %(is_tagged)s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
for row in transformed:
|
||||
self.db.execute(insert_sql, row)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 删除 %d 条,插入 %d 条",
|
||||
self.get_task_code(), deleted, len(transformed),
|
||||
)
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"counts": {"inserted": len(transformed), "deleted": deleted},
|
||||
}
|
||||
@@ -35,6 +35,8 @@ from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
from .dws_helpers import mask_mobile
|
||||
|
||||
@@ -152,7 +154,7 @@ class MemberVisitTask(BaseDwsTask):
|
||||
|
||||
# 获取区域分类
|
||||
area_name = tbl_info.get('area_name')
|
||||
area_cat = self.get_area_category(area_name)
|
||||
area_cat = self.get_area_category(area_name, tbl_info.get('table_name'))
|
||||
|
||||
# 构建助教服务JSON
|
||||
assistant_services_json = self._build_assistant_services_json(services)
|
||||
@@ -175,7 +177,7 @@ class MemberVisitTask(BaseDwsTask):
|
||||
# 会员信息
|
||||
'member_nickname': memb_info.get('nickname'),
|
||||
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
|
||||
# CHANGE 2026-02-22 | 恢复从 dim_member.birthday 读取
|
||||
# CHANGE 2026-02-26 | 生日仅从 dim_member.birthday 读取(API 来源)
|
||||
'member_birthday': memb_info.get('birthday'),
|
||||
# 台桌信息
|
||||
'table_id': table_id,
|
||||
@@ -187,12 +189,20 @@ class MemberVisitTask(BaseDwsTask):
|
||||
'goods_amount': self.safe_decimal(settle.get('goods_money', 0)),
|
||||
'assistant_amount': self.safe_decimal(settle.get('assistant_pd_money', 0)) + \
|
||||
self.safe_decimal(settle.get('assistant_cx_money', 0)),
|
||||
'total_consume': self.safe_decimal(settle.get('consume_money', 0)),
|
||||
# CHANGE 2026-03-07 | consume_money → items_sum 口径校准
|
||||
'total_consume': (
|
||||
self.safe_decimal(settle.get('table_charge_money', 0))
|
||||
+ self.safe_decimal(settle.get('goods_money', 0))
|
||||
+ self.safe_decimal(settle.get('assistant_pd_money', 0))
|
||||
+ self.safe_decimal(settle.get('assistant_cx_money', 0))
|
||||
+ self.safe_decimal(settle.get('electricity_money', 0))
|
||||
),
|
||||
'total_discount': self._calc_total_discount(settle),
|
||||
'actual_pay': self.safe_decimal(settle.get('pay_amount', 0)),
|
||||
# 支付方式
|
||||
'cash_pay': self.safe_decimal(settle.get('pay_amount', 0)),
|
||||
'cash_card_pay': self.safe_decimal(settle.get('balance_amount', 0)),
|
||||
'balance_pay': self.safe_decimal(settle.get('balance_amount', 0)),
|
||||
'recharge_card_pay': self.safe_decimal(settle.get('recharge_card_amount', 0)),
|
||||
'gift_card_pay': self.safe_decimal(settle.get('gift_card_amount', 0)),
|
||||
'groupbuy_pay': self.safe_decimal(settle.get('coupon_amount', 0)),
|
||||
# 时长
|
||||
@@ -205,7 +215,49 @@ class MemberVisitTask(BaseDwsTask):
|
||||
|
||||
return results
|
||||
|
||||
# load() 已移除——使用 BaseDwsTask 默认实现(DATE_COL="visit_date")
|
||||
# CHANGE 2026-02-27 | bugfix: 覆盖 load(),在标准 delete-by-window 后
|
||||
# 额外按 order_settle_id 清理旧数据,防止 biz_date 切换后残留记录导致唯一约束冲突。
|
||||
# 背景:visit_date 从 pay_time::date 改为 biz_date_sql_expr 后,凌晨订单的
|
||||
# visit_date 前移一天,旧数据不在新窗口的 delete 范围内,insert 时触发
|
||||
# uk_dws_member_visit (site_id, member_id, order_settle_id) 冲突。
|
||||
def load(self, transformed, context: "TaskContext") -> dict:
|
||||
if not transformed:
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
date_col = self.DATE_COL or "stat_date"
|
||||
deleted = self.delete_existing_data(context, date_col=date_col)
|
||||
|
||||
# 额外清理:按本批 order_settle_id 删除可能残留在其他日期窗口的旧记录
|
||||
order_ids = [r["order_settle_id"] for r in transformed if r.get("order_settle_id")]
|
||||
extra_deleted = 0
|
||||
if order_ids:
|
||||
full_table = f"{self.DWS_SCHEMA}.{self.get_target_table()}"
|
||||
placeholders = ",".join(["%s"] * len(order_ids))
|
||||
sql = (
|
||||
f"DELETE FROM {full_table} "
|
||||
f"WHERE site_id = %s AND order_settle_id IN ({placeholders})"
|
||||
)
|
||||
site_id = transformed[0].get("site_id", context.store_id)
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(sql, [site_id] + order_ids)
|
||||
extra_deleted = cur.rowcount
|
||||
if extra_deleted:
|
||||
self.logger.info(
|
||||
"%s: 额外清理残留旧数据 %d 行(order_settle_id 去重)",
|
||||
self.get_task_code(), extra_deleted,
|
||||
)
|
||||
|
||||
inserted = self.bulk_insert(transformed)
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
},
|
||||
"extra": {"deleted": deleted, "extra_deleted": extra_deleted},
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
@@ -220,7 +272,9 @@ class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
提取结账单
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
@@ -228,8 +282,9 @@ class MemberVisitTask(BaseDwsTask):
|
||||
member_id,
|
||||
create_time,
|
||||
pay_time,
|
||||
DATE(pay_time) AS visit_date,
|
||||
consume_money,
|
||||
{biz_expr} AS visit_date,
|
||||
-- CHANGE 2026-03-07 | 新增 electricity_money 用于 items_sum 计算
|
||||
electricity_money,
|
||||
pay_amount,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
@@ -244,10 +299,12 @@ class MemberVisitTask(BaseDwsTask):
|
||||
recharge_card_amount
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND DATE(pay_time) >= %s
|
||||
AND DATE(pay_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
-- CHANGE 2026-03-07 | 排除退货(6)/退款(7),仅保留台桌结账(1)+商城订单(3)
|
||||
AND settle_type IN (1, 3)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -261,7 +318,9 @@ class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
提取助教服务明细
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
order_settle_id,
|
||||
site_assistant_id AS assistant_id,
|
||||
@@ -270,8 +329,8 @@ class MemberVisitTask(BaseDwsTask):
|
||||
ledger_amount
|
||||
FROM dwd.dwd_assistant_service_log
|
||||
WHERE site_id = %s
|
||||
AND DATE(start_use_time) >= %s
|
||||
AND DATE(start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND is_delete = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
@@ -286,14 +345,16 @@ class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
提取台费时长(真实秒数)
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("ledger_end_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
order_settle_id,
|
||||
SUM(COALESCE(real_table_use_seconds, 0)) AS table_use_seconds
|
||||
FROM dwd.dwd_table_fee_log
|
||||
WHERE site_id = %s
|
||||
AND DATE(ledger_end_time) >= %s
|
||||
AND DATE(ledger_end_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
GROUP BY order_settle_id
|
||||
"""
|
||||
@@ -304,61 +365,26 @@ class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
提取会员信息
|
||||
|
||||
生日优先级:手动补录(fdw_app.member_birthday_manual)> API 来源(dim_member.birthday)
|
||||
FDW 连接失败时降级为仅使用 dim_member.birthday
|
||||
生日来源:dim_member.birthday(API 来源)
|
||||
CHANGE 2026-02-26 | 维客线索重构:移除 FDW member_birthday_manual 读取,
|
||||
生日不再单独补录,归入维客线索"客户基础信息"大类
|
||||
"""
|
||||
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
|
||||
# CHANGE 2026-02-22 | 恢复 birthday 字段(C1 迁移已加列)
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
# CHANGE 2026-02-22 | 需求 C2:COALESCE 优先手动补录生日,FDW 失败时降级
|
||||
sql_with_fdw = """
|
||||
sql = """
|
||||
SELECT
|
||||
m.member_id,
|
||||
m.nickname,
|
||||
m.mobile,
|
||||
COALESCE(
|
||||
(SELECT birthday_value
|
||||
FROM fdw_app.member_birthday_manual
|
||||
WHERE member_id = m.member_id
|
||||
ORDER BY recorded_at ASC
|
||||
LIMIT 1),
|
||||
m.birthday
|
||||
) AS birthday
|
||||
m.birthday
|
||||
FROM dwd.dim_member m
|
||||
WHERE m.member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
SELECT DISTINCT member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
) AND m.scd2_is_current = 1
|
||||
"""
|
||||
sql_fallback = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
birthday
|
||||
FROM dwd.dim_member
|
||||
WHERE member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
"""
|
||||
try:
|
||||
rows = self.db.query(sql_with_fdw, (site_id,))
|
||||
except Exception as exc:
|
||||
# CHANGE [2026-02-24] FDW 查询失败后事务处于 failed 状态,必须先 rollback 再执行 fallback
|
||||
self.db.rollback()
|
||||
# FDW 连接失败,降级为仅使用 dim_member.birthday
|
||||
self.logger.warning(
|
||||
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
|
||||
self.get_task_code(), exc,
|
||||
)
|
||||
rows = self.db.query(sql_fallback, (site_id,))
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r['member_id']: dict(r) for r in (rows or [])}
|
||||
|
||||
|
||||
|
||||
@@ -15,6 +15,11 @@ from psycopg2.extras import Json, execute_values
|
||||
from models.parsers import TypeParser
|
||||
from tasks.base_task import BaseTask
|
||||
from utils.windowing import build_window_segments, calc_window_minutes, calc_window_days, format_window_days
|
||||
from config.pipeline_config import PipelineConfig
|
||||
from pipeline.models import PipelineRequest, PipelineResult, WriteResult
|
||||
from pipeline.unified_pipeline import UnifiedPipeline
|
||||
from utils.cancellation import CancellationToken
|
||||
from utils.task_log_buffer import TaskLogBuffer
|
||||
|
||||
|
||||
ColumnTransform = Callable[[Any], Any]
|
||||
@@ -67,6 +72,15 @@ class OdsTaskSpec:
|
||||
# WINDOW 模式的时间列名
|
||||
snapshot_time_column: str | None = None
|
||||
|
||||
# ── Detail_Mode 可选配置(二级详情拉取)──
|
||||
detail_endpoint: str | None = None # 详情接口 endpoint
|
||||
detail_param_builder: Callable[[dict], dict] | None = None # 详情请求参数构造函数
|
||||
detail_target_table: str | None = None # 详情数据目标表名
|
||||
detail_data_path: Tuple[str, ...] | None = None # 详情数据的 data_path
|
||||
detail_list_key: str | None = None # 详情数据的 list_key
|
||||
detail_id_column: str | None = None # 从列表数据中提取 ID 的列名
|
||||
detail_process_fn: Callable[[Any], list[dict]] | None = None # 自定义详情处理函数
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.snapshot_mode == SnapshotMode.WINDOW and not self.snapshot_time_column:
|
||||
raise ValueError(
|
||||
@@ -88,7 +102,10 @@ class BaseOdsTask(BaseTask):
|
||||
|
||||
def execute(self, cursor_data: dict | None = None) -> dict:
|
||||
spec = self.SPEC
|
||||
# 创建任务级日志缓冲区,任务完成后一次性输出,避免多任务日志交叉
|
||||
self._log_buf = TaskLogBuffer(spec.code, self.logger)
|
||||
self.logger.info("开始执行%s (ODS)", spec.code)
|
||||
self._log_buf.info("开始执行%s (ODS)", spec.code)
|
||||
|
||||
window_start, window_end, window_minutes = self._resolve_window(cursor_data)
|
||||
segments = build_window_segments(
|
||||
@@ -111,6 +128,11 @@ class BaseOdsTask(BaseTask):
|
||||
total_segments,
|
||||
format_window_days(total_days),
|
||||
)
|
||||
self._log_buf.info(
|
||||
"窗口拆分为 %s 段(共 %s 天)",
|
||||
total_segments,
|
||||
format_window_days(total_days),
|
||||
)
|
||||
|
||||
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
|
||||
if not store_id:
|
||||
@@ -141,6 +163,10 @@ class BaseOdsTask(BaseTask):
|
||||
]
|
||||
has_is_delete = self._table_has_column(spec.table_name, "is_delete")
|
||||
|
||||
# 构建 PipelineConfig(支持任务级覆盖)
|
||||
pipeline_config = PipelineConfig.from_app_config(self.config, spec.code)
|
||||
cancel_token = getattr(self, '_cancel_token', None) or CancellationToken()
|
||||
|
||||
try:
|
||||
for idx, (seg_start, seg_end) in enumerate(segments, start=1):
|
||||
params = self._build_params(
|
||||
@@ -158,11 +184,12 @@ class BaseOdsTask(BaseTask):
|
||||
"errors": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
# 快照软删除需要的共享状态(process_fn 闭包写入)
|
||||
segment_keys: set[tuple] = set()
|
||||
# CHANGE 2026-02-18 | 收集 WINDOW 模式下 API 返回数据的实际最早时间戳
|
||||
segment_earliest_time: datetime | None = None
|
||||
segment_earliest_time: list[datetime | None] = [None]
|
||||
# CHANGE [2026-02-24] 收集 API 返回数据的实际最晚时间戳,用于 late-cutoff 保护
|
||||
segment_latest_time: datetime | None = None
|
||||
segment_latest_time: list[datetime | None] = [None]
|
||||
|
||||
self.logger.info(
|
||||
"%s: 开始执行(%s/%s),窗口[%s ~ %s]",
|
||||
@@ -172,52 +199,51 @@ class BaseOdsTask(BaseTask):
|
||||
seg_start,
|
||||
seg_end,
|
||||
)
|
||||
self._log_buf.info(
|
||||
"开始执行(%s/%s),窗口[%s ~ %s]",
|
||||
idx, total_segments, seg_start, seg_end,
|
||||
)
|
||||
|
||||
for _, page_records, _, response_payload in self.api.iter_paginated(
|
||||
endpoint=spec.endpoint,
|
||||
params=params,
|
||||
page_size=page_size,
|
||||
data_path=spec.data_path,
|
||||
list_key=spec.list_key,
|
||||
):
|
||||
if (
|
||||
snapshot_missing_delete
|
||||
and has_is_delete
|
||||
and business_pk_cols
|
||||
and snapshot_mode != SnapshotMode.NONE
|
||||
):
|
||||
segment_keys.update(self._collect_business_keys(page_records, business_pk_cols))
|
||||
# CHANGE 2026-02-18 | 收集实际最早时间戳,用于 early-cutoff 保护
|
||||
if (
|
||||
snapshot_protect_early_cutoff
|
||||
and snapshot_mode == SnapshotMode.WINDOW
|
||||
and snapshot_time_column
|
||||
):
|
||||
page_earliest = self._collect_earliest_time(
|
||||
page_records, snapshot_time_column
|
||||
)
|
||||
if page_earliest is not None:
|
||||
if segment_earliest_time is None or page_earliest < segment_earliest_time:
|
||||
segment_earliest_time = page_earliest
|
||||
# CHANGE [2026-02-24] 收集实际最晚时间戳,用于 late-cutoff 保护
|
||||
page_latest = self._collect_latest_time(
|
||||
page_records, snapshot_time_column
|
||||
)
|
||||
if page_latest is not None:
|
||||
if segment_latest_time is None or page_latest > segment_latest_time:
|
||||
segment_latest_time = page_latest
|
||||
inserted, updated, skipped = self._insert_records_schema_aware(
|
||||
table=spec.table_name,
|
||||
records=page_records,
|
||||
response_payload=response_payload,
|
||||
source_file=source_file,
|
||||
source_endpoint=spec.endpoint if spec.include_source_endpoint else None,
|
||||
)
|
||||
segment_counts["fetched"] += len(page_records)
|
||||
segment_counts["inserted"] += inserted
|
||||
segment_counts["updated"] += updated
|
||||
segment_counts["skipped"] += skipped
|
||||
# 构建 UnifiedPipeline 并执行当前 segment
|
||||
pipeline = UnifiedPipeline(
|
||||
api_client=self.api,
|
||||
db_connection=self.db,
|
||||
logger=self.logger,
|
||||
config=pipeline_config,
|
||||
cancel_token=cancel_token,
|
||||
)
|
||||
|
||||
requests = self._build_requests(
|
||||
spec, params, page_size, idx - 1,
|
||||
)
|
||||
process_fn = self._build_process_fn(
|
||||
spec,
|
||||
snapshot_missing_delete=snapshot_missing_delete,
|
||||
has_is_delete=has_is_delete,
|
||||
business_pk_cols=business_pk_cols,
|
||||
snapshot_mode=snapshot_mode,
|
||||
snapshot_protect_early_cutoff=snapshot_protect_early_cutoff,
|
||||
snapshot_time_column=snapshot_time_column,
|
||||
segment_keys=segment_keys,
|
||||
segment_earliest_time=segment_earliest_time,
|
||||
segment_latest_time=segment_latest_time,
|
||||
)
|
||||
write_fn = self._build_write_fn(spec, source_file)
|
||||
|
||||
pipe_result = pipeline.run(requests, process_fn, write_fn)
|
||||
|
||||
# 将 PipelineResult 映射到 segment_counts
|
||||
segment_counts["fetched"] = pipe_result.total_fetched
|
||||
segment_counts["inserted"] = pipe_result.total_inserted
|
||||
segment_counts["updated"] = pipe_result.total_updated
|
||||
segment_counts["skipped"] = pipe_result.total_skipped
|
||||
segment_counts["errors"] = (
|
||||
pipe_result.request_failures
|
||||
+ pipe_result.processing_failures
|
||||
+ pipe_result.write_failures
|
||||
)
|
||||
|
||||
# 快照软删除(pipeline 完成后执行,保留原有逻辑)
|
||||
if (
|
||||
snapshot_missing_delete
|
||||
and has_is_delete
|
||||
@@ -230,28 +256,36 @@ class BaseOdsTask(BaseTask):
|
||||
if (
|
||||
snapshot_protect_early_cutoff
|
||||
and snapshot_mode == SnapshotMode.WINDOW
|
||||
and segment_earliest_time is not None
|
||||
and segment_earliest_time > seg_start
|
||||
and segment_earliest_time[0] is not None
|
||||
and segment_earliest_time[0] > seg_start
|
||||
):
|
||||
self.logger.info(
|
||||
"%s: early-cutoff 保护生效,软删除窗口起点从 %s 收窄至 %s",
|
||||
spec.code, seg_start, segment_earliest_time,
|
||||
spec.code, seg_start, segment_earliest_time[0],
|
||||
)
|
||||
effective_window_start = segment_earliest_time
|
||||
self._log_buf.info(
|
||||
"early-cutoff 保护生效,软删除窗口起点从 %s 收窄至 %s",
|
||||
seg_start, segment_earliest_time[0],
|
||||
)
|
||||
effective_window_start = segment_earliest_time[0]
|
||||
# CHANGE [2026-02-24] late-cutoff 保护:用 API 实际最晚时间戳收窄软删除范围
|
||||
# 防止 recent endpoint 数据保留期滚动导致窗口尾部数据消失时误标删除
|
||||
effective_window_end = seg_end
|
||||
if (
|
||||
snapshot_protect_early_cutoff
|
||||
and snapshot_mode == SnapshotMode.WINDOW
|
||||
and segment_latest_time is not None
|
||||
and segment_latest_time < seg_end
|
||||
and segment_latest_time[0] is not None
|
||||
and segment_latest_time[0] < seg_end
|
||||
):
|
||||
self.logger.info(
|
||||
"%s: late-cutoff 保护生效,软删除窗口终点从 %s 收窄至 %s",
|
||||
spec.code, seg_end, segment_latest_time,
|
||||
spec.code, seg_end, segment_latest_time[0],
|
||||
)
|
||||
effective_window_end = segment_latest_time
|
||||
self._log_buf.info(
|
||||
"late-cutoff 保护生效,软删除窗口终点从 %s 收窄至 %s",
|
||||
seg_end, segment_latest_time[0],
|
||||
)
|
||||
effective_window_end = segment_latest_time[0]
|
||||
deleted = self._mark_missing_as_deleted(
|
||||
table=spec.table_name,
|
||||
business_pk_cols=business_pk_cols,
|
||||
@@ -279,6 +313,12 @@ class BaseOdsTask(BaseTask):
|
||||
format_window_days(processed_days),
|
||||
format_window_days(total_days),
|
||||
)
|
||||
self._log_buf.info(
|
||||
"完成(%s/%s),已处理 %s/%s 天",
|
||||
idx, total_segments,
|
||||
format_window_days(processed_days),
|
||||
format_window_days(total_days),
|
||||
)
|
||||
if total_segments > 1:
|
||||
segment_results.append(
|
||||
{
|
||||
@@ -291,13 +331,76 @@ class BaseOdsTask(BaseTask):
|
||||
}
|
||||
)
|
||||
|
||||
# ── Detail_Mode:列表拉取全部完成后,执行二级详情拉取 ──
|
||||
detail_counts = {
|
||||
"detail_success": 0,
|
||||
"detail_failure": 0,
|
||||
"detail_skipped": 0,
|
||||
}
|
||||
if spec.detail_endpoint:
|
||||
self.logger.info("%s: 列表阶段完成,进入详情拉取阶段", spec.code)
|
||||
self._log_buf.info("列表阶段完成,进入详情拉取阶段")
|
||||
detail_pipeline = UnifiedPipeline(
|
||||
api_client=self.api,
|
||||
db_connection=self.db,
|
||||
logger=self.logger,
|
||||
config=pipeline_config,
|
||||
cancel_token=cancel_token,
|
||||
)
|
||||
detail_requests = self._build_detail_requests(spec)
|
||||
detail_process_fn = self._build_detail_process_fn(spec)
|
||||
detail_write_fn = self._build_detail_write_fn(spec, source_file)
|
||||
|
||||
detail_result = detail_pipeline.run(
|
||||
detail_requests, detail_process_fn, detail_write_fn,
|
||||
)
|
||||
self.db.commit()
|
||||
|
||||
# 填充详情统计:成功 = 完成的请求数,失败 = 请求失败数,跳过 = 0(无跳过逻辑)
|
||||
detail_counts["detail_success"] = detail_result.completed_requests
|
||||
detail_counts["detail_failure"] = (
|
||||
detail_result.request_failures
|
||||
+ detail_result.processing_failures
|
||||
+ detail_result.write_failures
|
||||
)
|
||||
# 记录详情阶段每个失败项的错误日志
|
||||
for err in detail_result.errors:
|
||||
self.logger.error(
|
||||
"%s: 详情请求失败, detail_id=%s, error=%s",
|
||||
spec.code,
|
||||
err.get("detail_id", err.get("endpoint", "unknown")),
|
||||
err.get("error", "unknown"),
|
||||
)
|
||||
self._log_buf.error(
|
||||
"详情请求失败, detail_id=%s, error=%s",
|
||||
err.get("detail_id", err.get("endpoint", "unknown")),
|
||||
err.get("error", "unknown"),
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 详情拉取完成, success=%d, failure=%d, skipped=%d",
|
||||
spec.code,
|
||||
detail_counts["detail_success"],
|
||||
detail_counts["detail_failure"],
|
||||
detail_counts["detail_skipped"],
|
||||
)
|
||||
self._log_buf.info(
|
||||
"详情拉取完成, success=%d, failure=%d, skipped=%d",
|
||||
detail_counts["detail_success"],
|
||||
detail_counts["detail_failure"],
|
||||
detail_counts["detail_skipped"],
|
||||
)
|
||||
|
||||
self.logger.info("%s ODS 任务完成: %s", spec.code, total_counts)
|
||||
self._log_buf.info("ODS 任务完成: %s", total_counts)
|
||||
allow_empty_advance = bool(self.config.get("run.allow_empty_result_advance", False))
|
||||
status = "SUCCESS"
|
||||
if total_counts["fetched"] == 0 and not allow_empty_advance:
|
||||
status = "PARTIAL"
|
||||
|
||||
result = self._build_result(status, total_counts)
|
||||
# 附加详情统计到结果
|
||||
result["detail"] = detail_counts
|
||||
overall_start = segments[0][0]
|
||||
overall_end = segments[-1][1]
|
||||
result["window"] = {
|
||||
@@ -311,14 +414,223 @@ class BaseOdsTask(BaseTask):
|
||||
result["request_params"] = params_list[0]
|
||||
else:
|
||||
result["request_params"] = params_list
|
||||
# 任务完成,将缓冲日志一次性输出到父 logger
|
||||
self._log_buf.flush()
|
||||
return result
|
||||
|
||||
except Exception:
|
||||
self.db.rollback()
|
||||
total_counts["errors"] += 1
|
||||
self.logger.error("%s ODS 任务失败", spec.code, exc_info=True)
|
||||
self._log_buf.error("ODS 任务失败")
|
||||
# 异常时也 flush,确保已收集的日志不丢失
|
||||
self._log_buf.flush()
|
||||
raise
|
||||
|
||||
# ── Pipeline 集成方法 ──
|
||||
|
||||
def _build_requests(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
params: dict,
|
||||
page_size: int,
|
||||
segment_index: int,
|
||||
) -> Iterable[PipelineRequest]:
|
||||
"""生成 PipelineRequest 序列,内部使用 iter_paginated 处理分页。
|
||||
|
||||
每一页的数据通过 _prefetched_response 预取,UnifiedPipeline 的
|
||||
_request_loop 跳过 api.post() 直接使用预取数据。
|
||||
"""
|
||||
for page_num, page_records, total, response_payload in self.api.iter_paginated(
|
||||
endpoint=spec.endpoint,
|
||||
params=params,
|
||||
page_size=page_size,
|
||||
data_path=spec.data_path,
|
||||
list_key=spec.list_key,
|
||||
):
|
||||
yield PipelineRequest(
|
||||
endpoint=spec.endpoint,
|
||||
params=params,
|
||||
page_size=page_size,
|
||||
data_path=spec.data_path,
|
||||
list_key=spec.list_key,
|
||||
segment_index=segment_index,
|
||||
_prefetched_response={
|
||||
"records": page_records,
|
||||
"response_payload": response_payload,
|
||||
},
|
||||
)
|
||||
|
||||
def _build_process_fn(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
*,
|
||||
snapshot_missing_delete: bool,
|
||||
has_is_delete: bool,
|
||||
business_pk_cols: list[str],
|
||||
snapshot_mode: SnapshotMode,
|
||||
snapshot_protect_early_cutoff: bool,
|
||||
snapshot_time_column: str | None,
|
||||
segment_keys: set[tuple],
|
||||
segment_earliest_time: list[datetime | None],
|
||||
segment_latest_time: list[datetime | None],
|
||||
) -> Callable[[Any], list[dict]]:
|
||||
"""构建处理函数:从预取响应中提取记录,收集快照软删除所需的共享状态。"""
|
||||
|
||||
def process_fn(response: Any) -> list[dict]:
|
||||
# response 是 _prefetched_response 字典
|
||||
records = response.get("records", [])
|
||||
if not records:
|
||||
return []
|
||||
|
||||
# 收集业务主键(快照软删除用)
|
||||
if (
|
||||
snapshot_missing_delete
|
||||
and has_is_delete
|
||||
and business_pk_cols
|
||||
and snapshot_mode != SnapshotMode.NONE
|
||||
):
|
||||
segment_keys.update(
|
||||
self._collect_business_keys(records, business_pk_cols)
|
||||
)
|
||||
|
||||
# CHANGE 2026-02-18 | 收集实际最早时间戳,用于 early-cutoff 保护
|
||||
if (
|
||||
snapshot_protect_early_cutoff
|
||||
and snapshot_mode == SnapshotMode.WINDOW
|
||||
and snapshot_time_column
|
||||
):
|
||||
page_earliest = self._collect_earliest_time(
|
||||
records, snapshot_time_column
|
||||
)
|
||||
if page_earliest is not None:
|
||||
if segment_earliest_time[0] is None or page_earliest < segment_earliest_time[0]:
|
||||
segment_earliest_time[0] = page_earliest
|
||||
# CHANGE [2026-02-24] 收集实际最晚时间戳,用于 late-cutoff 保护
|
||||
page_latest = self._collect_latest_time(
|
||||
records, snapshot_time_column
|
||||
)
|
||||
if page_latest is not None:
|
||||
if segment_latest_time[0] is None or page_latest > segment_latest_time[0]:
|
||||
segment_latest_time[0] = page_latest
|
||||
|
||||
return records
|
||||
|
||||
return process_fn
|
||||
|
||||
def _build_write_fn(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
source_file: str | None,
|
||||
) -> Callable[[list[dict]], WriteResult]:
|
||||
"""构建写入函数:调用 _insert_records_schema_aware,返回 WriteResult。"""
|
||||
|
||||
def write_fn(records: list[dict]) -> WriteResult:
|
||||
inserted, updated, skipped = self._insert_records_schema_aware(
|
||||
table=spec.table_name,
|
||||
records=records,
|
||||
response_payload=None,
|
||||
source_file=source_file,
|
||||
source_endpoint=spec.endpoint if spec.include_source_endpoint else None,
|
||||
)
|
||||
return WriteResult(inserted=inserted, updated=updated, skipped=skipped)
|
||||
|
||||
return write_fn
|
||||
|
||||
# ── Detail_Mode 方法 ──
|
||||
|
||||
def _build_detail_requests(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
) -> Iterable[PipelineRequest]:
|
||||
"""从已写入 ODS 的记录中提取 ID 列表,生成详情请求序列。
|
||||
|
||||
仅在 spec.detail_endpoint 已配置时调用。查询 ODS 目标表获取
|
||||
detail_id_column 列的值,为每个 ID 生成一个 is_detail=True 的
|
||||
PipelineRequest。
|
||||
"""
|
||||
if not spec.detail_endpoint or not spec.detail_id_column:
|
||||
return
|
||||
|
||||
# 从 ODS 目标表查询刚写入的 ID 列表
|
||||
id_col = spec.detail_id_column
|
||||
table = spec.table_name
|
||||
query = f"SELECT DISTINCT {id_col} FROM {table} WHERE {id_col} IS NOT NULL"
|
||||
try:
|
||||
cursor = self.db.cursor()
|
||||
cursor.execute(query)
|
||||
rows = cursor.fetchall()
|
||||
cursor.close()
|
||||
except Exception:
|
||||
self.logger.error(
|
||||
"%s: 查询详情 ID 列表失败, table=%s, column=%s",
|
||||
spec.code, table, id_col, exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
if not rows:
|
||||
self.logger.info("%s: 无需拉取详情,ID 列表为空", spec.code)
|
||||
return
|
||||
|
||||
self.logger.info(
|
||||
"%s: 开始详情拉取,共 %d 个 ID", spec.code, len(rows),
|
||||
)
|
||||
|
||||
for (record_id,) in rows:
|
||||
# 使用 detail_param_builder 构造请求参数,或默认 {"id": record_id}
|
||||
if spec.detail_param_builder:
|
||||
params = spec.detail_param_builder({"id": record_id})
|
||||
else:
|
||||
params = {"id": record_id}
|
||||
|
||||
yield PipelineRequest(
|
||||
endpoint=spec.detail_endpoint,
|
||||
params=params,
|
||||
data_path=spec.detail_data_path or ("data",),
|
||||
list_key=spec.detail_list_key,
|
||||
is_detail=True,
|
||||
detail_id=record_id,
|
||||
)
|
||||
|
||||
def _build_detail_process_fn(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
) -> Callable[[Any], list[dict]]:
|
||||
"""构建详情阶段的处理函数:从预取响应中提取记录。
|
||||
|
||||
优先使用 spec.detail_process_fn(自定义处理函数),
|
||||
否则回退到默认的 response.get("records") 提取。
|
||||
"""
|
||||
if spec.detail_process_fn is not None:
|
||||
return spec.detail_process_fn
|
||||
|
||||
def detail_process_fn(response: Any) -> list[dict]:
|
||||
records = response.get("records", [])
|
||||
return records
|
||||
|
||||
return detail_process_fn
|
||||
|
||||
def _build_detail_write_fn(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
source_file: str | None,
|
||||
) -> Callable[[list[dict]], WriteResult]:
|
||||
"""构建详情阶段的写入函数:写入 detail_target_table。"""
|
||||
target_table = spec.detail_target_table or spec.table_name
|
||||
|
||||
def detail_write_fn(records: list[dict]) -> WriteResult:
|
||||
inserted, updated, skipped = self._insert_records_schema_aware(
|
||||
table=target_table,
|
||||
records=records,
|
||||
response_payload=None,
|
||||
source_file=source_file,
|
||||
source_endpoint=spec.detail_endpoint if spec.include_source_endpoint else None,
|
||||
)
|
||||
return WriteResult(inserted=inserted, updated=updated, skipped=skipped)
|
||||
|
||||
return detail_write_fn
|
||||
|
||||
|
||||
def _resolve_window(self, cursor_data: dict | None) -> tuple[datetime, datetime, int]:
|
||||
base_start, base_end, base_minutes = self._get_time_window(cursor_data)
|
||||
|
||||
@@ -909,6 +1221,18 @@ class BaseOdsTask(BaseTask):
|
||||
_fill_missing("siteid", [site_profile.get("siteId"), site_profile.get("id")])
|
||||
_fill_missing("sitename", [site_profile.get("shop_name"), site_profile.get("siteName")])
|
||||
|
||||
# 通用 siteid 注入:ODS 表有 siteid 列但 API 记录不含时,从 app.store_id 填充
|
||||
# 场景:goods_stock_summary 等按门店请求但返回记录不含 siteId 的接口
|
||||
ods_has_siteid = any(c[0].lower() == "siteid" for c in cols_info)
|
||||
if ods_has_siteid:
|
||||
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
|
||||
if store_id:
|
||||
for item in merged_records:
|
||||
merged = item["merged"]
|
||||
existing = self._get_value_case_insensitive(merged, "siteid")
|
||||
if existing in (None, "", 0):
|
||||
merged["siteid"] = store_id
|
||||
|
||||
business_keys = [c for c in pk_cols if str(c).lower() != "content_hash"]
|
||||
# P2(A): 使用 spec 上的显式开关控制去重,不再隐式依赖 has_fetched_at
|
||||
# CHANGE 2026-02-19 | force_full_update 时仍查最新 hash(用于判断是否回退到历史版本),
|
||||
@@ -1240,6 +1564,56 @@ def _bool_col(name: str, *sources: str) -> ColumnSpec:
|
||||
return ColumnSpec(column=name, sources=sources, transform=_to_bool)
|
||||
|
||||
|
||||
# ── 团购详情接口自定义 process_fn ──
|
||||
# API 原始响应结构:{"data": {"groupPurchasePackage": {...}, "packageCouponAssistants": [...], ...}, "code": 0}
|
||||
# detail_mode 下 process_fn 收到的是 api.post() 的原始 JSON 响应
|
||||
|
||||
def _group_package_detail_process_fn(response: Any) -> list[dict]:
|
||||
"""从 QueryPackageCouponInfo 响应中提取字段,组装为一条扁平记录。
|
||||
|
||||
匹配 ods.group_buy_package_details 表结构。
|
||||
"""
|
||||
data = response.get("data")
|
||||
if not data:
|
||||
return []
|
||||
|
||||
pkg = data.get("groupPurchasePackage")
|
||||
if not pkg:
|
||||
return []
|
||||
|
||||
# 结构化字段(来自 data.groupPurchasePackage)
|
||||
record: dict[str, Any] = {
|
||||
"coupon_id": pkg.get("id"),
|
||||
"package_name": pkg.get("package_name"),
|
||||
"duration": pkg.get("duration"),
|
||||
"start_time": pkg.get("start_time"),
|
||||
"end_time": pkg.get("end_time"),
|
||||
"add_start_clock": pkg.get("add_start_clock"),
|
||||
"add_end_clock": pkg.get("add_end_clock"),
|
||||
"is_enabled": pkg.get("is_enabled"),
|
||||
"is_delete": pkg.get("is_delete"),
|
||||
"site_id": pkg.get("site_id"),
|
||||
"tenant_id": pkg.get("tenant_id"),
|
||||
"create_time": pkg.get("create_time"),
|
||||
"creator_name": pkg.get("creator_name"),
|
||||
}
|
||||
|
||||
# JSONB 数组字段
|
||||
record["table_area_ids"] = pkg.get("tableAreaId")
|
||||
record["table_area_names"] = pkg.get("tableAreaNameList")
|
||||
record["assistant_services"] = data.get("packageCouponAssistants")
|
||||
record["groupon_site_infos"] = data.get("grouponSiteInfos")
|
||||
record["package_services"] = data.get("packagePackageService")
|
||||
record["coupon_details_list"] = data.get("packageCouponDetailsList")
|
||||
|
||||
# content_hash:对业务字段(不含 content_hash、payload、fetched_at)计算 SHA256
|
||||
hash_input = json.dumps(record, sort_keys=True, ensure_ascii=False, default=str)
|
||||
record["content_hash"] = hashlib.sha256(hash_input.encode("utf-8")).hexdigest()
|
||||
|
||||
# payload:完整的 data 对象
|
||||
record["payload"] = data
|
||||
|
||||
return [record]
|
||||
|
||||
|
||||
ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
@@ -1251,9 +1625,18 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
data_path=("data",),
|
||||
list_key="assistantInfos",
|
||||
pk_columns=(_int_col("id", "id", required=True),),
|
||||
extra_params={
|
||||
"workStatusEnum": 0,
|
||||
"dingTalkSynced": 0,
|
||||
"leaveId": 0,
|
||||
"criticismStatus": 0,
|
||||
"signStatus": -1,
|
||||
},
|
||||
include_source_endpoint=False,
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
requires_window=False,
|
||||
time_fields=None,
|
||||
snapshot_mode=SnapshotMode.FULL_TABLE,
|
||||
description="助教账号档案 ODS:SearchAssistantInfo -> assistantInfos 原始 JSON",
|
||||
),
|
||||
@@ -1314,7 +1697,8 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_source_endpoint=False,
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
requires_window=False,
|
||||
requires_window=True,
|
||||
time_fields=("startTime", "endTime"),
|
||||
snapshot_mode=SnapshotMode.WINDOW,
|
||||
snapshot_time_column="create_time",
|
||||
description="门店商品销售流水 ODS:GetGoodsSalesList -> orderGoodsLedgers 原始 JSON",
|
||||
@@ -1499,6 +1883,13 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
requires_window=False,
|
||||
snapshot_mode=SnapshotMode.FULL_TABLE,
|
||||
# ── Detail_Mode 配置:团购详情接口 ──
|
||||
detail_endpoint="/PackageCoupon/QueryPackageCouponInfo",
|
||||
detail_param_builder=lambda rec: {"couponId": rec["id"]},
|
||||
detail_target_table="ods.group_buy_package_details",
|
||||
detail_data_path=("data",),
|
||||
detail_id_column="id",
|
||||
detail_process_fn=_group_package_detail_process_fn,
|
||||
description="团购套餐定义 ODS:QueryPackageCouponList -> packageCouponList 原始 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
|
||||
@@ -24,7 +24,8 @@ WITH base AS (
|
||||
COALESCE(sh.member_discount_amount, 0) AS member_discount_amount,
|
||||
COALESCE(sh.adjust_amount, 0) AS manual_discount_amount,
|
||||
COALESCE(sh.pay_amount, 0) AS total_paid_amount,
|
||||
COALESCE(sh.balance_amount, 0) + COALESCE(sh.recharge_card_amount, 0) + COALESCE(sh.gift_card_amount, 0) AS stored_card_deduct,
|
||||
-- balance_amount = recharge_card_amount + gift_card_amount(恒等式),不可三者相加
|
||||
COALESCE(sh.balance_amount, 0) AS stored_card_deduct,
|
||||
COALESCE(sh.coupon_amount, 0) AS total_coupon_deduction,
|
||||
COALESCE(sh.table_charge_money, 0) AS settle_table_fee_amount,
|
||||
COALESCE(sh.assistant_pd_money, 0) + COALESCE(sh.assistant_cx_money, 0) AS settle_assistant_service_amount,
|
||||
|
||||
@@ -22,7 +22,6 @@ class ManualIngestTask(BaseTask):
|
||||
(("member_stored_value_cards",), "ods.member_stored_value_cards"),
|
||||
(("recharge_settlements",), "ods.recharge_settlements"),
|
||||
(("settlement_records",), "ods.settlement_records"),
|
||||
(("assistant_cancellation_records",), "ods.assistant_cancellation_records"),
|
||||
(("assistant_accounts_master",), "ods.assistant_accounts_master"),
|
||||
(("assistant_service_records",), "ods.assistant_service_records"),
|
||||
(("site_tables_master",), "ods.site_tables_master"),
|
||||
@@ -47,7 +46,6 @@ class ManualIngestTask(BaseTask):
|
||||
"ods.member_stored_value_cards": {"pk": "id"},
|
||||
"ods.recharge_settlements": {"pk": "id"},
|
||||
"ods.settlement_records": {"pk": "id"},
|
||||
"ods.assistant_cancellation_records": {"pk": "id", "json_cols": ["siteProfile"]},
|
||||
"ods.assistant_accounts_master": {"pk": "id"},
|
||||
"ods.assistant_service_records": {"pk": "id", "json_cols": ["siteProfile"]},
|
||||
"ods.site_tables_master": {"pk": "id"},
|
||||
|
||||
@@ -51,6 +51,7 @@ class DwsVerifier(BaseVerifier):
|
||||
"time_column": "stat_date",
|
||||
"source_table": "dwd.dwd_settlement_head",
|
||||
"source_time_column": "pay_time",
|
||||
# CHANGE 2026-03-07 | 补齐 settle_type 过滤,与 finance_base_task 对齐
|
||||
"agg_sql": """
|
||||
SELECT
|
||||
site_id,
|
||||
@@ -59,9 +60,10 @@ class DwsVerifier(BaseVerifier):
|
||||
COALESCE(SUM(pay_amount), 0) as cash_pay_amount,
|
||||
COALESCE(SUM(table_charge_money), 0) as table_fee_amount,
|
||||
COALESCE(SUM(goods_money), 0) as goods_amount,
|
||||
COALESCE(SUM(table_charge_money) + SUM(goods_money) + COALESCE(SUM(assistant_pd_money), 0) + COALESCE(SUM(assistant_cx_money), 0), 0) as gross_amount
|
||||
COALESCE(SUM(table_charge_money + goods_money + assistant_pd_money + assistant_cx_money), 0) as gross_amount
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE pay_time >= %s AND pay_time < %s
|
||||
AND settle_type IN (1, 3)
|
||||
GROUP BY site_id, tenant_id, DATE(pay_time)
|
||||
""",
|
||||
"compare_columns": ["cash_pay_amount", "table_fee_amount", "goods_amount", "gross_amount"],
|
||||
|
||||
@@ -97,7 +97,7 @@ class IndexVerifier(BaseVerifier):
|
||||
JOIN dwd.dim_assistant d
|
||||
ON s.user_id = d.user_id
|
||||
AND d.scd2_is_current = 1
|
||||
AND COALESCE(d.is_delete, 0) = 0
|
||||
AND COALESCE(d.leave_status, 0) = 0
|
||||
CROSS JOIN params p
|
||||
WHERE s.last_use_time >= p.start_time
|
||||
AND s.last_use_time < p.end_time
|
||||
|
||||
Reference in New Issue
Block a user