微信小程序页面迁移校验之前 P5任务处理之前

This commit is contained in:
Neo
2026-03-09 01:19:21 +08:00
parent 263bf96035
commit 6e20987d2f
1112 changed files with 153824 additions and 219694 deletions

View File

@@ -26,7 +26,7 @@ SCHEMA_ETL=meta
# API 配置(上游 SaaS API
# ------------------------------------------------------------------------------
API_BASE=https://pc.ficoo.vip/apiprod/admin/v1/
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6IjN4d3IwYjNWN01jemlvcFYyZnZibmtpMVg4MEhxNVFvOFRMcHh3RkNkQUk9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzMvMSDkuIvljYgxMDo1MDozOCIsIm5lZWRDaGVja1Rva2VuIjoiZmFsc2UiLCJleHAiOjE3NzIzNzY2MzgsImlzcyI6InRlc3QiLCJhdWQiOiJVc2VyIn0.k_f4jnSGKOKPoZC22bVSrAo9A1FfRqvsNiGw-Vmc0qQ
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6IlI5THQvRkVjSGZubkdiOTZJZ3lmdWhjaXU5WnIwREQrZFh1amhVY1RCSDQ9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzMvMTEg5LiL5Y2INjo0MjozMSIsIm5lZWRDaGVja1Rva2VuIjoiZmFsc2UiLCJleHAiOjE3NzMyMjU3NTEsImlzcyI6InRlc3QiLCJhdWQiOiJVc2VyIn0.8H5V3W0NfGJrcYo9Ex-35D-SzxhC2tRaZGrgo2reYr4
API_TIMEOUT=20
API_PAGE_SIZE=200
API_RETRY_MAX=3
@@ -45,6 +45,13 @@ WRITE_PRETTY_JSON=true
# ------------------------------------------------------------------------------
PIPELINE_FLOW=FULL
# ------------------------------------------------------------------------------
# 管道限流配置RateLimiter 请求间隔,秒)
# CHANGE 2026-03-06 | 从默认 5-20s 降至 0.1-2s大幅缩短 ODS 请求耗时
# ------------------------------------------------------------------------------
PIPELINE_RATE_MIN=0.1
PIPELINE_RATE_MAX=2.0
# ------------------------------------------------------------------------------
# 时间窗口配置
# ------------------------------------------------------------------------------
@@ -166,7 +173,7 @@ DWD_FACT_UPSERT=true
# ------------------------------------------------------------------------------
# 任务列表配置
# ------------------------------------------------------------------------------
RUN_TASKS=PRODUCTS,TABLES,MEMBERS,ASSISTANTS,PACKAGES_DEF,ORDERS,PAYMENTS,REFUNDS,COUPON_USAGE,INVENTORY_CHANGE,TOPUPS,TABLE_DISCOUNT,ASSISTANT_ABOLISH,LEDGER
RUN_TASKS=PRODUCTS,TABLES,MEMBERS,ASSISTANTS,PACKAGES_DEF,ORDERS,PAYMENTS,REFUNDS,COUPON_USAGE,INVENTORY_CHANGE,TOPUPS,TABLE_DISCOUNT,LEDGER
INDEX_LOOKBACK_DAYS=60
# ------------------------------------------------------------------------------

View File

@@ -107,6 +107,11 @@ class APIClient:
"""
return self._post_json(endpoint, params)
# CHANGE [2026-03-06] intent: 补齐公共 post() 方法UnifiedPipeline 详情拉取模式需要调用 self.api.post()
def post(self, endpoint: str, params: dict | None = None) -> dict:
"""发送 POST JSON 请求(与 get 相同,语义更明确的别名)。"""
return self._post_json(endpoint, params)
def _post_json(self, endpoint: str, payload: dict | None = None) -> dict:
if not self.base_url:
raise ValueError("API base_url 未配置")
@@ -292,3 +297,10 @@ class APIClient:
return v
return []
# AI_CHANGELOG:
# - 日期: 2026-03-06 08:37:26
# - Prompt: P20260306-083206
# - 直接原因: APIClient 缺少公共 post() 方法UnifiedPipeline 详情拉取模式调用 self.api.post() 失败
# - 变更摘要: 新增 post() 作为 _post_json() 的公共别名,与已有 get() 对齐
# - 风险与验证: 极低风险纯别名转发166 个单元测试通过

View File

@@ -0,0 +1,43 @@
"""请求间隔控制器,支持取消信号中断等待。"""
import random
import time
import threading
class RateLimiter:
"""请求间隔控制器,在相邻 API 请求之间插入随机等待时间,防止触发上游风控。
等待期间以 0.5s 为单位轮询 cancel_event支持快速响应取消信号。
"""
def __init__(self, min_interval: float = 5.0, max_interval: float = 20.0):
if min_interval > max_interval:
raise ValueError(
f"min_interval({min_interval}) 不能大于 max_interval({max_interval})"
)
self._min = min_interval
self._max = max_interval
self._last_interval: float = 0.0
def wait(self, cancel_event: threading.Event | None = None) -> bool:
"""等待随机间隔。返回 False 表示被取消信号中断。
将等待时间拆分为 0.5s 小段,每段检查 cancel_event
以便在取消信号到达时快速退出(最多延迟 0.5s)。
"""
interval = random.uniform(self._min, self._max)
self._last_interval = interval
remaining = interval
while remaining > 0:
if cancel_event and cancel_event.is_set():
return False
sleep_time = min(0.5, remaining)
time.sleep(sleep_time)
remaining -= sleep_time
return True
@property
def last_interval(self) -> float:
"""最近一次 wait() 生成的随机间隔值。"""
return self._last_interval

View File

@@ -36,6 +36,11 @@ class RecordingAPIClient:
self.last_dump: dict[str, Any] | None = None
# ------------------------------------------------------------------ 公共 API
# CHANGE [2026-03-06] intent: 补齐 post() 代理,使 RecordingAPIClient 完整覆盖 APIClient 公共接口
def post(self, endpoint: str, params: dict | None = None) -> dict:
"""委托给底层 APIClient 的 post 方法(详情拉取等非分页请求使用)。"""
return self.base.post(endpoint, params)
def get_source_hint(self, endpoint: str) -> str:
"""Return the JSON dump path for this endpoint (for source_file lineage)."""
return str(self.output_dir / endpoint_to_filename(endpoint))
@@ -193,6 +198,12 @@ def build_recording_client(
# AI_CHANGELOG:
# - 日期: 2026-03-06 08:37:26
# - Prompt: P20260306-083206
# - 直接原因: RecordingAPIClient 缺少 post() 方法UnifiedPipeline 详情拉取模式调用失败
# - 变更摘要: 新增 post() 方法委托给 self.base.post(),补齐代理接口覆盖
# - 风险与验证: 极低风险纯委托转发166 个单元测试通过
#
# - 日期: 2026-02-14
# - Prompt: P20260214-040231审计收口补录
# - 直接原因: 默认时区 Asia/Taipei 与运营地区(中国大陆)不符

View File

@@ -282,6 +282,32 @@ def parse_args():
parser.add_argument("--idle-end", help="闲时窗口结束(HH:MM)")
parser.add_argument("--allow-empty-advance", action="store_true", help="允许空结果推进窗口")
# Pipeline 管道参数(覆盖 PipelineConfig 全局默认值)
parser.add_argument(
"--pipeline-workers",
dest="pipeline_workers",
type=int,
help="Pipeline 处理线程数(覆盖 pipeline.workers默认 2",
)
parser.add_argument(
"--pipeline-batch-size",
dest="pipeline_batch_size",
type=int,
help="Pipeline 批量写入阈值(覆盖 pipeline.batch_size默认 100",
)
parser.add_argument(
"--pipeline-rate-min",
dest="pipeline_rate_min",
type=float,
help="Pipeline 限流最小间隔秒数(覆盖 pipeline.rate_min默认 5.0",
)
parser.add_argument(
"--pipeline-rate-max",
dest="pipeline_rate_max",
type=float,
help="Pipeline 限流最大间隔秒数(覆盖 pipeline.rate_max默认 20.0",
)
# 强制全量更新(跳过 ODS hash 去重 + DWD 变更对比,无条件写入)
parser.add_argument(
"--force-full",
@@ -406,6 +432,16 @@ def build_cli_overrides(args) -> dict:
# 强制全量更新
if args.force_full:
overrides.setdefault("run", {})["force_full_update"] = True
# Pipeline 管道参数 → pipeline.* 命名空间(供 PipelineConfig.from_app_config() 读取)
if getattr(args, "pipeline_workers", None) is not None:
overrides.setdefault("pipeline", {})["workers"] = args.pipeline_workers
if getattr(args, "pipeline_batch_size", None) is not None:
overrides.setdefault("pipeline", {})["batch_size"] = args.pipeline_batch_size
if getattr(args, "pipeline_rate_min", None) is not None:
overrides.setdefault("pipeline", {})["rate_min"] = args.pipeline_rate_min
if getattr(args, "pipeline_rate_max", None) is not None:
overrides.setdefault("pipeline", {})["rate_max"] = args.pipeline_rate_max
# 任务
if args.tasks:

View File

@@ -4,6 +4,7 @@
DEFAULTS = {
"app": {
"timezone": "Asia/Shanghai",
"business_day_start_hour": 8,
"store_id": "",
# CHANGE 2026-02-15 | 对齐新库 etl_feiqiu 六层架构
"schema_oltp": "ods",
@@ -52,7 +53,6 @@ DEFAULTS = {
"INVENTORY_CHANGE",
"TOPUPS",
"TABLE_DISCOUNT",
"ASSISTANT_ABOLISH",
"LEDGER",
],
"dws_tasks": [],
@@ -178,5 +178,4 @@ TASK_TABLES = "TABLES"
TASK_PACKAGES_DEF = "PACKAGES_DEF"
TASK_TOPUPS = "TOPUPS"
TASK_TABLE_DISCOUNT = "TABLE_DISCOUNT"
TASK_ASSISTANT_ABOLISH = "ASSISTANT_ABOLISH"
TASK_LEDGER = "LEDGER"

View File

@@ -7,6 +7,7 @@ from copy import deepcopy
ENV_MAP = {
"TIMEZONE": ("app.timezone",),
"BUSINESS_DAY_START_HOUR": ("app.business_day_start_hour",),
"STORE_ID": ("app.store_id",),
"SCHEMA_OLTP": ("app.schema_oltp",),
"SCHEMA_ETL": ("app.schema_etl",),
@@ -114,6 +115,9 @@ ENV_MAP = {
"DATA_SOURCE": ("run.data_source",),
# API 额外请求头JSON 对象格式)
"API_HEADERS_EXTRA": ("api.headers_extra",),
# Pipeline 管道限流参数
"PIPELINE_RATE_MIN": ("pipeline.rate_min",),
"PIPELINE_RATE_MAX": ("pipeline.rate_max",),
}

View File

@@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
"""统一管道配置数据类。
支持全局默认值 + 任务级覆盖的三级回退:
pipeline.<task_code>.* → pipeline.* → 硬编码默认值
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .settings import AppConfig
@dataclass(frozen=True)
class PipelineConfig:
"""统一管道配置,支持全局默认 + 任务级覆盖。"""
workers: int = 2 # ProcessingPool 工作线程数
queue_size: int = 100 # 处理队列容量
batch_size: int = 100 # WriteWorker 批量写入阈值
batch_timeout: float = 5.0 # WriteWorker 等待超时(秒)
rate_min: float = 0.1 # RateLimiter 最小间隔(秒)
rate_max: float = 2.0 # RateLimiter 最大间隔(秒)
max_consecutive_failures: int = 10 # 连续失败中断阈值
def __post_init__(self) -> None:
if self.workers < 1:
raise ValueError(f"workers 必须 >= 1当前值: {self.workers}")
if self.queue_size < 1:
raise ValueError(f"queue_size 必须 >= 1当前值: {self.queue_size}")
if self.batch_size < 1:
raise ValueError(f"batch_size 必须 >= 1当前值: {self.batch_size}")
if self.rate_min > self.rate_max:
raise ValueError(
f"rate_min({self.rate_min}) 不能大于 rate_max({self.rate_max})"
)
@classmethod
def from_app_config(
cls,
config: AppConfig,
task_code: str | None = None,
) -> PipelineConfig:
"""从 AppConfig 加载,支持 pipeline.<task_code>.* 任务级覆盖。
回退优先级:
1. pipeline.<task_code_lower>.<key> (任务级,仅 task_code 非空时查找)
2. pipeline.<key> (全局级)
3. 字段硬编码默认值
"""
def _get(key: str, default): # noqa: ANN001
# 任务级覆盖
if task_code:
val = config.get(f"pipeline.{task_code.lower()}.{key}")
if val is not None:
return type(default)(val)
# 全局级
val = config.get(f"pipeline.{key}")
if val is not None:
return type(default)(val)
# 硬编码默认值
return default
return cls(
workers=_get("workers", 2),
queue_size=_get("queue_size", 100),
batch_size=_get("batch_size", 100),
batch_timeout=_get("batch_timeout", 5.0),
rate_min=_get("rate_min", 5.0),
rate_max=_get("rate_max", 20.0),
max_consecutive_failures=_get("max_consecutive_failures", 10),
)

View File

@@ -111,6 +111,12 @@ class AppConfig:
missing.append("app.store_id")
if missing:
raise SystemExit("缺少必需配置: " + ", ".join(missing))
# business_day_start_hour 范围校验023 整数)
hour = cfg["app"].get("business_day_start_hour", 8)
if not isinstance(hour, int) or not (0 <= hour <= 23):
raise SystemExit("app.business_day_start_hour 必须为 023 的整数")
def get(self, key: str, default=None):
"""获取配置值(支持点号路径)"""

View File

@@ -20,7 +20,15 @@ class DatabaseConnection:
# 生产环境要求:数据库连接超时不得超过 20 秒。
timeout_val = max(1, min(int(timeout_val), 20))
conn = psycopg2.connect(self._dsn, connect_timeout=timeout_val)
# CHANGE 2026-03-06 | intent: 修复 Windows GBK 环境下 psycopg2 连接握手的 UnicodeDecodeError
# assumptions: libpq 默认使用系统 locale 的 client_encodingWindows 中文系统为 GBK/CP936
# 边界: 显式指定 client_encoding=utf8 确保连接层始终使用 UTF-8与数据库 server_encoding 一致
# 验证: web-admin 手动触发 ETL 全量 flow不再出现 0xd6 解码错误
conn = psycopg2.connect(
self._dsn,
connect_timeout=timeout_val,
options="-c client_encoding=utf8",
)
conn.autocommit = False
# 会话参数(时区、语句超时等)

View File

@@ -1,5 +1,9 @@
# -*- coding: utf-8 -*-
"""数据库批量操作"""
"""数据库批量操作
AI_CHANGELOG
- 2026-03-06 09:17:16 | Prompt: P20260306-084752摘录DWD 并行装载全部失败 _dsn 属性缺失)| Direct causeDatabaseOperations 组合模式未透传 _dsn/_session/_connect_timeout | Summary新增 3 个 property 透传底层 DatabaseConnection 属性 | Verify334 单元测试通过 + getDiagnostics 无问题
"""
import psycopg2.extras
import re
@@ -9,6 +13,23 @@ class DatabaseOperations:
def __init__(self, connection):
self._connection = connection
self.conn = connection.conn
# [CHANGE P20260306-084752] intent: 透传底层 DatabaseConnection 的连接参数,
# DwdLoadTask._process_single_table 需要 _dsn/_session/_connect_timeout
# 为每个线程创建独立连接
# assumptions: _connection 始终是 DatabaseConnection 实例,具有这三个属性
# verify: 334 单元测试通过DWD 并行装载不再 AttributeError
@property
def _dsn(self):
return self._connection._dsn
@property
def _session(self):
return self._connection._session
@property
def _connect_timeout(self):
return self._connection._connect_timeout
def batch_execute(self, sql: str, rows: list, page_size: int = 1000):
"""批量执行SQL"""

View File

@@ -12,7 +12,7 @@
### 1.1 助教日报dws_assistant_daily_detail
- 目标表:`dws.dws_assistant_daily_detail`
- 数据来源:`dwd_assistant_service_log``dwd_assistant_trash_event``dim_assistant`SCD2
- 数据来源:`dwd_assistant_service_log``dwd_assistant_service_log_ex`(提供 `is_trash` 标记)`dim_assistant`SCD2
- 粒度:门店 × 助教 × 日期
- 核心指标:服务次数(总/基础课/附加课/包厢课)、计费秒数与小时数、台账金额、去重客户数与台桌数、废除统计
- 课程类型分类:通过 `cfg_skill_type` 映射 `skill_id``BASE`/`BONUS`/`ROOM`

View File

@@ -42,6 +42,12 @@
| 23 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
| 24 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
| 25 | scd2_version | INTEGER | YES | | 版本号 |
| 26 | table_area_ids | JSONB | YES | | 可用台区 ID 列表(来自详情接口 tableAreaId |
| 27 | table_area_names | JSONB | YES | | 可用台区名称列表(来自详情接口 tableAreaNameList |
| 28 | assistant_services | JSONB | YES | | 助教服务关联数组(来自详情接口 packageCouponAssistants |
| 29 | groupon_site_infos | JSONB | YES | | 关联门店信息数组(来自详情接口 grouponSiteInfos |
> 字段 26-29 由迁移脚本 `db/etl_feiqiu/migrations/2026-03-05__add_detail_fields_to_dim_groupbuy_package_ex.sql` 新增,数据来源为 `ods.group_buy_package_details`(通过 LEFT JOIN `coupon_id = groupbuy_package_id` 合并)。
## 样本数据

View File

@@ -33,7 +33,7 @@
| 14 | is_confirm | INTEGER | YES | | 是否确认。**枚举值**: 2(5003)=**[待确认]** |
| 15 | is_single_order | INTEGER | YES | | 是否独立订单。**枚举值**: 1(5003)=是 |
| 16 | is_not_responding | INTEGER | YES | | 无响应。**枚举值**: 0(5003)=正常 |
| 17 | is_trash | INTEGER | YES | | 是否废单。**枚举值**: 0(5003)=正常 |
| 17 | is_trash | INTEGER | YES | | 是否废单。**枚举值**: 0=正常, 1=已作废。⚠️ 此字段是判断助教服务是否作废的唯一依据,替代已废弃的 `dwd_assistant_trash_event`2026-02-22 DROP。DWS 层助教日报等任务通过此字段过滤废单统计。 |
| 18 | trash_applicant_id | BIGINT | YES | | 废单申请人 ID当前数据全为 0 |
| 19 | trash_applicant_name | VARCHAR(64) | YES | | 废单申请人姓名(当前数据全为空) |
| 20 | trash_reason | VARCHAR(255) | YES | | 废单原因(当前数据全为空) |

View File

@@ -30,7 +30,7 @@
| 序号 | 表名 | 说明 | 主键 | 扩展表 | 文档链接 |
|------|------|------|------|--------|----------|
| 1 | dwd_assistant_service_log | 助教服务流水 | assistant_service_id | dwd_assistant_service_log_ex | [主表](BD_manual_dwd_assistant_service_log.md) / [扩展表](BD_manual_dwd_assistant_service_log_ex.md) |
| 2 | dwd_assistant_trash_event | 助教服务作废 | assistant_trash_event_id | dwd_assistant_trash_event_ex | [主表](BD_manual_dwd_assistant_trash_event.md) / [扩展表](BD_manual_dwd_assistant_trash_event_ex.md) |
| 2 | ~~dwd_assistant_trash_event~~ | ~~助教服务作废~~ | — | — | ⚠️ 已于 2026-02-22 废弃,作废判断改用 `dwd_assistant_service_log_ex.is_trash` |
| 3 | dwd_groupbuy_redemption | 团购券核销 | redemption_id | dwd_groupbuy_redemption_ex | [主表](BD_manual_dwd_groupbuy_redemption.md) / [扩展表](BD_manual_dwd_groupbuy_redemption_ex.md) |
| 4 | dwd_member_balance_change | 会员余额变动 | balance_change_id | dwd_member_balance_change_ex | [主表](BD_manual_dwd_member_balance_change.md) / [扩展表](BD_manual_dwd_member_balance_change_ex.md) |
| 5 | dwd_payment | 支付流水 | payment_id | 无 | [主表](BD_manual_dwd_payment.md) |
@@ -118,7 +118,7 @@ SELECT * FROM dwd.dwd_payment ORDER BY pay_time DESC NULLS LAST LIMIT 1;
| dwd_table_fee_adjust | 2,849 |
| dwd_assistant_service_log | 1,090 |
| dwd_recharge_order | 455 |
| dwd_assistant_trash_event | 98 |
| ~~dwd_assistant_trash_event~~ | ~~98~~ | ⚠️ 已废弃2026-02-22 |
| dwd_refund | 45 |
---

View File

@@ -42,7 +42,7 @@
| M7 | 2 | 麻将/麻将棋牌 |
| M8 | 1 | 麻将/麻将棋牌 |
| K包 | 4 | K包/K歌/KTV |
| VIP包厢 | 4 | 台球/打球/中八/追分 (V5为 台球/打球/斯诺克) |
| VIP包厢 | 4 | 🎱 中式/追分 (V1-V4)、斯诺克 (V5) |
| 斯诺克区 | 4 | 台球/打球/斯诺克 |
| 666 | 2 | 麻将/麻将棋牌 |
| TV台 | 1 | 台球/打球/中八/追分 |

View File

@@ -35,7 +35,7 @@
| 16 | member_card_type_name | VARCHAR(100) | YES | | 卡类型名称(当前数据全为空) |
| 17 | is_bind_member | BOOLEAN | YES | | 是否绑定会员。**枚举值**: False=否 |
| 18 | member_discount_amount | NUMERIC(18,2) | YES | | 会员折扣金额 |
| 19 | consume_money | NUMERIC(18,2) | YES | | 消费总金额(元) |
| 19 | consume_money | NUMERIC(18,2) | YES | | 消费总金额(元)。⚠️ **口径不稳定**存在三种历史口径A/B/CDWS 层不应直接使用,应使用 `items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`。详见 [consume_money 口径](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md) |
| 20 | table_charge_money | NUMERIC(18,2) | YES | | 台费金额 |
| 21 | goods_money | NUMERIC(18,2) | YES | | 商品金额 |
| 22 | real_goods_money | NUMERIC(18,2) | YES | | 实收商品金额 |
@@ -71,19 +71,30 @@ LIMIT 1;
```
**使用示例**
```sql
-- 每日营收统计
-- 每日营收统计(使用 items_sum 口径,不使用 consume_money
SELECT
DATE(pay_time) AS pay_date,
COUNT(*) AS order_count,
SUM(consume_money) AS total_consume,
SUM(table_charge_money + goods_money + assistant_pd_money
+ assistant_cx_money + electricity_money) AS total_items_sum,
SUM(pay_amount) AS total_pay
FROM dwd.dwd_settlement_head
WHERE settle_type IN (1, 3)
GROUP BY DATE(pay_time)
ORDER BY pay_date DESC;
-- 台费 vs 商品 vs 助教收入
SELECT
SUM(table_charge_money) AS table_revenue,
SUM(goods_money) AS goods_revenue,
SUM(assistant_pd_money + assistant_cx_money) AS assistant_revenue
FROM dwd.dwd_settlement_head;
SUM(assistant_pd_money) AS assistant_pd_revenue,
SUM(assistant_cx_money) AS assistant_cx_revenue
FROM dwd.dwd_settlement_head
WHERE settle_type IN (1, 3);
```
**支付渠道恒等式100% 成立)**
```
balance_amount = recharge_card_amount + gift_card_amount -- 储值卡 = 充值卡 + 礼品卡
pay_amount = point_amount + cash_amount -- 实付 = 积分 + 现金(互斥)
```
> `balance_amount` 是独立支付渠道,`recharge_card_amount`/`gift_card_amount` 是其分账明细,不可重复计算。

View File

@@ -1,6 +1,6 @@
# cfg_area_category 台区分类映射表
> 生成时间2026-02-03
> 生成时间2026-02-03 | 更新时间2026-03-07
## 表信息
@@ -9,8 +9,9 @@
| Schema | dws |
| 表名 | cfg_area_category |
| 主键 | category_id |
| 唯一约束 | (source_area_name, COALESCE(source_table_name, '')) |
| 数据来源 | 手工维护/seed脚本基于dim_table实际数据 |
| 说明 | 将dim_table.site_table_area_name映射到财务报表区域分类 |
| 说明 | 将dim_table的台区/台桌映射到项目分类,支持台桌级细分 |
## 字段说明
@@ -18,57 +19,47 @@
|------|--------|------|------|------|------|
| 1 | category_id | SERIAL | NO | PK | 分类ID自增 |
| 2 | source_area_name | VARCHAR(100) | NO | UK | 源区域名称来自dim_table.site_table_area_name |
| 3 | category_code | VARCHAR(20) | NO | | 分类代码。**枚举值**: BILLIARD, BILLIARD_VIP, SNOOKER, MAHJONG, KTV, SPECIAL, OTHER |
| 4 | category_name | VARCHAR(50) | NO | | 分类名称 |
| 5 | match_type | VARCHAR(10) | NO | | 匹配类型。**枚举值**: EXACT精确, LIKE模糊, DEFAULT兜底 |
| 6 | match_priority | INTEGER | NO | | 匹配优先级(数字越小优先级越高 |
| 7 | is_active | BOOLEAN | NO | | 是否启用 |
| 8 | description | TEXT | YES | | 说明 |
| 9 | created_at | TIMESTAMPTZ | NO | | 创建时间 |
| 10 | updated_at | TIMESTAMPTZ | NO | | 更新时间 |
| 3 | source_table_name | VARCHAR(100) | YES | UK | 源台桌名称来自dim_table.table_nameNULL表示区域级映射 |
| 4 | category_code | VARCHAR(20) | NO | | 分类代码。**枚举值**: BILLIARD, SNOOKER, MAHJONG, KTV, SPECIAL, OTHER |
| 5 | category_name | VARCHAR(50) | NO | | 分类名称含emoji |
| 6 | display_name | VARCHAR(50) | YES | | 显示名称(用于筛选器 |
| 7 | short_name | VARCHAR(20) | YES | | 简写(用于列表标签) |
| 8 | match_type | VARCHAR(10) | NO | | 匹配类型。**枚举值**: EXACT精确, LIKE模糊, DEFAULT兜底 |
| 9 | match_priority | INTEGER | NO | | 匹配优先级(数字越小优先级越高) |
| 10 | is_active | BOOLEAN | NO | | 是否启用 |
| 11 | description | TEXT | YES | | 说明 |
| 12 | created_at | TIMESTAMPTZ | NO | | 创建时间 |
| 13 | updated_at | TIMESTAMPTZ | NO | | 更新时间 |
## 分类映射示例
## 变更说明2026-03-07
| 源区域名称 | 分类代码 | 分类名称 |
|------------|----------|----------|
| A区 | BILLIARD | 台球散台 |
| B区 | BILLIARD | 台球散台 |
| C区 | BILLIARD | 台球散台 |
| TV台 | BILLIARD | 台球散台 |
| VIP包厢 | BILLIARD_VIP | 台球VIP |
| 斯诺克区 | SNOOKER | 斯诺克 |
| 麻将房 | MAHJONG | 麻将棋牌 |
| M7 | MAHJONG | 麻将棋牌 |
| M8 | MAHJONG | 麻将棋牌 |
| 666 | MAHJONG | 麻将棋牌 |
| 发财 | MAHJONG | 麻将棋牌 |
| K包 | KTV | K歌娱乐 |
| k包活动区 | KTV | K歌娱乐 |
| 幸会158 | KTV | K歌娱乐 |
| 补时长 | SPECIAL | 补时长 |
### 新增字段
- `source_table_name`:支持台桌级细分映射(如 VIP包厢 V5 → SNOOKER
- `display_name`:前端筛选器显示名称
- `short_name`:列表中的简写标签
## 使用说明
### 删除类型
- `BILLIARD_VIP` 已废弃VIP包厢 V1-V4 归入 `BILLIARD`V5 归入 `SNOOKER`
**取值方式**
### 唯一约束变更
-`(source_area_name)` 改为 `(source_area_name, COALESCE(source_table_name, ''))`
```sql
-- 将台区名称映射到分类
SELECT
dt.site_table_area_name,
COALESCE(ac.category_code, 'OTHER') AS category_code,
COALESCE(ac.category_name, '其他') AS category_name
FROM dwd.dim_table dt
LEFT JOIN dws.cfg_area_category ac
ON dt.site_table_area_name = ac.source_area_name
AND ac.is_active = TRUE
WHERE dt.scd2_is_current = 1;
## 匹配优先级
-- 按分类汇总收入
SELECT
COALESCE(ac.category_name, '其他') AS category_name,
SUM(tfl.ledger_amount) AS total_amount
FROM dwd.dwd_table_fee_log tfl
LEFT JOIN dwd.dim_table dt ON dt.table_id = tfl.site_table_id
LEFT JOIN dws.cfg_area_category ac ON dt.site_table_area_name = ac.source_area_name
GROUP BY COALESCE(ac.category_name, '其他');
```
| 优先级 | 匹配方式 | 说明 |
|--------|---------|------|
| 5 | 台桌级精确 | source_area_name + source_table_name 都匹配 |
| 10 | 区域级精确 | source_area_name 匹配source_table_name 为 NULL |
| 50 | 模糊匹配 | source_area_name 包含模式匹配 |
| 999 | 兜底 | 无法匹配的区域归入 OTHER |
## 分类映射
| 分类代码 | 显示名称 | 简写 | 源区域 |
|----------|---------|------|--------|
| BILLIARD | 🎱 中式/追分 | 🎱 | A区、B区、C区、TV台、VIP包厢(V1-V4) |
| SNOOKER | 斯诺克 | 斯 | 斯诺克区、VIP包厢(V5) |
| MAHJONG | 🀄 麻将/棋牌 | 🀄 | 麻将房、M7、M8、666、发财 |
| KTV | 🎤 团建/K歌 | 🎤 | K包、k包活动区、幸会158 |
| SPECIAL | 补时长 | 补 | 补时长 |
| OTHER | 其他 | 他 | 兜底 |

View File

@@ -10,7 +10,7 @@
| 表名 | dws_assistant_daily_detail |
| 主键 | id |
| 唯一键 | (site_id, assistant_id, stat_date) |
| 数据来源 | dwd_assistant_service_log + dwd_assistant_trash_event |
| 数据来源 | dwd_assistant_service_log + dwd_assistant_service_log_ex |
| 更新频率 | 每小时增量更新 |
| 说明 | 以"助教+日期"为粒度,汇总每日业绩明细 |
@@ -25,7 +25,7 @@
| 5 | assistant_nickname | VARCHAR(50) | YES | 助教花名(冗余,便于查询展示) |
| 6 | stat_date | DATE | NO | 统计日期 |
| 7 | assistant_level_code | INTEGER | YES | 助教等级代码SCD2口径取stat_date当日生效的等级 |
| 8 | assistant_level_name | VARCHAR(20) | YES | 助教等级名称 |
| 8 | assistant_level_name | VARCHAR(20) | YES | 助教等级名称(由 `level_code` 静态映射得出,不依赖 SCD2 返回值) |
| 9 | total_service_count | INTEGER | NO | 总服务次数 |
| 10 | base_service_count | INTEGER | NO | 基础课服务次数 |
| 11 | bonus_service_count | INTEGER | NO | 附加课服务次数 |
@@ -46,8 +46,12 @@
| 26 | unique_tables | INTEGER | NO | 服务台桌数(去重) |
| 27 | trashed_seconds | INTEGER | NO | 被废除的服务时长(秒) |
| 28 | trashed_count | INTEGER | NO | 被废除的服务次数 |
| 29 | created_at | TIMESTAMPTZ | NO | 创建时间 |
| 30 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
| 29 | penalty_minutes | NUMERIC(10,2) | YES | 惩罚分钟数(定档折算)。公式:`actual_minutes × (1 - per_hour_contribution / 24)`per_hour_contribution ≥ 24 时为 0 |
| 30 | penalty_reason | TEXT | YES | 惩罚原因描述NULL=无违规) |
| 31 | is_exempt | BOOLEAN | NO | 是否豁免惩罚(豁免助教不计算惩罚) |
| 32 | per_hour_contribution | NUMERIC(10,2) | YES | 每小时贡献金额(= `base_ledger_amount / base_hours / overlap_count`NULL=无违规或豁免) |
| 33 | created_at | TIMESTAMPTZ | NO | 创建时间 |
| 34 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
## 数据来源
@@ -68,17 +72,21 @@ WHERE is_delete = 0
GROUP BY site_id, DATE(start_use_time), site_assistant_id, nickname;
```
### 废除记录dwd_assistant_trash_event
### 废除记录dwd_assistant_service_log_ex
> ⚠️ `dwd_assistant_trash_event` 已于 2026-02-22 废弃,作废判断改用 `dwd_assistant_service_log_ex.is_trash`0=正常1=作废)。
```sql
SELECT
site_id,
DATE(create_time) AS stat_date,
assistant_no,
assistant_name,
SUM(charge_minutes_raw * 60) AS trashed_seconds,
COUNT(*) AS trashed_count
FROM dwd.dwd_assistant_trash_event
GROUP BY site_id, DATE(create_time), assistant_no, assistant_name;
s.site_id,
DATE(s.start_use_time) AS stat_date,
s.site_assistant_id AS assistant_id,
SUM(CASE WHEN ex.is_trash = 1 THEN s.income_seconds ELSE 0 END) AS trashed_seconds,
COUNT(CASE WHEN ex.is_trash = 1 THEN 1 END) AS trashed_count
FROM dwd.dwd_assistant_service_log s
LEFT JOIN dwd.dwd_assistant_service_log_ex ex ON s.assistant_service_id = ex.assistant_service_id
WHERE s.is_delete = 0
GROUP BY s.site_id, DATE(s.start_use_time), s.site_assistant_id;
```
## 使用说明
@@ -115,4 +123,4 @@ GROUP BY assistant_id, DATE_TRUNC('month', stat_date);
|------|------|
| 可回溯 | ✅ 完全可回溯 |
| 数据范围 | 2025-07-21 ~ 至今 |
| 依赖表 | dwd_assistant_service_log, dwd_assistant_trash_event, dim_assistant |
| 依赖表 | dwd_assistant_service_log, dwd_assistant_service_log_ex, dim_assistant, dim_table, cfg_skill_type |

View File

@@ -0,0 +1,136 @@
# dws_assistant_project_tag 助教项目标签表
> 生成时间2026-03-07
## 表信息
| 属性 | 值 |
|------|-----|
| Schema | dws |
| 表名 | dws_assistant_project_tag |
| 主键 | id |
| 唯一键 | (site_id, assistant_id, time_window, category_code) |
| 数据来源 | dwd_assistant_service_log + dim_table + cfg_area_category |
| 更新频率 | 每日全量重建(按 site_id 删除后重新插入) |
| 说明 | 按时间窗口计算助教在四大项目的工作时长占比≥25% 分配标签 |
## 字段说明
| 序号 | 字段名 | 类型 | 可空 | 说明 |
|------|--------|------|------|------|
| 1 | id | BIGSERIAL | NO | 自增主键 |
| 2 | site_id | BIGINT | NO | 门店ID |
| 3 | tenant_id | BIGINT | NO | 租户ID |
| 4 | assistant_id | BIGINT | NO | 助教ID |
| 5 | time_window | VARCHAR(40) | NO | 时间窗口枚举值 |
| 6 | category_code | VARCHAR(30) | NO | 项目分类代码BILLIARD/SNOOKER/MAHJONG/KTV |
| 7 | category_name | VARCHAR(50) | NO | 项目显示名称(如 🎱 中式/追分) |
| 8 | short_name | VARCHAR(10) | NO | 项目简写(如 🎱) |
| 9 | duration_seconds | BIGINT | NO | 该项目总工作时长(秒) |
| 10 | total_seconds | BIGINT | NO | 所有四大项目总时长(秒) |
| 11 | percentage | NUMERIC(5,4) | NO | 占比0~1四位小数 |
| 12 | is_tagged | BOOLEAN | NO | 占比≥0.25 时为 TRUE |
| 13 | computed_at | TIMESTAMPTZ | NO | 计算时间 |
| 14 | created_at | TIMESTAMPTZ | NO | 创建时间 |
| 15 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
## 时间窗口
助教看板使用 6 个时间窗口:
| 枚举值 | 说明 |
|--------|------|
| THIS_MONTH | 本月(月初 ~ 今天) |
| THIS_QUARTER | 本季度季度首月1日 ~ 今天) |
| LAST_MONTH | 上月(上月初 ~ 上月末) |
| LAST_3_MONTHS_EXCL_CURRENT | 前3个月不含本月 |
| LAST_QUARTER | 上季度 |
| LAST_6_MONTHS | 最近半年(不含本月) |
## 索引
| 索引名 | 字段 | 类型 | 说明 |
|--------|------|------|------|
| pk_dws_assistant_project_tag | id | 主键 | 自增主键 |
| uk_dws_assistant_project_tag | (site_id, assistant_id, time_window, category_code) | 唯一 | 业务唯一键 |
| idx_apt_site_window_tagged | (site_id, time_window) WHERE is_tagged=TRUE | 部分索引 | 加速看板查询 |
## 数据链路
```
dwd.dwd_assistant_service_log (income_seconds, site_table_id)
→ JOIN dwd.dim_table (site_table_id → table_id, scd2_is_current=1)
→ get_area_category(area_name, table_name) -- 通过 cfg_area_category 映射
→ 只保留 BILLIARD/SNOOKER/MAHJONG/KTV
→ 按 (assistant_id, category_code) 汇总 income_seconds
→ 计算占比 percentage = duration_seconds / total_seconds
→ ≥0.25 标记 is_tagged=TRUE
→ 写入 dws.dws_assistant_project_tag
```
### 关键规则
1. 数据链路走 `dim_table`(通过 `site_table_id` JOIN不直接用事实表的 `site_table_area_name`
2. 只计算四大项目BILLIARD/SNOOKER/MAHJONG/KTVSPECIAL/OTHER 不参与
3. 标签阈值 25%`TAG_THRESHOLD = 0.25`
4. 全量删除重建策略:按 `site_id` 删除后重新插入所有时间窗口
5. `is_delete = 0` 过滤已删除的服务记录
## ETL 任务
| 属性 | 值 |
|------|-----|
| 任务代码 | DWS_ASSISTANT_PROJECT_TAG |
| Python 类 | AssistantProjectTagTask |
| 文件 | tasks/dws/assistant_project_tag_task.py |
| 依赖 | DWD_LOAD_FROM_ODS |
## 变更记录
| 日期 | 变更 | 说明 |
|------|------|------|
| 2026-03-07 | 新建表 | 支持助教看板按项目类型筛选 |
## 验证 SQL
```sql
-- 1. 确认表存在且有数据
SELECT COUNT(*) AS row_count,
COUNT(DISTINCT assistant_id) AS assistant_count,
COUNT(DISTINCT time_window) AS window_count
FROM dws.dws_assistant_project_tag;
-- 2. 确认 category_code 只有四大项目
SELECT DISTINCT category_code
FROM dws.dws_assistant_project_tag
ORDER BY category_code;
-- 期望BILLIARD, KTV, MAHJONG, SNOOKER
-- 3. 确认占比计算正确duration_seconds / total_seconds ≈ percentage
SELECT site_id, assistant_id, time_window, category_code,
duration_seconds, total_seconds, percentage,
ROUND(duration_seconds::numeric / NULLIF(total_seconds, 0), 4) AS calc_pct,
is_tagged,
(percentage >= 0.25) AS should_be_tagged
FROM dws.dws_assistant_project_tag
WHERE percentage >= 0.25 AND is_tagged = FALSE
LIMIT 10;
-- 期望0 行(所有 ≥25% 的都应标记为 TRUE
-- 4. 确认唯一键无重复
SELECT site_id, assistant_id, time_window, category_code, COUNT(*)
FROM dws.dws_assistant_project_tag
GROUP BY site_id, assistant_id, time_window, category_code
HAVING COUNT(*) > 1;
-- 期望0 行
```
## 回滚策略
```sql
-- 删除表(不影响其他表)
DROP TABLE IF EXISTS dws.dws_assistant_project_tag CASCADE;
-- 从 task_registry.py 移除 DWS_ASSISTANT_PROJECT_TAG 注册
-- 从 maintenance_task.py DEFAULT_RETENTION_TABLES 移除对应条目
```

View File

@@ -22,7 +22,7 @@
| 2 | site_id | BIGINT | NO | 门店ID |
| 3 | tenant_id | BIGINT | NO | 租户ID |
| 4 | stat_date | DATE | NO | 统计日期 |
| 5 | gross_amount | NUMERIC(14,2) | NO | 发生额合计 |
| 5 | gross_amount | NUMERIC(14,2) | NO | 发生额合计= 四项正价之和table_fee + goods + assistant_pd + assistant_cx不含 electricity_money不使用 `consume_money` |
| 6 | table_fee_amount | NUMERIC(14,2) | NO | 台费正价 |
| 7 | goods_amount | NUMERIC(14,2) | NO | 商品正价 |
| 8 | assistant_pd_amount | NUMERIC(14,2) | NO | 助教基础课正价(陪打) |
@@ -31,9 +31,9 @@
| 11 | discount_groupbuy | NUMERIC(14,2) | NO | 团购优惠 |
| 12 | discount_vip | NUMERIC(14,2) | NO | 会员折扣 |
| 13 | discount_gift_card | NUMERIC(14,2) | NO | 赠送卡抵扣(余额变动) |
| 14 | discount_manual | NUMERIC(14,2) | NO | 手动调整 |
| 14 | discount_manual | NUMERIC(14,2) | NO | 大客户优惠(从 adjust_amount 中按配置拆出) |
| 15 | discount_rounding | NUMERIC(14,2) | NO | 抹零 |
| 16 | discount_other | NUMERIC(14,2) | NO | 其他优惠 |
| 16 | discount_other | NUMERIC(14,2) | NO | 其他优惠adjust_amount - 大客户优惠) |
| 17 | confirmed_income | NUMERIC(14,2) | NO | 确认收入 = 发生额 - 优惠 |
| 18 | cash_inflow_total | NUMERIC(14,2) | NO | 现金流入合计 |
| 19 | cash_pay_amount | NUMERIC(14,2) | NO | 收银实付 |
@@ -42,7 +42,7 @@
| 22 | platform_fee_amount | NUMERIC(14,2) | NO | 平台佣金+服务费(导入) |
| 23 | recharge_cash_inflow | NUMERIC(14,2) | NO | 充值现金流入 |
| 24 | card_consume_total | NUMERIC(14,2) | NO | 卡消费合计 |
| 25 | cash_card_consume | NUMERIC(14,2) | NO | 值卡消费 |
| 25 | recharge_card_consume | NUMERIC(14,2) | NO | 现金充值卡消费= `recharge_card_amount`,仅现金充值卡支付部分,不含赠送卡) |
| 26 | gift_card_consume | NUMERIC(14,2) | NO | 赠送卡消费 |
| 27 | cash_outflow_total | NUMERIC(14,2) | NO | 现金流出合计 |
| 28 | cash_balance_change | NUMERIC(14,2) | NO | 现金余额变动 |
@@ -63,7 +63,14 @@
## 数据来源
> ⚠️ **consume_money 口径警告**:飞球上游 `consume_money` 在不同时期存在三种口径A/B/CDWS 层不应直接使用。
> 应使用 `items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money` 作为全时期一致的消费项目合计。
> 详见 [consume_money 口径详解](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md)。
### 结账汇总dwd_settlement_head
> ⚠️ 以下示例 SQL 使用 `DATE(pay_time)` 简化展示。实际代码使用 `biz_date_sql_expr(pay_time, cutoff_hour)` 进行营业日归属(跨日订单归前一天)。
```sql
SELECT
DATE(pay_time) AS stat_date,
@@ -82,6 +89,7 @@ SELECT
SUM(pl_coupon_sale_amount) AS pl_coupon_sale_amount
FROM dwd.dwd_settlement_head
WHERE site_id = :site_id
AND settle_type IN (1, 3) -- 仅台桌结账+商城订单,排除退货(6)/退款(7)
GROUP BY DATE(pay_time);
```
@@ -137,12 +145,49 @@ GROUP BY change_time::DATE;
**计算公式**
```
-- gross_amount 基于 items_sum 各分项(全时期一致),不使用 consume_money
gross_amount = table_fee_amount + goods_amount + assistant_pd_amount + assistant_cx_amount
discount_total = discount_groupbuy + discount_vip + discount_gift_card + discount_manual + discount_rounding + discount_other
confirmed_income = gross_amount - discount_total
cash_inflow_total = cash_pay_amount + groupbuy_pay_amount + platform_settlement_amount + recharge_cash_inflow
cash_inflow_total = cash_pay_amount + platform_inflow + recharge_cash_inflow
-- platform_inflow优先取 platform_settlement_amount平台回款为 0 时取 groupbuy_pay_amount团购支付
-- 两者互斥,不可同时计入
```
> ⚠️ `discount_manual` 存储大客户优惠(从 adjust_amount 中按配置的会员ID/订单ID拆出`discount_other` 存储其他手动调整(= adjust_amount - 大客户优惠)。两者互斥,之和 = adjust_amount。
**支付渠道恒等式**
```
-- 以下恒等式 100% 成立DWD-DOC 校准确认)
balance_amount = recharge_card_amount + gift_card_amount -- 储值卡 = 充值卡 + 礼品卡
pay_amount = point_amount + cash_amount -- 实付 = 积分 + 现金(互斥)
```
> ⚠️ `balance_amount`(储值卡支付)是独立支付渠道,`recharge_card_amount` 和 `gift_card_amount` 是其分账明细,不可与 `balance_amount` 重复计算。
**团购券三层价格体系**
```
顾客支付价PCR.sale_price→ 平台结算价SH.pl_coupon_sale_amount→ 门店抵扣价SH.coupon_amount
门店补贴 = coupon_amount - pl_coupon_sale_amount
```
- `pl_coupon_sale_amount = SUM(GR.ledger_unit_price)` ✅ 100%
- `coupon_amount = SUM(GR.ledger_amount)` ✅ 100%
- P1 期间2025-07~10`pl_coupon_sale_amount` 恒为 0
**F2 收支平衡公式(三期差异)**
```
P1/P2< 2026-01-15 12:45:59:
consume = coupon + pay + balance - rounding + adjust + member_disc + prepay(ex)
B 类过渡期2026-01-15 12:46~18:44约 40 笔):
consume = 2*coupon + pay + balance - rounding + adjust + member_disc + prepay(ex)
P3≥ 2026-01-15 18:45当前生效:
consume = coupon + pl_coupon + pay + balance - rounding + adjust + member_disc + prepay(ex)
```
- 通过率P1/P2 99.24% | B 95.00% | P3 99.87%
- 详见 [F2 收支平衡专项](../../../../docs/reports/DWD-DOC/05-f2-balance-audit.md)
**物化汇总层(可选)**
- L1~L4 物化视图:`mv_dws_finance_daily_summary_l1` / `l2` / `l3` / `l4`
- 刷新任务:`DWS_MV_REFRESH_FINANCE_DAILY`

View File

@@ -37,16 +37,17 @@
|--------------------|--------------------|----------|
| GROUPBUY | 团购优惠 | dwd_settlement_head.coupon_amount - 团购实付 |
| VIP | 会员折扣 | dwd_settlement_head.member_discount_amount |
| GIFT_CARD_TABLE | 台费卡抵扣 | dwd_member_balance_change |
| GIFT_CARD_DRINK | 酒水卡抵扣 | dwd_member_balance_change |
| GIFT_CARD_COUPON | 活动抵用券抵扣 | dwd_member_balance_change |
| MANUAL | 手动调整 | dwd_settlement_head.adjust_amount |
| GIFT_CARD_TABLE | 台费卡抵扣 | dwd_member_balance_change`card_type_id = 2791990152417157` |
| GIFT_CARD_DRINK | 酒水卡抵扣 | dwd_member_balance_change`card_type_id = 2794699703437125` |
| GIFT_CARD_COUPON | 活动抵用券抵扣 | dwd_member_balance_change`card_type_id = 2793266846533445` |
| BIG_CUSTOMER | 大客户优惠 | dwd_settlement_headbig_customer_amountadjust_amount 拆分) |
| OTHER | 其他优惠 | adjust_amount - big_customer_amount其他无法归类的手动调整 |
| ROUNDING | 抹零 | dwd_settlement_head.rounding_amount |
| BIG_CUSTOMER | 大客户优惠 | dwd_settlement_head特定会员优惠 |
| OTHER | 其他优惠 | 其他无法归类的优惠 |
## 数据来源
> ⚠️ 以下示例 SQL 使用 `pay_time::DATE` 简化展示。实际代码使用 `biz_date_sql_expr(pay_time, cutoff_hour)` 进行营业日归属(跨日订单归前一天),详见 ETL 配置 `app.business_day_start_hour`。
```sql
-- 从结账头表提取优惠汇总
SELECT
@@ -62,7 +63,7 @@ SELECT
COUNT(CASE WHEN rounding_amount != 0 THEN 1 END) AS rounding_order_count
FROM dwd.dwd_settlement_head
WHERE site_id = :site_id
AND settle_status = 1
AND settle_type IN (1, 3)
GROUP BY pay_time::DATE;
```

View File

@@ -10,7 +10,7 @@
| 表名 | dws_finance_income_structure |
| 主键 | id |
| 唯一键 | (site_id, stat_date, structure_type, category_code) |
| 数据来源 | dwd_table_fee_log + dwd_assistant_service_log + cfg_area_category |
| 数据来源 | dwd_settlement_head + dwd_table_fee_log + dwd_assistant_service_log + cfg_area_category |
| 更新频率 | 每日更新 |
| 说明 | 以"日期+区域/类型"为粒度,分析收入结构 |
@@ -35,23 +35,28 @@
## 分类代码说明
### 按区域分析 (structure_type = 'AREA')
| category_code | category_name | 来源 |
|---------------|---------------|------|
| BILLIARD | 台球散台 | A区/B区/C区/TV台 |
| BILLIARD_VIP | 台球VIP | VIP包厢 |
| SNOOKER | 斯诺克 | 斯诺克区 |
| MAHJONG | 麻将棋牌 | 麻将房/M7/M8/666/发财 |
| KTV | K歌娱乐 | K包/k包活动区/幸会158 |
| SPECIAL | 补时长 | 补时长 |
| OTHER | 其他 | 未映射区域 |
| category_code | category_name | display_name | 来源 |
|---------------|---------------|--------------|------|
| BILLIARD | 🎱 中式/追分 | 🎱 中式/追分 | A区/B区/C区/TV台/VIP包厢(V1-V4) |
| SNOOKER | 斯诺克 | 斯诺克 | 斯诺克区/VIP包厢(V5) |
| MAHJONG | 🀄 麻将/棋牌 | 🀄 麻将/棋牌 | 麻将房/M7/M8/666/发财 |
| KTV | 🎤 团建/K歌 | 🎤 团建/K歌 | K包/k包活动区/幸会158 |
| SPECIAL | 补时长 | 补时长 | 补时长 |
| OTHER | 其他 | 其他 | 未映射区域 |
> ⚠️ `BILLIARD_VIP` 已于 2026-03-07 废弃VIP包厢按台桌级映射拆分至 BILLIARD(V1-V4) 和 SNOOKER(V5)。
### 按收入类型分析 (structure_type = 'INCOME_TYPE')
| category_code | category_name |
|---------------|---------------|
| TABLE_FEE | 台费收入 |
| GOODS | 商品收入 |
| ASSISTANT_BASE | 助教基础课收入 |
| ASSISTANT_BONUS | 助教附加课收入 |
| category_code | category_name | 数据来源字段 |
|---------------|---------------|-------------|
| TABLE_FEE | 台费收入 | `settlement_head.table_charge_money` |
| GOODS | 商品收入 | `settlement_head.goods_money` |
| ASSISTANT_PD | 助教陪打收入 | `settlement_head.assistant_pd_money` |
| ASSISTANT_CX | 助教超休收入 | `settlement_head.assistant_cx_money` |
> ⚠️ 历史版本曾使用 `ASSISTANT_BASE`/`ASSISTANT_BONUS`,已更正为 `ASSISTANT_PD`(陪打)/`ASSISTANT_CX`(超休),与 DWD 结算单字段对齐。
> 收入金额取自 `items_sum` 各分项(`table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`
> 不使用 `consume_money`(存在三种历史口径混合,详见 [consume_money 口径](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md))。
## 数据来源
@@ -85,4 +90,4 @@ income_ratio = income_amount / SUM(income_amount) OVER (PARTITION BY stat_date,
|------|------|
| 可回溯 | ✅ 完全可回溯 |
| 数据范围 | 2025-07-21 ~ 至今 |
| 依赖表 | dwd_table_fee_log, dwd_assistant_service_log, dim_table, cfg_area_category |
| 依赖表 | dwd_settlement_head, dwd_table_fee_log, dwd_assistant_service_log, dim_table, cfg_area_category |

View File

@@ -49,16 +49,16 @@
SELECT
DATE(pay_time) AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_money + gift_money) AS recharge_total,
SUM(pay_money) AS recharge_cash,
SUM(gift_money) AS recharge_gift,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
SUM(point_amount) AS recharge_gift,
-- 首充
SUM(CASE WHEN is_first = 1 THEN 1 ELSE 0 END) AS first_recharge_count,
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
SUM(CASE WHEN is_first = 1 THEN pay_amount ELSE 0 END) AS first_recharge_cash,
SUM(CASE WHEN is_first = 1 THEN point_amount ELSE 0 END) AS first_recharge_gift,
-- 续充
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN 1 ELSE 0 END) AS renewal_count,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_amount ELSE 0 END) AS renewal_cash,
-- 会员数
COUNT(DISTINCT member_id) AS recharge_member_count
FROM dwd.dwd_recharge_order

View File

@@ -30,40 +30,50 @@
| 10 | first_consume_date | DATE | YES | 首次消费日期 |
| 11 | last_consume_date | DATE | YES | 最近消费日期 |
| 12 | total_visit_count | INTEGER | NO | 累计到店次数 |
| 13 | total_consume_amount | NUMERIC(14,2) | NO | 累计消费金额 |
| 14 | total_recharge_amount | NUMERIC(14,2) | NO | 累计充值金额 |
| 15 | total_table_fee | NUMERIC(14,2) | NO | 累计台费 |
| 16 | total_goods_amount | NUMERIC(14,2) | NO | 累计商品消费 |
| 17 | total_assistant_amount | NUMERIC(14,2) | NO | 累计助教服务消费 |
| 13 | total_consume_amount | NUMERIC(14,2) | NO | 累计消费金额(基于 `items_sum` 口径,见下方说明) |
| 14 | total_recharge_amount | NUMERIC(14,2) | NO | 累计充值金额(来源:`dim_member.recharge_money_sum`,上游 API 同步值) |
| 15 | total_table_fee | NUMERIC(14,2) | NO | 累计台费`table_charge_money` |
| 16 | total_goods_amount | NUMERIC(14,2) | NO | 累计商品消费`goods_money` |
| 17 | total_assistant_amount | NUMERIC(14,2) | NO | 累计助教服务消费= `assistant_pd_money` + `assistant_cx_money` |
| 18-23 | visit_count_7d/10d/15d/30d/60d/90d | INTEGER | NO | 近N天到店次数 |
| 24-29 | consume_amount_7d/10d/15d/30d/60d/90d | NUMERIC(14,2) | NO | 近N天消费金额 |
| 30 | cash_card_balance | NUMERIC(14,2) | NO | 储值卡余额 |
| 31 | gift_card_balance | NUMERIC(14,2) | NO | 赠送卡余额 |
| 32 | total_card_balance | NUMERIC(14,2) | NO | 总卡余额 |
| 33 | days_since_last | INTEGER | YES | 距离最近消费的天数 |
| 34 | is_active_7d | BOOLEAN | NO | 近7天是否活跃 |
| 35 | is_active_30d | BOOLEAN | NO | 近30天是否活跃 |
| 36 | is_active_90d | BOOLEAN | NO | 近90天是否活跃 |
| 37 | customer_tier | VARCHAR(20) | YES | 客户分层(高价值/中等/低活跃/流失) |
| 38 | created_at | TIMESTAMPTZ | NO | 创建时间 |
| 39 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
| 30-32 | recharge_count_30d/60d/90d | INTEGER | NO | 近N天充值笔数来源dwd_recharge_order |
| 33-35 | recharge_amount_30d/60d/90d | NUMERIC(14,2) | NO | 近N天充值金额`pay_amount` 现金部分,不含 `point_amount` 赠送来源dwd_recharge_order |
| 36 | avg_ticket_amount | NUMERIC(14,2) | NO | 次均消费(= total_consume_amount / MAX(total_visit_count, 1) |
| 37 | cash_card_balance | NUMERIC(14,2) | NO | 储值卡余额 |
| 38 | gift_card_balance | NUMERIC(14,2) | NO | 赠送卡余额 |
| 39 | total_card_balance | NUMERIC(14,2) | NO | 总卡余额 |
| 40 | days_since_last | INTEGER | YES | 距离最近消费的天数 |
| 41 | is_active_7d | BOOLEAN | NO | 近7天是否活跃 |
| 42 | is_active_30d | BOOLEAN | NO | 近30天是否活跃 |
| 43 | is_active_90d | BOOLEAN | NO | 近90天是否活跃 |
| 44 | customer_tier | VARCHAR(20) | YES | 客户分层(高价值/中等/低活跃/流失) |
| 45 | created_at | TIMESTAMPTZ | NO | 创建时间 |
| 46 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
## 数据来源
### 消费统计来源dwd_settlement_head
> ⚠️ **consume_money 口径警告**`consume_money` 在不同时期存在三种口径A/B/CDWS 层不应直接使用。
> 应使用 `items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money` 作为全时期一致的消费项目合计。
> 详见 [consume_money 口径详解](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md)。
```sql
SELECT
site_id,
member_id,
DATE(pay_time) AS consume_date,
COUNT(*) AS visit_count,
SUM(consume_money) AS consume_amount,
-- ✅ 使用 items_sum 口径(全时期一致),不使用 consume_money
SUM(table_charge_money + goods_money + assistant_pd_money
+ assistant_cx_money + electricity_money) AS consume_amount,
SUM(table_charge_money) AS table_fee,
SUM(goods_money) AS goods_amount,
SUM(assistant_pd_money + assistant_cx_money) AS assistant_amount
FROM dwd.dwd_settlement_head
WHERE member_id != 0 -- 排除散客
AND settle_type = 1 -- 已结账
AND settle_type IN (1, 3) -- 已结账订单(台桌结账 + 快捷结账)
GROUP BY site_id, member_id, DATE(pay_time);
```
@@ -84,19 +94,27 @@ GROUP BY tenant_member_id;
- member_id=0 的散客不进入此表统计
**客户分层规则**
```sql
customer_tier = CASE
WHEN consume_amount_30d >= 1000 THEN '高价值'
WHEN consume_amount_30d >= 300 THEN '中等'
WHEN is_active_30d THEN '低活跃'
ELSE '流失'
END
```python
# 基于 90 天消费次数+金额组合判断(代码实际逻辑)
if visit_count_90d >= 3 and consume_amount_90d >= 1000:
customer_tier = '高价值'
elif visit_count_30d > 0:
customer_tier = '中等'
elif visit_count_90d > 0:
customer_tier = '低活跃'
else:
customer_tier = '流失'
```
**金额口径说明**
- `total_consume_amount` 及各滚动窗口 `consume_amount_*d` 均基于 `items_sum` 口径
- `items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`
- `total_assistant_amount` = `assistant_pd_money`(陪打)+ `assistant_cx_money`(超休),不使用笼统的 `service_fee`
## 可回溯性
| 项目 | 说明 |
|------|------|
| 可回溯 | ✅ 完全可回溯 |
| 数据范围 | 2025-07-16 ~ 至今 |
| 依赖表 | dwd_settlement_head, dim_member, dim_member_card_account |
| 依赖表 | dwd_settlement_head, dwd_recharge_order, dim_member, dim_member_card_account |

View File

@@ -0,0 +1,133 @@
# dws_member_project_tag 客户项目标签表
> 生成时间2026-03-07
## 表信息
| 属性 | 值 |
|------|-----|
| Schema | dws |
| 表名 | dws_member_project_tag |
| 主键 | id |
| 唯一键 | (site_id, member_id, time_window, category_code) |
| 数据来源 | dwd_table_fee_log + dim_table + cfg_area_category |
| 更新频率 | 每日全量重建(按 site_id 删除后重新插入) |
| 说明 | 按时间窗口计算客户在四大项目的消费时长占比≥25% 分配标签。散客不参与。 |
## 字段说明
| 序号 | 字段名 | 类型 | 可空 | 说明 |
|------|--------|------|------|------|
| 1 | id | BIGSERIAL | NO | 自增主键 |
| 2 | site_id | BIGINT | NO | 门店ID |
| 3 | tenant_id | BIGINT | NO | 租户ID |
| 4 | member_id | BIGINT | NO | 会员ID散客不入此表 |
| 5 | time_window | VARCHAR(40) | NO | 时间窗口枚举值 |
| 6 | category_code | VARCHAR(30) | NO | 项目分类代码BILLIARD/SNOOKER/MAHJONG/KTV |
| 7 | category_name | VARCHAR(50) | NO | 项目显示名称(如 🎱 中式/追分) |
| 8 | short_name | VARCHAR(10) | NO | 项目简写(如 🎱) |
| 9 | duration_seconds | BIGINT | NO | 该项目总计费时长(秒,来源 ledger_count |
| 10 | total_seconds | BIGINT | NO | 所有四大项目总时长(秒) |
| 11 | percentage | NUMERIC(5,4) | NO | 占比0~1四位小数 |
| 12 | is_tagged | BOOLEAN | NO | 占比≥0.25 时为 TRUE |
| 13 | computed_at | TIMESTAMPTZ | NO | 计算时间 |
| 14 | created_at | TIMESTAMPTZ | NO | 创建时间 |
| 15 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
## 时间窗口
客户看板使用 2 个时间窗口:
| 枚举值 | 说明 |
|--------|------|
| LAST_30_DAYS | 近30天含今天base_date-29天 ~ base_date |
| LAST_60_DAYS | 近60天含今天base_date-59天 ~ base_date |
## 索引
| 索引名 | 字段 | 类型 | 说明 |
|--------|------|------|------|
| pk_dws_member_project_tag | id | 主键 | 自增主键 |
| uk_dws_member_project_tag | (site_id, member_id, time_window, category_code) | 唯一 | 业务唯一键 |
| idx_mpt_site_window_tagged | (site_id, time_window) WHERE is_tagged=TRUE | 部分索引 | 加速看板查询 |
## 数据链路
```
dwd.dwd_table_fee_log (ledger_count, site_table_id)
→ JOIN dwd.dim_table (site_table_id → table_id, scd2_is_current=1)
→ get_area_category(area_name, table_name) -- 通过 cfg_area_category 映射
→ 只保留 BILLIARD/SNOOKER/MAHJONG/KTV
→ 排除散客member_id IS NULL 或 = 0
→ 按 (member_id, category_code) 汇总 ledger_count
→ 计算占比 percentage = duration_seconds / total_seconds
→ ≥0.25 标记 is_tagged=TRUE
→ 写入 dws.dws_member_project_tag
```
### 关键规则
1. 数据链路走 `dim_table`(通过 `site_table_id` JOIN不直接用事实表的 `site_table_area_name`
2. 客户时长使用 `ledger_count`(计费时长),不使用 `income_seconds`(那是助教工作时长)
3. 散客member_id=0 或 NULL不参与标签计算
4. 只计算四大项目BILLIARD/SNOOKER/MAHJONG/KTV
5. 标签阈值 25%`TAG_THRESHOLD = 0.25`
6. 全量删除重建策略:按 `site_id` 删除后重新插入所有时间窗口
7. `COALESCE(is_delete, 0) = 0` 过滤已删除的台费记录
## ETL 任务
| 属性 | 值 |
|------|-----|
| 任务代码 | DWS_MEMBER_PROJECT_TAG |
| Python 类 | MemberProjectTagTask |
| 文件 | tasks/dws/member_project_tag_task.py |
| 依赖 | DWD_LOAD_FROM_ODS |
## 变更记录
| 日期 | 变更 | 说明 |
|------|------|------|
| 2026-03-07 | 新建表 | 支持客户看板按项目类型筛选 |
## 验证 SQL
```sql
-- 1. 确认表存在且有数据
SELECT COUNT(*) AS row_count,
COUNT(DISTINCT member_id) AS member_count,
COUNT(DISTINCT time_window) AS window_count
FROM dws.dws_member_project_tag;
-- 2. 确认无散客数据
SELECT COUNT(*) FROM dws.dws_member_project_tag WHERE member_id = 0 OR member_id IS NULL;
-- 期望0
-- 3. 确认占比计算正确
SELECT site_id, member_id, time_window, category_code,
duration_seconds, total_seconds, percentage,
ROUND(duration_seconds::numeric / NULLIF(total_seconds, 0), 4) AS calc_pct,
is_tagged,
(percentage >= 0.25) AS should_be_tagged
FROM dws.dws_member_project_tag
WHERE percentage >= 0.25 AND is_tagged = FALSE
LIMIT 10;
-- 期望0 行
-- 4. 确认唯一键无重复
SELECT site_id, member_id, time_window, category_code, COUNT(*)
FROM dws.dws_member_project_tag
GROUP BY site_id, member_id, time_window, category_code
HAVING COUNT(*) > 1;
-- 期望0 行
```
## 回滚策略
```sql
-- 删除表(不影响其他表)
DROP TABLE IF EXISTS dws.dws_member_project_tag CASCADE;
-- 从 task_registry.py 移除 DWS_MEMBER_PROJECT_TAG 注册
-- 从 maintenance_task.py DEFAULT_RETENTION_TABLES 移除对应条目
```

View File

@@ -35,13 +35,14 @@
| 15 | table_fee | NUMERIC(12,2) | NO | 台费 |
| 16 | goods_amount | NUMERIC(12,2) | NO | 商品金额 |
| 17 | assistant_amount | NUMERIC(12,2) | NO | 助教服务金额 |
| 18 | total_consume | NUMERIC(12,2) | NO | 消费总额(正价 |
| 18 | total_consume | NUMERIC(12,2) | NO | 消费总额(基于 `items_sum` 口径,= tc + goods + pd + cx + electricity |
| 19 | total_discount | NUMERIC(12,2) | NO | 优惠总额 |
| 20 | actual_pay | NUMERIC(12,2) | NO | 实付金额 |
| 21 | cash_pay | NUMERIC(12,2) | NO | 现金/刷卡支付 |
| 22 | cash_card_pay | NUMERIC(12,2) | NO | 储值卡支付 |
| 21 | cash_pay | NUMERIC(12,2) | NO | 收银实付(= `pay_amount`,与 actual_pay 同值) |
| 22 | balance_pay | NUMERIC(12,2) | NO | 储值卡支付= recharge_card_pay + gift_card_pay |
| 22a | recharge_card_pay | NUMERIC(12,2) | NO | 现金充值卡支付balance_pay 的子项) |
| 23 | gift_card_pay | NUMERIC(12,2) | NO | 赠送卡支付 |
| 24 | groupbuy_pay | NUMERIC(12,2) | NO | 团购券支付 |
| 24 | groupbuy_pay | NUMERIC(12,2) | NO | 团购抵消台费金额(= `coupon_amount` |
| 25 | table_duration_min | INTEGER | NO | 台桌使用时长(分钟,来自台费流水真实秒数) |
| 26 | assistant_duration_min | INTEGER | NO | 助教服务时长(分钟) |
| 27 | assistant_services | JSONB | YES | 助教服务列表 |
@@ -51,28 +52,36 @@
## 数据来源
### 主表来源dwd_settlement_head
> ⚠️ `total_consume` 使用 `items_sum` 口径(全时期一致),不使用 `consume_money`(存在三种历史口径混合)。
```sql
SELECT
site_id,
tenant_id,
member_id,
order_settle_id,
DATE(pay_time) AS visit_date,
create_time AS visit_time,
member_name AS member_nickname,
member_phone AS member_mobile,
table_id,
table_charge_money AS table_fee,
goods_money AS goods_amount,
assistant_pd_money + assistant_cx_money AS assistant_amount,
consume_money AS total_consume,
member_discount_amount + adjust_amount + rounding_amount AS total_discount,
pay_amount AS actual_pay,
balance_amount AS cash_card_pay,
gift_card_amount AS gift_card_pay
FROM dwd.dwd_settlement_head
WHERE member_id != 0
AND settle_type = 1;
sh.site_id,
sh.tenant_id,
sh.member_id,
sh.order_settle_id,
DATE(sh.pay_time) AS visit_date,
sh.create_time AS visit_time,
-- ⚠️ member_nickname/member_mobile 实际从 dim_member 关联获取nickname/mobile非结算头表字段
dm.nickname AS member_nickname,
dm.mobile AS member_mobile,
sh.table_id,
sh.table_charge_money AS table_fee,
sh.goods_money AS goods_amount,
sh.assistant_pd_money + sh.assistant_cx_money AS assistant_amount,
-- ✅ 使用 items_sum 口径,不使用 consume_money
sh.table_charge_money + sh.goods_money + sh.assistant_pd_money
+ sh.assistant_cx_money + sh.electricity_money AS total_consume,
sh.member_discount_amount + sh.adjust_amount + sh.rounding_amount AS total_discount,
sh.pay_amount AS actual_pay,
sh.balance_amount AS balance_pay,
sh.recharge_card_amount AS recharge_card_pay,
sh.gift_card_amount AS gift_card_pay
FROM dwd.dwd_settlement_head sh
JOIN dwd.dim_member dm ON sh.tenant_member_id = dm.tenant_member_id AND dm.scd2_is_current = 1
WHERE sh.member_id IS NOT NULL AND sh.member_id != 0
AND sh.settle_type IN (1, 3); -- 仅台桌结账+商城订单,排除退货(6)/退款(7)
```
### 助教服务明细dwd_assistant_service_log
@@ -127,4 +136,4 @@ area_category = COALESCE(
|------|------|
| 可回溯 | ✅ 完全可回溯 |
| 数据范围 | 2025-07-16 ~ 至今 |
| 依赖表 | dwd_settlement_head, dwd_assistant_service_log, dwd_table_fee_log, dim_table, dim_member |
| 依赖表 | dwd_settlement_head, dwd_assistant_service_log, dwd_table_fee_log, dim_table, dim_member, cfg_area_category |

View File

@@ -23,20 +23,20 @@
| 4 | order_date | DATE | NO | 订单日期(优先 pay_time其次 create_time |
| 5 | tenant_id | BIGINT | NO | 租户ID |
| 6 | member_id | BIGINT | YES | 会员IDNULL 或 0 为散客) |
| 7 | member_flag | BOOLEAN | NO | 是否会员订单 |
| 8 | recharge_order_flag | BOOLEAN | NO | 充值订单标记(消费金额=0 且实付>0 |
| 7 | member_flag | BOOLEAN | NO | 是否会员订单(来源:`is_bind_member` |
| 8 | recharge_order_flag | BOOLEAN | NO | 充值订单标记(`consume_money = 0` 且实付>0此处 consume_money 仅用于零值判断,不参与金额计算 |
| 9 | item_count | INTEGER | NO | 订单项数 |
| 10 | total_item_quantity | INTEGER | NO | 订单项总数量 |
| 11 | table_fee_amount | NUMERIC | NO | 台费金额 |
| 12 | assistant_service_amount | NUMERIC | NO | 助教服务金额 |
| 12 | assistant_service_amount | NUMERIC | NO | 助教服务金额= `assistant_pd_money` + `assistant_cx_money` |
| 13 | goods_amount | NUMERIC | NO | 商品金额 |
| 14 | group_amount | NUMERIC | NO | 团购金额 |
| 15 | total_coupon_deduction | NUMERIC | NO | 优惠券抵扣总额 |
| 16 | member_discount_amount | NUMERIC | NO | 会员折扣金额 |
| 17 | manual_discount_amount | NUMERIC | NO | 手动折扣金额 |
| 18 | order_original_amount | NUMERIC | NO | 原价估算(实付+优惠/抵扣 |
| 18 | order_original_amount | NUMERIC | NO | 原价估算(= `total_paid_amount + total_coupon_deduction + member_discount_amount + manual_discount_amount` |
| 19 | order_final_amount | NUMERIC | NO | 最终应付金额 |
| 20 | stored_card_deduct | NUMERIC | NO | 储值卡抵扣金额 |
| 20 | stored_card_deduct | NUMERIC | NO | 储值卡抵扣金额= `balance_amount`,即 `recharge_card_amount + gift_card_amount` |
| 21 | external_paid_amount | NUMERIC | NO | 外部支付金额(实付-卡类抵扣) |
| 22 | total_paid_amount | NUMERIC | NO | 总实付金额 |
| 23 | book_table_flow | NUMERIC | NO | 台费流水 |
@@ -44,9 +44,9 @@
| 25 | book_goods_flow | NUMERIC | NO | 商品流水 |
| 26 | book_group_flow | NUMERIC | NO | 团购流水 |
| 27 | book_order_flow | NUMERIC | NO | 订单总流水(台费+助教+商品+团购) |
| 28 | order_effective_consume_cash | NUMERIC | NO | 有效消费现金 |
| 29 | order_effective_recharge_cash | NUMERIC | NO | 有效充值现金 |
| 30 | order_effective_flow | NUMERIC | NO | 有效流水 |
| 28 | order_effective_consume_cash | NUMERIC | NO | 有效消费现金= `GREATEST(total_paid_amount - stored_card_deduct, 0)`,即外部支付金额) |
| 29 | order_effective_recharge_cash | NUMERIC | NO | 有效充值现金(当前硬编码为 0占位字段待后续实现 |
| 30 | order_effective_flow | NUMERIC | NO | 有效流水(当前 = `total_paid_amount` |
| 31 | refund_amount | NUMERIC | NO | 退款金额 |
| 32 | net_income | NUMERIC | NO | 净收入(实付-退款) |
| 33 | created_at | TIMESTAMPTZ | NO | 创建时间 |
@@ -54,10 +54,18 @@
## 业务口径
> ⚠️ 本表金额字段基于 `items_sum` 各分项(`table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`
> 不使用 `consume_money`(存在三种历史口径混合)。
- order_date 优先取 pay_time其次 create_time
- recharge_order_flag消费金额=0 且实付>0 时标记为充值订单
- order_original_amount = 实付 + 优惠/抵扣
- recharge_order_flag`consume_money = 0` 且实付>0 时标记为充值订单(此处 consume_money 仅用于零值判断,不参与金额计算)
- stored_card_deduct = `balance_amount`(恒等式:`balance_amount = recharge_card_amount + gift_card_amount`,三者不可相加)
- order_original_amount = `total_paid_amount + total_coupon_deduction + member_discount_amount + manual_discount_amount`(实付 + 团购抵扣 + 会员折扣 + 手动调整)
- external_paid_amount = total_paid_amount - stored_card_deduct外部支付 = 实付 - 储值卡抵扣)
- book_order_flow = 台费 + 助教 + 商品 + 团购
- order_effective_recharge_cash当前硬编码为 0占位字段
- order_effective_consume_cash = `GREATEST(total_paid_amount - stored_card_deduct, 0)`(与 external_paid_amount 同值)
- order_effective_flow = `total_paid_amount`(当前实现)
- net_income = total_paid_amount - refund_amount
## 使用说明
@@ -81,4 +89,4 @@ ORDER BY order_date DESC;
| 项目 | 说明 |
|------|------|
| 可回溯 | ✅ 完全可回溯 |
| 依赖表 | dwd_settlement_head, dwd_table_fee_log, dwd_assistant_service_log, dwd_store_goods_sale, dwd_groupbuy_redemption, dwd_payment, dwd_refund |
| 依赖表 | dwd_settlement_head, dwd_table_fee_log, dwd_assistant_service_log, dwd_store_goods_sale, dwd_groupbuy_redemption, dwd_refund, dwd_refund_ex |

View File

@@ -0,0 +1,87 @@
# group_buy_package_details 团购套餐详情
> 生成时间2026-03-05
## 表信息
| 属性 | 值 |
|------|-----|
| Schema | ods |
| 表名 | group_buy_package_details |
| 主键 | coupon_id |
| 数据来源 | `QueryPackageCouponInfo` 详情接口(二级拉取) |
| DDL 路径 | `db/etl_feiqiu/ods/group_buy_package_details.sql` |
| 说明 | 团购套餐详情 ODS 层,存储每个 couponId 的详情原始数据 |
## 数据获取方式
本表数据通过 `ODS_GROUP_PACKAGE` 任务的 **detail_endpoint 二级详情拉取** 子流程获取:
1. 主流程先从 `QueryPackageCouponList` 拉取团购列表写入 `ods.group_buy_packages`
2. 子流程遍历列表中每个 `id`,串行调用 `QueryPackageCouponInfo` 获取详情
3. 详情数据写入本表,采用全量快照模式(`SnapshotMode.FULL_TABLE`UPSERT on `coupon_id`
## 字段说明
| 序号 | 字段名 | 类型 | 可空 | 说明 |
|------|--------|------|------|------|
| 1 | coupon_id | BIGINT | NOPK | 团购套餐 ID= groupPurchasePackage.id |
| 2 | package_name | TEXT | YES | 团购套餐名称 |
| 3 | duration | INTEGER | YES | 台费计时时长(秒) |
| 4 | start_time | TIMESTAMPTZ | YES | 可用日期开始 |
| 5 | end_time | TIMESTAMPTZ | YES | 可用日期结束 |
| 6 | add_start_clock | TEXT | YES | 可用时段开始(如 "00:00:00" |
| 7 | add_end_clock | TEXT | YES | 可用时段结束(如 "1.00:00:00" |
| 8 | is_enabled | INTEGER | YES | 是否启用1=启用, 0=禁用) |
| 9 | is_delete | INTEGER | YES | 是否已删除1=已删除, 0=正常) |
| 10 | site_id | BIGINT | YES | 店铺 ID |
| 11 | tenant_id | BIGINT | YES | 租户 ID |
| 12 | create_time | TIMESTAMPTZ | YES | 创建时间 |
| 13 | creator_name | TEXT | YES | 创建人 |
| 14 | table_area_ids | JSONB | YES | 可用台区 ID 列表(来自 groupPurchasePackage.tableAreaId |
| 15 | table_area_names | JSONB | YES | 可用台区名称列表(来自 groupPurchasePackage.tableAreaNameList |
| 16 | assistant_services | JSONB | YES | 助教服务关联数组(来自 packageCouponAssistants |
| 17 | groupon_site_infos | JSONB | YES | 关联门店信息数组(来自 grouponSiteInfos |
| 18 | package_services | JSONB | YES | 套餐服务数组(来自 packagePackageService待调研 |
| 19 | coupon_details_list | JSONB | YES | 券明细数组(来自 packageCouponDetailsList待调研 |
| 20 | content_hash | TEXT | YES | 业务字段内容哈希,用于变更检测 |
| 21 | payload | JSONB | YES | 详情接口完整原始 JSON 响应 |
| 22 | fetched_at | TIMESTAMPTZ | YES | ETL 拉取时间戳 |
## 与列表表的关系
```
ods.group_buy_packages (列表)
└── ods.group_buy_package_details (详情)
关联字段group_buy_packages.id = group_buy_package_details.coupon_id
关系1:1每个列表记录对应一条详情
```
## 下游消费
DWD 层 `dwd.dim_groupbuy_package_ex` 在加载时通过 LEFT JOIN 本表,将 `table_area_ids``table_area_names``assistant_services``groupon_site_infos` 四个 JSONB 字段合并到扩展表。
## 使用说明
```sql
-- 查询最新入库的详情记录
SELECT coupon_id, package_name, table_area_names, assistant_services
FROM ods.group_buy_package_details
ORDER BY fetched_at DESC
LIMIT 10;
```
```sql
-- 关联列表表查看完整信息
SELECT p.id, p.package_name, p.selling_price,
d.table_area_names, d.assistant_services, d.groupon_site_infos
FROM ods.group_buy_packages p
LEFT JOIN ods.group_buy_package_details d ON p.id = d.coupon_id
WHERE p.is_delete IS DISTINCT FROM 1;
```
## 可回溯性
| 项目 | 说明 |
|------|------|
| 可回溯 | ✅ 完全可回溯(保留 payload 原始 JSON |
| 数据来源 | `PackageCoupon/QueryPackageCouponInfo` API |

View File

@@ -0,0 +1,89 @@
# 团购套餐详情QueryPackageCouponInfo → group_buy_package_details 字段映射
> 生成时间2026-03-05
## 端点信息
| 属性 | 值 |
|------|-----|
| 接口路径 | `PackageCoupon/QueryPackageCouponInfo` |
| 请求方法 | POST |
| 请求参数 | `{ "couponId": <id> }`(从 `ods.group_buy_packages.id` 获取) |
| ODS 对应表 | `ods.group_buy_package_details` |
| JSON 数据路径 | `data` |
| 调用方式 | 二级详情拉取(`ODS_GROUP_PACKAGE` 任务的 `detail_endpoint` 子流程) |
## 响应结构
```json
{
"data": {
"groupPurchasePackage": {
"id": 123,
"packageName": "...",
"duration": 3600,
"startTime": "...",
"endTime": "...",
"addStartClock": "00:00:00",
"addEndClock": "1.00:00:00",
"isEnabled": 1,
"isDelete": 0,
"siteId": 456,
"tenantId": 789,
"createTime": "...",
"creatorName": "...",
"tableAreaId": [1, 2, 3],
"tableAreaNameList": ["A区", "B区"]
},
"packageCouponAssistants": [...],
"grouponSiteInfos": [...],
"packagePackageService": [...],
"packageCouponDetailsList": [...]
}
}
```
## 字段映射
### 结构化字段(来自 data.groupPurchasePackage
| JSON 路径 | ODS 列名 | 类型转换 | 说明 |
|-----------|----------|----------|------|
| data.groupPurchasePackage.id | coupon_id | int→BIGINT | 团购套餐 ID主键 |
| data.groupPurchasePackage.packageName | package_name | string→TEXT | 套餐名称 |
| data.groupPurchasePackage.duration | duration | int→INTEGER | 台费计时时长(秒) |
| data.groupPurchasePackage.startTime | start_time | string→TIMESTAMPTZ | 可用日期开始 |
| data.groupPurchasePackage.endTime | end_time | string→TIMESTAMPTZ | 可用日期结束 |
| data.groupPurchasePackage.addStartClock | add_start_clock | string→TEXT | 可用时段开始 |
| data.groupPurchasePackage.addEndClock | add_end_clock | string→TEXT | 可用时段结束 |
| data.groupPurchasePackage.isEnabled | is_enabled | int→INTEGER | 是否启用 |
| data.groupPurchasePackage.isDelete | is_delete | int→INTEGER | 是否已删除 |
| data.groupPurchasePackage.siteId | site_id | int→BIGINT | 店铺 ID |
| data.groupPurchasePackage.tenantId | tenant_id | int→BIGINT | 租户 ID |
| data.groupPurchasePackage.createTime | create_time | string→TIMESTAMPTZ | 创建时间 |
| data.groupPurchasePackage.creatorName | creator_name | string→TEXT | 创建人 |
### JSONB 数组字段
| JSON 路径 | ODS 列名 | 类型转换 | 说明 |
|-----------|----------|----------|------|
| data.groupPurchasePackage.tableAreaId | table_area_ids | array→JSONB | 可用台区 ID 列表 |
| data.groupPurchasePackage.tableAreaNameList | table_area_names | array→JSONB | 可用台区名称列表 |
| data.packageCouponAssistants | assistant_services | array→JSONB | 助教服务关联(含 skillId/assistantLevel/assistantDuration |
| data.grouponSiteInfos | groupon_site_infos | array→JSONB | 关联门店信息(含 siteId/siteName |
| data.packagePackageService | package_services | array→JSONB | 套餐服务数组(待调研,可能为空) |
| data.packageCouponDetailsList | coupon_details_list | array→JSONB | 券明细数组(待调研,可能为空) |
## ETL 补充字段
| ODS 列名 | 生成逻辑 |
|-----------|----------|
| content_hash | 基于原始 payload + is_delete 计算 SHA-256 |
| payload | 完整原始 JSON 响应(`data` 节点) |
| fetched_at | ETL 拉取时间戳(`DEFAULT now()` |
## 写入策略
- 全量快照模式(`SnapshotMode.FULL_TABLE`
- UPSERT on `coupon_id`,每次运行覆盖全部记录
- 通过 `content_hash` 去重,内容未变则跳过写入

View File

@@ -51,7 +51,7 @@ graph LR
| 文档 | 说明 |
|------|------|
| [BaseTask 公共机制](base_task_mechanism.md) | 任务基类模板方法、TaskContext、时间窗口、注册表、Flow 执行 |
| [ODS 层任务](ods_tasks.md) | 23 个通用 ODS 任务的架构、配置结构、API 端点、目标表 |
| [ODS 层任务](ods_tasks.md) | 22 个通用 ODS 任务的架构、配置结构、API 端点、目标表 |
| [DWD 层任务](dwd_tasks.md) | DWD_LOAD_FROM_ODS 核心装载、SCD2 处理、质量校验 |
| [DWS 层任务](dws_tasks.md) | 助教业绩、会员分析、财务统计、库存汇总、运维任务共 17 个 DWS 任务 |
| [INDEX 层任务](index_tasks.md) | WBI/NCI/RS/SPI 指数算法 + ML 手动台账导入 |
@@ -69,10 +69,9 @@ graph LR
|----------|-----------|--------|----------|------|
| `ODS_ASSISTANT_ACCOUNT` | `OdsAssistantAccountsTask` | `ods.assistant_accounts_master` | 助教账号档案 | [查看](ods_tasks.md) |
| `ODS_ASSISTANT_LEDGER` | `OdsAssistantLedgerTask` | `ods.assistant_service_records` | 助教服务流水 | [查看](ods_tasks.md) |
| `ODS_ASSISTANT_ABOLISH` | `OdsAssistantAbolishTask` | `ods.assistant_cancellation_records` | 助教废除记录 | [查看](ods_tasks.md) |
| `ODS_INVENTORY_CHANGE` | `OdsInventoryChangeTask` | `ods.goods_stock_movements` | 库存变化记录 | [查看](ods_tasks.md) |
| `ODS_INVENTORY_STOCK` | `OdsInventoryStockTask` | `ods.goods_stock_summary` | 库存汇总 | [查看](ods_tasks.md) |
| `ODS_GROUP_PACKAGE` | `OdsPackageTask` | `ods.group_buy_packages` | 团购套餐定义 | [查看](ods_tasks.md) |
| `ODS_GROUP_PACKAGE` | `OdsPackageTask` | `ods.group_buy_packages` | 团购套餐定义 + 详情子流程(通过 `detail_endpoint` 串行调用 `QueryPackageCouponInfo` 获取每个团购的详情数据,写入 `ods.group_buy_package_details` | [查看](ods_tasks.md) |
| `ODS_GROUP_BUY_REDEMPTION` | `OdsGroupBuyRedemptionTask` | `ods.group_buy_redemption_records` | 团购套餐核销 | [查看](ods_tasks.md) |
| `ODS_MEMBER` | `OdsMemberTask` | `ods.member_profiles` | 会员档案 | [查看](ods_tasks.md) |
| `ODS_MEMBER_BALANCE` | `OdsMemberBalanceTask` | `ods.member_balance_changes` | 会员余额变动 | [查看](ods_tasks.md) |

View File

@@ -77,13 +77,15 @@ load(extracted, context) → 遍历 TABLE_MAP
| `dwd.dim_goods_category` | `ods.stock_goods_category_tree` | 商品分类维度(含子类展开) |
| `dwd.dim_groupbuy_package` | `ods.group_buy_packages` | 团购套餐维度 |
| `dwd.dim_groupbuy_package_ex` | `ods.group_buy_packages` | 团购套餐扩展 |
| `dwd.dim_staff` | `ods.staff_info_master` | 员工维度 |
| `dwd.dim_staff_ex` | `ods.staff_info_master` | 员工扩展 |
#### 事实表映射
| DWD 表 | ODS 源表 | 说明 |
|--------|----------|------|
| `dwd.dwd_settlement_head` | `ods.settlement_records` | 结算头(订单结算主记录) |
| `dwd.dwd_settlement_head` | `ods.settlement_records` | 结算头(订单结算主记录)— 详见下方「结算头关键字段口径」 |
| `dwd.dwd_settlement_head_ex` | `ods.settlement_records` | 结算头扩展(支付方式、撤单、促销等) |
| `dwd.dwd_table_fee_log` | `ods.table_fee_transactions` | 台费流水 |
| `dwd.dwd_table_fee_log_ex` | `ods.table_fee_transactions` | 台费流水扩展(销售员、消费类型等) |
@@ -93,8 +95,8 @@ load(extracted, context) → 遍历 TABLE_MAP
| `dwd.dwd_store_goods_sale_ex` | `ods.store_goods_sales_records` | 商品销售扩展 |
| `dwd.dwd_assistant_service_log` | `ods.assistant_service_records` | 助教服务记录 |
| `dwd.dwd_assistant_service_log_ex` | `ods.assistant_service_records` | 助教服务扩展 |
| `dwd.dwd_assistant_trash_event` | `ods.assistant_cancellation_records` | 助教取消/废单事件 |
| `dwd.dwd_assistant_trash_event_ex` | `ods.assistant_cancellation_records` | 助教取消扩展 |
| ~~`dwd.dwd_assistant_trash_event`~~ | ~~`ods.assistant_cancellation_records`~~ | ~~助教取消/废单事件2026-02-22 DROP2026-03-01 清理残留)~~ |
| ~~`dwd.dwd_assistant_trash_event_ex`~~ | ~~`ods.assistant_cancellation_records`~~ | ~~助教取消扩展2026-02-22 DROP2026-03-01 清理残留)~~ |
| `dwd.dwd_member_balance_change` | `ods.member_balance_changes` | 会员余额变动 |
| `dwd.dwd_member_balance_change_ex` | `ods.member_balance_changes` | 会员余额变动扩展 |
| `dwd.dwd_groupbuy_redemption` | `ods.group_buy_redemption_records` | 团购核销记录 |
@@ -106,8 +108,48 @@ load(extracted, context) → 遍历 TABLE_MAP
| `dwd.dwd_payment` | `ods.payment_transactions` | 支付记录 |
| `dwd.dwd_refund` | `ods.refund_transactions` | 退款记录 |
| `dwd.dwd_refund_ex` | `ods.refund_transactions` | 退款扩展 |
| `dwd.dwd_goods_stock_summary` | `ods.goods_stock_summary` | 库存汇总 |
| `dwd.dwd_goods_stock_movement` | `ods.goods_stock_movements` | 库存变动 |
> 共计 **17 对维度映射**(含 `_ex`+ **23 对事实映射**(含 `_ex`= **40 对**映射。
> 共计 **19 对维度映射**(含 `_ex`+ **23 对事实映射**(含 `_ex`,已排除 2026-02-22 DROP 的 assistant_trash_event= **42 对**有效映射。
---
### 结算头关键字段口径
`dwd_settlement_head` 是核心交易事实表,以下字段在下游消费时需特别注意:
#### settle_type 枚举
| 值 | 含义 | 说明 |
|----|------|------|
| 1 | 台桌结账 | 正常台桌消费结账 |
| 3 | 商城订单 | 商品零售订单 |
| 6 | 退货订单 | 商品退货 |
| 7 | 退款订单 | 金额退款 |
> DWS 层计算发生额、收入等指标时,通常只取 `settle_type IN (1, 3)`(正向交易),排除退货/退款。
> 本表无 `is_delete` 字段,不可用 `is_delete` 过滤。
#### consume_money 口径警告
`consume_money` 存在三种历史口径A/B/C**DWS 层不应直接使用**。
应使用 `items_sum` 口径:
```
items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money
```
> 详见 [consume_money 口径校准文档](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md)
> 及 [BD 手册 dwd_settlement_head](../database/DWD/main/BD_manual_dwd_settlement_head.md)
#### 支付渠道恒等式
```
balance_amount = recharge_card_amount + gift_card_amount -- 储值卡 = 充值卡 + 礼品卡
```
> `balance_amount` 是独立支付渠道,`recharge_card_amount` / `gift_card_amount` 是其分账明细,三者不可重复计算。
---

View File

@@ -8,7 +8,7 @@
## 概述
DWS 层共有 17 个已注册任务(含 DWS_MAINTENANCE按业务域分为组:
DWS 层共有 19 个已注册任务(含 DWS_MAINTENANCE按业务域分为组:
### 助教业绩域6 个)
@@ -28,6 +28,13 @@ DWS 层共有 17 个已注册任务(含 DWS_MAINTENANCE按业务域分
| `DWS_MEMBER_CONSUMPTION` | `MemberConsumptionTask` | `dws_member_consumption_summary` | 日期+会员 | delete-before-insert |
| `DWS_MEMBER_VISIT` | `MemberVisitTask` | `dws_member_visit_detail` | 日期+会员+结账单 | delete-before-insert |
### 项目标签域2 个)
| 任务代码 | Python 类 | 目标表 | 粒度 | 更新策略 |
|----------|-----------|--------|------|----------|
| `DWS_ASSISTANT_PROJECT_TAG` | `AssistantProjectTagTask` | `dws_assistant_project_tag` | 助教+时间窗口+项目 | 全量删除重建(按 site_id |
| `DWS_MEMBER_PROJECT_TAG` | `MemberProjectTagTask` | `dws_member_project_tag` | 会员+时间窗口+项目 | 全量删除重建(按 site_id |
### 财务统计域4 个)
| 任务代码 | Python 类 | 目标表 | 粒度 | 更新策略 |
@@ -373,7 +380,7 @@ DWS 汇总计算涉及历史月份时,不能直接使用维度表的"当前版
```
dwd_assistant_service_log ──┬──► DWS_ASSISTANT_DAILY日度明细
dwd_assistant_trash_event ──┘ │
dwd_assistant_service_log_ex ┘ │
DWS_ASSISTANT_MONTHLY月度汇总+档位+排名)
@@ -448,7 +455,7 @@ dwd_assistant_service_log ────► DWS_ASSISTANT_CUSTOMER客户关系
| 来源表 | Schema | 用途 |
|--------|--------|------|
| `dwd_assistant_service_log` | `dwd` | 助教服务流水(主数据源) |
| `dwd_assistant_trash_event` | `dwd` | 废除记录(排除无效业绩 |
| `dwd_assistant_service_log_ex` | `dwd` | 扩展表(`is_trash` 标记废除记录) |
| `dim_assistant` | `dwd` | 助教维度SCD2获取当日等级 |
| `cfg_skill_type` | `dws` | 技能 → 课程类型映射 |
@@ -459,21 +466,23 @@ dwd_assistant_service_log ────► DWS_ASSISTANT_CUSTOMER客户关系
| 字段分组 | 字段 | 说明 |
|----------|------|------|
| 标识 | `site_id`, `tenant_id`, `assistant_id`, `assistant_nickname`, `stat_date` | 门店、助教、日期 |
| 等级 | `assistant_level_code`, `assistant_level_name` | SCD2 as-of 取值,取统计日当日生效的等级 |
| 等级 | `assistant_level_code`, `assistant_level_name` | SCD2 as-of 取值`level_code``level_name` 由 code 静态映射得出 |
| 服务次数 | `total_service_count`, `base_service_count`, `bonus_service_count`, `room_service_count` | 总/基础课/附加课/包厢课 |
| 计费秒数 | `total_seconds`, `base_seconds`, `bonus_seconds`, `room_seconds` | 原始秒数 |
| 计费小时 | `total_hours`, `base_hours`, `bonus_hours`, `room_hours` | 秒数 ÷ 3600`Decimal` 精度 |
| 计费金额 | `total_ledger_amount`, `base_ledger_amount`, `bonus_ledger_amount`, `room_ledger_amount` | 台账金额 |
| 去重统计 | `unique_customers`, `unique_tables` | 去重客户数(排除散客)、去重台桌数 |
| 废除统计 | `trashed_seconds`, `trashed_count` | 被废除的秒数和次数 |
| 惩罚检测 | `penalty_minutes`, `penalty_reason`, `is_exempt`, `per_hour_contribution` | 惩罚分钟数(公式:`actual_minutes × (1 - per_hour_contribution / 24)`)、惩罚原因、是否豁免、每小时贡献金额(= `base_ledger_amount / base_hours / overlap_count` |
#### 核心业务逻辑
1. **课程类型分类**:通过 `skill_id` 查询 `cfg_skill_type` 映射,分为 `BASE`(基础课)、`BONUS`(附加课)、`ROOM`(包厢课),未匹配默认 `BASE`
2. **废除记录排除**`assistant_service_id` 为键构建废除索引,被废除的服务记录不计入有效业绩(服务次数、时长、金额),但单独统计 `trashed_seconds``trashed_count`
2. **废除记录排除**通过 JOIN `dwd_assistant_service_log_ex``is_trash = 1` 标记识别废除记录(`dwd_assistant_trash_event` 已于 2026-02-22 废弃),被废除的服务记录不计入有效业绩(服务次数、时长、金额),但单独统计 `trashed_seconds``trashed_count`
3. **助教等级 SCD2 取值**:调用 `get_assistant_level_asof(assistant_id, service_date)` 获取统计日当日生效的等级版本,而非当前最新版本
4. **散客过滤**`unique_customers` 统计时排除 `member_id` 为 0 或 None 的散客
5. **客户/台桌去重**:无论服务记录是否被废除,客户和台桌均参与去重统计
6. **定档折算惩罚检测**聚合完成后检测同一台桌多名助教重叠挂台的违规情况规则2。计算 `per_hour_contribution = base_ledger_amount / base_hours / overlap_count`,若低于阈值(默认 24 元/小时)则按比例扣减 `penalty_minutes`。豁免助教(`is_exempt = True`)不参与惩罚计算。
---
@@ -813,6 +822,9 @@ dim_table ────────────────────┘
| 全量累计 | `first_consume_date`, `last_consume_date`, `total_visit_count`, `total_consume_amount`, `total_recharge_amount`, `total_table_fee`, `total_goods_amount`, `total_assistant_amount` | 首次/最近消费日期、累计到店次数、累计消费金额、累计充值金额、累计台费、累计商品金额、累计助教费用 |
| 滚动窗口(次数) | `visit_count_7d`, `visit_count_10d`, `visit_count_15d`, `visit_count_30d`, `visit_count_60d`, `visit_count_90d` | 各窗口到店次数 |
| 滚动窗口(金额) | `consume_amount_7d`, `consume_amount_10d`, `consume_amount_15d`, `consume_amount_30d`, `consume_amount_60d`, `consume_amount_90d` | 各窗口消费金额 |
| 充值窗口(笔数) | `recharge_count_30d`, `recharge_count_60d`, `recharge_count_90d` | 近 30/60/90 天充值笔数来源dwd_recharge_order |
| 充值窗口(金额) | `recharge_amount_30d`, `recharge_amount_60d`, `recharge_amount_90d` | 近 30/60/90 天充值金额(仅 `pay_amount` 现金部分,不含 `point_amount` 赠送) |
| 次均消费 | `avg_ticket_amount` | total_consume_amount / MAX(total_visit_count, 1) |
| 卡余额 | `cash_card_balance`, `gift_card_balance`, `total_card_balance` | 储值卡(现金卡)余额、赠送卡余额、总余额 |
| 活跃度 | `days_since_last`, `is_active_7d`, `is_active_30d`, `is_active_90d` | 距最近消费天数、近 7/30/90 天是否活跃 |
| 客户分层 | `customer_tier` | 分层标签(高价值/中等/低活跃/流失) |
@@ -821,15 +833,17 @@ dim_table ────────────────────┘
**1. 散客排除**
`member_id` 为 0 或 None 的散客不进入此表统计。SQL 层面和 transform 阶段均做过滤。
`member_id` 为 0 或 None 的散客不进入此表统计。SQL 层面和 transform 阶段均做过滤,同时通过 `settle_type IN (1, 3)` 仅保留台桌结账和商城订单(排除退货/退款)
**2. 消费统计来源**
`dwd_settlement_head``member_id` 聚合,消费金额拆分为:
- `consume_money`:总消费金额
`dwd_settlement_head``member_id` 聚合,消费金额使用 `items_sum` 口径拆分为:
- `items_sum`:消费项目合计(= `table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`
- `table_charge_money`:台费
- `goods_money`:商品金额
- `assistant_pd_money + assistant_cx_money`:助教费用(专业课 + 陪练课合计)
- `assistant_pd_money + assistant_cx_money`:助教费用(陪打 + 超休合计)
> ⚠️ 不使用 `consume_money`(三种历史口径混合),详见 `docs/reports/DWD-DOC/consume/consume-money-caliber.md`
**3. 滚动窗口**
@@ -909,7 +923,7 @@ dim_table ────────────────────┘
| 会员信息 | `member_nickname`, `member_mobile`, `member_birthday` | 昵称、脱敏手机号、生日 |
| 台桌信息 | `table_id`, `table_name`, `area_name`, `area_category` | 台桌 ID、台桌名称、区域名称、区域分类 |
| 消费金额 | `table_fee`, `goods_amount`, `assistant_amount`, `total_consume`, `total_discount`, `actual_pay` | 台费、商品金额、助教费用、总消费、总优惠、实付金额 |
| 支付方式 | `cash_pay`, `cash_card_pay`, `gift_card_pay`, `groupbuy_pay` | 现金/在线支付、储值卡支付、赠送卡支付、团购券支付 |
| 支付方式 | `cash_pay`, `balance_pay`, `recharge_card_pay`, `gift_card_pay`, `groupbuy_pay` | 现金/在线支付、储值卡总支付、现金充值卡支付、赠送卡支付、团购券支付 |
| 时长 | `table_duration_min`, `assistant_duration_min` | 台桌使用时长(分钟)、助教服务时长(分钟) |
| 助教服务 | `assistant_services` | JSON 格式的助教服务明细 |
@@ -917,15 +931,15 @@ dim_table ────────────────────┘
**1. 散客排除**
SQL 层面通过 `member_id IS NOT NULL AND member_id != 0` 过滤transform 阶段通过 `is_guest()` 二次过滤。
SQL 层面通过 `member_id IS NOT NULL AND member_id != 0` 过滤,同时通过 `settle_type IN (1, 3)` 仅保留台桌结账和商城订单(排除退货/退款),transform 阶段通过 `is_guest()` 二次过滤。
**2. 消费金额拆分**
`dwd_settlement_head` 直接读取各金额字段:
- `table_fee``table_charge_money`(台费)
- `goods_amount``goods_money`(商品金额)
- `assistant_amount``assistant_pd_money + assistant_cx_money`专业课 + 陪练课助教费用合计)
- `total_consume``consume_money`(总消费金额
- `assistant_amount``assistant_pd_money + assistant_cx_money`陪打 + 超休助教费用合计)
- `total_consume``items_sum`= `table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`,不使用 `consume_money`
- `actual_pay``pay_amount`(实付金额)
**3. 总优惠计算**
@@ -943,7 +957,8 @@ total_discount = adjust_amount + member_discount_amount + rounding_amount
| 字段 | 来源字段 | 说明 |
|------|----------|------|
| `cash_pay` | `pay_amount` | 现金/在线支付 |
| `cash_card_pay` | `balance_amount` | 储值卡(现金卡)支付 |
| `balance_pay` | `balance_amount` | 储值卡总支付(= recharge_card_pay + gift_card_pay |
| `recharge_card_pay` | `recharge_card_amount` | 现金充值卡支付balance_pay 的子项) |
| `gift_card_pay` | `gift_card_amount` | 赠送卡支付 |
| `groupbuy_pay` | `coupon_amount` | 团购券支付 |
@@ -1046,12 +1061,12 @@ dwd_member_balance_change ────┘
|----------|------|------|
| 标识 | `site_id`, `tenant_id`, `stat_date` | 门店、统计日期 |
| 发生额 | `gross_amount`, `table_fee_amount`, `goods_amount`, `assistant_pd_amount`, `assistant_cx_amount` | 正价总额及按类型拆分(台费/商品/专业课/陪练课) |
| 优惠 | `discount_total`, `discount_groupbuy`, `discount_vip`, `discount_gift_card`, `discount_manual`, `discount_rounding`, `discount_other` | 优惠合计及按类型拆分 |
| 优惠 | `discount_total`, `discount_groupbuy`, `discount_vip`, `discount_gift_card`, `discount_manual`, `discount_rounding`, `discount_other` | 优惠合计及按类型拆分discount_manual=大客户优惠discount_other=其他手动调整,两者互斥) |
| 确认收入 | `confirmed_income` | 发生额 - 优惠合计 |
| 现金流入 | `cash_inflow_total`, `cash_pay_amount`, `groupbuy_pay_amount`, `platform_settlement_amount`, `recharge_cash_inflow` | 现金流入合计及来源拆分 |
| 现金流出 | `cash_outflow_total`, `platform_fee_amount` | 现金流出合计(支出 + 平台费用) |
| 现金净变动 | `cash_balance_change` | 流入 - 流出 |
| 卡消费 | `card_consume_total`, `cash_card_consume`, `gift_card_consume` | 值卡消费 + 赠送卡消费 |
| 卡消费 | `card_consume_total`, `recharge_card_consume`, `gift_card_consume` | 现金充值卡消费= `recharge_card_amount`+ 赠送卡消费 |
| 充值统计 | `recharge_count`, `recharge_total`, `recharge_cash`, `recharge_gift`, `first_recharge_count`, `first_recharge_amount`, `renewal_count`, `renewal_amount` | 充值笔数/金额、首充/续充拆分 |
| 订单统计 | `order_count`, `member_order_count`, `guest_order_count`, `avg_order_amount` | 总订单数、会员/散客订单数、客单价 |
@@ -1063,7 +1078,9 @@ dwd_member_balance_change ────┘
gross_amount = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money
```
`dwd_settlement_head``DATE(pay_time)` 聚合分别统计台费、商品、专业课PD、陪练课CX四类收入
> 注意:`gross_amount` 为发生额(正价四项),不含 `electricity_money`。完整消费项目合计(`items_sum`)还需加上 `electricity_money`
`dwd_settlement_head``biz_date(pay_time)` 聚合,通过 `settle_type IN (1, 3)` 仅保留台桌结账和商城订单(排除退货/退款分别统计台费、商品、陪打PD、超休CX四类收入。
**2. 团购优惠计算**
@@ -1096,10 +1113,12 @@ discount_other = adjust_amount - big_customer_amount (负值置 0
**5. 优惠合计与确认收入**
```
discount_total = discount_groupbuy + discount_vip + discount_gift_card + discount_manual + discount_rounding
discount_total = discount_groupbuy + discount_vip + discount_gift_card + discount_manual + discount_rounding + discount_other
confirmed_income = gross_amount - discount_total
```
> `discount_manual` = 大客户优惠,`discount_other` = 其他手动调整,两者互斥,之和 = adjust_amount。
**6. 现金流计算**
```
@@ -1123,11 +1142,13 @@ daily_expense = expense_amount / days_in_month
**8. 卡消费统计**
```
cash_card_consume = recharge_card_amount + balance_amount 值卡支付)
recharge_card_consume = recharge_card_amount 现金充值卡支付部分
gift_card_consume = 赠送卡消费总额 (来自余额变动)
card_consume_total = cash_card_consume + gift_card_consume
card_consume_total = recharge_card_consume + gift_card_consume
```
> 注意:`balance_amount = recharge_card_amount + gift_card_amount`(恒等式),因此 `recharge_card_consume` 只取 `recharge_card_amount`,不可再加 `balance_amount`,否则重复计算。
---
### DWS_FINANCE_RECHARGE — 充值统计
@@ -1170,7 +1191,7 @@ card_consume_total = cash_card_consume + gift_card_consume
每笔充值金额拆分为:
```
充值总额 = pay_money(现金部分)+ gift_money(赠送部分)
充值总额 = pay_amount(现金部分)+ point_amount(赠送部分)
```
**2. 会员去重统计**
@@ -1236,14 +1257,14 @@ total_card_balance = cash_card_balance + gift_card_balance
**维度 1按收入类型`structure_type = 'INCOME_TYPE'`**
`dwd_settlement_head``pay_time::DATE` 聚合,仅统计已结账订单(`settle_status = 1`),每日展开为 4 条记录:
`dwd_settlement_head``pay_time::DATE` 聚合,仅统计已结账订单(`settle_type IN (1, 3)`),每日展开为 4 条记录:
| category_code | category_name | 来源字段 | 说明 |
|---------------|---------------|----------|------|
| `TABLE_FEE` | 台费收入 | `table_charge_money` | 台桌使用费 |
| `GOODS` | 商品收入 | `goods_money` | 商品销售 |
| `ASSISTANT_BASE` | 助教基础课 | `assistant_pd_money` | 专业课PD=陪打) |
| `ASSISTANT_BONUS` | 助教附加课 | `assistant_cx_money` | 附加课CX=超休/促销) |
| `ASSISTANT_PD` | 助教陪打 | `assistant_pd_money` | 陪打收入 |
| `ASSISTANT_CX` | 助教超休 | `assistant_cx_money` | 超休收入 |
占比计算:`income_ratio = 该类型金额 / 当日四类收入总和`
@@ -1327,7 +1348,7 @@ total_card_balance = cash_card_balance + gift_card_balance
团购优惠 = coupon_amount - 团购实付
```
仅统计 `coupon_amount > 0` 的已结账订单(`settle_status = 1`)。
仅统计 `coupon_amount > 0` 的已结账订单(`settle_type IN (1, 3)`)。
**2. 赠送卡消费拆分**
@@ -1406,10 +1427,10 @@ dws_*(所有 DWS 汇总表)──────► DWS_MAINTENANCE统一维
| 商品 | `item_count`, `total_item_quantity` | 商品种类数、商品总数量 |
| 费用明细 | `table_fee_amount`, `assistant_service_amount`, `goods_amount`, `group_amount` | 台费、助教费、商品金额、团购金额 |
| 优惠 | `total_coupon_deduction`, `member_discount_amount`, `manual_discount_amount` | 团购抵扣、会员折扣、手动调整 |
| 金额汇总 | `order_original_amount`, `order_final_amount` | 订单原价、实付金额 |
| 支付方式 | `stored_card_deduct`, `external_paid_amount`, `total_paid_amount` | 储值卡抵扣、外部支付、总支付 |
| 金额汇总 | `order_original_amount`, `order_final_amount` | 订单原价= `total_paid_amount + total_coupon_deduction + member_discount_amount + manual_discount_amount`、实付金额 |
| 支付方式 | `stored_card_deduct`, `external_paid_amount`, `total_paid_amount` | 储值卡抵扣= `balance_amount`、外部支付、总支付 |
| 台账流水 | `book_table_flow`, `book_assistant_flow`, `book_goods_flow`, `book_group_flow`, `book_order_flow` | 台费/助教/商品/团购/订单台账流水 |
| 有效消费 | `order_effective_consume_cash`, `order_effective_recharge_cash`, `order_effective_flow` | 有效消费现金、有效充值现金、有效流水 |
| 有效消费 | `order_effective_consume_cash`, `order_effective_recharge_cash`, `order_effective_flow` | 有效消费现金、有效充值现金(当前硬编码为 0占位、有效流水 |
| 退款 | `refund_amount`, `net_income` | 退款金额、净收入 |
#### 核心业务逻辑
@@ -1463,7 +1484,7 @@ net_income = total_paid_amount - refund_amount
recharge_order_flag = (consume_money = 0 AND pay_amount > 0)
```
消费金额为 0 但有支付金额的订单标记为充值订单。
消费金额为 0 但有支付金额的订单标记为充值订单。此处 `consume_money` 仅用于零值判断(三种口径在 =0 时等价),不涉及金额聚合。
#### 配置参数
@@ -1638,3 +1659,116 @@ dwd_goods_stock_summary ──┬──► DWS_GOODS_STOCK_DAILY日度汇总
- `range_start_stock` 取该月第一条记录的值(期初快照)
- `range_end_stock` / `current_stock` 取该月最后一条记录的值(期末快照)
- `stat_period = 'monthly'`
---
## 项目标签域
项目标签域包含 2 个任务按时间窗口计算助教和客户在四大项目类型BILLIARD/SNOOKER/MAHJONG/KTV的时长占比占比≥25% 则分配标签。数据流向为:
```
dwd_assistant_service_log (income_seconds) ──┐
├──► dim_table (site_table_id JOIN)
dwd_table_fee_log (ledger_count) ────────────┘ │
cfg_area_category (get_area_category)
┌──────────────────┴──────────────────┐
▼ ▼
DWS_ASSISTANT_PROJECT_TAG DWS_MEMBER_PROJECT_TAG
助教项目标签6 个时间窗口) 客户项目标签2 个时间窗口)
```
### 公共逻辑
1. 数据链路走 `dim_table`(通过 `site_table_id` JOIN`scd2_is_current=1`),获取 `area_name``table_name`
2. 通过 `get_area_category(area_name, table_name)` 映射到 `category_code`
3. 只保留四大项目BILLIARD/SNOOKER/MAHJONG/KTV排除 SPECIAL/OTHER
4. 标签阈值:`TAG_THRESHOLD = 0.25`25%
5. 更新策略:全量删除重建(按 `site_id` 删除后重新插入所有时间窗口)
---
### DWS_ASSISTANT_PROJECT_TAG — 助教项目标签
| 属性 | 值 |
|------|-----|
| 任务代码 | `DWS_ASSISTANT_PROJECT_TAG` |
| Python 类 | `AssistantProjectTagTask``tasks/dws/assistant_project_tag_task.py` |
| 目标表 | `dws.dws_assistant_project_tag` |
| 主键 | `site_id`, `assistant_id`, `time_window`, `category_code` |
| 粒度 | 助教 + 时间窗口 + 项目类型 |
| 更新策略 | 全量删除重建(按 site_id |
| 更新频率 | 每日更新 |
| 依赖 | `DWD_LOAD_FROM_ODS` |
#### 数据来源
| 来源表 | Schema | 用途 |
|--------|--------|------|
| `dwd_assistant_service_log` | `dwd` | 助教服务流水(`income_seconds` 工作时长) |
| `dim_table` | `dwd` | 台桌维度SCD2 当前版本,`area_name` + `table_name` |
| `cfg_area_category` | `dws` | 区域分类映射(通过 ConfigCache 加载) |
#### 时间窗口
| 枚举值 | 说明 |
|--------|------|
| `THIS_MONTH` | 本月(月初 ~ 今天) |
| `THIS_QUARTER` | 本季度季度首月1日 ~ 今天) |
| `LAST_MONTH` | 上月(上月初 ~ 上月末) |
| `LAST_3_MONTHS_EXCL_CURRENT` | 前3个月不含本月 |
| `LAST_QUARTER` | 上季度 |
| `LAST_6_MONTHS` | 最近半年(不含本月) |
#### 核心业务逻辑
1.`dwd_assistant_service_log``(site_assistant_id, site_table_id)` 聚合 `income_seconds`
2. 通过 `dim_table` JOIN 获取台桌的 `area_name``table_name`
3. 调用 `get_area_category(area_name, table_name)` 映射到 `category_code`
4.`(assistant_id, category_code)` 汇总各项目时长
5. 计算占比:`percentage = duration_seconds / total_seconds`(四位小数)
6. 占比 ≥ 0.25 标记 `is_tagged = TRUE`
7. 过滤条件:`is_delete = 0`,营业日切点通过 `biz_date_sql_expr` 处理
---
### DWS_MEMBER_PROJECT_TAG — 客户项目标签
| 属性 | 值 |
|------|-----|
| 任务代码 | `DWS_MEMBER_PROJECT_TAG` |
| Python 类 | `MemberProjectTagTask``tasks/dws/member_project_tag_task.py` |
| 目标表 | `dws.dws_member_project_tag` |
| 主键 | `site_id`, `member_id`, `time_window`, `category_code` |
| 粒度 | 会员 + 时间窗口 + 项目类型 |
| 更新策略 | 全量删除重建(按 site_id |
| 更新频率 | 每日更新 |
| 依赖 | `DWD_LOAD_FROM_ODS` |
#### 数据来源
| 来源表 | Schema | 用途 |
|--------|--------|------|
| `dwd_table_fee_log` | `dwd` | 台费流水(`ledger_count` 计费时长) |
| `dim_table` | `dwd` | 台桌维度SCD2 当前版本,`area_name` + `table_name` |
| `cfg_area_category` | `dws` | 区域分类映射(通过 ConfigCache 加载) |
#### 时间窗口
| 枚举值 | 说明 |
|--------|------|
| `LAST_30_DAYS` | 近30天含今天base_date-29天 ~ base_date |
| `LAST_60_DAYS` | 近60天含今天base_date-59天 ~ base_date |
#### 核心业务逻辑
1.`dwd_table_fee_log``(member_id, site_table_id)` 聚合 `ledger_count`
2. 散客排除:`member_id IS NOT NULL AND member_id != 0`
3. 通过 `dim_table` JOIN 获取台桌的 `area_name``table_name`
4. 调用 `get_area_category(area_name, table_name)` 映射到 `category_code`
5.`(member_id, category_code)` 汇总各项目时长
6. 计算占比:`percentage = duration_seconds / total_seconds`(四位小数)
7. 占比 ≥ 0.25 标记 `is_tagged = TRUE`
8. 过滤条件:`COALESCE(is_delete, 0) = 0`,营业日切点通过 `biz_date_sql_expr` 处理

View File

@@ -78,7 +78,6 @@ API 返回的 JSON 响应通过两级路径定位数据:先按 `data_path`
| `ODS_SETTLEMENT_RECORDS` | ✅ | `(rangeStartTime, rangeEndTime)` | ❌ | ✅ | ✅ | `NONE` | — |
| `ODS_TABLE_USE` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
| `ODS_ASSISTANT_LEDGER` | ✅ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
| `ODS_ASSISTANT_ABOLISH` | ✅ | 默认 | ❌ | ✅ | ✅ | `NONE` | — |
| `ODS_STORE_GOODS_SALES` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
| `ODS_PAYMENT` | ❌ | 默认 | ❌ | ✅ | ✅ | `NONE` | — |
| `ODS_REFUND` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `pay_time` |
@@ -88,6 +87,8 @@ API 返回的 JSON 响应通过两级路径定位数据:先按 `data_path`
| `ODS_MEMBER_BALANCE` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
| `ODS_RECHARGE_SETTLE` | ✅ | `(rangeStartTime, rangeEndTime)` | ✅ | ❌ | ✅ | `NONE` | — |
| `ODS_GROUP_PACKAGE` | ❌ | 默认 | ❌ | ✅ | ✅ | `FULL_TABLE` | — |
> `ODS_GROUP_PACKAGE` 额外配置了 `detail_endpoint`,在主流程完成后串行调用 `QueryPackageCouponInfo` 获取每个团购的详情数据,写入 `ods.group_buy_package_details`。
| `ODS_GROUP_BUY_REDEMPTION` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
| `ODS_INVENTORY_STOCK` | ❌ | 默认 | ❌ | ✅ | ✅ | `NONE` | — |
| `ODS_INVENTORY_CHANGE` | ✅ | 默认 | ❌ | ✅ | ✅ | `NONE` | — |

View File

@@ -7,7 +7,7 @@
## 概述
ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask` + `OdsTaskSpec` 配置驱动,通过 `ODS_TASK_CLASSES` 字典动态注册,共 23 个任务。
ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask` + `OdsTaskSpec` 配置驱动,通过 `ODS_TASK_CLASSES` 字典动态注册,共 22 个任务。
所有 ODS 任务写入 `ods.*` 表,原始 API 响应以 JSON 格式存入 `payload` 列,元数据列(`fetched_at``source_file``content_hash` 等)自动填充。
@@ -22,7 +22,6 @@ ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask`
| `ODS_SETTLEMENT_RECORDS` | `OdsOrderSettleTask` | `/Site/GetAllOrderSettleList` | `settlement_records` | 结账记录 |
| `ODS_TABLE_USE` | `OdsTableUseTask` | `/Site/GetSiteTableOrderDetails` | `table_fee_transactions` | 台费计费流水 |
| `ODS_ASSISTANT_LEDGER` | `OdsAssistantLedgerTask` | `/AssistantPerformance/GetOrderAssistantDetails` | `assistant_service_records` | 助教服务流水 |
| `ODS_ASSISTANT_ABOLISH` | `OdsAssistantAbolishTask` | `/AssistantPerformance/GetAbolitionAssistant` | `assistant_cancellation_records` | 助教废除记录 |
| `ODS_STORE_GOODS_SALES` | `OdsGoodsLedgerTask` | `/TenantGoods/GetGoodsSalesList` | `store_goods_sales_records` | 门店商品销售流水 |
| `ODS_PAYMENT` | `OdsPaymentTask` | `/PayLog/GetPayLogListPage` | `payment_transactions` | 支付流水 |
| `ODS_REFUND` | `OdsRefundTask` | `/Order/GetRefundPayLogList` | `refund_transactions` | 退款流水 |
@@ -31,7 +30,7 @@ ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask`
| `ODS_MEMBER_CARD` | `OdsMemberCardTask` | `/MemberProfile/GetTenantMemberCardList` | `member_stored_value_cards` | 会员储值卡 |
| `ODS_MEMBER_BALANCE` | `OdsMemberBalanceTask` | `/MemberProfile/GetMemberCardBalanceChange` | `member_balance_changes` | 会员余额变动 |
| `ODS_RECHARGE_SETTLE` | `OdsRechargeSettleTask` | `/Site/GetRechargeSettleList` | `recharge_settlements` | 充值结算 |
| `ODS_GROUP_PACKAGE` | `OdsPackageTask` | `/PackageCoupon/QueryPackageCouponList` | `group_buy_packages` | 团购套餐定义 |
| `ODS_GROUP_PACKAGE` | `OdsPackageTask` | `/PackageCoupon/QueryPackageCouponList` | `group_buy_packages` | 团购套餐定义(含详情子流程,见下方说明) |
| `ODS_GROUP_BUY_REDEMPTION` | `OdsGroupBuyRedemptionTask` | `/Site/GetSiteTableUseDetails` | `group_buy_redemption_records` | 团购套餐核销 |
| `ODS_INVENTORY_STOCK` | `OdsInventoryStockTask` | `/TenantGoods/GetGoodsStockReport` | `goods_stock_summary` | 库存汇总 |
| `ODS_INVENTORY_CHANGE` | `OdsInventoryChangeTask` | `/GoodsStockManage/QueryGoodsOutboundReceipt` | `goods_stock_movements` | 库存变化记录 |
@@ -44,6 +43,26 @@ ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask`
> 所有目标表均位于 `ods` schema 下。
### ODS_GROUP_PACKAGE 详情子流程
`ODS_GROUP_PACKAGE` 任务通过 `detail_endpoint` 配置启用了二级详情拉取:
| 配置项 | 值 |
|--------|-----|
| `detail_endpoint` | `/PackageCoupon/QueryPackageCouponInfo` |
| `detail_target_table` | `ods.group_buy_package_details` |
| `detail_param_builder` | `lambda rec: {"couponId": rec["id"]}` |
| `detail_data_path` | `("data",)` |
| `detail_id_column` | `id` |
执行流程:
1. 主流程从 `QueryPackageCouponList` 拉取团购列表 → 写入 `ods.group_buy_packages`
2. 子流程从 `ods.group_buy_packages` 提取所有 `id`
3. 串行调用 `QueryPackageCouponInfo`(通过 `UnifiedPipeline` + `RateLimiter`),获取每个团购的详情
4. 详情数据经字段提取后写入 `ods.group_buy_package_details`全量快照UPSERT on `coupon_id`
详情表字段映射见 `docs/database/ODS/mappings/mapping_QueryPackageCouponInfo_group_buy_package_details.md`
---
## 通用 ODS 任务架构BaseOdsTask + OdsTaskSpec 模式)
@@ -228,7 +247,7 @@ execute(cursor_data)
### content_hash 去重机制
`content_hash` 是通用 ODS 任务的核心去重手段,所有 23 个任务默认开启(`skip_unchanged=True`)。
`content_hash` 是通用 ODS 任务的核心去重手段,所有 22 个任务默认开启(`skip_unchanged=True`)。
#### 计算方式
@@ -277,8 +296,7 @@ ORDER BY id, fetched_at DESC;
| `ODS_SETTLEMENT_RECORDS` | 是 | `NONE` | — | 结账记录,按时间窗口增量抓取 |
| `ODS_TABLE_USE` | 否 | `WINDOW` | `create_time` | 台费计费流水 |
| `ODS_ASSISTANT_LEDGER` | 是 | `WINDOW` | `create_time` | 助教服务流水 |
| `ODS_ASSISTANT_ABOLISH` | 是 | `NONE` | — | 助教废除记录 |
| `ODS_STORE_GOODS_SALES` | 否 | `WINDOW` | `create_time` | 门店商品销售流水 |
| `ODS_STORE_GOODS_SALES` | 是 | `WINDOW` | `create_time` | 门店商品销售流水2026-03-01 修复:`requires_window``False` 改为 `True`,新增 `time_fields=("startTime", "endTime")` |
| `ODS_PAYMENT` | 否 | `NONE` | — | 支付流水 |
| `ODS_REFUND` | 否 | `WINDOW` | `pay_time` | 退款流水 |
| `ODS_PLATFORM_COUPON` | 否 | `WINDOW` | `consume_time` | 平台/团购券核销 |
@@ -286,7 +304,7 @@ ORDER BY id, fetched_at DESC;
| `ODS_MEMBER_CARD` | 否 | `FULL_TABLE` | — | 会员储值卡 |
| `ODS_MEMBER_BALANCE` | 否 | `WINDOW` | `create_time` | 会员余额变动 |
| `ODS_RECHARGE_SETTLE` | 是 | `NONE` | — | 充值结算 |
| `ODS_GROUP_PACKAGE` | 否 | `FULL_TABLE` | — | 团购套餐定义 |
| `ODS_GROUP_PACKAGE` | 否 | `FULL_TABLE` | — | 团购套餐定义 + 详情子流程(`detail_endpoint` |
| `ODS_GROUP_BUY_REDEMPTION` | 否 | `WINDOW` | `create_time` | 团购套餐核销 |
| `ODS_INVENTORY_STOCK` | 否 | `NONE` | — | 库存汇总 |
| `ODS_INVENTORY_CHANGE` | 是 | `NONE` | — | 库存变化记录 |
@@ -297,4 +315,4 @@ ORDER BY id, fetched_at DESC;
| `ODS_TENANT_GOODS` | 否 | `FULL_TABLE` | — | 租户商品档案 |
| `ODS_STAFF_INFO` | 否 | `FULL_TABLE` | — | 员工档案,全量快照 |
> 所有 23 个任务默认 `skip_unchanged=True`(去重开启)。
> 所有 22 个任务默认 `skip_unchanged=True`(去重开启)。

View File

@@ -283,7 +283,6 @@ execute()
| `member_stored_value_cards` | `ods.member_stored_value_cards` |
| `recharge_settlements` | `ods.recharge_settlements` |
| `settlement_records` | `ods.settlement_records` |
| `assistant_cancellation_records` | `ods.assistant_cancellation_records` |
| `assistant_accounts_master` | `ods.assistant_accounts_master` |
| `assistant_service_records` | `ods.assistant_service_records` |
| `site_tables_master` | `ods.site_tables_master` |

View File

@@ -247,6 +247,7 @@ class FlowRunner:
"""ETL 完成后运行数据一致性检查,输出黑盒测试报告。
返回报告文件路径,失败时返回 None不阻断主流程
CHANGE 2026-02-26 | 改用 FETCH_ROOT 读取实际抓取数据,替代 API_SAMPLE_CACHE_ROOT
"""
try:
from quality.consistency_checker import (
@@ -259,13 +260,19 @@ class FlowRunner:
timer.start_step("CONSISTENCY_CHECK")
try:
# 优先使用 FETCH_ROOTETL 实际抓取的分页 JSON
fetch_root_str = os.environ.get("FETCH_ROOT")
fetch_root = Path(fetch_root_str) if fetch_root_str else None
# 兼容保留api_sample_dir 作为回退
api_sample_dir_str = os.environ.get("API_SAMPLE_CACHE_ROOT")
api_sample_dir = Path(api_sample_dir_str) if api_sample_dir_str else None
report = run_consistency_check(
self.db_conn,
fetch_root=fetch_root,
api_sample_dir=api_sample_dir,
include_api_vs_ods=bool(api_sample_dir),
include_api_vs_ods=bool(fetch_root or api_sample_dir),
include_ods_vs_dwd=True,
tz=self.tz,
)

View File

@@ -26,6 +26,7 @@ from api.local_json_client import LocalJsonClient
from orchestration.cursor_manager import CursorManager
from orchestration.run_tracker import RunTracker
from orchestration.task_registry import TaskRegistry
from utils.task_log_buffer import TaskLogBuffer
class DataSource(str, Enum):
@@ -90,6 +91,8 @@ class TaskExecutor:
self.logger.info("开始运行任务: %s, run_uuid=%s", task_codes, run_uuid)
for task_code in task_codes:
# 为每个任务创建独立的日志缓冲区,避免多任务日志交叉
task_log_buf = TaskLogBuffer(task_code, self.logger)
try:
task_result = self.run_single_task(
task_code, run_uuid, store_id, data_source=data_source,
@@ -107,6 +110,7 @@ class TaskExecutor:
results.append(result_entry)
except Exception as exc: # noqa: BLE001
self.logger.error("任务 %s 失败: %s", task_code, exc, exc_info=True)
task_log_buf.error("任务失败: %s", exc)
# CHANGE 2026-02-24 | 任务失败后 rollback防止 InFailedSqlTransaction 级联
try:
self.db.rollback()
@@ -119,6 +123,9 @@ class TaskExecutor:
"counts": {},
})
continue
finally:
# 任务完成(无论成功/失败),一次性输出该任务的缓冲日志
task_log_buf.flush()
self.logger.info("所有任务执行完成")
return results

View File

@@ -37,6 +37,8 @@ from tasks.dws import (
AssistantFinanceTask,
MemberConsumptionTask,
MemberVisitTask,
AssistantProjectTagTask,
MemberProjectTagTask,
FinanceDailyTask,
FinanceRechargeTask,
FinanceIncomeStructureTask,
@@ -156,6 +158,9 @@ default_registry.register("DWS_ASSISTANT_SALARY", AssistantSalaryTask, layer="DW
default_registry.register("DWS_ASSISTANT_FINANCE", AssistantFinanceTask, layer="DWS", depends_on=["DWS_ASSISTANT_SALARY"])
default_registry.register("DWS_MEMBER_CONSUMPTION", MemberConsumptionTask, layer="DWS")
default_registry.register("DWS_MEMBER_VISIT", MemberVisitTask, layer="DWS")
# CHANGE [2026-03-07] intent: 注册项目标签任务,依赖 DWD 装载完成
default_registry.register("DWS_ASSISTANT_PROJECT_TAG", AssistantProjectTagTask, layer="DWS", depends_on=["DWD_LOAD_FROM_ODS"])
default_registry.register("DWS_MEMBER_PROJECT_TAG", MemberProjectTagTask, layer="DWS", depends_on=["DWD_LOAD_FROM_ODS"])
default_registry.register("DWS_FINANCE_DAILY", FinanceDailyTask, layer="DWS")
default_registry.register("DWS_FINANCE_RECHARGE", FinanceRechargeTask, layer="DWS")
default_registry.register("DWS_FINANCE_INCOME_STRUCTURE", FinanceIncomeStructureTask, layer="DWS")
@@ -172,6 +177,7 @@ default_registry.register("DWS_MAINTENANCE", DwsMaintenanceTask, layer="DWS", de
"DWS_ASSISTANT_MONTHLY", "DWS_ASSISTANT_CUSTOMER",
"DWS_ASSISTANT_SALARY", "DWS_ASSISTANT_FINANCE",
"DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
"DWS_ASSISTANT_PROJECT_TAG", "DWS_MEMBER_PROJECT_TAG",
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE",
"DWS_FINANCE_INCOME_STRUCTURE", "DWS_FINANCE_DISCOUNT_DETAIL",
"DWS_BUILD_ORDER_SUMMARY",

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,57 @@
"""管道数据类:请求描述、执行结果、写入结果。"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
@dataclass
class PipelineRequest:
"""管道请求描述。"""
endpoint: str
params: dict
page_size: int | None = 200
data_path: tuple[str, ...] = ("data",)
list_key: str | None = None
segment_index: int = 0 # 所属窗口分段索引
is_detail: bool = False # 是否为详情请求
detail_id: Any = None # 详情请求的 ID
# 预取的 API 响应(用于 BaseOdsTask 集成iter_paginated 已获取数据,
# _request_loop 跳过 api.post() 直接使用)
_prefetched_response: Any = None
@dataclass
class PipelineResult:
"""管道执行结果。"""
status: str = "SUCCESS"
total_requests: int = 0
completed_requests: int = 0
total_fetched: int = 0
total_inserted: int = 0
total_updated: int = 0
total_skipped: int = 0
total_deleted: int = 0
request_failures: int = 0
processing_failures: int = 0
write_failures: int = 0
cancelled: bool = False
errors: list[dict] = field(default_factory=list)
timing: dict[str, float] = field(default_factory=dict)
# Detail_Mode 统计
detail_success: int = 0
detail_failure: int = 0
detail_skipped: int = 0
@dataclass
class WriteResult:
"""单次批量写入结果。"""
inserted: int = 0
updated: int = 0
skipped: int = 0
errors: int = 0

View File

@@ -0,0 +1,473 @@
# -*- coding: utf-8 -*-
"""统一管道引擎:串行请求 + 异步处理 + 单线程写库。
核心执行流程:
主线程_request_loop串行发送 API 请求 → processing_queue
→ N 个 worker 线程_process_worker并行处理 → write_queue
→ 1 个 writer 线程_write_worker批量写入数据库
线程安全保证:
- PipelineResult 的计数更新通过 threading.Lock 保护
- 队列通信使用 queue.Queue内置线程安全
- SENTINELNone用于通知线程退出
"""
from __future__ import annotations
import logging
import queue
import threading
import time
from typing import Any, Callable, Iterable
from api.rate_limiter import RateLimiter
from config.pipeline_config import PipelineConfig
from utils.cancellation import CancellationToken
from pipeline.models import PipelineRequest, PipelineResult, WriteResult
# 运行时指标日志间隔(每 N 个请求记录一次队列深度等指标)
_METRICS_LOG_INTERVAL = 10
class UnifiedPipeline:
"""统一管道引擎:串行请求 + 异步处理 + 单线程写库。
Args:
api_client: API 客户端duck typing需有 post 方法)
db_connection: 数据库连接duck typing
logger: 日志记录器
config: 管道配置
cancel_token: 取消令牌None 时自动创建一个不会取消的令牌
etl_timer: 可选的 EtlTimer 实例,用于在 FlowRunner 计时报告中记录阶段耗时
task_code: 任务代码,与 etl_timer 配合使用作为步骤名前缀
"""
def __init__(
self,
api_client, # duck typing: 有 post(endpoint, params) 方法
db_connection, # duck typing
logger: logging.Logger,
config: PipelineConfig,
cancel_token: CancellationToken | None = None,
etl_timer=None, # 可选 EtlTimerduck typing
task_code: str | None = None,
) -> None:
self.api = api_client
self.db = db_connection
self.logger = logger
self.config = config
self.cancel_token = cancel_token or CancellationToken()
self._rate_limiter = RateLimiter(config.rate_min, config.rate_max)
self._etl_timer = etl_timer
self._task_code = task_code
# 结果计数锁,保护 PipelineResult 的并发更新
self._lock = threading.Lock()
# 处理线程引用,用于运行时指标日志中统计活跃线程数
self._workers: list[threading.Thread] = []
def run(
self,
requests: Iterable[PipelineRequest],
process_fn: Callable[[Any], list[dict]],
write_fn: Callable[[list[dict]], WriteResult],
) -> PipelineResult:
"""执行管道。
Args:
requests: 请求迭代器(由 BaseOdsTask 生成)
process_fn: 处理函数,将 API 响应转换为待写入记录列表
write_fn: 写入函数,将记录批量写入数据库
Returns:
PipelineResult 包含各阶段统计和最终状态
"""
# 预取消检查cancel_token 已取消则立即返回空结果
if self.cancel_token.is_cancelled:
return PipelineResult(status="CANCELLED", cancelled=True)
processing_queue: queue.Queue = queue.Queue(
maxsize=self.config.queue_size,
)
write_queue: queue.Queue = queue.Queue(
maxsize=self.config.queue_size * 2,
)
result = PipelineResult()
# 保存队列引用,供 _request_loop 运行时指标日志使用
self._processing_queue = processing_queue
self._write_queue = write_queue
start_time = time.monotonic()
# EtlTimer 集成:记录请求阶段子步骤
timer = self._etl_timer
step_name = self._task_code
# 启动 N 个处理线程
self._workers = []
for i in range(self.config.workers):
t = threading.Thread(
target=self._process_worker,
args=(processing_queue, write_queue, process_fn, result),
name=f"pipeline-worker-{i}",
daemon=True,
)
t.start()
self._workers.append(t)
# 启动 1 个写入线程
writer = threading.Thread(
target=self._write_worker,
args=(write_queue, write_fn, result),
name="pipeline-writer",
daemon=True,
)
writer.start()
# 主线程:串行请求
if timer and step_name:
try:
timer.start_sub_step(step_name, "request")
except KeyError:
pass # 父步骤不存在时静默跳过
request_start = time.monotonic()
self._request_loop(requests, processing_queue, result)
request_elapsed = time.monotonic() - request_start
if timer and step_name:
try:
timer.stop_sub_step(step_name, "request")
except KeyError:
pass
# 发送 SENTINEL 到处理队列,通知所有 worker 退出
if timer and step_name:
try:
timer.start_sub_step(step_name, "process")
except KeyError:
pass
process_start = time.monotonic()
for _ in self._workers:
processing_queue.put(None)
for w in self._workers:
w.join()
process_elapsed = time.monotonic() - process_start
if timer and step_name:
try:
timer.stop_sub_step(step_name, "process")
except KeyError:
pass
# 发送 SENTINEL 到写入队列,通知 writer 退出
if timer and step_name:
try:
timer.start_sub_step(step_name, "write")
except KeyError:
pass
write_start = time.monotonic()
write_queue.put(None)
writer.join()
write_elapsed = time.monotonic() - write_start
if timer and step_name:
try:
timer.stop_sub_step(step_name, "write")
except KeyError:
pass
total_elapsed = time.monotonic() - start_time
result.timing["total"] = round(total_elapsed, 3)
result.timing["request"] = round(request_elapsed, 3)
result.timing["process"] = round(process_elapsed, 3)
result.timing["write"] = round(write_elapsed, 3)
# 确定最终状态
if result.cancelled:
result.status = "CANCELLED"
elif result.status == "FAILED":
pass # 连续失败已设置 FAILED保持不变
elif (
result.request_failures
+ result.processing_failures
+ result.write_failures
> 0
):
result.status = "PARTIAL"
else:
result.status = "SUCCESS"
# 执行摘要日志(需求 8.2
self.logger.info(
"管道执行摘要: status=%s, 总耗时=%.1fs "
"[请求=%.1fs, 处理=%.1fs, 写入=%.1fs], "
"请求=%d/%d, 获取=%d, "
"写入(inserted=%d, updated=%d, skipped=%d), "
"失败(request=%d, process=%d, write=%d)",
result.status,
total_elapsed,
request_elapsed,
process_elapsed,
write_elapsed,
result.completed_requests,
result.total_requests,
result.total_fetched,
result.total_inserted,
result.total_updated,
result.total_skipped,
result.request_failures,
result.processing_failures,
result.write_failures,
)
# 清理队列引用
self._processing_queue = None
self._write_queue = None
self._workers = []
return result
def _request_loop(
self,
requests: Iterable[PipelineRequest],
processing_queue: queue.Queue,
result: PipelineResult,
) -> None:
"""主线程:串行发送 API 请求,限流等待,背压阻塞。
流程:
1. 遍历 requests 迭代器
2. 检查取消信号
3. 调用 api.post() 发送请求
4. 将响应 put 到 processing_queue满时阻塞 = 背压)
5. 调用 rate_limiter.wait(),被取消则 break
6. 连续失败超过阈值则中断status=FAILED
"""
consecutive_failures = 0
for req in requests:
# 取消检查
if self.cancel_token.is_cancelled:
with self._lock:
result.cancelled = True
self.logger.info("收到取消信号,停止发送新请求")
break
with self._lock:
result.total_requests += 1
req_start = time.monotonic()
try:
# 预取模式iter_paginated 已获取数据,直接使用
if req._prefetched_response is not None:
response = req._prefetched_response
else:
response = self.api.post(req.endpoint, req.params)
elapsed = time.monotonic() - req_start
self.logger.debug(
"请求完成: endpoint=%s, 耗时=%.2fs",
req.endpoint,
elapsed,
)
# 将响应放入处理队列(满时阻塞 = 背压机制)
processing_queue.put((req, response))
with self._lock:
result.completed_requests += 1
completed = result.completed_requests
total = result.total_requests
# 成功则重置连续失败计数
consecutive_failures = 0
# 运行时指标日志(需求 8.1):每 N 个请求记录一次队列深度和进度
if completed % _METRICS_LOG_INTERVAL == 0:
self._log_runtime_metrics(result, completed, total)
except Exception as exc:
elapsed = time.monotonic() - req_start
consecutive_failures += 1
self.logger.error(
"请求失败: endpoint=%s, 耗时=%.2fs, 错误=%s",
req.endpoint,
elapsed,
exc,
)
with self._lock:
result.request_failures += 1
result.errors.append({
"phase": "request",
"endpoint": req.endpoint,
"error": str(exc),
})
# 连续失败超过阈值则中断
if consecutive_failures >= self.config.max_consecutive_failures:
self.logger.error(
"连续失败 %d 次,超过阈值 %d,中断管道",
consecutive_failures,
self.config.max_consecutive_failures,
)
with self._lock:
result.status = "FAILED"
break
# 限流等待(最后一个请求后也等待,保持与上游的间隔一致性)
if not self._rate_limiter.wait(self.cancel_token.event):
with self._lock:
result.cancelled = True
self.logger.info("限流等待期间收到取消信号,停止发送新请求")
break
def _process_worker(
self,
processing_queue: queue.Queue,
write_queue: queue.Queue,
process_fn: Callable[[Any], list[dict]],
result: PipelineResult,
) -> None:
"""处理线程:从 processing_queue 消费数据,调用 process_fn结果放入 write_queue。
收到 SENTINELNone时退出。
单条记录处理异常时捕获、记录错误、继续处理。
"""
while True:
item = processing_queue.get()
# SENTINEL退出信号
if item is None:
processing_queue.task_done()
break
req, response = item
try:
records = process_fn(response)
if records:
# 将处理结果放入写入队列
write_queue.put(records)
with self._lock:
result.total_fetched += len(records)
except Exception as exc:
self.logger.error(
"处理失败: endpoint=%s, 错误=%s",
req.endpoint,
exc,
)
with self._lock:
result.processing_failures += 1
result.errors.append({
"phase": "processing",
"endpoint": req.endpoint,
"error": str(exc),
})
processing_queue.task_done()
def _write_worker(
self,
write_queue: queue.Queue,
write_fn: Callable[[list[dict]], WriteResult],
result: PipelineResult,
) -> None:
"""写入线程:从 write_queue 消费数据,累积到 batch_size 或超时后批量写入。
- 累积到 batch_size 条记录时立即写入
- 等待 batch_timeout 秒后将已累积的记录写入(即使不足 batch_size
- 写入失败时记录错误、继续处理后续批次
- 收到 SENTINELNone时将剩余数据 flush 后退出
"""
batch: list[dict] = []
batch_size = self.config.batch_size
batch_timeout = self.config.batch_timeout
while True:
try:
item = write_queue.get(timeout=batch_timeout)
except queue.Empty:
# 超时:将已累积的记录写入
if batch:
self._flush_batch(batch, write_fn, result)
batch = []
continue
# SENTINEL退出信号
if item is None:
write_queue.task_done()
break
# item 是 list[dict](一次 process_fn 的输出)
batch.extend(item)
write_queue.task_done()
# 队列积压警告
qsize = write_queue.qsize()
if qsize >= self.config.queue_size * 2:
self.logger.warning(
"写入队列积压: qsize=%d, 阈值=%d",
qsize,
self.config.queue_size * 2,
)
# 累积到 batch_size 时写入
while len(batch) >= batch_size:
chunk = batch[:batch_size]
batch = batch[batch_size:]
self._flush_batch(chunk, write_fn, result)
# 退出前 flush 剩余数据
if batch:
self._flush_batch(batch, write_fn, result)
def _flush_batch(
self,
batch: list[dict],
write_fn: Callable[[list[dict]], WriteResult],
result: PipelineResult,
) -> None:
"""执行一次批量写入,更新结果计数。"""
if not batch:
return
try:
wr = write_fn(batch)
with self._lock:
result.total_inserted += wr.inserted
result.total_updated += wr.updated
result.total_skipped += wr.skipped
except Exception as exc:
self.logger.error(
"批量写入失败: batch_size=%d, 错误=%s",
len(batch),
exc,
)
with self._lock:
result.write_failures += 1
result.errors.append({
"phase": "write",
"batch_size": len(batch),
"error": str(exc),
})
def _log_runtime_metrics(
self,
result: PipelineResult,
completed: int,
total: int,
) -> None:
"""记录运行时指标:队列深度、活跃线程数、进度(需求 8.1)。"""
pq_depth = self._processing_queue.qsize() if self._processing_queue else 0
wq_depth = self._write_queue.qsize() if self._write_queue else 0
active_workers = sum(1 for w in self._workers if w.is_alive())
self.logger.debug(
"运行时指标: 进度=%d/%d, 处理队列=%d, 活跃线程=%d, 写入队列=%d",
completed,
total,
pq_depth,
active_workers,
wq_depth,
)

View File

@@ -71,7 +71,6 @@ class ConsistencyReport:
ODS_TABLE_TO_JSON_FILE: Dict[str, str] = {
"assistant_accounts_master": "assistant_accounts_master.json",
"assistant_service_records": "assistant_service_records.json",
"assistant_cancellation_records": "assistant_cancellation_records.json",
"member_profiles": "member_profiles.json",
"member_stored_value_cards": "member_stored_value_cards.json",
"member_balance_changes": "member_balance_changes.json",
@@ -93,6 +92,35 @@ ODS_TABLE_TO_JSON_FILE: Dict[str, str] = {
"stock_goods_category_tree": "stock_goods_category_tree.json",
}
# CHANGE 2026-02-26 | ODS 表名 → task_code 映射,用于从 FETCH_ROOT 定位分页 JSON
# FETCH_ROOT 目录结构:{task_code}/{task_code}-{run_id}-{date}-{time}/{ods_table}.json
ODS_TABLE_TO_TASK_CODE: Dict[str, str] = {
"assistant_accounts_master": "ODS_ASSISTANT_ACCOUNT",
"assistant_service_records": "ODS_ASSISTANT_LEDGER",
"member_profiles": "ODS_MEMBER",
"member_stored_value_cards": "ODS_MEMBER_CARD",
"member_balance_changes": "ODS_MEMBER_BALANCE",
"recharge_settlements": "ODS_RECHARGE_SETTLE",
"settlement_records": "ODS_SETTLEMENT_RECORDS",
"table_fee_transactions": "ODS_TABLE_USE",
"table_fee_discount_records": "ODS_TABLE_FEE_DISCOUNT",
"store_goods_sales_records": "ODS_STORE_GOODS_SALES",
"store_goods_master": "ODS_STORE_GOODS",
"tenant_goods_master": "ODS_TENANT_GOODS",
"site_tables_master": "ODS_TABLES",
"group_buy_packages": "ODS_GROUP_PACKAGE",
"group_buy_redemption_records": "ODS_GROUP_BUY_REDEMPTION",
"platform_coupon_redemption_records": "ODS_PLATFORM_COUPON",
"payment_transactions": "ODS_PAYMENT",
"refund_transactions": "ODS_REFUND",
"goods_stock_summary": "ODS_INVENTORY_STOCK",
"goods_stock_movements": "ODS_INVENTORY_CHANGE",
"stock_goods_category_tree": "ODS_GOODS_CATEGORY",
"staff_info_master": "ODS_STAFF_INFO",
"settlement_ticket_records": "ODS_SETTLEMENT_TICKET",
"json_archive_records": "ODS_JSON_ARCHIVE",
}
# ODS 元数据列——不来自 API由 ETL 框架自动填充
ODS_META_COLUMNS = frozenset({
"payload", "source_file", "source_endpoint",
@@ -145,6 +173,86 @@ def _extract_records(data: Any) -> list[dict]:
return []
def extract_api_fields_from_fetch_root(
fetch_root: Path,
ods_table: str,
) -> set[str] | None:
"""从 FETCH_ROOT 分页 JSON 中提取 API 原始字段名。
CHANGE 2026-02-26 | 替代 extract_api_fields_from_json 的 API_SAMPLE_CACHE_ROOT 依赖,
直接读取 ETL 实际抓取的分页 JSON无需额外手动生成缓存。
目录结构FETCH_ROOT/{task_code}/{task_code}-{run_id}-{date}-{time}/{ods_table}.json
分页 JSON 结构:{ "pages": [{ "response": { "data": { "{listKey}": [...] } } }] }
"""
task_code = ODS_TABLE_TO_TASK_CODE.get(ods_table)
if not task_code:
return None
task_dir = fetch_root / task_code
if not task_dir.is_dir():
return None
# 取最新 run 目录(按目录名排序,格式含时间戳)
run_dirs = sorted(
(d for d in task_dir.iterdir() if d.is_dir()),
key=lambda d: d.name,
reverse=True,
)
if not run_dirs:
return None
# 在最新 run 目录中查找 {ods_table}.json
json_file = run_dirs[0] / f"{ods_table}.json"
if not json_file.exists():
return None
try:
with json_file.open("r", encoding="utf-8") as f:
data = json.load(f)
except (json.JSONDecodeError, OSError):
return None
records = _extract_records_from_paged_json(data)
if not records:
return None
all_fields: set[str] = set()
for rec in records[:10]:
if isinstance(rec, dict):
all_fields.update(rec.keys())
return all_fields
def _extract_records_from_paged_json(data: Any) -> list[dict]:
"""从 ETL 分页 JSON 中提取业务记录。
分页 JSON 格式:
{ "pages": [{ "response": { "data": { "{listKey}": [record, ...] } } }] }
也兼容 gen_full_dataflow_doc 的扁平缓存格式(直接列表 / {"data": [...]})。
"""
if not isinstance(data, dict):
return _extract_records(data)
pages = data.get("pages")
if not isinstance(pages, list) or not pages:
# 回退到扁平格式
return _extract_records(data)
# 从第一个有数据的 page 中提取记录
for page in pages:
if not isinstance(page, dict):
continue
response = page.get("response")
if not isinstance(response, dict):
continue
records = _extract_records(response)
if records:
return records
return []
def check_api_vs_ods_fields(
api_fields: set[str],
ods_columns: set[str],
@@ -494,6 +602,7 @@ def run_consistency_check(
db_conn,
*,
api_sample_dir: Path | None = None,
fetch_root: Path | None = None,
include_api_vs_ods: bool = True,
include_ods_vs_dwd: bool = True,
sample_limit: int = 5,
@@ -504,7 +613,8 @@ def run_consistency_check(
参数:
db_conn: 数据库连接对象(需有 .conn 属性返回 psycopg2 connection
api_sample_dir: API JSON 缓存目录(用于 API vs ODS 检查
api_sample_dir: API JSON 缓存目录(旧方式,兼容保留
fetch_root: FETCH_ROOT 目录(优先使用,从 ETL 实际抓取的分页 JSON 提取字段)
include_api_vs_ods: 是否执行 API vs ODS 检查
include_ods_vs_dwd: 是否执行 ODS vs DWD 检查
sample_limit: 值不一致时的采样行数
@@ -519,16 +629,28 @@ def run_consistency_check(
with db_conn.conn.cursor() as cur:
# --- 1. API vs ODS 字段完整性检查 ---
if include_api_vs_ods and api_sample_dir:
# CHANGE 2026-02-26 | 优先从 FETCH_ROOT 读取实际抓取数据,回退到 api_sample_dir 缓存
if include_api_vs_ods and (fetch_root or api_sample_dir):
for ods_table, json_file in sorted(ODS_TABLE_TO_JSON_FILE.items()):
json_path = api_sample_dir / json_file
api_fields = extract_api_fields_from_json(json_path)
# 优先尝试 FETCH_ROOTETL 实际抓取的分页 JSON
api_fields = None
source_hint = ""
if fetch_root:
api_fields = extract_api_fields_from_fetch_root(fetch_root, ods_table)
source_hint = "FETCH_ROOT"
# 回退到 api_sample_dirgen_full_dataflow_doc 缓存)
if api_fields is None and api_sample_dir:
json_path = api_sample_dir / json_file
api_fields = extract_api_fields_from_json(json_path)
source_hint = "API_SAMPLE_CACHE"
if api_fields is None:
result = TableCheckResult(
table_name=f"ods.{ods_table}",
check_type="api_vs_ods",
passed=True, # 无 JSON 缓存时跳过,不算失败
error=f"API JSON 缓存不存在: {json_file}",
passed=True, # 无 JSON 数据时跳过,不算失败
error=f"无可用 JSON 数据FETCH_ROOT 和 API 缓存均未找到)",
)
report.api_vs_ods_results.append(result)
continue

View File

@@ -14,6 +14,9 @@ import os
import re
import sys
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
DOCS_DIR = os.path.join("docs", "api-reference")

View File

@@ -11,6 +11,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
import psycopg2
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
load_dotenv()
PG_DSN = os.getenv("PG_DSN")

View File

@@ -23,6 +23,9 @@ from enum import Enum
from pathlib import Path
from typing import Optional
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
class DiffKind(str, Enum):
"""差异分类枚举。"""

View File

@@ -13,6 +13,9 @@ from pathlib import Path
from dotenv import load_dotenv
import psycopg2
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
load_dotenv()
SUMMARY_DIR = Path("docs/api-reference/summary")

View File

@@ -414,6 +414,7 @@ def _check_ods_vs_dwd(
# ══════════════════════════════════════════════════════════════
# 已知的 DWS→DWD 聚合关系映射
# 营业日口径:使用 dws.biz_date() 替代 ::date 自然日转换
_DWS_DWD_MAP: dict[str, dict] = {
"dws.dws_assistant_daily_detail": {
"dwd_source": "dwd.dwd_assistant_service_log",
@@ -425,28 +426,28 @@ _DWS_DWD_MAP: dict[str, dict] = {
"dwd_source": "dwd.dwd_settlement_head",
"dws_date_col": "stat_date",
"dwd_date_col": "pay_time",
"dwd_date_cast": "::date",
"dwd_date_cast": "dws.biz_date(%col%)",
"description": "财务日度汇总 vs DWD 结账记录",
},
"dws.dws_member_visit_detail": {
"dwd_source": "dwd.dwd_settlement_head",
"dws_date_col": "visit_date",
"dwd_date_col": "pay_time",
"dwd_date_cast": "::date",
"dwd_date_cast": "dws.biz_date(%col%)",
"description": "会员到店明细 vs DWD 结账记录",
},
"dws.dws_member_consumption_summary": {
"dwd_source": "dwd.dwd_settlement_head",
"dws_date_col": "stat_month",
"dwd_date_col": "pay_time",
"dwd_date_cast": "date_trunc('month', %col%)::date",
"dwd_date_cast": "date_trunc('month', dws.biz_date(%col%))::date",
"description": "会员消费汇总 vs DWD 结账记录",
},
"dws.dws_finance_recharge_summary": {
"dwd_source": "dwd.dwd_recharge_order",
"dws_date_col": "stat_date",
"dwd_date_col": "pay_time",
"dwd_date_cast": "::date",
"dwd_date_cast": "dws.biz_date(%col%)",
"description": "充值汇总 vs DWD 充值订单",
},
}

View File

@@ -20,6 +20,9 @@ from datetime import datetime
import requests
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ── 配置 ──────────────────────────────────────────────────────────────────
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
API_TOKEN = os.environ.get("API_TOKEN", "")

View File

@@ -12,6 +12,9 @@ import re
from dataclasses import dataclass, field
from pathlib import Path
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ---------------------------------------------------------------------------
# 常量
# ---------------------------------------------------------------------------

View File

@@ -68,27 +68,6 @@
"payload",
"content_hash"
],
"assistant_cancellation_records": [
"id",
"siteid",
"siteprofile",
"assistantname",
"assistantabolishamount",
"assistanton",
"pdchargeminutes",
"tableareaid",
"tablearea",
"tableid",
"tablename",
"trashreason",
"createtime",
"source_file",
"source_endpoint",
"fetched_at",
"payload",
"content_hash",
"tenant_id"
],
"assistant_service_records": [
"id",
"tenant_id",

View File

@@ -15,6 +15,9 @@ import sys
import time
import requests
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ── 配置 ──────────────────────────────────────────────────────────────────
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
API_TOKEN = os.environ.get("API_TOKEN", "")
@@ -58,7 +61,6 @@ CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "exa
ACTUAL_LIST_KEY = {
"assistant_accounts_master": "assistantInfos",
"assistant_service_records": "orderAssistantDetails",
"assistant_cancellation_records": "abolitionAssistants",
"table_fee_transactions": "siteTableUseDetailsList",
"table_fee_discount_records": "taiFeeAdjustInfos",
"tenant_goods_master": "tenantGoodsList",

View File

@@ -0,0 +1,189 @@
# -*- coding: utf-8 -*-
"""一次性调研脚本:拉取全部团购详情并写入 ods.group_buy_package_details。
用法cwd = C:\\NeoZQYY/
python apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
流程:
1. 从 ods.group_buy_packages 读取所有 coupon_idid 列)
2. 串行调用 QueryPackageCouponInfo 详情接口RateLimiter 5-20s
3. 提取结构化字段 + 计算 content_hash + 保留原始 payload
4. UPSERT 写入 ods.group_buy_package_details
需求覆盖:附录 B 调研 3、4
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
# ── 环境初始化 ──────────────────────────────────────────────────────────
# 加载根 .env脚本 cwd 为 apps/etl/connectors/feiqiu/
from dotenv import load_dotenv
_SCRIPT_DIR = Path(__file__).resolve().parent # scripts/
_FEIQIU_DIR = _SCRIPT_DIR.parent # apps/etl/connectors/feiqiu/
_REPO_ROOT = _FEIQIU_DIR.parents[3] # → connectors/ → etl/ → apps/ → root
load_dotenv(_REPO_ROOT / ".env")
# 必需环境变量校验
_REQUIRED_ENV = ("FETCH_ROOT", "EXPORT_ROOT", "PG_DSN", "TEST_DB_DSN")
_missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)]
if _missing:
sys.exit(f"ERROR: 缺少必需环境变量: {', '.join(_missing)}")
TEST_DB_DSN = os.environ["TEST_DB_DSN"]
# 确保 feiqiu 目录在 sys.path 中,以便从仓库根目录运行时也能 import 本地模块
if str(_FEIQIU_DIR) not in sys.path:
sys.path.insert(0, str(_FEIQIU_DIR))
# ── 依赖导入 ──────────────────────────────────────────────────────────
from psycopg2.extras import Json # noqa: E402
from config.settings import AppConfig # noqa: E402
from api.client import APIClient # noqa: E402
from api.rate_limiter import RateLimiter # noqa: E402
from database.connection import DatabaseConnection # noqa: E402
# 复用 ods_tasks.py 中的字段提取逻辑
from tasks.ods.ods_tasks import _group_package_detail_process_fn # noqa: E402
def main():
# ── 1. 加载配置 ──────────────────────────────────────────────────
config = AppConfig.load()
print(f"✅ 配置加载完成 (store_id={config.get('app.store_id')})")
# ── 2. 连接测试库 ──────────────────────────────────────────────
db = DatabaseConnection(
dsn=TEST_DB_DSN,
session=config["db"].get("session", {}),
connect_timeout=config["db"].get("connect_timeout_sec"),
)
print(f"✅ 已连接测试库: {TEST_DB_DSN.split('@')[-1]}")
# ── 3. 查询所有 coupon_id ────────────────────────────────────
rows = db.query("SELECT DISTINCT id FROM ods.group_buy_packages ORDER BY id")
coupon_ids = [r["id"] for r in rows]
print(f"📋 共 {len(coupon_ids)} 个 coupon_id 待拉取")
if not coupon_ids:
print("⚠️ 没有找到任何 coupon_id退出")
db.close()
return
# ── 4. 初始化 API 客户端 + 限流器 ────────────────────────────
api = APIClient(
base_url=config["api"]["base_url"],
token=config["api"]["token"],
timeout=config.get("api.timeout_sec", 20),
)
limiter = RateLimiter(min_interval=5.0, max_interval=20.0)
# ── 5. 串行拉取详情 ──────────────────────────────────────────
success_count = 0
fail_count = 0
skip_count = 0
for idx, cid in enumerate(coupon_ids, 1):
print(f"\n[{idx}/{len(coupon_ids)}] coupon_id={cid} ...", end=" ", flush=True)
try:
resp = api.get(
"/PackageCoupon/QueryPackageCouponInfo",
{"couponId": cid},
)
except Exception as e:
print(f"❌ API 错误: {e}")
fail_count += 1
if idx < len(coupon_ids):
limiter.wait()
continue
# 提取字段(复用 _group_package_detail_process_fn
records = _group_package_detail_process_fn(resp)
if not records:
print("⚠️ 响应无有效数据,跳过")
skip_count += 1
if idx < len(coupon_ids):
limiter.wait()
continue
record = records[0]
# ── 6. UPSERT 写入 ──────────────────────────────────────
try:
_upsert_detail(db, record)
db.commit()
success_count += 1
print(f"✅ 写入成功 (hash={record['content_hash'][:8]}...)")
except Exception as e:
db.rollback()
print(f"❌ 写入失败: {e}")
fail_count += 1
# 限流等待(最后一条不等)
if idx < len(coupon_ids):
waited = limiter.wait()
if not waited:
print("⚠️ 等待被中断")
break
# ── 7. 汇总 ──────────────────────────────────────────────────
print("\n" + "=" * 50)
print(f"📊 拉取完成: 成功={success_count}, 失败={fail_count}, 跳过={skip_count}, 总计={len(coupon_ids)}")
print("=" * 50)
db.close()
def _upsert_detail(db: DatabaseConnection, record: dict) -> None:
"""UPSERT 单条详情记录到 ods.group_buy_package_details。
ON CONFLICT (coupon_id) 时更新所有字段。
"""
columns = [
"coupon_id", "package_name", "duration", "start_time", "end_time",
"add_start_clock", "add_end_clock", "is_enabled", "is_delete",
"site_id", "tenant_id", "create_time", "creator_name",
"table_area_ids", "table_area_names", "assistant_services",
"groupon_site_infos", "package_services", "coupon_details_list",
"content_hash", "payload",
]
# JSONB 字段需要用 Json 适配器
_JSONB_COLS = {
"table_area_ids", "table_area_names", "assistant_services",
"groupon_site_infos", "package_services", "coupon_details_list",
"payload",
}
values = []
for col in columns:
val = record.get(col)
if col in _JSONB_COLS and val is not None:
val = Json(val)
values.append(val)
col_list = ", ".join(columns)
placeholders = ", ".join(["%s"] * len(columns))
# 除 coupon_id 外的所有列用于 UPDATE
update_cols = [c for c in columns if c != "coupon_id"]
update_set = ", ".join(f"{c} = EXCLUDED.{c}" for c in update_cols)
sql = (
f"INSERT INTO ods.group_buy_package_details ({col_list}) "
f"VALUES ({placeholders}) "
f"ON CONFLICT (coupon_id) DO UPDATE SET {update_set}, "
f"fetched_at = now()"
)
db.execute(sql, values)
if __name__ == "__main__":
main()

View File

@@ -6,6 +6,9 @@ ODS 列数据来自 information_schema.columns WHERE table_schema = 'ods'。
import json
import os
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
if not REPORT_DIR:
@@ -16,7 +19,7 @@ NESTED_OBJECTS = {"siteprofile", "tableprofile"}
# 22 张需要比对的表
TABLES = [
"assistant_accounts_master", "settlement_records", "assistant_service_records",
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
"table_fee_transactions", "table_fee_discount_records",
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",

View File

@@ -31,7 +31,7 @@ ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_
TABLES = [
"assistant_accounts_master", "settlement_records", "assistant_service_records",
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
"table_fee_transactions", "table_fee_discount_records",
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
@@ -195,7 +195,7 @@ def classify_ods_only(table_name: str, field: str) -> str:
return "ODS 后续版本新增字段(当前使用中的台桌关联订单 ID"
# tenant_id 在某些表中是 ODS 额外添加的
if field == "tenant_id" and table_name in (
"assistant_cancellation_records", "payment_transactions"
"payment_transactions",
):
return "ODS 额外添加的租户 ID 字段API 响应中不含ETL 入库时补充)"
# API 后续版本新增字段(文档快照未覆盖)

View File

@@ -14,7 +14,7 @@ import multiprocessing as mp
import subprocess
import sys
import time as time_mod
from datetime import date, datetime, time, timedelta
from datetime import date, datetime, timedelta
from pathlib import Path
from zoneinfo import ZoneInfo
@@ -27,6 +27,7 @@ from tasks.utility.check_cutoff_task import CheckCutoffTask
from tasks.dwd.dwd_load_task import DwdLoadTask
from tasks.ods.ods_tasks import ENABLED_ODS_CODES
from utils.logging_utils import build_log_path, configure_logging
from neozqyy_shared.datetime_utils import business_date, business_day_range, now_shanghai
STEP_TIMEOUT_SEC = 120
@@ -53,6 +54,7 @@ def _compute_dws_window(
if dws_start and dws_end and dws_end < dws_start:
raise ValueError("dws_end must be >= dws_start")
cutoff = int(cfg.get("app.business_day_start_hour", 8))
store_id = int(cfg.get("app.store_id"))
dsn = cfg["db"]["dsn"]
session = cfg["db"].get("session")
@@ -67,19 +69,22 @@ def _compute_dws_window(
if isinstance(mx, date):
dws_start = mx - timedelta(days=max(0, int(rebuild_days)))
else:
dws_start = (datetime.now(tz).date()) - timedelta(days=max(1, int(bootstrap_days)))
# 营业日口径:用 business_date 计算"今天"
dws_start = business_date(now_shanghai(), cutoff) - timedelta(days=max(1, int(bootstrap_days)))
if dws_end is None:
dws_end = datetime.now(tz).date()
dws_end = business_date(now_shanghai(), cutoff)
finally:
conn.close()
start_dt = datetime.combine(dws_start, time.min).replace(tzinfo=tz)
# end_dt 取到当天 23:59:59避免只跑到“当前时刻”的 date() 导致少一天
end_dt = datetime.combine(dws_end, time.max).replace(tzinfo=tz)
# 营业日口径:窗口边界按 cutoff 小时对齐
start_dt = business_day_range(dws_start, cutoff)[0]
# end_dt 取到营业日结束(即 dws_end 次日 cutoff 前一秒),覆盖完整营业日
end_dt = business_day_range(dws_end, cutoff)[1] - timedelta(seconds=1)
return start_dt, end_dt
def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
dsn = cfg["db"]["dsn"]
session = cfg["db"].get("session")
@@ -99,21 +104,21 @@ def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
def _iter_daily_windows(window_start: datetime, window_end: datetime) -> list[tuple[datetime, datetime]]:
"""按营业日拆分时间窗口。
window_start/window_end 已按 cutoff 小时对齐(由 _compute_dws_window 保证)。
"""
if window_start > window_end:
return []
tz = window_start.tzinfo
windows: list[tuple[datetime, datetime]] = []
cur = window_start
while cur <= window_end:
day_start = datetime.combine(cur.date(), time.min).replace(tzinfo=tz)
day_end = datetime.combine(cur.date(), time.max).replace(tzinfo=tz)
if day_start < window_start:
day_start = window_start
if day_end > window_end:
day_end = window_end
windows.append((day_start, day_end))
next_day = cur.date() + timedelta(days=1)
cur = datetime.combine(next_day, time.min).replace(tzinfo=tz)
# 从 window_start 开始,每次推进 24 小时(一个营业日)
cur_start = window_start
while cur_start <= window_end:
cur_end = cur_start + timedelta(days=1) - timedelta(seconds=1)
if cur_end > window_end:
cur_end = window_end
windows.append((cur_start, cur_end))
cur_start = cur_start + timedelta(days=1)
return windows

View File

@@ -21,6 +21,9 @@ import sys
from pathlib import Path
from dataclasses import dataclass, field
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ---------------------------------------------------------------------------
# 常量
# ---------------------------------------------------------------------------

View File

@@ -20,6 +20,8 @@ import sys
from pathlib import Path
from dotenv import load_dotenv
from neozqyy_shared.repo_root import ensure_repo_root
ensure_repo_root()
# ---------------------------------------------------------------------------
# 1. 加载根 .env遵循 testing-env.md 规范)

View File

@@ -184,11 +184,18 @@ class BaseTask:
if not (override_start and override_end):
raise ValueError("run.window_override.start/end 需要同时提供")
# CHANGE 2026-03-04 | 纯日期字符串按业务日分割start→当天biz_hour, end→次日biz_hour
biz_hour = int(self.config.get("app.business_day_start_hour", 8))
window_start = override_start
if isinstance(window_start, str):
window_start = dtparser.parse(window_start)
if isinstance(window_start, datetime) and window_start.tzinfo is None:
window_start = window_start.replace(tzinfo=self.tz)
# 纯日期(时分秒全零)→ 当天业务日起始时刻
if window_start.hour == 0 and window_start.minute == 0 and window_start.second == 0:
window_start = window_start.replace(hour=biz_hour, tzinfo=self.tz)
else:
window_start = window_start.replace(tzinfo=self.tz)
elif isinstance(window_start, datetime):
window_start = window_start.astimezone(self.tz)
@@ -196,7 +203,11 @@ class BaseTask:
if isinstance(window_end, str):
window_end = dtparser.parse(window_end)
if isinstance(window_end, datetime) and window_end.tzinfo is None:
window_end = window_end.replace(tzinfo=self.tz)
# 纯日期(时分秒全零)→ 次日业务日起始时刻
if window_end.hour == 0 and window_end.minute == 0 and window_end.second == 0:
window_end = (window_end + timedelta(days=1)).replace(hour=biz_hour, tzinfo=self.tz)
else:
window_end = window_end.replace(tzinfo=self.tz)
elif isinstance(window_end, datetime):
window_end = window_end.astimezone(self.tz)

View File

@@ -5,12 +5,14 @@ from __future__ import annotations
import os
import re
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import date, datetime
from decimal import Decimal, InvalidOperation
from typing import Any, Dict, Iterable, List, Sequence
from psycopg2.extras import RealDictCursor, execute_batch, execute_values
from psycopg2.extras import Json, RealDictCursor, execute_batch, execute_values
from database.connection import DatabaseConnection
from tasks.base_task import BaseTask, TaskContext
@@ -70,6 +72,16 @@ class DwdLoadTask(BaseTask):
_NUMERIC_RE = re.compile(r"^[+-]?\d+(?:\.\d+)?$")
_BOOL_STRINGS = {"true", "false", "1", "0", "yes", "no", "y", "n", "t", "f"}
# 详情表 LEFT JOIN 配置:当 DWD 表需要从额外的 ODS 详情表获取字段时使用
# detail_columns 中的列在 FACT_MAPPINGS 中以 detail."col" 形式引用
DETAIL_JOIN_CONFIG: dict[str, dict] = {
"dwd.dim_groupbuy_package_ex": {
"detail_table": "ods.group_buy_package_details",
"join_condition": 'ods_main."id" = detail."coupon_id"',
"detail_columns": ["table_area_ids", "table_area_names", "assistant_services", "groupon_site_infos"],
},
}
def _strip_scd2_keys(self, pk_cols: Sequence[str]) -> list[str]:
return [c for c in pk_cols if c.lower() not in self.SCD_COLS]
@@ -113,7 +125,10 @@ class DwdLoadTask(BaseTask):
) -> str:
if key_exprs and order_col:
distinct_on = ", ".join(key_exprs)
order_by = ", ".join([*key_exprs, f'"{order_col}" DESC NULLS LAST'])
# order_col 可能是预格式化的表达式(如 ods_main."fetched_at"),此时直接使用;
# 否则包裹双引号
order_col_expr = order_col if '"' in order_col else f'"{order_col}"'
order_by = ", ".join([*key_exprs, f'{order_col_expr} DESC NULLS LAST'])
return (
f"SELECT DISTINCT ON ({distinct_on}) {select_cols_sql} "
f"FROM {ods_table_sql} {where_sql} ORDER BY {order_by}"
@@ -303,6 +318,11 @@ class DwdLoadTask(BaseTask):
("table_area_id_list", "table_area_id_list", None),
("package_type", "type", None),
("tenant_coupon_sale_order_item_id", "tenantcouponsaleorderitemid", None),
# CHANGE 2026-03-05: 团购详情字段(来自 ods.group_buy_package_details通过 LEFT JOIN 关联)
("table_area_ids", 'detail."table_area_ids"', None),
("table_area_names", 'detail."table_area_names"', None),
("assistant_services", 'detail."assistant_services"', None),
("groupon_site_infos", 'detail."groupon_site_infos"', None),
],
"dwd.dim_staff": [
("staff_id", "id", None),
@@ -311,16 +331,16 @@ class DwdLoadTask(BaseTask):
],
"dwd.dim_staff_ex": [
("staff_id", "id", None),
("rank_name", "rankname", None),
("cashier_point_id", "cashierpointid", "bigint"),
("cashier_point_name", "cashierpointname", None),
("group_id", "groupid", "bigint"),
("group_name", "groupname", None),
("system_user_id", "systemuserid", "bigint"),
("tenant_org_id", "tenantorgid", "bigint"),
("rank_name", "rank_name", None),
("cashier_point_id", "cashier_point_id", "bigint"),
("cashier_point_name", "cashier_point_name", None),
("group_id", "group_id", "bigint"),
("group_name", "group_name", None),
("system_user_id", "system_user_id", "bigint"),
("tenant_org_id", "tenant_org_id", "bigint"),
("auth_code_create", "auth_code_create", "timestamptz"),
("create_time", "create_time", "timestamptz"),
("user_roles", "userroles", "jsonb"),
("user_roles", "user_roles", "jsonb"),
],
# 事实表主键及关键差异列
"dwd.dwd_table_fee_log": [
@@ -602,6 +622,7 @@ class DwdLoadTask(BaseTask):
],
# 库存汇总goods_stock_summaryODS 列名全小写)
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写sitegoodsid不是驼峰
# CHANGE 2026-03-01: 补 site_id 映射ODS 入库时从 app.store_id 注入 siteid
"dwd.dwd_goods_stock_summary": [
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 IDPK
("goods_name", '"goodsname"', None), # 商品名称
@@ -617,6 +638,7 @@ class DwdLoadTask(BaseTask):
("range_sale_money", '"rangesalemoney"', "numeric"), # 销售金额
("range_inventory", '"rangeinventory"', "numeric"), # 盘点调整量
("current_stock", '"currentstock"', "numeric"), # 当前库存
("site_id", '"siteid"', "bigint"), # 门店 IDODS 入库时注入)
],
# 库存变动流水goods_stock_movementsODS 列名全小写)
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写,不是驼峰
@@ -653,11 +675,12 @@ class DwdLoadTask(BaseTask):
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
"""
遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。
并行遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。
说明:
- 为避免长事务导致锁堆积/中断后遗留 idle-in-tx本任务按“每张表一次事务”提交
- 单表失败会回滚该表并继续后续表,最终在结果中汇总错误信息
- 使用 ThreadPoolExecutor 并行处理多张表,每张表使用独立数据库连接和事务
- 单表失败会回滚该表并继续后续表,最终在结果中汇总错误信息
- 并行线程数通过 AppConfig 的 dwd.parallel_workers 配置(默认 4
"""
now = extracted["now"]
summary: List[Dict[str, Any]] = []
@@ -668,54 +691,109 @@ class DwdLoadTask(BaseTask):
if env_only and not only_tables_cfg:
only_tables_cfg = [t.strip() for t in env_only.split(",") if t.strip()]
only_tables = {str(t).strip().lower() for t in only_tables_cfg if str(t).strip()} if only_tables_cfg else set()
with self.db.conn.cursor(cursor_factory=RealDictCursor) as cur:
for dwd_table, ods_table in self.TABLE_MAP.items():
if only_tables and dwd_table.lower() not in only_tables and self._table_base(dwd_table).lower() not in only_tables:
continue
started = time.monotonic()
self.logger.info("DWD 装载开始:%s <= %s", dwd_table, ods_table)
parallel_workers = int(self.config.get("dwd.parallel_workers", 4))
# 筛选需要处理的表
tables_to_process: list[tuple[str, str]] = []
for dwd_table, ods_table in self.TABLE_MAP.items():
if only_tables and dwd_table.lower() not in only_tables and self._table_base(dwd_table).lower() not in only_tables:
continue
tables_to_process.append((dwd_table, ods_table))
if not tables_to_process:
return {"tables": summary, "errors": 0, "error_details": errors}
# 并行调度:每张表在独立线程中执行,使用独立数据库连接
with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
futures = {}
for dwd_table, ods_table in tables_to_process:
future = executor.submit(
self._process_single_table,
dwd_table, ods_table, now, context,
)
futures[future] = dwd_table
for future in as_completed(futures):
dwd_table = futures[future]
try:
dwd_cols = self._get_columns(cur, dwd_table)
ods_cols = self._get_columns(cur, ods_table)
if not dwd_cols:
self.logger.warning("跳过 %s:未能获取 DWD 列信息", dwd_table)
continue
if self._table_base(dwd_table).startswith("dim_"):
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
self.db.conn.commit()
summary.append({"table": dwd_table, "mode": "SCD2", **dim_counts})
else:
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
ods_types = self._get_column_types(cur, ods_table, "ods")
fact_counts = self._merge_fact_increment(
cur,
dwd_table,
ods_table,
dwd_cols,
ods_cols,
dwd_types,
ods_types,
window_start=context.window_start,
window_end=context.window_end,
)
self.db.conn.commit()
summary.append({"table": dwd_table, "mode": "INCREMENT", **fact_counts})
elapsed = time.monotonic() - started
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
table_result = future.result()
summary.append(table_result)
except Exception as exc: # noqa: BLE001
try:
self.db.conn.rollback()
except Exception:
pass
elapsed = time.monotonic() - started
self.logger.exception("DWD 装载失败:%s,用时 %.2fserr=%s", dwd_table, elapsed, exc)
self.logger.error(
"DWD 并行装载失败:%serr=%s", dwd_table, exc,
)
errors.append({"table": dwd_table, "error": str(exc)})
continue
return {"tables": summary, "errors": len(errors), "error_details": errors}
def _process_single_table(
self,
dwd_table: str,
ods_table: str,
now: datetime,
context: TaskContext,
) -> Dict[str, Any]:
"""在独立线程中处理单张 DWD 表,使用独立数据库连接和事务。
每张表创建独立的 DatabaseConnection处理完成后关闭
保证线程间事务隔离,单表失败不影响其他表。
"""
started = time.monotonic()
self.logger.info("DWD 装载开始:%s <= %s", dwd_table, ods_table)
# 为当前线程创建独立数据库连接
thread_db = DatabaseConnection(
dsn=self.db._dsn,
session=self.db._session,
connect_timeout=self.db._connect_timeout,
)
try:
with thread_db.conn.cursor(cursor_factory=RealDictCursor) as cur:
dwd_cols = self._get_columns(cur, dwd_table)
ods_cols = self._get_columns(cur, ods_table)
if not dwd_cols:
self.logger.warning("跳过 %s:未能获取 DWD 列信息", dwd_table)
return {"table": dwd_table, "mode": "SKIPPED", "inserted": 0, "updated": 0}
if self._table_base(dwd_table).startswith("dim_"):
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
thread_db.conn.commit()
result = {"table": dwd_table, "mode": "SCD2", **dim_counts}
else:
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
ods_types = self._get_column_types(cur, ods_table, "ods")
fact_counts = self._merge_fact_increment(
cur,
dwd_table,
ods_table,
dwd_cols,
ods_cols,
dwd_types,
ods_types,
window_start=context.window_start,
window_end=context.window_end,
)
thread_db.conn.commit()
result = {"table": dwd_table, "mode": "INCREMENT", **fact_counts}
elapsed = time.monotonic() - started
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
return result
except Exception as exc:
try:
thread_db.conn.rollback()
except Exception:
pass
elapsed = time.monotonic() - started
self.logger.exception(
"DWD 装载失败:%s,用时 %.2fserr=%s", dwd_table, elapsed, exc,
)
# 重新抛出,让 future.result() 在主线程捕获
raise
finally:
thread_db.close()
# ---------------------- 辅助方法 ----------------------
def _get_columns(self, cur, table: str) -> List[str]:
"""获取指定表的列名(小写)。"""
@@ -872,6 +950,17 @@ class DwdLoadTask(BaseTask):
ods_types = self._get_column_types(cur, ods_table, "ods")
ts_types = {"timestamp without time zone", "timestamp with time zone"}
table_sql = self._format_table(ods_table, "ods")
# CHANGE 2026-03-05: 详情表 LEFT JOIN 支持 — 当 DWD 表配置了 DETAIL_JOIN_CONFIG 时,
# 给 ODS 主表加别名 ods_mainLEFT JOIN 详情表为 detail
# 非 detail 列引用加 ods_main. 前缀避免歧义
detail_join = self.DETAIL_JOIN_CONFIG.get(dwd_table)
ods_alias = "ods_main" if detail_join else ""
if detail_join:
detail_table_sql = self._format_table(detail_join["detail_table"], "ods")
table_sql = (
f"{table_sql} AS ods_main "
f'LEFT JOIN {detail_table_sql} AS detail ON {detail_join["join_condition"]}'
)
# 构造 SELECT 表达式,支持 JSON/expression 映射
select_exprs: list[str] = []
added: set[str] = set()
@@ -881,21 +970,26 @@ class DwdLoadTask(BaseTask):
continue
if lc in mapping:
src, cast_type = mapping[lc]
# detail. 前缀的列直接使用(来自详情表),其他列加 ods_main. 前缀
if ods_alias and not src.startswith("detail."):
src = self._qualify_column_ref(src, ods_alias)
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
added.add(lc)
elif lc in ods_set:
col_ref = f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"'
# CHANGE 2026-02-22: BUG 12 — 同名列如果是时间类型,加哨兵值过滤
if dwd_types.get(lc) in ts_types and ods_types.get(lc) in ts_types:
select_exprs.append(
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
f"CASE WHEN {col_ref} >= '{self._SENTINEL_DATE_THRESHOLD}'::timestamp "
f'THEN {col_ref} ELSE NULL END AS "{lc}"'
)
else:
select_exprs.append(f'"{lc}" AS "{lc}"')
select_exprs.append(f'{col_ref} AS "{lc}"')
added.add(lc)
# 分类维度需要额外读取 categoryboxes 以展开子类
if dwd_table == "dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
select_exprs.append('"categoryboxes" AS "categoryboxes"')
col_ref = f'{ods_alias}."categoryboxes"' if ods_alias else '"categoryboxes"'
select_exprs.append(f'{col_ref} AS "categoryboxes"')
added.add("categoryboxes")
# 主键兜底确保被选出
for pk in business_keys:
@@ -903,9 +997,12 @@ class DwdLoadTask(BaseTask):
if lc not in added:
if lc in mapping:
src, cast_type = mapping[lc]
if ods_alias and not src.startswith("detail."):
src = self._qualify_column_ref(src, ods_alias)
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
elif lc in ods_set:
select_exprs.append(f'"{lc}" AS "{lc}"')
col_ref = f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"'
select_exprs.append(f'{col_ref} AS "{lc}"')
added.add(lc)
if not select_exprs:
@@ -917,14 +1014,19 @@ class DwdLoadTask(BaseTask):
lc = key.lower()
if lc in mapping:
src, cast_type = mapping[lc]
if ods_alias and not src.startswith("detail."):
src = self._qualify_column_ref(src, ods_alias)
key_exprs.append(self._cast_expr(src, cast_type))
elif lc in ods_set:
key_exprs.append(f'"{lc}"')
key_exprs.append(f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"')
select_cols_sql = ", ".join(select_exprs)
where_sql = self._append_where_condition("", '"fetched_at" IS NOT NULL')
fetched_at_ref = f'{ods_alias}."fetched_at"' if ods_alias else '"fetched_at"'
where_sql = self._append_where_condition("", f'{fetched_at_ref} IS NOT NULL')
# CHANGE 2026-03-05: order_col 也需要加别名前缀
qualified_order_col = f'{ods_alias}."{order_col}"' if ods_alias and order_col else (f'"{order_col}"' if order_col else None)
sql = self._latest_snapshot_select_sql(
select_cols_sql, table_sql, key_exprs, order_col, where_sql
select_cols_sql, table_sql, key_exprs, qualified_order_col, where_sql
)
cur.execute(sql)
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
@@ -1006,7 +1108,7 @@ class DwdLoadTask(BaseTask):
# 批量插入新版本
if to_insert:
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now)
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now, dwd_types=dwd_types)
processed = len(src_rows_by_pk)
updated = len(to_close)
@@ -1050,11 +1152,16 @@ class DwdLoadTask(BaseTask):
dwd_cols: Sequence[str],
rows_with_version: Sequence[tuple[Dict[str, Any], int]],
now: datetime,
dwd_types: Dict[str, str] | None = None,
) -> None:
"""批量插入新的 SCD2 版本行。"""
sorted_cols = [c.lower() for c in sorted(dwd_cols)]
insert_cols_sql = ", ".join(f'"{c}"' for c in sorted_cols)
table_sql = self._format_table(table, "dwd")
# 预计算数组类型列集合,避免 list 值被误包装为 Json
_array_cols: set[str] = set()
if dwd_types:
_array_cols = {c for c, t in dwd_types.items() if "ARRAY" in t.upper() or "[]" in t}
def build_row(src_row: Dict[str, Any], version: int) -> list[Any]:
values: list[Any] = []
@@ -1068,7 +1175,15 @@ class DwdLoadTask(BaseTask):
elif c == "scd2_version":
values.append(version)
else:
values.append(src_row.get(c))
val = src_row.get(c)
# CHANGE 2026-03-07: 区分数组列和 JSONB 列
# 数组列TEXT[] 等)的 list 值直接传递psycopg2 自动转为 PG 数组格式
# JSONB 列的 dict/list 值需要 Json() 包装
if isinstance(val, list) and c not in _array_cols:
val = Json(val)
elif isinstance(val, dict):
val = Json(val)
values.append(val)
return values
values_rows = [build_row(r, ver) for r, ver in rows_with_version]
@@ -1395,6 +1510,23 @@ class DwdLoadTask(BaseTask):
# CHANGE 2026-02-22: BUG 12 fix — 哨兵日期阈值,上游 API 用 0001-01-01 表示"未设置"
_SENTINEL_DATE_THRESHOLD = "0002-01-01"
@staticmethod
def _qualify_column_ref(src: str, alias: str) -> str:
"""为裸列引用添加表别名前缀。
已包含 detail.、别名前缀、JSON 操作符、表达式CASE/COALESCE 等)的源不做修改。
仅对简单列名(如 "col" 或 col添加 alias."col" 前缀。
"""
# 已有 detail. 或其他表前缀(含 .)→ 不修改
if "." in src:
return src
# JSON 操作符、SQL 表达式 → 不修改
if any(tok in src for tok in ("->", "#>>", "::", "CASE ", "COALESCE", "NULLIF", "(")):
return src
# 裸列名(可能带引号)→ 加别名前缀
bare = src.strip('"')
return f'{alias}."{bare}"'
def _cast_expr(self, col: str, cast_type: str | None) -> str:
"""构造带可选 CAST 的列表达式。

View File

@@ -20,6 +20,8 @@ from .assistant_salary_task import AssistantSalaryTask
from .assistant_finance_task import AssistantFinanceTask
from .member_consumption_task import MemberConsumptionTask
from .member_visit_task import MemberVisitTask
from .assistant_project_tag_task import AssistantProjectTagTask
from .member_project_tag_task import MemberProjectTagTask
from .finance_daily_task import FinanceDailyTask
from .finance_recharge_task import FinanceRechargeTask
from .finance_income_task import FinanceIncomeStructureTask
@@ -56,6 +58,8 @@ __all__ = [
# 客户维度
"MemberConsumptionTask",
"MemberVisitTask",
"AssistantProjectTagTask",
"MemberProjectTagTask",
# 财务维度
"FinanceBaseTask",
"FinanceDailyTask",

View File

@@ -34,6 +34,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple
from .base_dws_task import BaseDwsTask, TaskContext
from .dws_helpers import mask_mobile, calc_days_since
from neozqyy_shared.datetime_utils import biz_date_sql_expr
class AssistantCustomerTask(BaseDwsTask):
"""
@@ -181,13 +183,16 @@ class AssistantCustomerTask(BaseDwsTask):
"""
提取助教-客户服务统计(含滚动窗口)
"""
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.3: DATE(start_use_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
sql = f"""
WITH service_base AS (
SELECT
site_assistant_id AS assistant_id,
nickname AS assistant_nickname,
tenant_member_id AS member_id,
DATE(start_use_time) AS service_date,
{biz_expr} AS service_date,
income_seconds,
ledger_amount
FROM dwd.dwd_assistant_service_log

View File

@@ -34,6 +34,8 @@ from datetime import date, datetime, time, timedelta
from decimal import Decimal, ROUND_HALF_UP
from typing import Any, Dict, List, Optional, Set, Tuple
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
# 惩罚区域集合:大厅 A/B/C/S/TV + 麻将房 M1M7
@@ -197,7 +199,12 @@ class AssistantDailyTask(BaseDwsTask):
JOIN _ex 表取 is_trash 字段,用于直接判断服务是否被废除。
"""
sql = """
# CHANGE 2026-02-26: dwd_assistant_service_log 无 table_area_name 列,
# 改为 JOIN dim_table 取 site_table_area_name
# CHANGE 2026-03-01 | business-day-cutoff 6.1: DATE() → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
sql = f"""
SELECT
asl.assistant_service_id,
asl.order_settle_id,
@@ -214,15 +221,18 @@ class AssistantDailyTask(BaseDwsTask):
asl.ledger_unit_price,
asl.start_use_time,
asl.last_use_time,
asl.table_area_name,
DATE(asl.start_use_time) AS service_date,
COALESCE(dt.site_table_area_name, '') AS table_area_name,
{biz_expr} AS service_date,
COALESCE(ex.is_trash, 0) AS is_trash
FROM dwd.dwd_assistant_service_log asl
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
ON asl.assistant_service_id = ex.assistant_service_id
LEFT JOIN dwd.dim_table dt
ON asl.site_table_id = dt.table_id
AND dt.scd2_is_current = 1
WHERE asl.site_id = %s
AND DATE(asl.start_use_time) >= %s
AND DATE(asl.start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND asl.is_delete = 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
@@ -258,14 +268,20 @@ class AssistantDailyTask(BaseDwsTask):
# 获取助教当日等级SCD2 as-of
level_info = self.get_assistant_level_asof(assistant_id, service_date)
# CHANGE 2026-02-27 | level_name 始终由 code 静态映射得出
# SCD2 仅用于取历史 level_code等级可能变过
# name 不再依赖 SCD2 返回值,避免 SCD2 缺失时 NULL
level_code = level_info.get('level_code') if level_info else record.get('assistant_level')
level_name = self.level_code_to_name(level_code)
agg_dict[key] = {
'site_id': site_id,
'tenant_id': self.config.get("app.tenant_id", site_id),
'assistant_id': assistant_id,
'assistant_nickname': record.get('assistant_nickname'),
'stat_date': service_date,
'assistant_level_code': level_info.get('level_code') if level_info else record.get('assistant_level'),
'assistant_level_name': level_info.get('level_name') if level_info else None,
'assistant_level_code': level_code,
'assistant_level_name': level_name,
'total_service_count': 0,
'base_service_count': 0,
'bonus_service_count': 0,

View File

@@ -28,6 +28,8 @@ from typing import Any, Dict, List, Optional, Tuple
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
class AssistantFinanceTask(BaseDwsTask):
"""
@@ -98,6 +100,8 @@ class AssistantFinanceTask(BaseDwsTask):
revenue_total = self.safe_decimal(rev.get('revenue_total', 0))
gross_profit = revenue_total - cost_daily
gross_margin = gross_profit / revenue_total if revenue_total > 0 else Decimal('0')
# 防御clamp 到 numeric(7,4) 安全范围,避免极端值溢出
gross_margin = max(Decimal('-999.9999'), min(Decimal('999.9999'), gross_margin))
record = {
'site_id': site_id,
@@ -125,9 +129,12 @@ class AssistantFinanceTask(BaseDwsTask):
# load() 已移除——使用 BaseDwsTask 默认实现DATE_COL="stat_date"
def _extract_daily_revenue(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.5: DATE(start_use_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("s.start_use_time", cutoff)
sql = f"""
SELECT
DATE(s.start_use_time) AS stat_date,
{biz_expr} AS stat_date,
s.site_assistant_id AS assistant_id,
(ARRAY_AGG(s.nickname ORDER BY s.start_use_time DESC))[1] AS assistant_nickname,
COUNT(*) AS service_count,
@@ -143,10 +150,10 @@ class AssistantFinanceTask(BaseDwsTask):
LEFT JOIN dws.cfg_skill_type st
ON st.skill_id = s.skill_id AND st.is_active = TRUE
WHERE s.site_id = %s
AND DATE(s.start_use_time) >= %s
AND DATE(s.start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND s.is_delete = 0
GROUP BY DATE(s.start_use_time), s.site_assistant_id
GROUP BY {biz_expr}, s.site_assistant_id
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []

View File

@@ -35,6 +35,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple
from .base_dws_task import BaseDwsTask, TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
class AssistantMonthlyTask(BaseDwsTask):
"""
@@ -262,14 +264,18 @@ class AssistantMonthlyTask(BaseDwsTask):
month_where = " OR ".join(month_conditions)
# CHANGE 2026-02-22 | Prompt: 需求 A — 按档位分段统计
# GROUP BY 加入 assistant_level_code/name,使同一助教月内不同档位各自聚合;
# GROUP BY 加入 assistant_level_code使同一助教月内不同档位各自聚合
# nickname 改用 ARRAY_AGG 按时间倒序取最新值,替代 MAX() 的字典序取值。
# 唯一约束已同步变更为 (site_id, assistant_id, stat_month, assistant_level_code)
# CHANGE 2026-02-27 | BUG: assistant_level_name 从 GROUP BY 移到 ARRAY_AGG FILTER
# 同一 level_code 在 daily_detail 中可能有 NULL 和非 NULL 的 name
# GROUP BY 会产生多行导致 UK 冲突
sql = f"""
SELECT
assistant_id,
assistant_level_code,
assistant_level_name,
-- 同一 level_code 可能有 NULL 和非 NULL 的 name取最新非空值避免 UK 冲突
(ARRAY_AGG(assistant_level_name ORDER BY stat_date DESC) FILTER (WHERE assistant_level_name IS NOT NULL))[1] AS assistant_level_name,
(ARRAY_AGG(assistant_nickname ORDER BY stat_date DESC))[1] AS assistant_nickname,
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
COUNT(DISTINCT stat_date) AS work_days,
@@ -291,7 +297,7 @@ class AssistantMonthlyTask(BaseDwsTask):
SUM(trashed_count) AS trashed_count
FROM dws.dws_assistant_daily_detail
WHERE site_id = %s AND ({month_where})
GROUP BY assistant_id, assistant_level_code, assistant_level_name,
GROUP BY assistant_id, assistant_level_code,
DATE_TRUNC('month', stat_date)
"""
@@ -313,10 +319,13 @@ class AssistantMonthlyTask(BaseDwsTask):
end_month = max(months)
next_month = (end_month.replace(day=28) + timedelta(days=4)).replace(day=1)
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.4: 使用 Business_Month 口径
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
sql = f"""
SELECT
site_assistant_id AS assistant_id,
DATE_TRUNC('month', start_use_time)::DATE AS stat_month,
DATE_TRUNC('month', {biz_expr}::timestamp)::DATE AS stat_month,
COUNT(DISTINCT CASE WHEN tenant_member_id > 0 THEN tenant_member_id END) AS unique_customers,
COUNT(DISTINCT site_table_id) AS unique_tables
FROM dwd.dwd_assistant_service_log
@@ -324,7 +333,7 @@ class AssistantMonthlyTask(BaseDwsTask):
AND start_use_time >= %s
AND start_use_time < %s
AND is_delete = 0
GROUP BY site_assistant_id, DATE_TRUNC('month', start_use_time)
GROUP BY site_assistant_id, DATE_TRUNC('month', {biz_expr}::timestamp)
"""
rows = self.db.query(sql, (site_id, start_month, next_month))
return [dict(row) for row in rows] if rows else []

View File

@@ -43,6 +43,8 @@ from typing import Any, Dict, List
from .base_dws_task import BaseDwsTask, TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# =============================================================================
# 数据结构
@@ -225,19 +227,22 @@ class AssistantOrderContributionTask(BaseDwsTask):
settle_type=1 为台桌结账,包含台费、酒水食品等金额。
"""
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(pay_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
order_settle_id,
site_id,
tenant_id,
table_charge_money,
goods_money,
DATE(pay_time) AS stat_date
{biz_expr} AS stat_date
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND settle_type = 1
AND DATE(pay_time) >= %s
AND DATE(pay_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -250,7 +255,10 @@ class AssistantOrderContributionTask(BaseDwsTask):
每条记录对应一张台桌在一个订单中的台费信息。
real_table_use_seconds 为台桌实际使用时长。
"""
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(start_use_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("tfl.start_use_time", cutoff)
sql = f"""
SELECT
tfl.order_settle_id,
tfl.site_table_id AS table_id,
@@ -259,8 +267,8 @@ class AssistantOrderContributionTask(BaseDwsTask):
COALESCE(tfl.ledger_amount, 0) AS table_fee
FROM dwd.dwd_table_fee_log tfl
WHERE tfl.site_id = %s
AND DATE(tfl.start_use_time) >= %s
AND DATE(tfl.start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND COALESCE(tfl.is_delete, 0) = 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
@@ -274,7 +282,10 @@ class AssistantOrderContributionTask(BaseDwsTask):
通过 LEFT JOIN cfg_skill_type 获取 course_type_code
real_service_money 为助教分成。
"""
sql = """
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(start_use_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
sql = f"""
SELECT
asl.order_settle_id,
asl.site_assistant_id AS assistant_id,
@@ -290,8 +301,8 @@ class AssistantOrderContributionTask(BaseDwsTask):
ON asl.skill_id = cst.skill_id
AND cst.is_active = TRUE
WHERE asl.site_id = %s
AND DATE(asl.start_use_time) >= %s
AND DATE(asl.start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND COALESCE(asl.is_delete, 0) = 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))

View File

@@ -0,0 +1,236 @@
# -*- coding: utf-8 -*-
"""
DWS 助教项目标签任务
按时间窗口计算每位助教在四大项目BILLIARD/SNOOKER/MAHJONG/KTV
工作时长占比占比≥25% 则分配标签。
数据链路:
dwd_assistant_service_log (income_seconds)
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
→ get_area_category(area_name, table_name)
→ 按 category_code 汇总 → 计算占比 → 写入 dws_assistant_project_tag
目标表:
dws.dws_assistant_project_tag
更新策略:
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List, Optional
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# 只计算四大项目,排除 SPECIAL/OTHER
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
# 助教看板的 6 个时间窗口
ASSISTANT_WINDOWS = [
TimeWindow.THIS_MONTH,
TimeWindow.THIS_QUARTER,
TimeWindow.LAST_MONTH,
TimeWindow.LAST_3_MONTHS_EXCL_CURRENT,
TimeWindow.LAST_QUARTER,
TimeWindow.LAST_6_MONTHS,
]
TAG_THRESHOLD = Decimal("0.25")
class AssistantProjectTagTask(BaseDwsTask):
"""助教项目标签 ETL 任务"""
def get_task_code(self) -> str:
return "DWS_ASSISTANT_PROJECT_TAG"
def get_target_table(self) -> str:
return "dws_assistant_project_tag"
def get_primary_keys(self) -> List[str]:
return ["site_id", "assistant_id", "time_window", "category_code"]
def extract(self, context) -> Dict[str, Any]:
site_id = context.store_id
self.logger.info("%s: 提取助教服务数据", self.get_task_code())
# 加载配置cfg_area_category 等)
self.load_config_cache()
# 提取台桌信息(用于 get_area_category 的 table_name 参数)
table_info = self._extract_table_info(site_id)
# 按时间窗口提取助教服务时长
window_data: Dict[str, List[Dict]] = {}
for window in ASSISTANT_WINDOWS:
time_range = self.get_time_window_range(window)
rows = self._extract_assistant_durations(
site_id, time_range.start, time_range.end
)
window_data[window.value] = rows
return {
"window_data": window_data,
"table_info": table_info,
"site_id": site_id,
}
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""提取台桌维度信息"""
sql = """
SELECT table_id, table_name, site_table_area_name AS area_name
FROM dwd.dim_table
WHERE site_id = %s AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
return {r["table_id"]: dict(r) for r in (rows or [])}
def _extract_assistant_durations(
self, site_id: int, start_date: date, end_date: date
) -> List[Dict[str, Any]]:
"""提取助教服务时长明细(按助教+台桌聚合)"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
sql = f"""
SELECT
asl.site_assistant_id AS assistant_id,
asl.site_table_id AS table_id,
COALESCE(SUM(asl.income_seconds), 0) AS duration_seconds
FROM dwd.dwd_assistant_service_log asl
WHERE asl.site_id = %(site_id)s
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND asl.is_delete = 0
GROUP BY asl.site_assistant_id, asl.site_table_id
"""
rows = self.db.query(sql, {
"site_id": site_id,
"start_date": start_date,
"end_date": end_date,
})
return [dict(r) for r in rows] if rows else []
def transform(self, extracted: Dict[str, Any], context) -> List[Dict[str, Any]]:
table_info = extracted["table_info"]
site_id = extracted["site_id"]
tenant_id = getattr(context, "tenant_id", 0) or 0
results: List[Dict[str, Any]] = []
for window_value, rows in extracted["window_data"].items():
# 按助教汇总各项目时长
# assistant_id → category_code → seconds
assistant_cats: Dict[int, Dict[str, int]] = {}
for row in rows:
aid = row["assistant_id"]
tid = row["table_id"]
secs = self.safe_int(row["duration_seconds"])
if secs <= 0:
continue
# 通过 dim_table 获取区域和台桌名
tinfo = table_info.get(tid, {})
area_name = tinfo.get("area_name")
table_name = tinfo.get("table_name")
cat = self.get_area_category(area_name, table_name)
code = cat.get("category_code", "OTHER")
# 只计算四大项目
if code not in VALID_CATEGORIES:
continue
if aid not in assistant_cats:
assistant_cats[aid] = {}
assistant_cats[aid][code] = assistant_cats[aid].get(code, 0) + secs
# 计算占比并生成记录
for aid, cats in assistant_cats.items():
total = sum(cats.values())
if total <= 0:
continue
for code, secs in cats.items():
pct = Decimal(str(secs)) / Decimal(str(total))
pct = pct.quantize(Decimal("0.0001"))
cat_info = self._get_category_display(code)
results.append({
"site_id": site_id,
"tenant_id": tenant_id,
"assistant_id": aid,
"time_window": window_value,
"category_code": code,
"category_name": cat_info["category_name"],
"short_name": cat_info["short_name"],
"duration_seconds": secs,
"total_seconds": total,
"percentage": float(pct),
"is_tagged": pct >= TAG_THRESHOLD,
})
self.logger.info(
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
self.get_task_code(),
len(results),
sum(1 for r in results if r["is_tagged"]),
)
return results
def _get_category_display(self, code: str) -> Dict[str, str]:
"""从配置缓存获取分类的显示名和简写"""
cache = self.load_config_cache()
for key, cat in cache.area_categories.items():
if cat.get("category_code") == code:
return {
"category_name": cat.get("display_name") or cat.get("category_name", code),
"short_name": cat.get("short_name", code[:1]),
}
# 兜底
fallback = {
"BILLIARD": ("🎱 中式/追分", "🎱"),
"SNOOKER": ("斯诺克", ""),
"MAHJONG": ("🀄 麻将/棋牌", "🀄"),
"KTV": ("🎤 团建/K歌", "🎤"),
}
name, short = fallback.get(code, (code, code[:1]))
return {"category_name": name, "short_name": short}
def load(self, transformed, context) -> dict:
if not transformed:
return {"status": "SUCCESS", "counts": {"inserted": 0, "deleted": 0}}
site_id = transformed[0]["site_id"]
# 全量删除该门店的标签数据后重建
delete_sql = "DELETE FROM dws.dws_assistant_project_tag WHERE site_id = %s"
self.db.execute(delete_sql, (site_id,))
deleted = self.db.cursor.rowcount if hasattr(self.db, "cursor") else 0
insert_sql = """
INSERT INTO dws.dws_assistant_project_tag (
site_id, tenant_id, assistant_id, time_window,
category_code, category_name, short_name,
duration_seconds, total_seconds, percentage, is_tagged,
computed_at, created_at, updated_at
) VALUES (
%(site_id)s, %(tenant_id)s, %(assistant_id)s, %(time_window)s,
%(category_code)s, %(category_name)s, %(short_name)s,
%(duration_seconds)s, %(total_seconds)s, %(percentage)s, %(is_tagged)s,
NOW(), NOW(), NOW()
)
"""
for row in transformed:
self.db.execute(insert_sql, row)
self.logger.info(
"%s: 删除 %d 条,插入 %d",
self.get_task_code(), deleted, len(transformed),
)
return {
"status": "SUCCESS",
"counts": {"inserted": len(transformed), "deleted": deleted},
}

View File

@@ -27,8 +27,9 @@ DWS层任务基类
- 提供滚动窗口统计方法
时间口径说明:
- 周起始日:周一
- 月/季度起始第一天0点
- 营业日切点BUSINESS_DAY_START_HOUR默认 08:0008:00 前的记录归属前一天
- 周起始日:周一 08:00
- 月/季度起始:第一天 08:00
- 环比规则:对比上一个等长区间
- 前3个月含/不含本月(用于财务筛选)
- 最近半年:不含本月
@@ -52,6 +53,8 @@ from decimal import Decimal, InvalidOperation
from enum import Enum
from typing import Any, Dict, Iterator, List, Optional, Tuple, TypeVar
from neozqyy_shared.datetime_utils import biz_date_sql_expr, business_date, now_shanghai
from ..base_task import BaseTask, TaskContext
# =============================================================================
@@ -81,6 +84,8 @@ class TimeWindow(Enum):
THIS_QUARTER = "THIS_QUARTER" # 本季度
LAST_QUARTER = "LAST_QUARTER" # 上季度
LAST_6_MONTHS = "LAST_6_MONTHS" # 最近半年(不含本月)
LAST_30_DAYS = "LAST_30_DAYS" # 近30天含今天
LAST_60_DAYS = "LAST_60_DAYS" # 近60天含今天
class CourseType(Enum):
@@ -292,18 +297,20 @@ class BaseDwsTask(BaseTask):
获取时间窗口的日期范围(用于财务报表)
时间口径说明:
- 周起始日为周一
- 月/季度起始为第一天0点
- 营业日切点BUSINESS_DAY_START_HOUR默认 08:00
- 周起始日为周一 08:00
- 月/季度起始为第一天 08:00
Args:
window: 时间窗口枚举
base_date: 基准日期,默认为今天
base_date: 基准日期,默认为当前营业日
Returns:
TimeRange对象
"""
if base_date is None:
base_date = date.today()
cutoff = self.config.get("app.business_day_start_hour", 8)
base_date = business_date(now_shanghai(), cutoff)
if window == TimeWindow.THIS_WEEK:
# 本周(周一起始)
@@ -369,6 +376,16 @@ class BaseDwsTask(BaseTask):
start = self.get_month_first_day(self._shift_months(month_start, -6))
return TimeRange(start=start, end=end)
elif window == TimeWindow.LAST_30_DAYS:
# 近30天含今天
start = base_date - timedelta(days=29)
return TimeRange(start=start, end=base_date)
elif window == TimeWindow.LAST_60_DAYS:
# 近60天含今天
start = base_date - timedelta(days=59)
return TimeRange(start=start, end=base_date)
raise ValueError(f"不支持的时间窗口类型: {window}")
def get_comparison_range(self, time_range: TimeRange) -> TimeRange:
@@ -410,9 +427,9 @@ class BaseDwsTask(BaseTask):
def is_new_hire_in_month(self, hire_date: date, stat_month: date) -> bool:
"""
判断是否为新入职月1日0点后入职)
判断是否为新入职月1日8点后入职)
新入职定档规则月1日0点之后入职的,计算为新入职
新入职定档规则月1日8点之后入职的,计算为新入职
Args:
hire_date: 入职日期
@@ -527,10 +544,12 @@ class BaseDwsTask(BaseTask):
return [dict(row) for row in rows] if rows else []
def _load_area_categories(self) -> Dict[str, Dict[str, Any]]:
"""加载区域分类映射"""
"""加载区域分类映射(支持台桌级细分)"""
sql = """
SELECT
source_area_name, category_code, category_name,
source_area_name, source_table_name,
category_code, category_name,
display_name, short_name,
match_type, match_priority
FROM dws.cfg_area_category
WHERE is_active = TRUE
@@ -540,10 +559,15 @@ class BaseDwsTask(BaseTask):
if not rows:
return {}
# 双层索引:(area_name, table_name) → config
# table_name 为 NULL 时用空字符串作 key
result = {}
for row in rows:
row_dict = dict(row)
result[row_dict['source_area_name']] = row_dict
area = row_dict['source_area_name']
table = row_dict.get('source_table_name') or ''
key = f"{area}\x00{table}" # 复合键,\x00 不会出现在正常名称中
result[key] = row_dict
return result
def _load_skill_types(self) -> Dict[int, Dict[str, Any]]:
@@ -709,50 +733,57 @@ class BaseDwsTask(BaseTask):
# 默认为基础课
return CourseType.BASE
def get_area_category(self, area_name: Optional[str]) -> Dict[str, str]:
def get_area_category(self, area_name: Optional[str], table_name: Optional[str] = None) -> Dict[str, str]:
"""
获取区域分类(支持精确匹配、模糊匹配、兜底)
获取区域分类(支持台桌级精确 > 区域精确 > 模糊 > 兜底)
Args:
area_name: 原始区域名称
area_name: 原始区域名称dim_table.site_table_area_name
table_name: 台桌名称dim_table.table_name用于台桌级细分映射
Returns:
包含 category_code category_name 的字典
包含 category_code, category_name, display_name, short_name 的字典
"""
config = self.load_config_cache()
default = {'category_code': 'OTHER', 'category_name': '其他', 'display_name': '其他', 'short_name': ''}
if not area_name:
# 无区域名称,返回默认
return {'category_code': 'OTHER', 'category_name': '其他区域'}
return default
# 1. 精确匹配
if area_name in config.area_categories:
cat = config.area_categories[area_name]
if cat.get('match_type') == 'EXACT':
return {
'category_code': cat['category_code'],
'category_name': cat['category_name']
}
cats = config.area_categories
# 2. 模糊匹配(按优先级)
for key, cat in config.area_categories.items():
if cat.get('match_type') == 'LIKE':
pattern = key.replace('%', '')
if pattern and pattern in area_name:
return {
'category_code': cat['category_code'],
'category_name': cat['category_name']
}
# 3. 兜底
if 'DEFAULT' in config.area_categories:
cat = config.area_categories['DEFAULT']
def _pick(cat: Dict[str, Any]) -> Dict[str, str]:
return {
'category_code': cat['category_code'],
'category_name': cat['category_name']
'category_name': cat['category_name'],
'display_name': cat.get('display_name') or cat['category_name'],
'short_name': cat.get('short_name') or '',
}
return {'category_code': 'OTHER', 'category_name': '其他区域'}
# 1. 台桌级精确匹配area_name + table_name
if table_name:
key = f"{area_name}\x00{table_name}"
if key in cats and cats[key].get('match_type') == 'EXACT':
return _pick(cats[key])
# 2. 区域级精确匹配area_name + 空 table_name
key = f"{area_name}\x00"
if key in cats and cats[key].get('match_type') == 'EXACT':
return _pick(cats[key])
# 3. 模糊匹配(按优先级,已排序)
for k, cat in cats.items():
if cat.get('match_type') == 'LIKE':
pattern = cat['source_area_name'].replace('%', '')
if pattern and pattern in area_name:
return _pick(cat)
# 4. 兜底
fallback_key = f"DEFAULT\x00"
if fallback_key in cats:
return _pick(cats[fallback_key])
return default
def calculate_sprint_bonus(
self,
@@ -908,8 +939,10 @@ class BaseDwsTask(BaseTask):
offset = 0
cols_str = ", ".join(columns)
# 构建WHERE条件
where_parts = [f"DATE({date_col}) >= %s", f"DATE({date_col}) <= %s"]
# 构建WHERE条件 — 使用营业日归属表达式替代 DATE()
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr(date_col, cutoff)
where_parts = [f"{biz_expr} >= %s", f"{biz_expr} <= %s"]
params: List[Any] = [start_date, end_date]
if where_clause:
@@ -972,15 +1005,24 @@ class BaseDwsTask(BaseTask):
获取助教在指定日期的等级SCD2 as-of取值
助教等级是SCD2维度历史月份不能直接用"当前等级"
需要按有效期as-of join取数。
优先精确匹配 [scd2_start, scd2_end) 区间;
若无匹配(服务日期早于首条 SCD2 或区间有间隙),
回退取 scd2_start_time <= asof_date 的最近一条,
因为从该记录起等级未变。
Args:
assistant_id: 助教ID
asof_date: 取值日期
Returns:
助教等级信息包含level_code和level_name
助教等级信息包含level_code和level_name无记录时返回None
"""
# CHANGE 2026-02-27 | 放宽 SCD2 匹配:去掉 scd2_end_time 条件,
# 改为取 scd2_start_time <= asof_date 的最近一条。
# 原逻辑要求 asof_date 严格落在 [start, end) 区间内,
# 当 SCD2 记录有间隙或服务日期早于首条记录时返回 None
# 导致 dws_assistant_daily_detail.assistant_level_name 出现 NULL
# 下游 monthly 聚合时同一 level_code 有 NULL/非NULL 两种值引发 UK 冲突。
sql = """
SELECT
assistant_id,
@@ -999,13 +1041,30 @@ class BaseDwsTask(BaseTask):
FROM dwd.dim_assistant
WHERE assistant_id = %s
AND scd2_start_time <= %s
AND (scd2_end_time IS NULL OR scd2_end_time > %s)
ORDER BY scd2_start_time DESC
LIMIT 1
"""
rows = self.db.query(sql, (assistant_id, asof_date, asof_date))
rows = self.db.query(sql, (assistant_id, asof_date))
return dict(rows[0]) if rows else None
# CHANGE 2026-02-27 | 新增 level_code → level_name 静态映射
# 当 SCD2 记录晚于服务日期dim_assistant 后期才开始同步)时,
# 用服务记录自带的 assistant_level 做 fallback 映射
LEVEL_CODE_NAME_MAP: dict[int, str] = {
8: "助教管理",
10: "初级",
20: "中级",
30: "高级",
40: "星级",
}
@staticmethod
def level_code_to_name(level_code: int | None) -> str | None:
"""将 assistant_level code 映射为中文名称,无匹配返回 None"""
if level_code is None:
return None
return BaseDwsTask.LEVEL_CODE_NAME_MAP.get(int(level_code))
def get_member_card_balance_asof(
self,
member_id: int,

View File

@@ -22,6 +22,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask
from .dws_helpers import parse_id_list
@@ -39,9 +41,11 @@ class FinanceBaseTask(BaseDwsTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""结账单日汇总(结算头表按日聚合)"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
DATE(pay_time) AS stat_date,
{biz_expr} AS stat_date,
COUNT(*) AS order_count,
COUNT(CASE WHEN member_id != 0 AND member_id IS NOT NULL THEN 1 END) AS member_order_count,
COUNT(CASE WHEN member_id = 0 OR member_id IS NULL THEN 1 END) AS guest_order_count,
@@ -61,13 +65,17 @@ class FinanceBaseTask(BaseDwsTask):
SUM(member_discount_amount) AS member_discount_amount,
SUM(rounding_amount) AS rounding_amount,
SUM(pl_coupon_sale_amount) AS pl_coupon_sale_amount,
-- 消费金额
SUM(consume_money) AS total_consume
-- CHANGE 2026-03-07 | consume_money → items_sum 口径校准
-- consume_money 存在三种历史口径混合DWS 层统一使用 items_sum
SUM(table_charge_money + goods_money + assistant_pd_money
+ assistant_cx_money + electricity_money) AS items_sum
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND DATE(pay_time) >= %s
AND DATE(pay_time) <= %s
GROUP BY DATE(pay_time)
AND {biz_expr} >= %s
AND {biz_expr} <= %s
-- CHANGE 2026-03-07 | 排除退货(6)/退款(7),仅保留台桌结账(1)+商城订单(3)
AND settle_type IN (1, 3)
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -83,9 +91,11 @@ class FinanceBaseTask(BaseDwsTask):
) -> List[Dict[str, Any]]:
"""充值日汇总(充值订单按日聚合)"""
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money实际字段为 pay_amount/point_amount
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
DATE(pay_time) AS stat_date,
{biz_expr} AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
@@ -101,9 +111,9 @@ class FinanceBaseTask(BaseDwsTask):
COUNT(DISTINCT member_id) AS recharge_member_count
FROM dwd.dwd_recharge_order
WHERE site_id = %s
AND DATE(pay_time) >= %s
AND DATE(pay_time) <= %s
GROUP BY DATE(pay_time)
AND {biz_expr} >= %s
AND {biz_expr} <= %s
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -118,9 +128,11 @@ class FinanceBaseTask(BaseDwsTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""团购核销日汇总(结算头表 + 团购核销表联查)"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
sql = f"""
SELECT
sh.pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
COUNT(CASE WHEN sh.coupon_amount > 0 THEN 1 END) AS groupbuy_count,
SUM(
CASE
@@ -137,9 +149,9 @@ class FinanceBaseTask(BaseDwsTask):
ON gr.order_settle_id = sh.order_settle_id
AND COALESCE(gr.is_delete, 0) = 0
WHERE sh.site_id = %s
AND sh.pay_time >= %s
AND sh.pay_time < %s + INTERVAL '1 day'
GROUP BY sh.pay_time::DATE
AND {biz_expr} >= %s
AND {biz_expr} <= %s
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -188,16 +200,18 @@ class FinanceBaseTask(BaseDwsTask):
if not member_ids and not order_ids:
return []
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
order_settle_id,
member_id,
adjust_amount
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND pay_time >= %s
AND pay_time < %s + INTERVAL '1 day'
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND adjust_amount != 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
@@ -242,20 +256,22 @@ class FinanceBaseTask(BaseDwsTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""赠送卡消费汇总(余额变动按日聚合)"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_change = biz_date_sql_expr("change_time", cutoff)
id_list = ", ".join(str(card_id) for card_id in self.GIFT_CARD_TYPE_IDS)
sql = f"""
SELECT
change_time::DATE AS stat_date,
{biz_expr_change} AS stat_date,
SUM(ABS(change_amount)) AS gift_card_consume
FROM dwd.dwd_member_balance_change
WHERE site_id = %s
AND change_time >= %s
AND change_time < %s + INTERVAL '1 day'
AND {biz_expr_change} >= %s
AND {biz_expr_change} <= %s
AND from_type = 1
AND change_amount < 0
AND COALESCE(is_delete, 0) = 0
AND card_type_id IN ({id_list})
GROUP BY change_time::DATE
GROUP BY {biz_expr_change}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []

View File

@@ -222,6 +222,8 @@ class FinanceDailyTask(FinanceBaseTask):
member_discount = self.safe_decimal(settle.get('member_discount_amount', 0))
rounding_amount = self.safe_decimal(settle.get('rounding_amount', 0))
big_customer_amount = self.safe_decimal(big_customer.get('big_customer_amount', 0))
# 大客户优惠不超过手动调整总额(大客户是 adjust 的子集)
big_customer_amount = min(big_customer_amount, adjust_amount) if adjust_amount > 0 else Decimal('0')
other_discount = adjust_amount - big_customer_amount
if other_discount < 0:
other_discount = Decimal('0')
@@ -229,8 +231,8 @@ class FinanceDailyTask(FinanceBaseTask):
# 赠送卡消费(来自余额变动)
gift_card_consume_amount = self.safe_decimal(gift_card.get('gift_card_consume', 0))
# 优惠合计
discount_total = discount_groupbuy + member_discount + gift_card_consume_amount + adjust_amount + rounding_amount
# 优惠合计(大客户 + 其他 = adjust_amount互斥拆分
discount_total = discount_groupbuy + member_discount + gift_card_consume_amount + big_customer_amount + other_discount + rounding_amount
# 确认收入
confirmed_income = gross_amount - discount_total
@@ -249,9 +251,12 @@ class FinanceDailyTask(FinanceBaseTask):
cash_balance_change = cash_inflow_total - cash_outflow_total
# 卡消费
cash_card_consume = card_pay_amount + balance_pay_amount
# CHANGE 2026-03-07 | balance 恒等式校准
# balance_amount = recharge_card_amount + gift_card_amount
# recharge_card_consume 只取现金充值部分recharge_card_amount不加 balance_amount 避免重复计算
recharge_card_consume = card_pay_amount
gift_card_consume = gift_card_consume_amount
card_consume_total = cash_card_consume + gift_card_consume
card_consume_total = recharge_card_consume + gift_card_consume
# 充值统计
recharge_count = self.safe_int(recharge.get('recharge_count', 0))
@@ -284,7 +289,8 @@ class FinanceDailyTask(FinanceBaseTask):
'discount_groupbuy': discount_groupbuy,
'discount_vip': member_discount,
'discount_gift_card': gift_card_consume_amount,
'discount_manual': adjust_amount,
# CHANGE 2026-03-07 | discount_manual 语义修正:存储大客户优惠(与 discount_other 互斥,两者之和 = adjust_amount
'discount_manual': big_customer_amount,
'discount_rounding': rounding_amount,
'discount_other': other_discount,
# 确认收入
@@ -297,7 +303,7 @@ class FinanceDailyTask(FinanceBaseTask):
'platform_fee_amount': platform_fee_amount,
'recharge_cash_inflow': recharge_cash_inflow,
'card_consume_total': card_consume_total,
'cash_card_consume': cash_card_consume,
'recharge_card_consume': recharge_card_consume,
'gift_card_consume': gift_card_consume,
'cash_outflow_total': cash_outflow_total,
'cash_balance_change': cash_balance_change,

View File

@@ -35,6 +35,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import TaskContext
from .finance_base_task import FinanceBaseTask
@@ -112,9 +114,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
- rounding_amount: 抹零金额
- pl_coupon_sale_amount: 平台券销售金额团购实付路径1
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
-- 团购相关
COALESCE(SUM(coupon_amount), 0) AS coupon_amount_total,
COALESCE(SUM(pl_coupon_sale_amount), 0) AS pl_coupon_sale_total,
@@ -132,10 +136,10 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
COUNT(*) AS total_orders
FROM dwd.dwd_settlement_head
WHERE site_id = %(site_id)s
AND pay_time >= %(start_date)s
AND pay_time < %(end_date)s + INTERVAL '1 day'
AND settle_status = 1 -- 已结账
GROUP BY pay_time::DATE
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
GROUP BY {biz_expr}
ORDER BY stat_date
"""
rows = self.db.query(sql, {
@@ -160,9 +164,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
返回:{日期: 团购实付总额}
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
sql = f"""
SELECT
sh.pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
SUM(
CASE
WHEN sh.pl_coupon_sale_amount > 0 THEN sh.pl_coupon_sale_amount
@@ -174,11 +180,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
ON gr.order_settle_id = sh.order_settle_id
AND COALESCE(gr.is_delete, 0) = 0
WHERE sh.site_id = %(site_id)s
AND sh.pay_time >= %(start_date)s
AND sh.pay_time < %(end_date)s + INTERVAL '1 day'
AND sh.settle_status = 1
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND sh.settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
AND sh.coupon_amount > 0 -- 只统计有团购的订单
GROUP BY sh.pay_time::DATE
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, {
'site_id': site_id,
@@ -206,22 +212,24 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
2794699703437125, # 酒水卡
2793266846533445, # 活动抵用券
)
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("change_time", cutoff)
id_list = ", ".join(str(card_id) for card_id in gift_card_type_ids)
sql = f"""
SELECT
change_time::DATE AS stat_date,
{biz_expr} AS stat_date,
card_type_id,
COUNT(*) AS consume_count,
SUM(ABS(change_amount)) AS consume_amount
FROM dwd.dwd_member_balance_change
WHERE site_id = %(site_id)s
AND change_time >= %(start_date)s
AND change_time < %(end_date)s + INTERVAL '1 day'
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND from_type = 1
AND change_amount < 0
AND COALESCE(is_delete, 0) = 0
AND card_type_id IN ({id_list})
GROUP BY change_time::DATE, card_type_id
GROUP BY {biz_expr}, card_type_id
"""
rows = self.db.query(sql, {
'site_id': site_id,

View File

@@ -33,6 +33,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List, Optional
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import TaskContext
from .finance_base_task import FinanceBaseTask
@@ -94,32 +96,35 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
收入类型分类:
- TABLE_FEE: 台费收入 (table_charge_money)
- GOODS: 商品收入 (goods_money)
- ASSISTANT_BASE: 助教基础课 (assistant_pd_money)
- ASSISTANT_BONUS: 助教附加课 (assistant_cx_money)
- ASSISTANT_PD: 助教陪打收入 (assistant_pd_money)
- ASSISTANT_CX: 助教超休收入 (assistant_cx_money)
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
-- 台费收入
COALESCE(SUM(table_charge_money), 0) AS table_fee_income,
COUNT(CASE WHEN table_charge_money > 0 THEN 1 END) AS table_fee_orders,
-- 商品收入
COALESCE(SUM(goods_money), 0) AS goods_income,
COUNT(CASE WHEN goods_money > 0 THEN 1 END) AS goods_orders,
-- 助教基础课收入PD=陪打)
COALESCE(SUM(assistant_pd_money), 0) AS assistant_base_income,
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_base_orders,
-- 助教附加课收入CX=超休/促销)
COALESCE(SUM(assistant_cx_money), 0) AS assistant_bonus_income,
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_bonus_orders,
-- CHANGE 2026-03-07 | ASSISTANT_BASE/BONUS → PD/CX 命名校准
-- 助教陪打收入
COALESCE(SUM(assistant_pd_money), 0) AS assistant_pd_income,
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_pd_orders,
-- 助教超休收入
COALESCE(SUM(assistant_cx_money), 0) AS assistant_cx_income,
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_cx_orders,
-- 总订单数
COUNT(*) AS total_orders
FROM dwd.dwd_settlement_head
WHERE site_id = %(site_id)s
AND pay_time >= %(start_date)s
AND pay_time < %(end_date)s + INTERVAL '1 day'
AND settle_status = 1 -- 已结账
GROUP BY pay_time::DATE
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
GROUP BY {biz_expr}
ORDER BY stat_date
"""
rows = self.db.query(sql, {
@@ -142,46 +147,57 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
"""
# CHANGE 2026-02-22 | BUG 7 修复 | dim_table 主键是 table_id 而非 site_table_id
# JOIN 条件从 dt.site_table_id → dt.table_id事实表侧 site_table_id 不变)
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
sql = f"""
WITH area_orders AS (
SELECT
tfl.pay_time::DATE AS stat_date,
{biz_expr} AS stat_date,
dt.site_table_area_name AS area_name,
dt.table_name AS table_name,
tfl.order_settle_id,
COALESCE(tfl.ledger_amount, 0) AS income_amount,
COALESCE(tfl.ledger_time_seconds, 0) AS duration_seconds
COALESCE(tfl.ledger_count, 0) AS duration_seconds
FROM dwd.dwd_table_fee_log tfl
INNER JOIN dwd.dwd_settlement_head sh
ON sh.order_settle_id = tfl.order_settle_id
LEFT JOIN dwd.dim_table dt
ON dt.table_id = tfl.site_table_id
AND dt.scd2_is_current = 1
WHERE tfl.site_id = %(site_id)s
AND tfl.pay_time >= %(start_date)s
AND tfl.pay_time < %(end_date)s + INTERVAL '1 day'
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND COALESCE(tfl.is_delete, 0) = 0
UNION ALL
SELECT
asl.start_use_time::DATE AS stat_date,
{biz_expr} AS stat_date,
dt.site_table_area_name AS area_name,
dt.table_name AS table_name,
asl.order_settle_id,
COALESCE(asl.ledger_amount, 0) AS income_amount,
COALESCE(asl.income_seconds, 0) AS duration_seconds
FROM dwd.dwd_assistant_service_log asl
INNER JOIN dwd.dwd_settlement_head sh
ON sh.order_settle_id = asl.order_settle_id
LEFT JOIN dwd.dim_table dt
ON dt.table_id = asl.site_table_id
AND dt.scd2_is_current = 1
WHERE asl.site_id = %(site_id)s
AND asl.start_use_time >= %(start_date)s
AND asl.start_use_time < %(end_date)s + INTERVAL '1 day'
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND asl.is_delete = 0
)
SELECT
stat_date,
area_name,
table_name,
COALESCE(SUM(income_amount), 0) AS income_amount,
COALESCE(SUM(duration_seconds), 0) AS duration_seconds,
COUNT(DISTINCT order_settle_id) AS order_count
FROM area_orders
GROUP BY stat_date, area_name
GROUP BY stat_date, area_name, table_name
ORDER BY stat_date, area_name
"""
rows = self.db.query(sql, {
@@ -232,14 +248,14 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
"""
转换按收入类型的数据
将每日汇总数据展开为4条记录台费/商品/基础课/附加课
将每日汇总数据展开为4条记录台费/商品/陪打/超休
"""
# 收入类型定义
# CHANGE 2026-03-07 | ASSISTANT_BASE/BONUS → PD/CX 命名校准
income_types = [
('TABLE_FEE', '台费收入', 'table_fee_income', 'table_fee_orders'),
('GOODS', '商品收入', 'goods_income', 'goods_orders'),
('ASSISTANT_BASE', '助教基础课', 'assistant_base_income', 'assistant_base_orders'),
('ASSISTANT_BONUS', '助教附加课', 'assistant_bonus_income', 'assistant_bonus_orders'),
('ASSISTANT_PD', '助教陪打收入', 'assistant_pd_income', 'assistant_pd_orders'),
('ASSISTANT_CX', '助教超休收入', 'assistant_cx_income', 'assistant_cx_orders'),
]
records = []
@@ -309,8 +325,8 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
duration_seconds = row.get('duration_seconds', 0) or 0
order_count = row.get('order_count', 0) or 0
# 映射区域名称到分类代码
category = self.get_area_category(area_name)
# CHANGE 2026-03-07 | 传入 table_name 支持台桌级映射VIP包厢 V5→斯诺克
category = self.get_area_category(area_name, row.get('table_name'))
category_code = category.get('category_code', 'OTHER')
category_name = category.get('category_name', '其他区域')
@@ -363,7 +379,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
"""
兼容旧逻辑的映射方法(当前使用 get_area_category
"""
return self.get_area_category(area_name)
return self.get_area_category(area_name, None)
def load(self, records: List[Dict[str, Any]], context: TaskContext) -> Dict[str, Any]:
"""

View File

@@ -31,6 +31,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import TaskContext
from .finance_base_task import FinanceBaseTask
@@ -111,9 +113,11 @@ class FinanceRechargeTask(FinanceBaseTask):
def _extract_recharge_summary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money实际字段为 pay_amount/point_amount
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
DATE(pay_time) AS stat_date,
{biz_expr} AS stat_date,
COUNT(*) AS recharge_count,
SUM(pay_amount + point_amount) AS recharge_total,
SUM(pay_amount) AS recharge_cash,
@@ -129,8 +133,8 @@ class FinanceRechargeTask(FinanceBaseTask):
COUNT(DISTINCT member_id) AS recharge_member_count,
COUNT(DISTINCT CASE WHEN is_first = 1 THEN member_id END) AS new_member_count
FROM dwd.dwd_recharge_order
WHERE site_id = %s AND DATE(pay_time) >= %s AND DATE(pay_time) <= %s
GROUP BY DATE(pay_time)
WHERE site_id = %s AND {biz_expr} >= %s AND {biz_expr} <= %s
GROUP BY {biz_expr}
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []

View File

@@ -29,6 +29,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
@@ -74,7 +76,9 @@ class GoodsStockDailyTask(BaseDwsTask):
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
sql = f"""
SELECT
site_goods_id,
goods_name,
@@ -92,11 +96,12 @@ class GoodsStockDailyTask(BaseDwsTask):
current_stock,
site_id,
tenant_id,
fetched_at
fetched_at,
{biz_expr} AS biz_date
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
@@ -135,11 +140,14 @@ class GoodsStockDailyTask(BaseDwsTask):
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
stat_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 使用 SQL 层计算的营业日归属日期
stat_date = row.get("biz_date")
if stat_date is None:
stat_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:
continue

View File

@@ -31,6 +31,8 @@ from datetime import date
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
@@ -81,7 +83,9 @@ class GoodsStockMonthlyTask(BaseDwsTask):
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
sql = f"""
SELECT
site_goods_id,
goods_name,
@@ -99,11 +103,12 @@ class GoodsStockMonthlyTask(BaseDwsTask):
current_stock,
site_id,
tenant_id,
fetched_at
fetched_at,
{biz_expr} AS biz_date
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
@@ -141,12 +146,15 @@ class GoodsStockMonthlyTask(BaseDwsTask):
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 自然月的第一天作为 stat_date
# 使用 SQL 层计算的营业日归属日期
row_date = row.get("biz_date")
if row_date is None:
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 营业月的第一天作为 stat_date
first_day = _month_first_day(row_date)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:

View File

@@ -31,6 +31,8 @@ from datetime import date, timedelta
from decimal import Decimal
from typing import Any, Dict, List
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
@@ -82,7 +84,9 @@ class GoodsStockWeeklyTask(BaseDwsTask):
self.get_task_code(), site_id, start_date, end_date,
)
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
sql = f"""
SELECT
site_goods_id,
goods_name,
@@ -100,11 +104,12 @@ class GoodsStockWeeklyTask(BaseDwsTask):
current_stock,
site_id,
tenant_id,
fetched_at
fetched_at,
{biz_expr} AS biz_date
FROM dwd.dwd_goods_stock_summary
WHERE site_id = %s
AND DATE(fetched_at) >= %s
AND DATE(fetched_at) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
ORDER BY fetched_at
"""
rows = self.query_dwd(sql, (site_id, start_date, end_date))
@@ -142,12 +147,15 @@ class GoodsStockWeeklyTask(BaseDwsTask):
fetched_at = row.get("fetched_at")
if fetched_at is None:
continue
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# ISO 周的周一作为 stat_date
# 使用 SQL 层计算的营业日归属日期
row_date = row.get("biz_date")
if row_date is None:
row_date = (
fetched_at.date()
if hasattr(fetched_at, "date")
else fetched_at
)
# 营业周的周一作为 stat_date
monday = _iso_monday(row_date)
site_goods_id = row.get("site_goods_id")
if site_goods_id is None:

View File

@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Optional, Tuple
from .base_index_task import BaseIndexTask
from ..base_dws_task import TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
@dataclass
class MemberActivityData:
@@ -238,6 +240,8 @@ class MemberIndexBaseTask(BaseIndexTask):
end_date: date,
) -> List[Dict[str, Any]]:
"""提取到店记录(按天去重)"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
condition_sql = self._build_visit_condition_sql()
sql = f"""
WITH visit_source AS (
@@ -258,12 +262,12 @@ class MemberIndexBaseTask(BaseIndexTask):
)
SELECT
canonical_member_id AS member_id,
DATE(pay_time) AS visit_date,
{biz_expr} AS visit_date,
MAX(pay_time) AS last_visit_time,
SUM(COALESCE(pay_amount, 0)) AS day_pay_amount
FROM visit_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id, DATE(pay_time)
GROUP BY canonical_member_id, {biz_expr}
ORDER BY canonical_member_id, visit_date
"""
rows = self.db.query(sql, (site_id, start_date, end_date))

View File

@@ -214,7 +214,7 @@ class RelationIndexTask(BaseIndexTask):
JOIN dwd.dim_assistant d
ON s.user_id = d.user_id
AND d.scd2_is_current = 1
AND COALESCE(d.is_delete, 0) = 0
AND COALESCE(d.leave_status, 0) = 0
WHERE s.site_id = %s
AND s.tenant_member_id > 0
AND s.user_id > 0

View File

@@ -18,6 +18,8 @@ from typing import Any, Dict, List, Optional
from .base_index_task import BaseIndexTask
from ..base_dws_task import TaskContext
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# =============================================================================
# 数据类定义
@@ -333,6 +335,10 @@ class SpendingPowerIndexTask(BaseIndexTask):
short_days = int(params.get('spend_window_short_days', 30))
long_days = int(params.get('spend_window_long_days', 90))
# CHANGE 2026-03-01 | business-day-cutoff 7.6: DATE(pay_time) → 营业日归属表达式
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
# INTERVAL 天数通过 f-string 内嵌整数安全site_id 走参数化
sql = f"""
@@ -357,7 +363,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
-- 90 天窗口
SUM(pay_amount) AS spend_90,
COUNT(*) AS orders_90,
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
COUNT(DISTINCT {biz_expr}) AS visit_days_90,
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
-- 30 天窗口(子集过滤)
@@ -366,7 +372,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN 1 ELSE 0 END) AS orders_30,
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
THEN DATE(pay_time) END) AS visit_days_30
THEN {biz_expr} END) AS visit_days_30
FROM consume_source
WHERE canonical_member_id > 0
GROUP BY canonical_member_id
@@ -467,12 +473,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
{biz_expr_s} AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
@@ -516,12 +525,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
long_days = int(params.get('spend_window_long_days', 90))
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
sql = f"""
WITH consume_source AS (
SELECT
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
AS canonical_member_id,
DATE(s.pay_time) AS pay_date,
{biz_expr_s} AS pay_date,
COALESCE(s.pay_amount, 0) AS pay_amount
FROM dwd.dwd_settlement_head s
LEFT JOIN dwd.dim_member_card_account mca
@@ -572,13 +584,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
return result
# CHANGE 2026-03-02 | 基数校准改用非零样本中位数,零消费会员不参与校准
# 原因:零消费会员不参与 SPI 有效区分,纳入中位数只会拉低基数
_CALIBRATE_MIN_SAMPLE = 10 # 非零样本最小数量,低于此值回退默认值
def _calibrate_amount_bases(
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
) -> Dict[str, float]:
"""从门店数据计算中位数作为金额压缩基数校准值。
优先级cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS
优先级cfg_index_parameters 配置值 > 非零样本自动校准中位数 > DEFAULT_PARAMS 默认值。
仅使用值 > 0 的样本计算中位数;非零样本数 < _CALIBRATE_MIN_SAMPLE 时回退默认值
"""
# 特征字段 → 对应的 amount_base 参数名
base_extractors: Dict[str, callable] = {
@@ -600,21 +616,23 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
continue
# 从特征数据计算中位数
values = [extractor(f) for f in features.values()]
median_val = self.calculate_median(values)
# 仅取非零样本计算中位数
nonzero_values = [v for v in (extractor(f) for f in features.values()) if v > 0]
if median_val > 0:
if len(nonzero_values) >= self._CALIBRATE_MIN_SAMPLE:
median_val = self.calculate_median(nonzero_values)
calibrated[base_key] = median_val
self.logger.info(
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
"SPI 基数校准: %s 非零样本 %d/%d,中位数 %.2f",
base_key, len(nonzero_values), len(features), median_val,
)
else:
# 中位数 ≤ 0,回退到 DEFAULT_PARAMS
# 非零样本不足,回退到 DEFAULT_PARAMS
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
self.logger.warning(
"SPI 基数校准: %s 中位数 %.2f ≤ 0,回退到默认值 %.2f",
base_key, median_val, self.DEFAULT_PARAMS[base_key],
"SPI 基数校准: %s 非零样本 %d 不足(最低 %d,回退到默认值 %.2f",
base_key, len(nonzero_values), self._CALIBRATE_MIN_SAMPLE,
self.DEFAULT_PARAMS[base_key],
)
return calibrated
@@ -747,6 +765,13 @@ class SpendingPowerIndexTask(BaseIndexTask):
)
"""
inserted = 0
# raw score 列为 numeric(10,4)display 列为 numeric(5,2)
# 防止极端数据导致 NumericValueOutOfRange
RAW_MAX = 999999.9999
DISP_MAX = 999.99
def _clamp(v, lo, hi):
return max(lo, min(hi, v))
for f in data_list:
cur.execute(insert_sql, (
f.site_id, f.member_id,
@@ -754,9 +779,14 @@ class SpendingPowerIndexTask(BaseIndexTask):
f.orders_30, f.orders_90,
f.visit_days_30, f.visit_days_90,
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
f.score_level_display, f.score_speed_display, f.score_stability_display,
f.raw_score, f.display_score,
_clamp(f.score_level_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_speed_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_stability_raw, -RAW_MAX, RAW_MAX),
_clamp(f.score_level_display, 0, DISP_MAX),
_clamp(f.score_speed_display, 0, DISP_MAX),
_clamp(f.score_stability_display, 0, DISP_MAX),
_clamp(f.raw_score, -RAW_MAX, RAW_MAX),
_clamp(f.display_score, 0, DISP_MAX),
))
inserted += max(cur.rowcount, 0)

View File

@@ -68,6 +68,10 @@ class DwsMaintenanceTask(BaseDwsTask):
{"table": "dws_finance_recharge_summary", "date_col": "stat_date"},
{"table": "dws_finance_expense_summary", "date_col": "expense_month"},
{"table": "dws_platform_settlement", "date_col": "settlement_date"},
# CHANGE [2026-03-07] intent: 项目标签表纳入历史数据清理范围
# assumptions: computed_at 为清理日期列,与其他表的 stat_date 语义一致
{"table": "dws_assistant_project_tag", "date_col": "computed_at"},
{"table": "dws_member_project_tag", "date_col": "computed_at"},
]
def get_task_code(self) -> str:

View File

@@ -6,9 +6,10 @@
"会员"为粒度,统计消费行为和滚动窗口指标
数据来源:
- dwd_settlement_head: 结账单头表
- dwd_settlement_head: 结账单头表settle_type IN (1,3) 过滤有效订单)
- dim_member: 会员维度
- dim_member_card_account: 会员卡账户
- dwd_recharge_order: 充值订单30/60/90 天窗口统计)
目标表:
dws.dws_member_consumption_summary
@@ -32,6 +33,8 @@ from datetime import date, datetime, timedelta
from decimal import Decimal
from typing import Any, Dict, List, Optional, Set, Tuple
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
from .dws_helpers import mask_mobile, calc_days_since
@@ -209,12 +212,18 @@ class MemberConsumptionTask(BaseDwsTask):
"""
提取会员消费统计(含滚动窗口)
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
-- CHANGE 2026-03-07 | consume_money → items_sum 口径校准
-- consume_money 存在三种历史口径(A/B/C)混合DWS 层统一使用 items_sum
-- items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money
WITH consume_base AS (
SELECT
member_id,
DATE(pay_time) AS consume_date,
consume_money,
{biz_expr} AS consume_date,
table_charge_money + goods_money + assistant_pd_money
+ assistant_cx_money + electricity_money AS items_sum,
table_charge_money,
goods_money,
assistant_pd_money + assistant_cx_money AS assistant_amount
@@ -222,6 +231,9 @@ class MemberConsumptionTask(BaseDwsTask):
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
-- CHANGE 2026-03-07 | dwd_settlement_head 无 is_delete 字段,改用 settle_type 过滤
-- settle_type: 1=台桌结账, 3=商城订单; 排除 6=退货, 7=撤销
AND settle_type IN (1, 3)
)
SELECT
member_id,
@@ -229,7 +241,7 @@ class MemberConsumptionTask(BaseDwsTask):
MAX(consume_date) AS last_consume_date,
-- 全量累计
COUNT(*) AS total_visit_count,
SUM(consume_money) AS total_consume_amount,
SUM(items_sum) AS total_consume_amount,
SUM(table_charge_money) AS total_table_fee,
SUM(goods_money) AS total_goods_amount,
SUM(assistant_amount) AS total_assistant_amount,
@@ -240,12 +252,12 @@ class MemberConsumptionTask(BaseDwsTask):
COUNT(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN 1 END) AS visit_count_30d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN 1 END) AS visit_count_60d,
COUNT(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN 1 END) AS visit_count_90d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN consume_money ELSE 0 END) AS consume_amount_7d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN consume_money ELSE 0 END) AS consume_amount_10d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN consume_money ELSE 0 END) AS consume_amount_15d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN consume_money ELSE 0 END) AS consume_amount_30d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN consume_money ELSE 0 END) AS consume_amount_60d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN consume_money ELSE 0 END) AS consume_amount_90d
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN items_sum ELSE 0 END) AS consume_amount_7d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN items_sum ELSE 0 END) AS consume_amount_10d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN items_sum ELSE 0 END) AS consume_amount_15d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN items_sum ELSE 0 END) AS consume_amount_30d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN items_sum ELSE 0 END) AS consume_amount_60d,
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN items_sum ELSE 0 END) AS consume_amount_90d
FROM consume_base
GROUP BY member_id
"""
@@ -257,29 +269,21 @@ class MemberConsumptionTask(BaseDwsTask):
"""
提取会员信息
生日优先级手动补录fdw_app.member_birthday_manual> API 来源dim_member.birthday
FDW 连接失败时降级为仅使用 dim_member.birthday
生日来源dim_member.birthdayAPI 来源
CHANGE 2026-02-26 | 维客线索重构:移除 FDW member_birthday_manual 读取,
生日不再单独补录,归入维客线索"客户基础信息"大类
"""
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 恢复 birthday 字段C1 迁移已加列),供后续 C2 COALESCE 使用
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
# CHANGE 2026-02-22 | 需求 C2COALESCE 优先手动补录生日FDW 失败时降级
sql_with_fdw = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr_create = biz_date_sql_expr("m.create_time", cutoff)
sql = f"""
SELECT
m.member_id,
m.nickname,
m.mobile,
m.member_card_grade_name,
DATE(m.create_time) AS register_date,
{biz_expr_create} AS register_date,
m.recharge_money_sum,
COALESCE(
(SELECT birthday_value
FROM fdw_app.member_birthday_manual
WHERE member_id = m.member_id
ORDER BY recorded_at ASC
LIMIT 1),
m.birthday
) AS birthday
m.birthday
FROM dwd.dim_member m
WHERE m.member_id IN (
SELECT DISTINCT member_id
@@ -289,36 +293,7 @@ class MemberConsumptionTask(BaseDwsTask):
AND member_id != 0
) AND m.scd2_is_current = 1
"""
# CHANGE 2026-02-24 | 修复列名tenant_member_id → member_iddwd_settlement_head 无 tenant_member_id 列)
sql_fallback = """
SELECT
member_id,
nickname,
mobile,
member_card_grade_name,
DATE(create_time) AS register_date,
recharge_money_sum,
birthday
FROM dwd.dim_member
WHERE member_id IN (
SELECT DISTINCT member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
) AND scd2_is_current = 1
"""
try:
rows = self.db.query(sql_with_fdw, (site_id,))
except Exception as exc:
# CHANGE [2026-02-24] FDW 查询失败后事务处于 failed 状态,必须先 rollback 再执行 fallback
self.db.rollback()
# FDW 连接失败,降级为仅使用 dim_member.birthday
self.logger.warning(
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
self.get_task_code(), exc,
)
rows = self.db.query(sql_fallback, (site_id,))
rows = self.db.query(sql, (site_id,))
result = {}
for row in (rows or []):
@@ -343,11 +318,11 @@ class MemberConsumptionTask(BaseDwsTask):
balance
FROM dwd.dim_member_card_account
WHERE tenant_member_id IN (
SELECT DISTINCT tenant_member_id
SELECT DISTINCT member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
AND member_id IS NOT NULL
AND member_id != 0
) AND scd2_is_current = 1
AND COALESCE(is_delete, 0) = 0
"""
@@ -390,21 +365,23 @@ class MemberConsumptionTask(BaseDwsTask):
返回: {member_id: {count_30d, count_60d, count_90d,
amount_30d, amount_60d, amount_90d}}
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
member_id,
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '29 days' THEN 1 END) AS count_30d,
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '59 days' THEN 1 END) AS count_60d,
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '89 days' THEN 1 END) AS count_90d,
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '29 days' THEN pay_amount ELSE 0 END), 0) AS amount_30d,
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '59 days' THEN pay_amount ELSE 0 END), 0) AS amount_60d,
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '89 days' THEN pay_amount ELSE 0 END), 0) AS amount_90d
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '29 days' THEN 1 END) AS count_30d,
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '59 days' THEN 1 END) AS count_60d,
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '89 days' THEN 1 END) AS count_90d,
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '29 days' THEN pay_amount ELSE 0 END), 0) AS amount_30d,
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '59 days' THEN pay_amount ELSE 0 END), 0) AS amount_60d,
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '89 days' THEN pay_amount ELSE 0 END), 0) AS amount_90d
FROM dwd.dwd_recharge_order
WHERE site_id = %s
AND member_id IS NOT NULL
AND member_id != 0
AND pay_time IS NOT NULL
AND DATE(pay_time) <= %s
AND {biz_expr} <= %s
GROUP BY member_id
"""
params = (

View File

@@ -0,0 +1,224 @@
# -*- coding: utf-8 -*-
"""
DWS 客户项目标签任务
按时间窗口计算每位客户在四大项目BILLIARD/SNOOKER/MAHJONG/KTV
消费时长占比占比≥25% 则分配标签。散客member_id=0不参与。
数据链路:
dwd_table_fee_log (ledger_count)
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
→ get_area_category(area_name, table_name)
→ 按 category_code 汇总 → 计算占比 → 写入 dws_member_project_tag
目标表:
dws.dws_member_project_tag
更新策略:
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Any, Dict, List, Optional
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
from neozqyy_shared.datetime_utils import biz_date_sql_expr
# 只计算四大项目
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
# 客户看板的 2 个时间窗口
MEMBER_WINDOWS = [
TimeWindow.LAST_30_DAYS,
TimeWindow.LAST_60_DAYS,
]
TAG_THRESHOLD = Decimal("0.25")
class MemberProjectTagTask(BaseDwsTask):
"""客户项目标签 ETL 任务"""
def get_task_code(self) -> str:
return "DWS_MEMBER_PROJECT_TAG"
def get_target_table(self) -> str:
return "dws_member_project_tag"
def get_primary_keys(self) -> List[str]:
return ["site_id", "member_id", "time_window", "category_code"]
def extract(self, context) -> Dict[str, Any]:
site_id = context.store_id
self.logger.info("%s: 提取客户台费时长数据", self.get_task_code())
self.load_config_cache()
table_info = self._extract_table_info(site_id)
window_data: Dict[str, List[Dict]] = {}
for window in MEMBER_WINDOWS:
time_range = self.get_time_window_range(window)
rows = self._extract_member_durations(
site_id, time_range.start, time_range.end
)
window_data[window.value] = rows
return {
"window_data": window_data,
"table_info": table_info,
"site_id": site_id,
}
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
"""提取台桌维度信息"""
sql = """
SELECT table_id, table_name, site_table_area_name AS area_name
FROM dwd.dim_table
WHERE site_id = %s AND scd2_is_current = 1
"""
rows = self.db.query(sql, (site_id,))
return {r["table_id"]: dict(r) for r in (rows or [])}
def _extract_member_durations(
self, site_id: int, start_date: date, end_date: date
) -> List[Dict[str, Any]]:
"""提取客户台费时长明细(按客户+台桌聚合),排除散客"""
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("tfl.ledger_end_time", cutoff)
sql = f"""
SELECT
tfl.member_id,
tfl.site_table_id AS table_id,
COALESCE(SUM(tfl.ledger_count), 0) AS duration_seconds
FROM dwd.dwd_table_fee_log tfl
WHERE tfl.site_id = %(site_id)s
AND {biz_expr} >= %(start_date)s
AND {biz_expr} <= %(end_date)s
AND COALESCE(tfl.is_delete, 0) = 0
AND tfl.member_id IS NOT NULL
AND tfl.member_id != 0
GROUP BY tfl.member_id, tfl.site_table_id
"""
rows = self.db.query(sql, {
"site_id": site_id,
"start_date": start_date,
"end_date": end_date,
})
return [dict(r) for r in rows] if rows else []
def transform(self, extracted: Dict[str, Any], context) -> List[Dict[str, Any]]:
table_info = extracted["table_info"]
site_id = extracted["site_id"]
tenant_id = getattr(context, "tenant_id", 0) or 0
results: List[Dict[str, Any]] = []
for window_value, rows in extracted["window_data"].items():
# member_id → category_code → seconds
member_cats: Dict[int, Dict[str, int]] = {}
for row in rows:
mid = row["member_id"]
tid = row["table_id"]
secs = self.safe_int(row["duration_seconds"])
if secs <= 0:
continue
tinfo = table_info.get(tid, {})
area_name = tinfo.get("area_name")
table_name = tinfo.get("table_name")
cat = self.get_area_category(area_name, table_name)
code = cat.get("category_code", "OTHER")
if code not in VALID_CATEGORIES:
continue
if mid not in member_cats:
member_cats[mid] = {}
member_cats[mid][code] = member_cats[mid].get(code, 0) + secs
for mid, cats in member_cats.items():
total = sum(cats.values())
if total <= 0:
continue
for code, secs in cats.items():
pct = Decimal(str(secs)) / Decimal(str(total))
pct = pct.quantize(Decimal("0.0001"))
cat_info = self._get_category_display(code)
results.append({
"site_id": site_id,
"tenant_id": tenant_id,
"member_id": mid,
"time_window": window_value,
"category_code": code,
"category_name": cat_info["category_name"],
"short_name": cat_info["short_name"],
"duration_seconds": secs,
"total_seconds": total,
"percentage": float(pct),
"is_tagged": pct >= TAG_THRESHOLD,
})
self.logger.info(
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
self.get_task_code(),
len(results),
sum(1 for r in results if r["is_tagged"]),
)
return results
def _get_category_display(self, code: str) -> Dict[str, str]:
"""从配置缓存获取分类的显示名和简写"""
cache = self.load_config_cache()
for key, cat in cache.area_categories.items():
if cat.get("category_code") == code:
return {
"category_name": cat.get("display_name") or cat.get("category_name", code),
"short_name": cat.get("short_name", code[:1]),
}
fallback = {
"BILLIARD": ("🎱 中式/追分", "🎱"),
"SNOOKER": ("斯诺克", ""),
"MAHJONG": ("🀄 麻将/棋牌", "🀄"),
"KTV": ("🎤 团建/K歌", "🎤"),
}
name, short = fallback.get(code, (code, code[:1]))
return {"category_name": name, "short_name": short}
def load(self, transformed, context) -> dict:
if not transformed:
return {"status": "SUCCESS", "counts": {"inserted": 0, "deleted": 0}}
site_id = transformed[0]["site_id"]
delete_sql = "DELETE FROM dws.dws_member_project_tag WHERE site_id = %s"
self.db.execute(delete_sql, (site_id,))
deleted = self.db.cursor.rowcount if hasattr(self.db, "cursor") else 0
insert_sql = """
INSERT INTO dws.dws_member_project_tag (
site_id, tenant_id, member_id, time_window,
category_code, category_name, short_name,
duration_seconds, total_seconds, percentage, is_tagged,
computed_at, created_at, updated_at
) VALUES (
%(site_id)s, %(tenant_id)s, %(member_id)s, %(time_window)s,
%(category_code)s, %(category_name)s, %(short_name)s,
%(duration_seconds)s, %(total_seconds)s, %(percentage)s, %(is_tagged)s,
NOW(), NOW(), NOW()
)
"""
for row in transformed:
self.db.execute(insert_sql, row)
self.logger.info(
"%s: 删除 %d 条,插入 %d",
self.get_task_code(), deleted, len(transformed),
)
return {
"status": "SUCCESS",
"counts": {"inserted": len(transformed), "deleted": deleted},
}

View File

@@ -35,6 +35,8 @@ from datetime import date, datetime, timedelta
from decimal import Decimal
from typing import Any, Dict, List, Optional, Set, Tuple
from neozqyy_shared.datetime_utils import biz_date_sql_expr
from .base_dws_task import BaseDwsTask, TaskContext
from .dws_helpers import mask_mobile
@@ -152,7 +154,7 @@ class MemberVisitTask(BaseDwsTask):
# 获取区域分类
area_name = tbl_info.get('area_name')
area_cat = self.get_area_category(area_name)
area_cat = self.get_area_category(area_name, tbl_info.get('table_name'))
# 构建助教服务JSON
assistant_services_json = self._build_assistant_services_json(services)
@@ -175,7 +177,7 @@ class MemberVisitTask(BaseDwsTask):
# 会员信息
'member_nickname': memb_info.get('nickname'),
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
# CHANGE 2026-02-22 | 恢复从 dim_member.birthday 读取
# CHANGE 2026-02-26 | 生日仅从 dim_member.birthday 读取API 来源)
'member_birthday': memb_info.get('birthday'),
# 台桌信息
'table_id': table_id,
@@ -187,12 +189,20 @@ class MemberVisitTask(BaseDwsTask):
'goods_amount': self.safe_decimal(settle.get('goods_money', 0)),
'assistant_amount': self.safe_decimal(settle.get('assistant_pd_money', 0)) + \
self.safe_decimal(settle.get('assistant_cx_money', 0)),
'total_consume': self.safe_decimal(settle.get('consume_money', 0)),
# CHANGE 2026-03-07 | consume_money → items_sum 口径校准
'total_consume': (
self.safe_decimal(settle.get('table_charge_money', 0))
+ self.safe_decimal(settle.get('goods_money', 0))
+ self.safe_decimal(settle.get('assistant_pd_money', 0))
+ self.safe_decimal(settle.get('assistant_cx_money', 0))
+ self.safe_decimal(settle.get('electricity_money', 0))
),
'total_discount': self._calc_total_discount(settle),
'actual_pay': self.safe_decimal(settle.get('pay_amount', 0)),
# 支付方式
'cash_pay': self.safe_decimal(settle.get('pay_amount', 0)),
'cash_card_pay': self.safe_decimal(settle.get('balance_amount', 0)),
'balance_pay': self.safe_decimal(settle.get('balance_amount', 0)),
'recharge_card_pay': self.safe_decimal(settle.get('recharge_card_amount', 0)),
'gift_card_pay': self.safe_decimal(settle.get('gift_card_amount', 0)),
'groupbuy_pay': self.safe_decimal(settle.get('coupon_amount', 0)),
# 时长
@@ -205,7 +215,49 @@ class MemberVisitTask(BaseDwsTask):
return results
# load() 已移除——使用 BaseDwsTask 默认实现DATE_COL="visit_date"
# CHANGE 2026-02-27 | bugfix: 覆盖 load(),在标准 delete-by-window 后
# 额外按 order_settle_id 清理旧数据,防止 biz_date 切换后残留记录导致唯一约束冲突。
# 背景visit_date 从 pay_time::date 改为 biz_date_sql_expr 后,凌晨订单的
# visit_date 前移一天,旧数据不在新窗口的 delete 范围内insert 时触发
# uk_dws_member_visit (site_id, member_id, order_settle_id) 冲突。
def load(self, transformed, context: "TaskContext") -> dict:
if not transformed:
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
date_col = self.DATE_COL or "stat_date"
deleted = self.delete_existing_data(context, date_col=date_col)
# 额外清理:按本批 order_settle_id 删除可能残留在其他日期窗口的旧记录
order_ids = [r["order_settle_id"] for r in transformed if r.get("order_settle_id")]
extra_deleted = 0
if order_ids:
full_table = f"{self.DWS_SCHEMA}.{self.get_target_table()}"
placeholders = ",".join(["%s"] * len(order_ids))
sql = (
f"DELETE FROM {full_table} "
f"WHERE site_id = %s AND order_settle_id IN ({placeholders})"
)
site_id = transformed[0].get("site_id", context.store_id)
with self.db.conn.cursor() as cur:
cur.execute(sql, [site_id] + order_ids)
extra_deleted = cur.rowcount
if extra_deleted:
self.logger.info(
"%s: 额外清理残留旧数据 %dorder_settle_id 去重)",
self.get_task_code(), extra_deleted,
)
inserted = self.bulk_insert(transformed)
return {
"counts": {
"fetched": len(transformed),
"inserted": inserted,
"updated": 0,
"skipped": 0,
"errors": 0,
},
"extra": {"deleted": deleted, "extra_deleted": extra_deleted},
}
# ==========================================================================
# 数据提取方法
@@ -220,7 +272,9 @@ class MemberVisitTask(BaseDwsTask):
"""
提取结账单
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("pay_time", cutoff)
sql = f"""
SELECT
order_settle_id,
order_trade_no,
@@ -228,8 +282,9 @@ class MemberVisitTask(BaseDwsTask):
member_id,
create_time,
pay_time,
DATE(pay_time) AS visit_date,
consume_money,
{biz_expr} AS visit_date,
-- CHANGE 2026-03-07 | 新增 electricity_money 用于 items_sum 计算
electricity_money,
pay_amount,
table_charge_money,
goods_money,
@@ -244,10 +299,12 @@ class MemberVisitTask(BaseDwsTask):
recharge_card_amount
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND DATE(pay_time) >= %s
AND DATE(pay_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND member_id IS NOT NULL
AND member_id != 0
-- CHANGE 2026-03-07 | 排除退货(6)/退款(7),仅保留台桌结账(1)+商城订单(3)
AND settle_type IN (1, 3)
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
return [dict(row) for row in rows] if rows else []
@@ -261,7 +318,9 @@ class MemberVisitTask(BaseDwsTask):
"""
提取助教服务明细
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
sql = f"""
SELECT
order_settle_id,
site_assistant_id AS assistant_id,
@@ -270,8 +329,8 @@ class MemberVisitTask(BaseDwsTask):
ledger_amount
FROM dwd.dwd_assistant_service_log
WHERE site_id = %s
AND DATE(start_use_time) >= %s
AND DATE(start_use_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND is_delete = 0
"""
rows = self.db.query(sql, (site_id, start_date, end_date))
@@ -286,14 +345,16 @@ class MemberVisitTask(BaseDwsTask):
"""
提取台费时长(真实秒数)
"""
sql = """
cutoff = self.config.get("app.business_day_start_hour", 8)
biz_expr = biz_date_sql_expr("ledger_end_time", cutoff)
sql = f"""
SELECT
order_settle_id,
SUM(COALESCE(real_table_use_seconds, 0)) AS table_use_seconds
FROM dwd.dwd_table_fee_log
WHERE site_id = %s
AND DATE(ledger_end_time) >= %s
AND DATE(ledger_end_time) <= %s
AND {biz_expr} >= %s
AND {biz_expr} <= %s
AND COALESCE(is_delete, 0) = 0
GROUP BY order_settle_id
"""
@@ -304,61 +365,26 @@ class MemberVisitTask(BaseDwsTask):
"""
提取会员信息
生日优先级手动补录fdw_app.member_birthday_manual> API 来源dim_member.birthday
FDW 连接失败时降级为仅使用 dim_member.birthday
生日来源dim_member.birthdayAPI 来源
CHANGE 2026-02-26 | 维客线索重构:移除 FDW member_birthday_manual 读取,
生日不再单独补录,归入维客线索"客户基础信息"大类
"""
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
# CHANGE 2026-02-22 | 恢复 birthday 字段C1 迁移已加列)
# CHANGE 2026-02-22 | 需求 B通过事实表反查支持跨店消费会员
# CHANGE 2026-02-22 | 需求 C2COALESCE 优先手动补录生日FDW 失败时降级
sql_with_fdw = """
sql = """
SELECT
m.member_id,
m.nickname,
m.mobile,
COALESCE(
(SELECT birthday_value
FROM fdw_app.member_birthday_manual
WHERE member_id = m.member_id
ORDER BY recorded_at ASC
LIMIT 1),
m.birthday
) AS birthday
m.birthday
FROM dwd.dim_member m
WHERE m.member_id IN (
SELECT DISTINCT tenant_member_id
SELECT DISTINCT member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
AND member_id IS NOT NULL
AND member_id != 0
) AND m.scd2_is_current = 1
"""
sql_fallback = """
SELECT
member_id,
nickname,
mobile,
birthday
FROM dwd.dim_member
WHERE member_id IN (
SELECT DISTINCT tenant_member_id
FROM dwd.dwd_settlement_head
WHERE site_id = %s
AND tenant_member_id IS NOT NULL
AND tenant_member_id != 0
) AND scd2_is_current = 1
"""
try:
rows = self.db.query(sql_with_fdw, (site_id,))
except Exception as exc:
# CHANGE [2026-02-24] FDW 查询失败后事务处于 failed 状态,必须先 rollback 再执行 fallback
self.db.rollback()
# FDW 连接失败,降级为仅使用 dim_member.birthday
self.logger.warning(
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
self.get_task_code(), exc,
)
rows = self.db.query(sql_fallback, (site_id,))
rows = self.db.query(sql, (site_id,))
return {r['member_id']: dict(r) for r in (rows or [])}

View File

@@ -15,6 +15,11 @@ from psycopg2.extras import Json, execute_values
from models.parsers import TypeParser
from tasks.base_task import BaseTask
from utils.windowing import build_window_segments, calc_window_minutes, calc_window_days, format_window_days
from config.pipeline_config import PipelineConfig
from pipeline.models import PipelineRequest, PipelineResult, WriteResult
from pipeline.unified_pipeline import UnifiedPipeline
from utils.cancellation import CancellationToken
from utils.task_log_buffer import TaskLogBuffer
ColumnTransform = Callable[[Any], Any]
@@ -67,6 +72,15 @@ class OdsTaskSpec:
# WINDOW 模式的时间列名
snapshot_time_column: str | None = None
# ── Detail_Mode 可选配置(二级详情拉取)──
detail_endpoint: str | None = None # 详情接口 endpoint
detail_param_builder: Callable[[dict], dict] | None = None # 详情请求参数构造函数
detail_target_table: str | None = None # 详情数据目标表名
detail_data_path: Tuple[str, ...] | None = None # 详情数据的 data_path
detail_list_key: str | None = None # 详情数据的 list_key
detail_id_column: str | None = None # 从列表数据中提取 ID 的列名
detail_process_fn: Callable[[Any], list[dict]] | None = None # 自定义详情处理函数
def __post_init__(self) -> None:
if self.snapshot_mode == SnapshotMode.WINDOW and not self.snapshot_time_column:
raise ValueError(
@@ -88,7 +102,10 @@ class BaseOdsTask(BaseTask):
def execute(self, cursor_data: dict | None = None) -> dict:
spec = self.SPEC
# 创建任务级日志缓冲区,任务完成后一次性输出,避免多任务日志交叉
self._log_buf = TaskLogBuffer(spec.code, self.logger)
self.logger.info("开始执行%s (ODS)", spec.code)
self._log_buf.info("开始执行%s (ODS)", spec.code)
window_start, window_end, window_minutes = self._resolve_window(cursor_data)
segments = build_window_segments(
@@ -111,6 +128,11 @@ class BaseOdsTask(BaseTask):
total_segments,
format_window_days(total_days),
)
self._log_buf.info(
"窗口拆分为 %s 段(共 %s 天)",
total_segments,
format_window_days(total_days),
)
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
if not store_id:
@@ -141,6 +163,10 @@ class BaseOdsTask(BaseTask):
]
has_is_delete = self._table_has_column(spec.table_name, "is_delete")
# 构建 PipelineConfig支持任务级覆盖
pipeline_config = PipelineConfig.from_app_config(self.config, spec.code)
cancel_token = getattr(self, '_cancel_token', None) or CancellationToken()
try:
for idx, (seg_start, seg_end) in enumerate(segments, start=1):
params = self._build_params(
@@ -158,11 +184,12 @@ class BaseOdsTask(BaseTask):
"errors": 0,
"deleted": 0,
}
# 快照软删除需要的共享状态process_fn 闭包写入)
segment_keys: set[tuple] = set()
# CHANGE 2026-02-18 | 收集 WINDOW 模式下 API 返回数据的实际最早时间戳
segment_earliest_time: datetime | None = None
segment_earliest_time: list[datetime | None] = [None]
# CHANGE [2026-02-24] 收集 API 返回数据的实际最晚时间戳,用于 late-cutoff 保护
segment_latest_time: datetime | None = None
segment_latest_time: list[datetime | None] = [None]
self.logger.info(
"%s: 开始执行(%s/%s),窗口[%s ~ %s]",
@@ -172,52 +199,51 @@ class BaseOdsTask(BaseTask):
seg_start,
seg_end,
)
self._log_buf.info(
"开始执行(%s/%s),窗口[%s ~ %s]",
idx, total_segments, seg_start, seg_end,
)
for _, page_records, _, response_payload in self.api.iter_paginated(
endpoint=spec.endpoint,
params=params,
page_size=page_size,
data_path=spec.data_path,
list_key=spec.list_key,
):
if (
snapshot_missing_delete
and has_is_delete
and business_pk_cols
and snapshot_mode != SnapshotMode.NONE
):
segment_keys.update(self._collect_business_keys(page_records, business_pk_cols))
# CHANGE 2026-02-18 | 收集实际最早时间戳,用于 early-cutoff 保护
if (
snapshot_protect_early_cutoff
and snapshot_mode == SnapshotMode.WINDOW
and snapshot_time_column
):
page_earliest = self._collect_earliest_time(
page_records, snapshot_time_column
)
if page_earliest is not None:
if segment_earliest_time is None or page_earliest < segment_earliest_time:
segment_earliest_time = page_earliest
# CHANGE [2026-02-24] 收集实际最晚时间戳,用于 late-cutoff 保护
page_latest = self._collect_latest_time(
page_records, snapshot_time_column
)
if page_latest is not None:
if segment_latest_time is None or page_latest > segment_latest_time:
segment_latest_time = page_latest
inserted, updated, skipped = self._insert_records_schema_aware(
table=spec.table_name,
records=page_records,
response_payload=response_payload,
source_file=source_file,
source_endpoint=spec.endpoint if spec.include_source_endpoint else None,
)
segment_counts["fetched"] += len(page_records)
segment_counts["inserted"] += inserted
segment_counts["updated"] += updated
segment_counts["skipped"] += skipped
# 构建 UnifiedPipeline 并执行当前 segment
pipeline = UnifiedPipeline(
api_client=self.api,
db_connection=self.db,
logger=self.logger,
config=pipeline_config,
cancel_token=cancel_token,
)
requests = self._build_requests(
spec, params, page_size, idx - 1,
)
process_fn = self._build_process_fn(
spec,
snapshot_missing_delete=snapshot_missing_delete,
has_is_delete=has_is_delete,
business_pk_cols=business_pk_cols,
snapshot_mode=snapshot_mode,
snapshot_protect_early_cutoff=snapshot_protect_early_cutoff,
snapshot_time_column=snapshot_time_column,
segment_keys=segment_keys,
segment_earliest_time=segment_earliest_time,
segment_latest_time=segment_latest_time,
)
write_fn = self._build_write_fn(spec, source_file)
pipe_result = pipeline.run(requests, process_fn, write_fn)
# 将 PipelineResult 映射到 segment_counts
segment_counts["fetched"] = pipe_result.total_fetched
segment_counts["inserted"] = pipe_result.total_inserted
segment_counts["updated"] = pipe_result.total_updated
segment_counts["skipped"] = pipe_result.total_skipped
segment_counts["errors"] = (
pipe_result.request_failures
+ pipe_result.processing_failures
+ pipe_result.write_failures
)
# 快照软删除pipeline 完成后执行,保留原有逻辑)
if (
snapshot_missing_delete
and has_is_delete
@@ -230,28 +256,36 @@ class BaseOdsTask(BaseTask):
if (
snapshot_protect_early_cutoff
and snapshot_mode == SnapshotMode.WINDOW
and segment_earliest_time is not None
and segment_earliest_time > seg_start
and segment_earliest_time[0] is not None
and segment_earliest_time[0] > seg_start
):
self.logger.info(
"%s: early-cutoff 保护生效,软删除窗口起点从 %s 收窄至 %s",
spec.code, seg_start, segment_earliest_time,
spec.code, seg_start, segment_earliest_time[0],
)
effective_window_start = segment_earliest_time
self._log_buf.info(
"early-cutoff 保护生效,软删除窗口起点从 %s 收窄至 %s",
seg_start, segment_earliest_time[0],
)
effective_window_start = segment_earliest_time[0]
# CHANGE [2026-02-24] late-cutoff 保护:用 API 实际最晚时间戳收窄软删除范围
# 防止 recent endpoint 数据保留期滚动导致窗口尾部数据消失时误标删除
effective_window_end = seg_end
if (
snapshot_protect_early_cutoff
and snapshot_mode == SnapshotMode.WINDOW
and segment_latest_time is not None
and segment_latest_time < seg_end
and segment_latest_time[0] is not None
and segment_latest_time[0] < seg_end
):
self.logger.info(
"%s: late-cutoff 保护生效,软删除窗口终点从 %s 收窄至 %s",
spec.code, seg_end, segment_latest_time,
spec.code, seg_end, segment_latest_time[0],
)
effective_window_end = segment_latest_time
self._log_buf.info(
"late-cutoff 保护生效,软删除窗口终点从 %s 收窄至 %s",
seg_end, segment_latest_time[0],
)
effective_window_end = segment_latest_time[0]
deleted = self._mark_missing_as_deleted(
table=spec.table_name,
business_pk_cols=business_pk_cols,
@@ -279,6 +313,12 @@ class BaseOdsTask(BaseTask):
format_window_days(processed_days),
format_window_days(total_days),
)
self._log_buf.info(
"完成(%s/%s),已处理 %s/%s",
idx, total_segments,
format_window_days(processed_days),
format_window_days(total_days),
)
if total_segments > 1:
segment_results.append(
{
@@ -291,13 +331,76 @@ class BaseOdsTask(BaseTask):
}
)
# ── Detail_Mode列表拉取全部完成后执行二级详情拉取 ──
detail_counts = {
"detail_success": 0,
"detail_failure": 0,
"detail_skipped": 0,
}
if spec.detail_endpoint:
self.logger.info("%s: 列表阶段完成,进入详情拉取阶段", spec.code)
self._log_buf.info("列表阶段完成,进入详情拉取阶段")
detail_pipeline = UnifiedPipeline(
api_client=self.api,
db_connection=self.db,
logger=self.logger,
config=pipeline_config,
cancel_token=cancel_token,
)
detail_requests = self._build_detail_requests(spec)
detail_process_fn = self._build_detail_process_fn(spec)
detail_write_fn = self._build_detail_write_fn(spec, source_file)
detail_result = detail_pipeline.run(
detail_requests, detail_process_fn, detail_write_fn,
)
self.db.commit()
# 填充详情统计:成功 = 完成的请求数,失败 = 请求失败数,跳过 = 0无跳过逻辑
detail_counts["detail_success"] = detail_result.completed_requests
detail_counts["detail_failure"] = (
detail_result.request_failures
+ detail_result.processing_failures
+ detail_result.write_failures
)
# 记录详情阶段每个失败项的错误日志
for err in detail_result.errors:
self.logger.error(
"%s: 详情请求失败, detail_id=%s, error=%s",
spec.code,
err.get("detail_id", err.get("endpoint", "unknown")),
err.get("error", "unknown"),
)
self._log_buf.error(
"详情请求失败, detail_id=%s, error=%s",
err.get("detail_id", err.get("endpoint", "unknown")),
err.get("error", "unknown"),
)
self.logger.info(
"%s: 详情拉取完成, success=%d, failure=%d, skipped=%d",
spec.code,
detail_counts["detail_success"],
detail_counts["detail_failure"],
detail_counts["detail_skipped"],
)
self._log_buf.info(
"详情拉取完成, success=%d, failure=%d, skipped=%d",
detail_counts["detail_success"],
detail_counts["detail_failure"],
detail_counts["detail_skipped"],
)
self.logger.info("%s ODS 任务完成: %s", spec.code, total_counts)
self._log_buf.info("ODS 任务完成: %s", total_counts)
allow_empty_advance = bool(self.config.get("run.allow_empty_result_advance", False))
status = "SUCCESS"
if total_counts["fetched"] == 0 and not allow_empty_advance:
status = "PARTIAL"
result = self._build_result(status, total_counts)
# 附加详情统计到结果
result["detail"] = detail_counts
overall_start = segments[0][0]
overall_end = segments[-1][1]
result["window"] = {
@@ -311,14 +414,223 @@ class BaseOdsTask(BaseTask):
result["request_params"] = params_list[0]
else:
result["request_params"] = params_list
# 任务完成,将缓冲日志一次性输出到父 logger
self._log_buf.flush()
return result
except Exception:
self.db.rollback()
total_counts["errors"] += 1
self.logger.error("%s ODS 任务失败", spec.code, exc_info=True)
self._log_buf.error("ODS 任务失败")
# 异常时也 flush确保已收集的日志不丢失
self._log_buf.flush()
raise
# ── Pipeline 集成方法 ──
def _build_requests(
self,
spec: OdsTaskSpec,
params: dict,
page_size: int,
segment_index: int,
) -> Iterable[PipelineRequest]:
"""生成 PipelineRequest 序列,内部使用 iter_paginated 处理分页。
每一页的数据通过 _prefetched_response 预取UnifiedPipeline 的
_request_loop 跳过 api.post() 直接使用预取数据。
"""
for page_num, page_records, total, response_payload in self.api.iter_paginated(
endpoint=spec.endpoint,
params=params,
page_size=page_size,
data_path=spec.data_path,
list_key=spec.list_key,
):
yield PipelineRequest(
endpoint=spec.endpoint,
params=params,
page_size=page_size,
data_path=spec.data_path,
list_key=spec.list_key,
segment_index=segment_index,
_prefetched_response={
"records": page_records,
"response_payload": response_payload,
},
)
def _build_process_fn(
self,
spec: OdsTaskSpec,
*,
snapshot_missing_delete: bool,
has_is_delete: bool,
business_pk_cols: list[str],
snapshot_mode: SnapshotMode,
snapshot_protect_early_cutoff: bool,
snapshot_time_column: str | None,
segment_keys: set[tuple],
segment_earliest_time: list[datetime | None],
segment_latest_time: list[datetime | None],
) -> Callable[[Any], list[dict]]:
"""构建处理函数:从预取响应中提取记录,收集快照软删除所需的共享状态。"""
def process_fn(response: Any) -> list[dict]:
# response 是 _prefetched_response 字典
records = response.get("records", [])
if not records:
return []
# 收集业务主键(快照软删除用)
if (
snapshot_missing_delete
and has_is_delete
and business_pk_cols
and snapshot_mode != SnapshotMode.NONE
):
segment_keys.update(
self._collect_business_keys(records, business_pk_cols)
)
# CHANGE 2026-02-18 | 收集实际最早时间戳,用于 early-cutoff 保护
if (
snapshot_protect_early_cutoff
and snapshot_mode == SnapshotMode.WINDOW
and snapshot_time_column
):
page_earliest = self._collect_earliest_time(
records, snapshot_time_column
)
if page_earliest is not None:
if segment_earliest_time[0] is None or page_earliest < segment_earliest_time[0]:
segment_earliest_time[0] = page_earliest
# CHANGE [2026-02-24] 收集实际最晚时间戳,用于 late-cutoff 保护
page_latest = self._collect_latest_time(
records, snapshot_time_column
)
if page_latest is not None:
if segment_latest_time[0] is None or page_latest > segment_latest_time[0]:
segment_latest_time[0] = page_latest
return records
return process_fn
def _build_write_fn(
self,
spec: OdsTaskSpec,
source_file: str | None,
) -> Callable[[list[dict]], WriteResult]:
"""构建写入函数:调用 _insert_records_schema_aware返回 WriteResult。"""
def write_fn(records: list[dict]) -> WriteResult:
inserted, updated, skipped = self._insert_records_schema_aware(
table=spec.table_name,
records=records,
response_payload=None,
source_file=source_file,
source_endpoint=spec.endpoint if spec.include_source_endpoint else None,
)
return WriteResult(inserted=inserted, updated=updated, skipped=skipped)
return write_fn
# ── Detail_Mode 方法 ──
def _build_detail_requests(
self,
spec: OdsTaskSpec,
) -> Iterable[PipelineRequest]:
"""从已写入 ODS 的记录中提取 ID 列表,生成详情请求序列。
仅在 spec.detail_endpoint 已配置时调用。查询 ODS 目标表获取
detail_id_column 列的值,为每个 ID 生成一个 is_detail=True 的
PipelineRequest。
"""
if not spec.detail_endpoint or not spec.detail_id_column:
return
# 从 ODS 目标表查询刚写入的 ID 列表
id_col = spec.detail_id_column
table = spec.table_name
query = f"SELECT DISTINCT {id_col} FROM {table} WHERE {id_col} IS NOT NULL"
try:
cursor = self.db.cursor()
cursor.execute(query)
rows = cursor.fetchall()
cursor.close()
except Exception:
self.logger.error(
"%s: 查询详情 ID 列表失败, table=%s, column=%s",
spec.code, table, id_col, exc_info=True,
)
return
if not rows:
self.logger.info("%s: 无需拉取详情ID 列表为空", spec.code)
return
self.logger.info(
"%s: 开始详情拉取,共 %d 个 ID", spec.code, len(rows),
)
for (record_id,) in rows:
# 使用 detail_param_builder 构造请求参数,或默认 {"id": record_id}
if spec.detail_param_builder:
params = spec.detail_param_builder({"id": record_id})
else:
params = {"id": record_id}
yield PipelineRequest(
endpoint=spec.detail_endpoint,
params=params,
data_path=spec.detail_data_path or ("data",),
list_key=spec.detail_list_key,
is_detail=True,
detail_id=record_id,
)
def _build_detail_process_fn(
self,
spec: OdsTaskSpec,
) -> Callable[[Any], list[dict]]:
"""构建详情阶段的处理函数:从预取响应中提取记录。
优先使用 spec.detail_process_fn自定义处理函数
否则回退到默认的 response.get("records") 提取。
"""
if spec.detail_process_fn is not None:
return spec.detail_process_fn
def detail_process_fn(response: Any) -> list[dict]:
records = response.get("records", [])
return records
return detail_process_fn
def _build_detail_write_fn(
self,
spec: OdsTaskSpec,
source_file: str | None,
) -> Callable[[list[dict]], WriteResult]:
"""构建详情阶段的写入函数:写入 detail_target_table。"""
target_table = spec.detail_target_table or spec.table_name
def detail_write_fn(records: list[dict]) -> WriteResult:
inserted, updated, skipped = self._insert_records_schema_aware(
table=target_table,
records=records,
response_payload=None,
source_file=source_file,
source_endpoint=spec.detail_endpoint if spec.include_source_endpoint else None,
)
return WriteResult(inserted=inserted, updated=updated, skipped=skipped)
return detail_write_fn
def _resolve_window(self, cursor_data: dict | None) -> tuple[datetime, datetime, int]:
base_start, base_end, base_minutes = self._get_time_window(cursor_data)
@@ -909,6 +1221,18 @@ class BaseOdsTask(BaseTask):
_fill_missing("siteid", [site_profile.get("siteId"), site_profile.get("id")])
_fill_missing("sitename", [site_profile.get("shop_name"), site_profile.get("siteName")])
# 通用 siteid 注入ODS 表有 siteid 列但 API 记录不含时,从 app.store_id 填充
# 场景goods_stock_summary 等按门店请求但返回记录不含 siteId 的接口
ods_has_siteid = any(c[0].lower() == "siteid" for c in cols_info)
if ods_has_siteid:
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
if store_id:
for item in merged_records:
merged = item["merged"]
existing = self._get_value_case_insensitive(merged, "siteid")
if existing in (None, "", 0):
merged["siteid"] = store_id
business_keys = [c for c in pk_cols if str(c).lower() != "content_hash"]
# P2(A): 使用 spec 上的显式开关控制去重,不再隐式依赖 has_fetched_at
# CHANGE 2026-02-19 | force_full_update 时仍查最新 hash用于判断是否回退到历史版本
@@ -1240,6 +1564,56 @@ def _bool_col(name: str, *sources: str) -> ColumnSpec:
return ColumnSpec(column=name, sources=sources, transform=_to_bool)
# ── 团购详情接口自定义 process_fn ──
# API 原始响应结构:{"data": {"groupPurchasePackage": {...}, "packageCouponAssistants": [...], ...}, "code": 0}
# detail_mode 下 process_fn 收到的是 api.post() 的原始 JSON 响应
def _group_package_detail_process_fn(response: Any) -> list[dict]:
"""从 QueryPackageCouponInfo 响应中提取字段,组装为一条扁平记录。
匹配 ods.group_buy_package_details 表结构。
"""
data = response.get("data")
if not data:
return []
pkg = data.get("groupPurchasePackage")
if not pkg:
return []
# 结构化字段(来自 data.groupPurchasePackage
record: dict[str, Any] = {
"coupon_id": pkg.get("id"),
"package_name": pkg.get("package_name"),
"duration": pkg.get("duration"),
"start_time": pkg.get("start_time"),
"end_time": pkg.get("end_time"),
"add_start_clock": pkg.get("add_start_clock"),
"add_end_clock": pkg.get("add_end_clock"),
"is_enabled": pkg.get("is_enabled"),
"is_delete": pkg.get("is_delete"),
"site_id": pkg.get("site_id"),
"tenant_id": pkg.get("tenant_id"),
"create_time": pkg.get("create_time"),
"creator_name": pkg.get("creator_name"),
}
# JSONB 数组字段
record["table_area_ids"] = pkg.get("tableAreaId")
record["table_area_names"] = pkg.get("tableAreaNameList")
record["assistant_services"] = data.get("packageCouponAssistants")
record["groupon_site_infos"] = data.get("grouponSiteInfos")
record["package_services"] = data.get("packagePackageService")
record["coupon_details_list"] = data.get("packageCouponDetailsList")
# content_hash对业务字段不含 content_hash、payload、fetched_at计算 SHA256
hash_input = json.dumps(record, sort_keys=True, ensure_ascii=False, default=str)
record["content_hash"] = hashlib.sha256(hash_input.encode("utf-8")).hexdigest()
# payload完整的 data 对象
record["payload"] = data
return [record]
ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
@@ -1251,9 +1625,18 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
data_path=("data",),
list_key="assistantInfos",
pk_columns=(_int_col("id", "id", required=True),),
extra_params={
"workStatusEnum": 0,
"dingTalkSynced": 0,
"leaveId": 0,
"criticismStatus": 0,
"signStatus": -1,
},
include_source_endpoint=False,
include_fetched_at=False,
include_record_index=True,
requires_window=False,
time_fields=None,
snapshot_mode=SnapshotMode.FULL_TABLE,
description="助教账号档案 ODSSearchAssistantInfo -> assistantInfos 原始 JSON",
),
@@ -1314,7 +1697,8 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
include_source_endpoint=False,
include_fetched_at=False,
include_record_index=True,
requires_window=False,
requires_window=True,
time_fields=("startTime", "endTime"),
snapshot_mode=SnapshotMode.WINDOW,
snapshot_time_column="create_time",
description="门店商品销售流水 ODSGetGoodsSalesList -> orderGoodsLedgers 原始 JSON",
@@ -1499,6 +1883,13 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
include_record_index=True,
requires_window=False,
snapshot_mode=SnapshotMode.FULL_TABLE,
# ── Detail_Mode 配置:团购详情接口 ──
detail_endpoint="/PackageCoupon/QueryPackageCouponInfo",
detail_param_builder=lambda rec: {"couponId": rec["id"]},
detail_target_table="ods.group_buy_package_details",
detail_data_path=("data",),
detail_id_column="id",
detail_process_fn=_group_package_detail_process_fn,
description="团购套餐定义 ODSQueryPackageCouponList -> packageCouponList 原始 JSON",
),
OdsTaskSpec(

View File

@@ -24,7 +24,8 @@ WITH base AS (
COALESCE(sh.member_discount_amount, 0) AS member_discount_amount,
COALESCE(sh.adjust_amount, 0) AS manual_discount_amount,
COALESCE(sh.pay_amount, 0) AS total_paid_amount,
COALESCE(sh.balance_amount, 0) + COALESCE(sh.recharge_card_amount, 0) + COALESCE(sh.gift_card_amount, 0) AS stored_card_deduct,
-- balance_amount = recharge_card_amount + gift_card_amount恒等式不可三者相加
COALESCE(sh.balance_amount, 0) AS stored_card_deduct,
COALESCE(sh.coupon_amount, 0) AS total_coupon_deduction,
COALESCE(sh.table_charge_money, 0) AS settle_table_fee_amount,
COALESCE(sh.assistant_pd_money, 0) + COALESCE(sh.assistant_cx_money, 0) AS settle_assistant_service_amount,

View File

@@ -22,7 +22,6 @@ class ManualIngestTask(BaseTask):
(("member_stored_value_cards",), "ods.member_stored_value_cards"),
(("recharge_settlements",), "ods.recharge_settlements"),
(("settlement_records",), "ods.settlement_records"),
(("assistant_cancellation_records",), "ods.assistant_cancellation_records"),
(("assistant_accounts_master",), "ods.assistant_accounts_master"),
(("assistant_service_records",), "ods.assistant_service_records"),
(("site_tables_master",), "ods.site_tables_master"),
@@ -47,7 +46,6 @@ class ManualIngestTask(BaseTask):
"ods.member_stored_value_cards": {"pk": "id"},
"ods.recharge_settlements": {"pk": "id"},
"ods.settlement_records": {"pk": "id"},
"ods.assistant_cancellation_records": {"pk": "id", "json_cols": ["siteProfile"]},
"ods.assistant_accounts_master": {"pk": "id"},
"ods.assistant_service_records": {"pk": "id", "json_cols": ["siteProfile"]},
"ods.site_tables_master": {"pk": "id"},

View File

@@ -51,6 +51,7 @@ class DwsVerifier(BaseVerifier):
"time_column": "stat_date",
"source_table": "dwd.dwd_settlement_head",
"source_time_column": "pay_time",
# CHANGE 2026-03-07 | 补齐 settle_type 过滤,与 finance_base_task 对齐
"agg_sql": """
SELECT
site_id,
@@ -59,9 +60,10 @@ class DwsVerifier(BaseVerifier):
COALESCE(SUM(pay_amount), 0) as cash_pay_amount,
COALESCE(SUM(table_charge_money), 0) as table_fee_amount,
COALESCE(SUM(goods_money), 0) as goods_amount,
COALESCE(SUM(table_charge_money) + SUM(goods_money) + COALESCE(SUM(assistant_pd_money), 0) + COALESCE(SUM(assistant_cx_money), 0), 0) as gross_amount
COALESCE(SUM(table_charge_money + goods_money + assistant_pd_money + assistant_cx_money), 0) as gross_amount
FROM dwd.dwd_settlement_head
WHERE pay_time >= %s AND pay_time < %s
AND settle_type IN (1, 3)
GROUP BY site_id, tenant_id, DATE(pay_time)
""",
"compare_columns": ["cash_pay_amount", "table_fee_amount", "goods_amount", "gross_amount"],

View File

@@ -97,7 +97,7 @@ class IndexVerifier(BaseVerifier):
JOIN dwd.dim_assistant d
ON s.user_id = d.user_id
AND d.scd2_is_current = 1
AND COALESCE(d.is_delete, 0) = 0
AND COALESCE(d.leave_status, 0) = 0
CROSS JOIN params p
WHERE s.last_use_time >= p.start_time
AND s.last_use_time < p.end_time

View File

@@ -22,3 +22,32 @@ def test_config_get_nested():
config = AppConfig.load({"app": {"store_id": 1}})
assert config.get("db.batch_size") == 1000
assert config.get("nonexistent.key", "default") == "default"
def test_business_day_start_hour_default():
"""默认值 8 应正常加载"""
config = AppConfig.load({"app": {"store_id": 1}})
assert config.get("app.business_day_start_hour") == 8
def test_business_day_start_hour_valid_range():
"""023 范围内的整数应正常加载"""
for h in (0, 12, 23):
config = AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": h}})
assert config.get("app.business_day_start_hour") == h
def test_business_day_start_hour_out_of_range():
"""超出 023 范围应抛出 SystemExit"""
with pytest.raises(SystemExit, match="business_day_start_hour"):
AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": 24}})
with pytest.raises(SystemExit, match="business_day_start_hour"):
AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": -1}})
def test_business_day_start_hour_non_int():
"""非整数类型应抛出 SystemExit"""
with pytest.raises(SystemExit, match="business_day_start_hour"):
AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": "8"}})
with pytest.raises(SystemExit, match="business_day_start_hour"):
AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": 8.0}})

View File

@@ -0,0 +1,50 @@
"""线程安全的取消令牌,用于 ETL 管道的优雅中断。"""
from __future__ import annotations
import threading
class CancellationToken:
"""线程安全的取消令牌,封装 threading.Event。
支持手动取消和超时自动取消两种模式。
取消操作不可逆——一旦 cancel() 被调用is_cancelled 永远为 True。
"""
def __init__(self, timeout: float | None = None):
"""初始化取消令牌。
Args:
timeout: 超时秒数。传入正数时启动守护定时器,
到期后自动调用 cancel()。None 或 <=0 不启动定时器。
"""
self._event = threading.Event()
self._timer: threading.Timer | None = None
if timeout is not None and timeout > 0:
self._timer = threading.Timer(timeout, self.cancel)
self._timer.daemon = True
self._timer.start()
def cancel(self) -> None:
"""发出取消信号(幂等,可多次调用)。"""
self._event.set()
@property
def is_cancelled(self) -> bool:
"""当前是否已取消。"""
return self._event.is_set()
@property
def event(self) -> threading.Event:
"""底层 Event 对象,供 RateLimiter 等组件轮询使用。"""
return self._event
def dispose(self) -> None:
"""清理超时定时器,防止资源泄漏。
管道结束后应主动调用;即使不调用,守护线程也会随主进程退出。
"""
if self._timer is not None:
self._timer.cancel()
self._timer = None

View File

@@ -12,7 +12,6 @@ ENDPOINT_FILENAME_MAP: dict[str, str] = {
"/memberprofile/getmembercardbalancechange": "member_balance_changes.json",
"/memberprofile/gettenantmembercardlist": "member_stored_value_cards.json",
"/site/getrechargesettlelist": "recharge_settlements.json",
"/assistantperformance/getabolitionassistant": "assistant_cancellation_records.json",
"/assistantperformance/getorderassistantdetails": "assistant_service_records.json",
"/personnelmanagement/searchassistantinfo": "assistant_accounts_master.json",
"/table/getsitetables": "site_tables_master.json",

View File

@@ -0,0 +1,101 @@
"""任务级日志缓冲区,收集单个任务的所有日志,任务完成后一次性输出。
解决多任务并行执行时日志行交叉混乱的问题:每个任务维护独立的缓冲区,
任务完成后将完整日志按时间顺序一次性输出到父 logger添加 [task_code] 前缀。
"""
from __future__ import annotations
import logging
import threading
from dataclasses import dataclass, field
from datetime import datetime
@dataclass
class LogEntry:
"""日志条目。"""
timestamp: datetime
level: int
task_code: str
message: str
class TaskLogBuffer:
"""任务级日志缓冲区,收集单个任务的所有日志,任务完成后一次性输出。
所有写入操作线程安全(内部使用 threading.Lock
"""
def __init__(self, task_code: str, parent_logger: logging.Logger) -> None:
"""初始化日志缓冲区。
Args:
task_code: 任务代码,用于日志前缀标识。
parent_logger: 父 loggerflush() 时日志输出的目标。
"""
self.task_code = task_code
self._parent = parent_logger
self._buffer: list[LogEntry] = []
self._lock = threading.Lock()
def log(self, level: int, message: str, *args: object) -> None:
"""线程安全地缓冲一条日志。
Args:
level: 日志级别(如 logging.INFO
message: 日志消息,支持 % 格式化。
*args: 格式化参数。
"""
formatted = message % args if args else message
entry = LogEntry(
timestamp=datetime.now(),
level=level,
task_code=self.task_code,
message=formatted,
)
with self._lock:
self._buffer.append(entry)
# ---- 便捷方法 ----
def debug(self, message: str, *args: object) -> None:
self.log(logging.DEBUG, message, *args)
def info(self, message: str, *args: object) -> None:
self.log(logging.INFO, message, *args)
def warning(self, message: str, *args: object) -> None:
self.log(logging.WARNING, message, *args)
def error(self, message: str, *args: object) -> None:
self.log(logging.ERROR, message, *args)
# ---- 输出 ----
def flush(self) -> list[LogEntry]:
"""将缓冲区内容按时间顺序一次性输出到父 logger并清空缓冲区。
输出时每条日志添加 [task_code] 前缀,保证日志归属可识别。
Returns:
按时间戳升序排列的日志条目列表(副本)。
"""
with self._lock:
entries = sorted(self._buffer, key=lambda e: e.timestamp)
for entry in entries:
self._parent.log(
entry.level,
"[%s] %s",
entry.task_code,
entry.message,
)
self._buffer.clear()
return list(entries)
@property
def entries(self) -> list[LogEntry]:
"""返回当前缓冲区条目的副本(用于测试/检查)。"""
with self._lock:
return list(self._buffer)