微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
@@ -26,7 +26,7 @@ SCHEMA_ETL=meta
|
||||
# API 配置(上游 SaaS API)
|
||||
# ------------------------------------------------------------------------------
|
||||
API_BASE=https://pc.ficoo.vip/apiprod/admin/v1/
|
||||
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6IjN4d3IwYjNWN01jemlvcFYyZnZibmtpMVg4MEhxNVFvOFRMcHh3RkNkQUk9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzMvMSDkuIvljYgxMDo1MDozOCIsIm5lZWRDaGVja1Rva2VuIjoiZmFsc2UiLCJleHAiOjE3NzIzNzY2MzgsImlzcyI6InRlc3QiLCJhdWQiOiJVc2VyIn0.k_f4jnSGKOKPoZC22bVSrAo9A1FfRqvsNiGw-Vmc0qQ
|
||||
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6IlI5THQvRkVjSGZubkdiOTZJZ3lmdWhjaXU5WnIwREQrZFh1amhVY1RCSDQ9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzMvMTEg5LiL5Y2INjo0MjozMSIsIm5lZWRDaGVja1Rva2VuIjoiZmFsc2UiLCJleHAiOjE3NzMyMjU3NTEsImlzcyI6InRlc3QiLCJhdWQiOiJVc2VyIn0.8H5V3W0NfGJrcYo9Ex-35D-SzxhC2tRaZGrgo2reYr4
|
||||
API_TIMEOUT=20
|
||||
API_PAGE_SIZE=200
|
||||
API_RETRY_MAX=3
|
||||
@@ -45,6 +45,13 @@ WRITE_PRETTY_JSON=true
|
||||
# ------------------------------------------------------------------------------
|
||||
PIPELINE_FLOW=FULL
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# 管道限流配置(RateLimiter 请求间隔,秒)
|
||||
# CHANGE 2026-03-06 | 从默认 5-20s 降至 0.1-2s,大幅缩短 ODS 请求耗时
|
||||
# ------------------------------------------------------------------------------
|
||||
PIPELINE_RATE_MIN=0.1
|
||||
PIPELINE_RATE_MAX=2.0
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# 时间窗口配置
|
||||
# ------------------------------------------------------------------------------
|
||||
@@ -166,7 +173,7 @@ DWD_FACT_UPSERT=true
|
||||
# ------------------------------------------------------------------------------
|
||||
# 任务列表配置
|
||||
# ------------------------------------------------------------------------------
|
||||
RUN_TASKS=PRODUCTS,TABLES,MEMBERS,ASSISTANTS,PACKAGES_DEF,ORDERS,PAYMENTS,REFUNDS,COUPON_USAGE,INVENTORY_CHANGE,TOPUPS,TABLE_DISCOUNT,ASSISTANT_ABOLISH,LEDGER
|
||||
RUN_TASKS=PRODUCTS,TABLES,MEMBERS,ASSISTANTS,PACKAGES_DEF,ORDERS,PAYMENTS,REFUNDS,COUPON_USAGE,INVENTORY_CHANGE,TOPUPS,TABLE_DISCOUNT,LEDGER
|
||||
INDEX_LOOKBACK_DAYS=60
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
@@ -107,6 +107,11 @@ class APIClient:
|
||||
"""
|
||||
return self._post_json(endpoint, params)
|
||||
|
||||
# CHANGE [2026-03-06] intent: 补齐公共 post() 方法,UnifiedPipeline 详情拉取模式需要调用 self.api.post()
|
||||
def post(self, endpoint: str, params: dict | None = None) -> dict:
|
||||
"""发送 POST JSON 请求(与 get 相同,语义更明确的别名)。"""
|
||||
return self._post_json(endpoint, params)
|
||||
|
||||
def _post_json(self, endpoint: str, payload: dict | None = None) -> dict:
|
||||
if not self.base_url:
|
||||
raise ValueError("API base_url 未配置")
|
||||
@@ -292,3 +297,10 @@ class APIClient:
|
||||
return v
|
||||
|
||||
return []
|
||||
|
||||
# AI_CHANGELOG:
|
||||
# - 日期: 2026-03-06 08:37:26
|
||||
# - Prompt: P20260306-083206
|
||||
# - 直接原因: APIClient 缺少公共 post() 方法,UnifiedPipeline 详情拉取模式调用 self.api.post() 失败
|
||||
# - 变更摘要: 新增 post() 作为 _post_json() 的公共别名,与已有 get() 对齐
|
||||
# - 风险与验证: 极低风险,纯别名转发;166 个单元测试通过
|
||||
|
||||
43
apps/etl/connectors/feiqiu/api/rate_limiter.py
Normal file
43
apps/etl/connectors/feiqiu/api/rate_limiter.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""请求间隔控制器,支持取消信号中断等待。"""
|
||||
|
||||
import random
|
||||
import time
|
||||
import threading
|
||||
|
||||
|
||||
class RateLimiter:
|
||||
"""请求间隔控制器,在相邻 API 请求之间插入随机等待时间,防止触发上游风控。
|
||||
|
||||
等待期间以 0.5s 为单位轮询 cancel_event,支持快速响应取消信号。
|
||||
"""
|
||||
|
||||
def __init__(self, min_interval: float = 5.0, max_interval: float = 20.0):
|
||||
if min_interval > max_interval:
|
||||
raise ValueError(
|
||||
f"min_interval({min_interval}) 不能大于 max_interval({max_interval})"
|
||||
)
|
||||
self._min = min_interval
|
||||
self._max = max_interval
|
||||
self._last_interval: float = 0.0
|
||||
|
||||
def wait(self, cancel_event: threading.Event | None = None) -> bool:
|
||||
"""等待随机间隔。返回 False 表示被取消信号中断。
|
||||
|
||||
将等待时间拆分为 0.5s 小段,每段检查 cancel_event,
|
||||
以便在取消信号到达时快速退出(最多延迟 0.5s)。
|
||||
"""
|
||||
interval = random.uniform(self._min, self._max)
|
||||
self._last_interval = interval
|
||||
remaining = interval
|
||||
while remaining > 0:
|
||||
if cancel_event and cancel_event.is_set():
|
||||
return False
|
||||
sleep_time = min(0.5, remaining)
|
||||
time.sleep(sleep_time)
|
||||
remaining -= sleep_time
|
||||
return True
|
||||
|
||||
@property
|
||||
def last_interval(self) -> float:
|
||||
"""最近一次 wait() 生成的随机间隔值。"""
|
||||
return self._last_interval
|
||||
@@ -36,6 +36,11 @@ class RecordingAPIClient:
|
||||
self.last_dump: dict[str, Any] | None = None
|
||||
|
||||
# ------------------------------------------------------------------ 公共 API
|
||||
# CHANGE [2026-03-06] intent: 补齐 post() 代理,使 RecordingAPIClient 完整覆盖 APIClient 公共接口
|
||||
def post(self, endpoint: str, params: dict | None = None) -> dict:
|
||||
"""委托给底层 APIClient 的 post 方法(详情拉取等非分页请求使用)。"""
|
||||
return self.base.post(endpoint, params)
|
||||
|
||||
def get_source_hint(self, endpoint: str) -> str:
|
||||
"""Return the JSON dump path for this endpoint (for source_file lineage)."""
|
||||
return str(self.output_dir / endpoint_to_filename(endpoint))
|
||||
@@ -193,6 +198,12 @@ def build_recording_client(
|
||||
|
||||
|
||||
# AI_CHANGELOG:
|
||||
# - 日期: 2026-03-06 08:37:26
|
||||
# - Prompt: P20260306-083206
|
||||
# - 直接原因: RecordingAPIClient 缺少 post() 方法,UnifiedPipeline 详情拉取模式调用失败
|
||||
# - 变更摘要: 新增 post() 方法委托给 self.base.post(),补齐代理接口覆盖
|
||||
# - 风险与验证: 极低风险,纯委托转发;166 个单元测试通过
|
||||
#
|
||||
# - 日期: 2026-02-14
|
||||
# - Prompt: P20260214-040231(审计收口补录)
|
||||
# - 直接原因: 默认时区 Asia/Taipei 与运营地区(中国大陆)不符
|
||||
|
||||
@@ -282,6 +282,32 @@ def parse_args():
|
||||
parser.add_argument("--idle-end", help="闲时窗口结束(HH:MM)")
|
||||
parser.add_argument("--allow-empty-advance", action="store_true", help="允许空结果推进窗口")
|
||||
|
||||
# Pipeline 管道参数(覆盖 PipelineConfig 全局默认值)
|
||||
parser.add_argument(
|
||||
"--pipeline-workers",
|
||||
dest="pipeline_workers",
|
||||
type=int,
|
||||
help="Pipeline 处理线程数(覆盖 pipeline.workers,默认 2)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pipeline-batch-size",
|
||||
dest="pipeline_batch_size",
|
||||
type=int,
|
||||
help="Pipeline 批量写入阈值(覆盖 pipeline.batch_size,默认 100)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pipeline-rate-min",
|
||||
dest="pipeline_rate_min",
|
||||
type=float,
|
||||
help="Pipeline 限流最小间隔秒数(覆盖 pipeline.rate_min,默认 5.0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pipeline-rate-max",
|
||||
dest="pipeline_rate_max",
|
||||
type=float,
|
||||
help="Pipeline 限流最大间隔秒数(覆盖 pipeline.rate_max,默认 20.0)",
|
||||
)
|
||||
|
||||
# 强制全量更新(跳过 ODS hash 去重 + DWD 变更对比,无条件写入)
|
||||
parser.add_argument(
|
||||
"--force-full",
|
||||
@@ -406,6 +432,16 @@ def build_cli_overrides(args) -> dict:
|
||||
# 强制全量更新
|
||||
if args.force_full:
|
||||
overrides.setdefault("run", {})["force_full_update"] = True
|
||||
|
||||
# Pipeline 管道参数 → pipeline.* 命名空间(供 PipelineConfig.from_app_config() 读取)
|
||||
if getattr(args, "pipeline_workers", None) is not None:
|
||||
overrides.setdefault("pipeline", {})["workers"] = args.pipeline_workers
|
||||
if getattr(args, "pipeline_batch_size", None) is not None:
|
||||
overrides.setdefault("pipeline", {})["batch_size"] = args.pipeline_batch_size
|
||||
if getattr(args, "pipeline_rate_min", None) is not None:
|
||||
overrides.setdefault("pipeline", {})["rate_min"] = args.pipeline_rate_min
|
||||
if getattr(args, "pipeline_rate_max", None) is not None:
|
||||
overrides.setdefault("pipeline", {})["rate_max"] = args.pipeline_rate_max
|
||||
|
||||
# 任务
|
||||
if args.tasks:
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
DEFAULTS = {
|
||||
"app": {
|
||||
"timezone": "Asia/Shanghai",
|
||||
"business_day_start_hour": 8,
|
||||
"store_id": "",
|
||||
# CHANGE 2026-02-15 | 对齐新库 etl_feiqiu 六层架构
|
||||
"schema_oltp": "ods",
|
||||
@@ -52,7 +53,6 @@ DEFAULTS = {
|
||||
"INVENTORY_CHANGE",
|
||||
"TOPUPS",
|
||||
"TABLE_DISCOUNT",
|
||||
"ASSISTANT_ABOLISH",
|
||||
"LEDGER",
|
||||
],
|
||||
"dws_tasks": [],
|
||||
@@ -178,5 +178,4 @@ TASK_TABLES = "TABLES"
|
||||
TASK_PACKAGES_DEF = "PACKAGES_DEF"
|
||||
TASK_TOPUPS = "TOPUPS"
|
||||
TASK_TABLE_DISCOUNT = "TABLE_DISCOUNT"
|
||||
TASK_ASSISTANT_ABOLISH = "ASSISTANT_ABOLISH"
|
||||
TASK_LEDGER = "LEDGER"
|
||||
|
||||
@@ -7,6 +7,7 @@ from copy import deepcopy
|
||||
|
||||
ENV_MAP = {
|
||||
"TIMEZONE": ("app.timezone",),
|
||||
"BUSINESS_DAY_START_HOUR": ("app.business_day_start_hour",),
|
||||
"STORE_ID": ("app.store_id",),
|
||||
"SCHEMA_OLTP": ("app.schema_oltp",),
|
||||
"SCHEMA_ETL": ("app.schema_etl",),
|
||||
@@ -114,6 +115,9 @@ ENV_MAP = {
|
||||
"DATA_SOURCE": ("run.data_source",),
|
||||
# API 额外请求头(JSON 对象格式)
|
||||
"API_HEADERS_EXTRA": ("api.headers_extra",),
|
||||
# Pipeline 管道限流参数
|
||||
"PIPELINE_RATE_MIN": ("pipeline.rate_min",),
|
||||
"PIPELINE_RATE_MAX": ("pipeline.rate_max",),
|
||||
}
|
||||
|
||||
|
||||
|
||||
75
apps/etl/connectors/feiqiu/config/pipeline_config.py
Normal file
75
apps/etl/connectors/feiqiu/config/pipeline_config.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""统一管道配置数据类。
|
||||
|
||||
支持全局默认值 + 任务级覆盖的三级回退:
|
||||
pipeline.<task_code>.* → pipeline.* → 硬编码默认值
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .settings import AppConfig
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PipelineConfig:
|
||||
"""统一管道配置,支持全局默认 + 任务级覆盖。"""
|
||||
|
||||
workers: int = 2 # ProcessingPool 工作线程数
|
||||
queue_size: int = 100 # 处理队列容量
|
||||
batch_size: int = 100 # WriteWorker 批量写入阈值
|
||||
batch_timeout: float = 5.0 # WriteWorker 等待超时(秒)
|
||||
rate_min: float = 0.1 # RateLimiter 最小间隔(秒)
|
||||
rate_max: float = 2.0 # RateLimiter 最大间隔(秒)
|
||||
max_consecutive_failures: int = 10 # 连续失败中断阈值
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.workers < 1:
|
||||
raise ValueError(f"workers 必须 >= 1,当前值: {self.workers}")
|
||||
if self.queue_size < 1:
|
||||
raise ValueError(f"queue_size 必须 >= 1,当前值: {self.queue_size}")
|
||||
if self.batch_size < 1:
|
||||
raise ValueError(f"batch_size 必须 >= 1,当前值: {self.batch_size}")
|
||||
if self.rate_min > self.rate_max:
|
||||
raise ValueError(
|
||||
f"rate_min({self.rate_min}) 不能大于 rate_max({self.rate_max})"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_app_config(
|
||||
cls,
|
||||
config: AppConfig,
|
||||
task_code: str | None = None,
|
||||
) -> PipelineConfig:
|
||||
"""从 AppConfig 加载,支持 pipeline.<task_code>.* 任务级覆盖。
|
||||
|
||||
回退优先级:
|
||||
1. pipeline.<task_code_lower>.<key> (任务级,仅 task_code 非空时查找)
|
||||
2. pipeline.<key> (全局级)
|
||||
3. 字段硬编码默认值
|
||||
"""
|
||||
|
||||
def _get(key: str, default): # noqa: ANN001
|
||||
# 任务级覆盖
|
||||
if task_code:
|
||||
val = config.get(f"pipeline.{task_code.lower()}.{key}")
|
||||
if val is not None:
|
||||
return type(default)(val)
|
||||
# 全局级
|
||||
val = config.get(f"pipeline.{key}")
|
||||
if val is not None:
|
||||
return type(default)(val)
|
||||
# 硬编码默认值
|
||||
return default
|
||||
|
||||
return cls(
|
||||
workers=_get("workers", 2),
|
||||
queue_size=_get("queue_size", 100),
|
||||
batch_size=_get("batch_size", 100),
|
||||
batch_timeout=_get("batch_timeout", 5.0),
|
||||
rate_min=_get("rate_min", 5.0),
|
||||
rate_max=_get("rate_max", 20.0),
|
||||
max_consecutive_failures=_get("max_consecutive_failures", 10),
|
||||
)
|
||||
@@ -111,6 +111,12 @@ class AppConfig:
|
||||
missing.append("app.store_id")
|
||||
if missing:
|
||||
raise SystemExit("缺少必需配置: " + ", ".join(missing))
|
||||
|
||||
# business_day_start_hour 范围校验(0–23 整数)
|
||||
hour = cfg["app"].get("business_day_start_hour", 8)
|
||||
if not isinstance(hour, int) or not (0 <= hour <= 23):
|
||||
raise SystemExit("app.business_day_start_hour 必须为 0–23 的整数")
|
||||
|
||||
|
||||
def get(self, key: str, default=None):
|
||||
"""获取配置值(支持点号路径)"""
|
||||
|
||||
@@ -20,7 +20,15 @@ class DatabaseConnection:
|
||||
# 生产环境要求:数据库连接超时不得超过 20 秒。
|
||||
timeout_val = max(1, min(int(timeout_val), 20))
|
||||
|
||||
conn = psycopg2.connect(self._dsn, connect_timeout=timeout_val)
|
||||
# CHANGE 2026-03-06 | intent: 修复 Windows GBK 环境下 psycopg2 连接握手的 UnicodeDecodeError
|
||||
# assumptions: libpq 默认使用系统 locale 的 client_encoding,Windows 中文系统为 GBK/CP936
|
||||
# 边界: 显式指定 client_encoding=utf8 确保连接层始终使用 UTF-8,与数据库 server_encoding 一致
|
||||
# 验证: web-admin 手动触发 ETL 全量 flow,不再出现 0xd6 解码错误
|
||||
conn = psycopg2.connect(
|
||||
self._dsn,
|
||||
connect_timeout=timeout_val,
|
||||
options="-c client_encoding=utf8",
|
||||
)
|
||||
conn.autocommit = False
|
||||
|
||||
# 会话参数(时区、语句超时等)
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""数据库批量操作"""
|
||||
"""数据库批量操作
|
||||
|
||||
AI_CHANGELOG
|
||||
- 2026-03-06 09:17:16 | Prompt: P20260306-084752(摘录:DWD 并行装载全部失败 _dsn 属性缺失)| Direct cause:DatabaseOperations 组合模式未透传 _dsn/_session/_connect_timeout | Summary:新增 3 个 property 透传底层 DatabaseConnection 属性 | Verify:334 单元测试通过 + getDiagnostics 无问题
|
||||
"""
|
||||
import psycopg2.extras
|
||||
import re
|
||||
|
||||
@@ -9,6 +13,23 @@ class DatabaseOperations:
|
||||
def __init__(self, connection):
|
||||
self._connection = connection
|
||||
self.conn = connection.conn
|
||||
|
||||
# [CHANGE P20260306-084752] intent: 透传底层 DatabaseConnection 的连接参数,
|
||||
# DwdLoadTask._process_single_table 需要 _dsn/_session/_connect_timeout
|
||||
# 为每个线程创建独立连接
|
||||
# assumptions: _connection 始终是 DatabaseConnection 实例,具有这三个属性
|
||||
# verify: 334 单元测试通过,DWD 并行装载不再 AttributeError
|
||||
@property
|
||||
def _dsn(self):
|
||||
return self._connection._dsn
|
||||
|
||||
@property
|
||||
def _session(self):
|
||||
return self._connection._session
|
||||
|
||||
@property
|
||||
def _connect_timeout(self):
|
||||
return self._connection._connect_timeout
|
||||
|
||||
def batch_execute(self, sql: str, rows: list, page_size: int = 1000):
|
||||
"""批量执行SQL"""
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
### 1.1 助教日报(dws_assistant_daily_detail)
|
||||
|
||||
- 目标表:`dws.dws_assistant_daily_detail`
|
||||
- 数据来源:`dwd_assistant_service_log`、`dwd_assistant_trash_event`、`dim_assistant`(SCD2)
|
||||
- 数据来源:`dwd_assistant_service_log`、`dwd_assistant_service_log_ex`(提供 `is_trash` 标记)、`dim_assistant`(SCD2)
|
||||
- 粒度:门店 × 助教 × 日期
|
||||
- 核心指标:服务次数(总/基础课/附加课/包厢课)、计费秒数与小时数、台账金额、去重客户数与台桌数、废除统计
|
||||
- 课程类型分类:通过 `cfg_skill_type` 映射 `skill_id` → `BASE`/`BONUS`/`ROOM`
|
||||
|
||||
@@ -42,6 +42,12 @@
|
||||
| 23 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 24 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 25 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
| 26 | table_area_ids | JSONB | YES | | 可用台区 ID 列表(来自详情接口 tableAreaId) |
|
||||
| 27 | table_area_names | JSONB | YES | | 可用台区名称列表(来自详情接口 tableAreaNameList) |
|
||||
| 28 | assistant_services | JSONB | YES | | 助教服务关联数组(来自详情接口 packageCouponAssistants) |
|
||||
| 29 | groupon_site_infos | JSONB | YES | | 关联门店信息数组(来自详情接口 grouponSiteInfos) |
|
||||
|
||||
> 字段 26-29 由迁移脚本 `db/etl_feiqiu/migrations/2026-03-05__add_detail_fields_to_dim_groupbuy_package_ex.sql` 新增,数据来源为 `ods.group_buy_package_details`(通过 LEFT JOIN `coupon_id = groupbuy_package_id` 合并)。
|
||||
|
||||
## 样本数据
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
| 14 | is_confirm | INTEGER | YES | | 是否确认。**枚举值**: 2(5003)=**[待确认]** |
|
||||
| 15 | is_single_order | INTEGER | YES | | 是否独立订单。**枚举值**: 1(5003)=是 |
|
||||
| 16 | is_not_responding | INTEGER | YES | | 无响应。**枚举值**: 0(5003)=正常 |
|
||||
| 17 | is_trash | INTEGER | YES | | 是否废单。**枚举值**: 0(5003)=正常 |
|
||||
| 17 | is_trash | INTEGER | YES | | 是否废单。**枚举值**: 0=正常, 1=已作废。⚠️ 此字段是判断助教服务是否作废的唯一依据,替代已废弃的 `dwd_assistant_trash_event` 表(2026-02-22 DROP)。DWS 层助教日报等任务通过此字段过滤废单统计。 |
|
||||
| 18 | trash_applicant_id | BIGINT | YES | | 废单申请人 ID(当前数据全为 0) |
|
||||
| 19 | trash_applicant_name | VARCHAR(64) | YES | | 废单申请人姓名(当前数据全为空) |
|
||||
| 20 | trash_reason | VARCHAR(255) | YES | | 废单原因(当前数据全为空) |
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
| 序号 | 表名 | 说明 | 主键 | 扩展表 | 文档链接 |
|
||||
|------|------|------|------|--------|----------|
|
||||
| 1 | dwd_assistant_service_log | 助教服务流水 | assistant_service_id | dwd_assistant_service_log_ex | [主表](BD_manual_dwd_assistant_service_log.md) / [扩展表](BD_manual_dwd_assistant_service_log_ex.md) |
|
||||
| 2 | dwd_assistant_trash_event | 助教服务作废 | assistant_trash_event_id | dwd_assistant_trash_event_ex | [主表](BD_manual_dwd_assistant_trash_event.md) / [扩展表](BD_manual_dwd_assistant_trash_event_ex.md) |
|
||||
| 2 | ~~dwd_assistant_trash_event~~ | ~~助教服务作废~~ | — | — | ⚠️ 已于 2026-02-22 废弃,作废判断改用 `dwd_assistant_service_log_ex.is_trash` |
|
||||
| 3 | dwd_groupbuy_redemption | 团购券核销 | redemption_id | dwd_groupbuy_redemption_ex | [主表](BD_manual_dwd_groupbuy_redemption.md) / [扩展表](BD_manual_dwd_groupbuy_redemption_ex.md) |
|
||||
| 4 | dwd_member_balance_change | 会员余额变动 | balance_change_id | dwd_member_balance_change_ex | [主表](BD_manual_dwd_member_balance_change.md) / [扩展表](BD_manual_dwd_member_balance_change_ex.md) |
|
||||
| 5 | dwd_payment | 支付流水 | payment_id | 无 | [主表](BD_manual_dwd_payment.md) |
|
||||
@@ -118,7 +118,7 @@ SELECT * FROM dwd.dwd_payment ORDER BY pay_time DESC NULLS LAST LIMIT 1;
|
||||
| dwd_table_fee_adjust | 2,849 |
|
||||
| dwd_assistant_service_log | 1,090 |
|
||||
| dwd_recharge_order | 455 |
|
||||
| dwd_assistant_trash_event | 98 |
|
||||
| ~~dwd_assistant_trash_event~~ | ~~98~~ | ⚠️ 已废弃(2026-02-22) |
|
||||
| dwd_refund | 45 |
|
||||
|
||||
---
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
| M7 | 2 | 麻将/麻将棋牌 |
|
||||
| M8 | 1 | 麻将/麻将棋牌 |
|
||||
| K包 | 4 | K包/K歌/KTV |
|
||||
| VIP包厢 | 4 | 台球/打球/中八/追分 (V5为 台球/打球/斯诺克) |
|
||||
| VIP包厢 | 4 | 🎱 中式/追分 (V1-V4)、斯诺克 (V5) |
|
||||
| 斯诺克区 | 4 | 台球/打球/斯诺克 |
|
||||
| 666 | 2 | 麻将/麻将棋牌 |
|
||||
| TV台 | 1 | 台球/打球/中八/追分 |
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
| 16 | member_card_type_name | VARCHAR(100) | YES | | 卡类型名称(当前数据全为空) |
|
||||
| 17 | is_bind_member | BOOLEAN | YES | | 是否绑定会员。**枚举值**: False=否 |
|
||||
| 18 | member_discount_amount | NUMERIC(18,2) | YES | | 会员折扣金额 |
|
||||
| 19 | consume_money | NUMERIC(18,2) | YES | | 消费总金额(元) |
|
||||
| 19 | consume_money | NUMERIC(18,2) | YES | | 消费总金额(元)。⚠️ **口径不稳定**:存在三种历史口径(A/B/C),DWS 层不应直接使用,应使用 `items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`。详见 [consume_money 口径](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md) |
|
||||
| 20 | table_charge_money | NUMERIC(18,2) | YES | | 台费金额 |
|
||||
| 21 | goods_money | NUMERIC(18,2) | YES | | 商品金额 |
|
||||
| 22 | real_goods_money | NUMERIC(18,2) | YES | | 实收商品金额 |
|
||||
@@ -71,19 +71,30 @@ LIMIT 1;
|
||||
```
|
||||
**使用示例**
|
||||
```sql
|
||||
-- 每日营收统计
|
||||
-- 每日营收统计(使用 items_sum 口径,不使用 consume_money)
|
||||
SELECT
|
||||
DATE(pay_time) AS pay_date,
|
||||
COUNT(*) AS order_count,
|
||||
SUM(consume_money) AS total_consume,
|
||||
SUM(table_charge_money + goods_money + assistant_pd_money
|
||||
+ assistant_cx_money + electricity_money) AS total_items_sum,
|
||||
SUM(pay_amount) AS total_pay
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE settle_type IN (1, 3)
|
||||
GROUP BY DATE(pay_time)
|
||||
ORDER BY pay_date DESC;
|
||||
-- 台费 vs 商品 vs 助教收入
|
||||
SELECT
|
||||
SUM(table_charge_money) AS table_revenue,
|
||||
SUM(goods_money) AS goods_revenue,
|
||||
SUM(assistant_pd_money + assistant_cx_money) AS assistant_revenue
|
||||
FROM dwd.dwd_settlement_head;
|
||||
SUM(assistant_pd_money) AS assistant_pd_revenue,
|
||||
SUM(assistant_cx_money) AS assistant_cx_revenue
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE settle_type IN (1, 3);
|
||||
```
|
||||
|
||||
**支付渠道恒等式(100% 成立)**
|
||||
```
|
||||
balance_amount = recharge_card_amount + gift_card_amount -- 储值卡 = 充值卡 + 礼品卡
|
||||
pay_amount = point_amount + cash_amount -- 实付 = 积分 + 现金(互斥)
|
||||
```
|
||||
> `balance_amount` 是独立支付渠道,`recharge_card_amount`/`gift_card_amount` 是其分账明细,不可重复计算。
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# cfg_area_category 台区分类映射表
|
||||
|
||||
> 生成时间:2026-02-03
|
||||
> 生成时间:2026-02-03 | 更新时间:2026-03-07
|
||||
|
||||
## 表信息
|
||||
|
||||
@@ -9,8 +9,9 @@
|
||||
| Schema | dws |
|
||||
| 表名 | cfg_area_category |
|
||||
| 主键 | category_id |
|
||||
| 唯一约束 | (source_area_name, COALESCE(source_table_name, '')) |
|
||||
| 数据来源 | 手工维护/seed脚本(基于dim_table实际数据) |
|
||||
| 说明 | 将dim_table.site_table_area_name映射到财务报表区域分类 |
|
||||
| 说明 | 将dim_table的台区/台桌映射到项目分类,支持台桌级细分 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
@@ -18,57 +19,47 @@
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | category_id | SERIAL | NO | PK | 分类ID(自增) |
|
||||
| 2 | source_area_name | VARCHAR(100) | NO | UK | 源区域名称(来自dim_table.site_table_area_name) |
|
||||
| 3 | category_code | VARCHAR(20) | NO | | 分类代码。**枚举值**: BILLIARD, BILLIARD_VIP, SNOOKER, MAHJONG, KTV, SPECIAL, OTHER |
|
||||
| 4 | category_name | VARCHAR(50) | NO | | 分类名称 |
|
||||
| 5 | match_type | VARCHAR(10) | NO | | 匹配类型。**枚举值**: EXACT(精确), LIKE(模糊), DEFAULT(兜底) |
|
||||
| 6 | match_priority | INTEGER | NO | | 匹配优先级(数字越小优先级越高) |
|
||||
| 7 | is_active | BOOLEAN | NO | | 是否启用 |
|
||||
| 8 | description | TEXT | YES | | 说明 |
|
||||
| 9 | created_at | TIMESTAMPTZ | NO | | 创建时间 |
|
||||
| 10 | updated_at | TIMESTAMPTZ | NO | | 更新时间 |
|
||||
| 3 | source_table_name | VARCHAR(100) | YES | UK | 源台桌名称(来自dim_table.table_name),NULL表示区域级映射 |
|
||||
| 4 | category_code | VARCHAR(20) | NO | | 分类代码。**枚举值**: BILLIARD, SNOOKER, MAHJONG, KTV, SPECIAL, OTHER |
|
||||
| 5 | category_name | VARCHAR(50) | NO | | 分类名称(含emoji) |
|
||||
| 6 | display_name | VARCHAR(50) | YES | | 显示名称(用于筛选器) |
|
||||
| 7 | short_name | VARCHAR(20) | YES | | 简写(用于列表标签) |
|
||||
| 8 | match_type | VARCHAR(10) | NO | | 匹配类型。**枚举值**: EXACT(精确), LIKE(模糊), DEFAULT(兜底) |
|
||||
| 9 | match_priority | INTEGER | NO | | 匹配优先级(数字越小优先级越高) |
|
||||
| 10 | is_active | BOOLEAN | NO | | 是否启用 |
|
||||
| 11 | description | TEXT | YES | | 说明 |
|
||||
| 12 | created_at | TIMESTAMPTZ | NO | | 创建时间 |
|
||||
| 13 | updated_at | TIMESTAMPTZ | NO | | 更新时间 |
|
||||
|
||||
## 分类映射示例
|
||||
## 变更说明(2026-03-07)
|
||||
|
||||
| 源区域名称 | 分类代码 | 分类名称 |
|
||||
|------------|----------|----------|
|
||||
| A区 | BILLIARD | 台球散台 |
|
||||
| B区 | BILLIARD | 台球散台 |
|
||||
| C区 | BILLIARD | 台球散台 |
|
||||
| TV台 | BILLIARD | 台球散台 |
|
||||
| VIP包厢 | BILLIARD_VIP | 台球VIP |
|
||||
| 斯诺克区 | SNOOKER | 斯诺克 |
|
||||
| 麻将房 | MAHJONG | 麻将棋牌 |
|
||||
| M7 | MAHJONG | 麻将棋牌 |
|
||||
| M8 | MAHJONG | 麻将棋牌 |
|
||||
| 666 | MAHJONG | 麻将棋牌 |
|
||||
| 发财 | MAHJONG | 麻将棋牌 |
|
||||
| K包 | KTV | K歌娱乐 |
|
||||
| k包活动区 | KTV | K歌娱乐 |
|
||||
| 幸会158 | KTV | K歌娱乐 |
|
||||
| 补时长 | SPECIAL | 补时长 |
|
||||
### 新增字段
|
||||
- `source_table_name`:支持台桌级细分映射(如 VIP包厢 V5 → SNOOKER)
|
||||
- `display_name`:前端筛选器显示名称
|
||||
- `short_name`:列表中的简写标签
|
||||
|
||||
## 使用说明
|
||||
### 删除类型
|
||||
- `BILLIARD_VIP` 已废弃,VIP包厢 V1-V4 归入 `BILLIARD`,V5 归入 `SNOOKER`
|
||||
|
||||
**取值方式**
|
||||
### 唯一约束变更
|
||||
- 从 `(source_area_name)` 改为 `(source_area_name, COALESCE(source_table_name, ''))`
|
||||
|
||||
```sql
|
||||
-- 将台区名称映射到分类
|
||||
SELECT
|
||||
dt.site_table_area_name,
|
||||
COALESCE(ac.category_code, 'OTHER') AS category_code,
|
||||
COALESCE(ac.category_name, '其他') AS category_name
|
||||
FROM dwd.dim_table dt
|
||||
LEFT JOIN dws.cfg_area_category ac
|
||||
ON dt.site_table_area_name = ac.source_area_name
|
||||
AND ac.is_active = TRUE
|
||||
WHERE dt.scd2_is_current = 1;
|
||||
## 匹配优先级
|
||||
|
||||
-- 按分类汇总收入
|
||||
SELECT
|
||||
COALESCE(ac.category_name, '其他') AS category_name,
|
||||
SUM(tfl.ledger_amount) AS total_amount
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
LEFT JOIN dwd.dim_table dt ON dt.table_id = tfl.site_table_id
|
||||
LEFT JOIN dws.cfg_area_category ac ON dt.site_table_area_name = ac.source_area_name
|
||||
GROUP BY COALESCE(ac.category_name, '其他');
|
||||
```
|
||||
| 优先级 | 匹配方式 | 说明 |
|
||||
|--------|---------|------|
|
||||
| 5 | 台桌级精确 | source_area_name + source_table_name 都匹配 |
|
||||
| 10 | 区域级精确 | source_area_name 匹配,source_table_name 为 NULL |
|
||||
| 50 | 模糊匹配 | source_area_name 包含模式匹配 |
|
||||
| 999 | 兜底 | 无法匹配的区域归入 OTHER |
|
||||
|
||||
## 分类映射
|
||||
|
||||
| 分类代码 | 显示名称 | 简写 | 源区域 |
|
||||
|----------|---------|------|--------|
|
||||
| BILLIARD | 🎱 中式/追分 | 🎱 | A区、B区、C区、TV台、VIP包厢(V1-V4) |
|
||||
| SNOOKER | 斯诺克 | 斯 | 斯诺克区、VIP包厢(V5) |
|
||||
| MAHJONG | 🀄 麻将/棋牌 | 🀄 | 麻将房、M7、M8、666、发财 |
|
||||
| KTV | 🎤 团建/K歌 | 🎤 | K包、k包活动区、幸会158 |
|
||||
| SPECIAL | 补时长 | 补 | 补时长 |
|
||||
| OTHER | 其他 | 他 | 兜底 |
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
| 表名 | dws_assistant_daily_detail |
|
||||
| 主键 | id |
|
||||
| 唯一键 | (site_id, assistant_id, stat_date) |
|
||||
| 数据来源 | dwd_assistant_service_log + dwd_assistant_trash_event |
|
||||
| 数据来源 | dwd_assistant_service_log + dwd_assistant_service_log_ex |
|
||||
| 更新频率 | 每小时增量更新 |
|
||||
| 说明 | 以"助教+日期"为粒度,汇总每日业绩明细 |
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
| 5 | assistant_nickname | VARCHAR(50) | YES | 助教花名(冗余,便于查询展示) |
|
||||
| 6 | stat_date | DATE | NO | 统计日期 |
|
||||
| 7 | assistant_level_code | INTEGER | YES | 助教等级代码(SCD2口径:取stat_date当日生效的等级) |
|
||||
| 8 | assistant_level_name | VARCHAR(20) | YES | 助教等级名称 |
|
||||
| 8 | assistant_level_name | VARCHAR(20) | YES | 助教等级名称(由 `level_code` 静态映射得出,不依赖 SCD2 返回值) |
|
||||
| 9 | total_service_count | INTEGER | NO | 总服务次数 |
|
||||
| 10 | base_service_count | INTEGER | NO | 基础课服务次数 |
|
||||
| 11 | bonus_service_count | INTEGER | NO | 附加课服务次数 |
|
||||
@@ -46,8 +46,12 @@
|
||||
| 26 | unique_tables | INTEGER | NO | 服务台桌数(去重) |
|
||||
| 27 | trashed_seconds | INTEGER | NO | 被废除的服务时长(秒) |
|
||||
| 28 | trashed_count | INTEGER | NO | 被废除的服务次数 |
|
||||
| 29 | created_at | TIMESTAMPTZ | NO | 创建时间 |
|
||||
| 30 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
|
||||
| 29 | penalty_minutes | NUMERIC(10,2) | YES | 惩罚分钟数(定档折算)。公式:`actual_minutes × (1 - per_hour_contribution / 24)`,per_hour_contribution ≥ 24 时为 0 |
|
||||
| 30 | penalty_reason | TEXT | YES | 惩罚原因描述(NULL=无违规) |
|
||||
| 31 | is_exempt | BOOLEAN | NO | 是否豁免惩罚(豁免助教不计算惩罚) |
|
||||
| 32 | per_hour_contribution | NUMERIC(10,2) | YES | 每小时贡献金额(= `base_ledger_amount / base_hours / overlap_count`,NULL=无违规或豁免) |
|
||||
| 33 | created_at | TIMESTAMPTZ | NO | 创建时间 |
|
||||
| 34 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
|
||||
|
||||
## 数据来源
|
||||
|
||||
@@ -68,17 +72,21 @@ WHERE is_delete = 0
|
||||
GROUP BY site_id, DATE(start_use_time), site_assistant_id, nickname;
|
||||
```
|
||||
|
||||
### 废除记录:dwd_assistant_trash_event
|
||||
### 废除记录:dwd_assistant_service_log_ex
|
||||
|
||||
> ⚠️ `dwd_assistant_trash_event` 已于 2026-02-22 废弃,作废判断改用 `dwd_assistant_service_log_ex.is_trash`(0=正常,1=作废)。
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
site_id,
|
||||
DATE(create_time) AS stat_date,
|
||||
assistant_no,
|
||||
assistant_name,
|
||||
SUM(charge_minutes_raw * 60) AS trashed_seconds,
|
||||
COUNT(*) AS trashed_count
|
||||
FROM dwd.dwd_assistant_trash_event
|
||||
GROUP BY site_id, DATE(create_time), assistant_no, assistant_name;
|
||||
s.site_id,
|
||||
DATE(s.start_use_time) AS stat_date,
|
||||
s.site_assistant_id AS assistant_id,
|
||||
SUM(CASE WHEN ex.is_trash = 1 THEN s.income_seconds ELSE 0 END) AS trashed_seconds,
|
||||
COUNT(CASE WHEN ex.is_trash = 1 THEN 1 END) AS trashed_count
|
||||
FROM dwd.dwd_assistant_service_log s
|
||||
LEFT JOIN dwd.dwd_assistant_service_log_ex ex ON s.assistant_service_id = ex.assistant_service_id
|
||||
WHERE s.is_delete = 0
|
||||
GROUP BY s.site_id, DATE(s.start_use_time), s.site_assistant_id;
|
||||
```
|
||||
|
||||
## 使用说明
|
||||
@@ -115,4 +123,4 @@ GROUP BY assistant_id, DATE_TRUNC('month', stat_date);
|
||||
|------|------|
|
||||
| 可回溯 | ✅ 完全可回溯 |
|
||||
| 数据范围 | 2025-07-21 ~ 至今 |
|
||||
| 依赖表 | dwd_assistant_service_log, dwd_assistant_trash_event, dim_assistant |
|
||||
| 依赖表 | dwd_assistant_service_log, dwd_assistant_service_log_ex, dim_assistant, dim_table, cfg_skill_type |
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
# dws_assistant_project_tag 助教项目标签表
|
||||
|
||||
> 生成时间:2026-03-07
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | dws |
|
||||
| 表名 | dws_assistant_project_tag |
|
||||
| 主键 | id |
|
||||
| 唯一键 | (site_id, assistant_id, time_window, category_code) |
|
||||
| 数据来源 | dwd_assistant_service_log + dim_table + cfg_area_category |
|
||||
| 更新频率 | 每日全量重建(按 site_id 删除后重新插入) |
|
||||
| 说明 | 按时间窗口计算助教在四大项目的工作时长占比,≥25% 分配标签 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 说明 |
|
||||
|------|--------|------|------|------|
|
||||
| 1 | id | BIGSERIAL | NO | 自增主键 |
|
||||
| 2 | site_id | BIGINT | NO | 门店ID |
|
||||
| 3 | tenant_id | BIGINT | NO | 租户ID |
|
||||
| 4 | assistant_id | BIGINT | NO | 助教ID |
|
||||
| 5 | time_window | VARCHAR(40) | NO | 时间窗口枚举值 |
|
||||
| 6 | category_code | VARCHAR(30) | NO | 项目分类代码(BILLIARD/SNOOKER/MAHJONG/KTV) |
|
||||
| 7 | category_name | VARCHAR(50) | NO | 项目显示名称(如 🎱 中式/追分) |
|
||||
| 8 | short_name | VARCHAR(10) | NO | 项目简写(如 🎱) |
|
||||
| 9 | duration_seconds | BIGINT | NO | 该项目总工作时长(秒) |
|
||||
| 10 | total_seconds | BIGINT | NO | 所有四大项目总时长(秒) |
|
||||
| 11 | percentage | NUMERIC(5,4) | NO | 占比(0~1,四位小数) |
|
||||
| 12 | is_tagged | BOOLEAN | NO | 占比≥0.25 时为 TRUE |
|
||||
| 13 | computed_at | TIMESTAMPTZ | NO | 计算时间 |
|
||||
| 14 | created_at | TIMESTAMPTZ | NO | 创建时间 |
|
||||
| 15 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
|
||||
|
||||
## 时间窗口
|
||||
|
||||
助教看板使用 6 个时间窗口:
|
||||
|
||||
| 枚举值 | 说明 |
|
||||
|--------|------|
|
||||
| THIS_MONTH | 本月(月初 ~ 今天) |
|
||||
| THIS_QUARTER | 本季度(季度首月1日 ~ 今天) |
|
||||
| LAST_MONTH | 上月(上月初 ~ 上月末) |
|
||||
| LAST_3_MONTHS_EXCL_CURRENT | 前3个月不含本月 |
|
||||
| LAST_QUARTER | 上季度 |
|
||||
| LAST_6_MONTHS | 最近半年(不含本月) |
|
||||
|
||||
## 索引
|
||||
|
||||
| 索引名 | 字段 | 类型 | 说明 |
|
||||
|--------|------|------|------|
|
||||
| pk_dws_assistant_project_tag | id | 主键 | 自增主键 |
|
||||
| uk_dws_assistant_project_tag | (site_id, assistant_id, time_window, category_code) | 唯一 | 业务唯一键 |
|
||||
| idx_apt_site_window_tagged | (site_id, time_window) WHERE is_tagged=TRUE | 部分索引 | 加速看板查询 |
|
||||
|
||||
|
||||
## 数据链路
|
||||
|
||||
```
|
||||
dwd.dwd_assistant_service_log (income_seconds, site_table_id)
|
||||
→ JOIN dwd.dim_table (site_table_id → table_id, scd2_is_current=1)
|
||||
→ get_area_category(area_name, table_name) -- 通过 cfg_area_category 映射
|
||||
→ 只保留 BILLIARD/SNOOKER/MAHJONG/KTV
|
||||
→ 按 (assistant_id, category_code) 汇总 income_seconds
|
||||
→ 计算占比 percentage = duration_seconds / total_seconds
|
||||
→ ≥0.25 标记 is_tagged=TRUE
|
||||
→ 写入 dws.dws_assistant_project_tag
|
||||
```
|
||||
|
||||
### 关键规则
|
||||
|
||||
1. 数据链路走 `dim_table`(通过 `site_table_id` JOIN),不直接用事实表的 `site_table_area_name`
|
||||
2. 只计算四大项目(BILLIARD/SNOOKER/MAHJONG/KTV),SPECIAL/OTHER 不参与
|
||||
3. 标签阈值 25%(`TAG_THRESHOLD = 0.25`)
|
||||
4. 全量删除重建策略:按 `site_id` 删除后重新插入所有时间窗口
|
||||
5. `is_delete = 0` 过滤已删除的服务记录
|
||||
|
||||
## ETL 任务
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| 任务代码 | DWS_ASSISTANT_PROJECT_TAG |
|
||||
| Python 类 | AssistantProjectTagTask |
|
||||
| 文件 | tasks/dws/assistant_project_tag_task.py |
|
||||
| 依赖 | DWD_LOAD_FROM_ODS |
|
||||
|
||||
## 变更记录
|
||||
|
||||
| 日期 | 变更 | 说明 |
|
||||
|------|------|------|
|
||||
| 2026-03-07 | 新建表 | 支持助教看板按项目类型筛选 |
|
||||
|
||||
## 验证 SQL
|
||||
|
||||
```sql
|
||||
-- 1. 确认表存在且有数据
|
||||
SELECT COUNT(*) AS row_count,
|
||||
COUNT(DISTINCT assistant_id) AS assistant_count,
|
||||
COUNT(DISTINCT time_window) AS window_count
|
||||
FROM dws.dws_assistant_project_tag;
|
||||
|
||||
-- 2. 确认 category_code 只有四大项目
|
||||
SELECT DISTINCT category_code
|
||||
FROM dws.dws_assistant_project_tag
|
||||
ORDER BY category_code;
|
||||
-- 期望:BILLIARD, KTV, MAHJONG, SNOOKER
|
||||
|
||||
-- 3. 确认占比计算正确(duration_seconds / total_seconds ≈ percentage)
|
||||
SELECT site_id, assistant_id, time_window, category_code,
|
||||
duration_seconds, total_seconds, percentage,
|
||||
ROUND(duration_seconds::numeric / NULLIF(total_seconds, 0), 4) AS calc_pct,
|
||||
is_tagged,
|
||||
(percentage >= 0.25) AS should_be_tagged
|
||||
FROM dws.dws_assistant_project_tag
|
||||
WHERE percentage >= 0.25 AND is_tagged = FALSE
|
||||
LIMIT 10;
|
||||
-- 期望:0 行(所有 ≥25% 的都应标记为 TRUE)
|
||||
|
||||
-- 4. 确认唯一键无重复
|
||||
SELECT site_id, assistant_id, time_window, category_code, COUNT(*)
|
||||
FROM dws.dws_assistant_project_tag
|
||||
GROUP BY site_id, assistant_id, time_window, category_code
|
||||
HAVING COUNT(*) > 1;
|
||||
-- 期望:0 行
|
||||
```
|
||||
|
||||
## 回滚策略
|
||||
|
||||
```sql
|
||||
-- 删除表(不影响其他表)
|
||||
DROP TABLE IF EXISTS dws.dws_assistant_project_tag CASCADE;
|
||||
-- 从 task_registry.py 移除 DWS_ASSISTANT_PROJECT_TAG 注册
|
||||
-- 从 maintenance_task.py DEFAULT_RETENTION_TABLES 移除对应条目
|
||||
```
|
||||
@@ -22,7 +22,7 @@
|
||||
| 2 | site_id | BIGINT | NO | 门店ID |
|
||||
| 3 | tenant_id | BIGINT | NO | 租户ID |
|
||||
| 4 | stat_date | DATE | NO | 统计日期 |
|
||||
| 5 | gross_amount | NUMERIC(14,2) | NO | 发生额合计 |
|
||||
| 5 | gross_amount | NUMERIC(14,2) | NO | 发生额合计(= 四项正价之和:table_fee + goods + assistant_pd + assistant_cx,不含 electricity_money,不使用 `consume_money`) |
|
||||
| 6 | table_fee_amount | NUMERIC(14,2) | NO | 台费正价 |
|
||||
| 7 | goods_amount | NUMERIC(14,2) | NO | 商品正价 |
|
||||
| 8 | assistant_pd_amount | NUMERIC(14,2) | NO | 助教基础课正价(陪打) |
|
||||
@@ -31,9 +31,9 @@
|
||||
| 11 | discount_groupbuy | NUMERIC(14,2) | NO | 团购优惠 |
|
||||
| 12 | discount_vip | NUMERIC(14,2) | NO | 会员折扣 |
|
||||
| 13 | discount_gift_card | NUMERIC(14,2) | NO | 赠送卡抵扣(余额变动) |
|
||||
| 14 | discount_manual | NUMERIC(14,2) | NO | 手动调整 |
|
||||
| 14 | discount_manual | NUMERIC(14,2) | NO | 大客户优惠(从 adjust_amount 中按配置拆出) |
|
||||
| 15 | discount_rounding | NUMERIC(14,2) | NO | 抹零 |
|
||||
| 16 | discount_other | NUMERIC(14,2) | NO | 其他优惠 |
|
||||
| 16 | discount_other | NUMERIC(14,2) | NO | 其他优惠(adjust_amount - 大客户优惠) |
|
||||
| 17 | confirmed_income | NUMERIC(14,2) | NO | 确认收入 = 发生额 - 优惠 |
|
||||
| 18 | cash_inflow_total | NUMERIC(14,2) | NO | 现金流入合计 |
|
||||
| 19 | cash_pay_amount | NUMERIC(14,2) | NO | 收银实付 |
|
||||
@@ -42,7 +42,7 @@
|
||||
| 22 | platform_fee_amount | NUMERIC(14,2) | NO | 平台佣金+服务费(导入) |
|
||||
| 23 | recharge_cash_inflow | NUMERIC(14,2) | NO | 充值现金流入 |
|
||||
| 24 | card_consume_total | NUMERIC(14,2) | NO | 卡消费合计 |
|
||||
| 25 | cash_card_consume | NUMERIC(14,2) | NO | 储值卡消费 |
|
||||
| 25 | recharge_card_consume | NUMERIC(14,2) | NO | 现金充值卡消费(= `recharge_card_amount`,仅现金充值卡支付部分,不含赠送卡) |
|
||||
| 26 | gift_card_consume | NUMERIC(14,2) | NO | 赠送卡消费 |
|
||||
| 27 | cash_outflow_total | NUMERIC(14,2) | NO | 现金流出合计 |
|
||||
| 28 | cash_balance_change | NUMERIC(14,2) | NO | 现金余额变动 |
|
||||
@@ -63,7 +63,14 @@
|
||||
|
||||
## 数据来源
|
||||
|
||||
> ⚠️ **consume_money 口径警告**:飞球上游 `consume_money` 在不同时期存在三种口径(A/B/C),DWS 层不应直接使用。
|
||||
> 应使用 `items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money` 作为全时期一致的消费项目合计。
|
||||
> 详见 [consume_money 口径详解](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md)。
|
||||
|
||||
### 结账汇总:dwd_settlement_head
|
||||
|
||||
> ⚠️ 以下示例 SQL 使用 `DATE(pay_time)` 简化展示。实际代码使用 `biz_date_sql_expr(pay_time, cutoff_hour)` 进行营业日归属(跨日订单归前一天)。
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
@@ -82,6 +89,7 @@ SELECT
|
||||
SUM(pl_coupon_sale_amount) AS pl_coupon_sale_amount
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = :site_id
|
||||
AND settle_type IN (1, 3) -- 仅台桌结账+商城订单,排除退货(6)/退款(7)
|
||||
GROUP BY DATE(pay_time);
|
||||
```
|
||||
|
||||
@@ -137,12 +145,49 @@ GROUP BY change_time::DATE;
|
||||
|
||||
**计算公式**
|
||||
```
|
||||
-- gross_amount 基于 items_sum 各分项(全时期一致),不使用 consume_money
|
||||
gross_amount = table_fee_amount + goods_amount + assistant_pd_amount + assistant_cx_amount
|
||||
discount_total = discount_groupbuy + discount_vip + discount_gift_card + discount_manual + discount_rounding + discount_other
|
||||
confirmed_income = gross_amount - discount_total
|
||||
cash_inflow_total = cash_pay_amount + groupbuy_pay_amount + platform_settlement_amount + recharge_cash_inflow
|
||||
cash_inflow_total = cash_pay_amount + platform_inflow + recharge_cash_inflow
|
||||
-- platform_inflow:优先取 platform_settlement_amount(平台回款),为 0 时取 groupbuy_pay_amount(团购支付)
|
||||
-- 两者互斥,不可同时计入
|
||||
```
|
||||
|
||||
> ⚠️ `discount_manual` 存储大客户优惠(从 adjust_amount 中按配置的会员ID/订单ID拆出),`discount_other` 存储其他手动调整(= adjust_amount - 大客户优惠)。两者互斥,之和 = adjust_amount。
|
||||
|
||||
**支付渠道恒等式**
|
||||
```
|
||||
-- 以下恒等式 100% 成立(DWD-DOC 校准确认)
|
||||
balance_amount = recharge_card_amount + gift_card_amount -- 储值卡 = 充值卡 + 礼品卡
|
||||
pay_amount = point_amount + cash_amount -- 实付 = 积分 + 现金(互斥)
|
||||
```
|
||||
|
||||
> ⚠️ `balance_amount`(储值卡支付)是独立支付渠道,`recharge_card_amount` 和 `gift_card_amount` 是其分账明细,不可与 `balance_amount` 重复计算。
|
||||
|
||||
**团购券三层价格体系**
|
||||
```
|
||||
顾客支付价(PCR.sale_price)→ 平台结算价(SH.pl_coupon_sale_amount)→ 门店抵扣价(SH.coupon_amount)
|
||||
门店补贴 = coupon_amount - pl_coupon_sale_amount
|
||||
```
|
||||
- `pl_coupon_sale_amount = SUM(GR.ledger_unit_price)` ✅ 100%
|
||||
- `coupon_amount = SUM(GR.ledger_amount)` ✅ 100%
|
||||
- P1 期间(2025-07~10)`pl_coupon_sale_amount` 恒为 0
|
||||
|
||||
**F2 收支平衡公式(三期差异)**
|
||||
```
|
||||
P1/P2(< 2026-01-15 12:45:59):
|
||||
consume = coupon + pay + balance - rounding + adjust + member_disc + prepay(ex)
|
||||
|
||||
B 类过渡期(2026-01-15 12:46~18:44,约 40 笔):
|
||||
consume = 2*coupon + pay + balance - rounding + adjust + member_disc + prepay(ex)
|
||||
|
||||
P3(≥ 2026-01-15 18:45,当前生效):
|
||||
consume = coupon + pl_coupon + pay + balance - rounding + adjust + member_disc + prepay(ex)
|
||||
```
|
||||
- 通过率:P1/P2 99.24% | B 95.00% | P3 99.87%
|
||||
- 详见 [F2 收支平衡专项](../../../../docs/reports/DWD-DOC/05-f2-balance-audit.md)
|
||||
|
||||
**物化汇总层(可选)**
|
||||
- L1~L4 物化视图:`mv_dws_finance_daily_summary_l1` / `l2` / `l3` / `l4`
|
||||
- 刷新任务:`DWS_MV_REFRESH_FINANCE_DAILY`
|
||||
|
||||
@@ -37,16 +37,17 @@
|
||||
|--------------------|--------------------|----------|
|
||||
| GROUPBUY | 团购优惠 | dwd_settlement_head.coupon_amount - 团购实付 |
|
||||
| VIP | 会员折扣 | dwd_settlement_head.member_discount_amount |
|
||||
| GIFT_CARD_TABLE | 台费卡抵扣 | dwd_member_balance_change |
|
||||
| GIFT_CARD_DRINK | 酒水卡抵扣 | dwd_member_balance_change |
|
||||
| GIFT_CARD_COUPON | 活动抵用券抵扣 | dwd_member_balance_change |
|
||||
| MANUAL | 手动调整 | dwd_settlement_head.adjust_amount |
|
||||
| GIFT_CARD_TABLE | 台费卡抵扣 | dwd_member_balance_change(`card_type_id = 2791990152417157`) |
|
||||
| GIFT_CARD_DRINK | 酒水卡抵扣 | dwd_member_balance_change(`card_type_id = 2794699703437125`) |
|
||||
| GIFT_CARD_COUPON | 活动抵用券抵扣 | dwd_member_balance_change(`card_type_id = 2793266846533445`) |
|
||||
| BIG_CUSTOMER | 大客户优惠 | dwd_settlement_head(big_customer_amount,从 adjust_amount 拆分) |
|
||||
| OTHER | 其他优惠 | adjust_amount - big_customer_amount(其他无法归类的手动调整) |
|
||||
| ROUNDING | 抹零 | dwd_settlement_head.rounding_amount |
|
||||
| BIG_CUSTOMER | 大客户优惠 | dwd_settlement_head(特定会员优惠) |
|
||||
| OTHER | 其他优惠 | 其他无法归类的优惠 |
|
||||
|
||||
## 数据来源
|
||||
|
||||
> ⚠️ 以下示例 SQL 使用 `pay_time::DATE` 简化展示。实际代码使用 `biz_date_sql_expr(pay_time, cutoff_hour)` 进行营业日归属(跨日订单归前一天),详见 ETL 配置 `app.business_day_start_hour`。
|
||||
|
||||
```sql
|
||||
-- 从结账头表提取优惠汇总
|
||||
SELECT
|
||||
@@ -62,7 +63,7 @@ SELECT
|
||||
COUNT(CASE WHEN rounding_amount != 0 THEN 1 END) AS rounding_order_count
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = :site_id
|
||||
AND settle_status = 1
|
||||
AND settle_type IN (1, 3)
|
||||
GROUP BY pay_time::DATE;
|
||||
```
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
| 表名 | dws_finance_income_structure |
|
||||
| 主键 | id |
|
||||
| 唯一键 | (site_id, stat_date, structure_type, category_code) |
|
||||
| 数据来源 | dwd_table_fee_log + dwd_assistant_service_log + cfg_area_category |
|
||||
| 数据来源 | dwd_settlement_head + dwd_table_fee_log + dwd_assistant_service_log + cfg_area_category |
|
||||
| 更新频率 | 每日更新 |
|
||||
| 说明 | 以"日期+区域/类型"为粒度,分析收入结构 |
|
||||
|
||||
@@ -35,23 +35,28 @@
|
||||
## 分类代码说明
|
||||
|
||||
### 按区域分析 (structure_type = 'AREA')
|
||||
| category_code | category_name | 来源 |
|
||||
|---------------|---------------|------|
|
||||
| BILLIARD | 台球散台 | A区/B区/C区/TV台 |
|
||||
| BILLIARD_VIP | 台球VIP | VIP包厢 |
|
||||
| SNOOKER | 斯诺克 | 斯诺克区 |
|
||||
| MAHJONG | 麻将棋牌 | 麻将房/M7/M8/666/发财 |
|
||||
| KTV | K歌娱乐 | K包/k包活动区/幸会158 |
|
||||
| SPECIAL | 补时长 | 补时长 |
|
||||
| OTHER | 其他 | 未映射区域 |
|
||||
| category_code | category_name | display_name | 来源 |
|
||||
|---------------|---------------|--------------|------|
|
||||
| BILLIARD | 🎱 中式/追分 | 🎱 中式/追分 | A区/B区/C区/TV台/VIP包厢(V1-V4) |
|
||||
| SNOOKER | 斯诺克 | 斯诺克 | 斯诺克区/VIP包厢(V5) |
|
||||
| MAHJONG | 🀄 麻将/棋牌 | 🀄 麻将/棋牌 | 麻将房/M7/M8/666/发财 |
|
||||
| KTV | 🎤 团建/K歌 | 🎤 团建/K歌 | K包/k包活动区/幸会158 |
|
||||
| SPECIAL | 补时长 | 补时长 | 补时长 |
|
||||
| OTHER | 其他 | 其他 | 未映射区域 |
|
||||
|
||||
> ⚠️ `BILLIARD_VIP` 已于 2026-03-07 废弃,VIP包厢按台桌级映射拆分至 BILLIARD(V1-V4) 和 SNOOKER(V5)。
|
||||
|
||||
### 按收入类型分析 (structure_type = 'INCOME_TYPE')
|
||||
| category_code | category_name |
|
||||
|---------------|---------------|
|
||||
| TABLE_FEE | 台费收入 |
|
||||
| GOODS | 商品收入 |
|
||||
| ASSISTANT_BASE | 助教基础课收入 |
|
||||
| ASSISTANT_BONUS | 助教附加课收入 |
|
||||
| category_code | category_name | 数据来源字段 |
|
||||
|---------------|---------------|-------------|
|
||||
| TABLE_FEE | 台费收入 | `settlement_head.table_charge_money` |
|
||||
| GOODS | 商品收入 | `settlement_head.goods_money` |
|
||||
| ASSISTANT_PD | 助教陪打收入 | `settlement_head.assistant_pd_money` |
|
||||
| ASSISTANT_CX | 助教超休收入 | `settlement_head.assistant_cx_money` |
|
||||
|
||||
> ⚠️ 历史版本曾使用 `ASSISTANT_BASE`/`ASSISTANT_BONUS`,已更正为 `ASSISTANT_PD`(陪打)/`ASSISTANT_CX`(超休),与 DWD 结算单字段对齐。
|
||||
> 收入金额取自 `items_sum` 各分项(`table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`),
|
||||
> 不使用 `consume_money`(存在三种历史口径混合,详见 [consume_money 口径](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md))。
|
||||
|
||||
## 数据来源
|
||||
|
||||
@@ -85,4 +90,4 @@ income_ratio = income_amount / SUM(income_amount) OVER (PARTITION BY stat_date,
|
||||
|------|------|
|
||||
| 可回溯 | ✅ 完全可回溯 |
|
||||
| 数据范围 | 2025-07-21 ~ 至今 |
|
||||
| 依赖表 | dwd_table_fee_log, dwd_assistant_service_log, dim_table, cfg_area_category |
|
||||
| 依赖表 | dwd_settlement_head, dwd_table_fee_log, dwd_assistant_service_log, dim_table, cfg_area_category |
|
||||
|
||||
@@ -49,16 +49,16 @@
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_money + gift_money) AS recharge_total,
|
||||
SUM(pay_money) AS recharge_cash,
|
||||
SUM(gift_money) AS recharge_gift,
|
||||
SUM(pay_amount + point_amount) AS recharge_total,
|
||||
SUM(pay_amount) AS recharge_cash,
|
||||
SUM(point_amount) AS recharge_gift,
|
||||
-- 首充
|
||||
SUM(CASE WHEN is_first = 1 THEN 1 ELSE 0 END) AS first_recharge_count,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_money ELSE 0 END) AS first_recharge_cash,
|
||||
SUM(CASE WHEN is_first = 1 THEN gift_money ELSE 0 END) AS first_recharge_gift,
|
||||
SUM(CASE WHEN is_first = 1 THEN pay_amount ELSE 0 END) AS first_recharge_cash,
|
||||
SUM(CASE WHEN is_first = 1 THEN point_amount ELSE 0 END) AS first_recharge_gift,
|
||||
-- 续充
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN 1 ELSE 0 END) AS renewal_count,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_money ELSE 0 END) AS renewal_cash,
|
||||
SUM(CASE WHEN is_first != 1 OR is_first IS NULL THEN pay_amount ELSE 0 END) AS renewal_cash,
|
||||
-- 会员数
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count
|
||||
FROM dwd.dwd_recharge_order
|
||||
|
||||
@@ -30,40 +30,50 @@
|
||||
| 10 | first_consume_date | DATE | YES | 首次消费日期 |
|
||||
| 11 | last_consume_date | DATE | YES | 最近消费日期 |
|
||||
| 12 | total_visit_count | INTEGER | NO | 累计到店次数 |
|
||||
| 13 | total_consume_amount | NUMERIC(14,2) | NO | 累计消费金额 |
|
||||
| 14 | total_recharge_amount | NUMERIC(14,2) | NO | 累计充值金额 |
|
||||
| 15 | total_table_fee | NUMERIC(14,2) | NO | 累计台费 |
|
||||
| 16 | total_goods_amount | NUMERIC(14,2) | NO | 累计商品消费 |
|
||||
| 17 | total_assistant_amount | NUMERIC(14,2) | NO | 累计助教服务消费 |
|
||||
| 13 | total_consume_amount | NUMERIC(14,2) | NO | 累计消费金额(基于 `items_sum` 口径,见下方说明) |
|
||||
| 14 | total_recharge_amount | NUMERIC(14,2) | NO | 累计充值金额(来源:`dim_member.recharge_money_sum`,上游 API 同步值) |
|
||||
| 15 | total_table_fee | NUMERIC(14,2) | NO | 累计台费(`table_charge_money`) |
|
||||
| 16 | total_goods_amount | NUMERIC(14,2) | NO | 累计商品消费(`goods_money`) |
|
||||
| 17 | total_assistant_amount | NUMERIC(14,2) | NO | 累计助教服务消费(= `assistant_pd_money` + `assistant_cx_money`) |
|
||||
| 18-23 | visit_count_7d/10d/15d/30d/60d/90d | INTEGER | NO | 近N天到店次数 |
|
||||
| 24-29 | consume_amount_7d/10d/15d/30d/60d/90d | NUMERIC(14,2) | NO | 近N天消费金额 |
|
||||
| 30 | cash_card_balance | NUMERIC(14,2) | NO | 储值卡余额 |
|
||||
| 31 | gift_card_balance | NUMERIC(14,2) | NO | 赠送卡余额 |
|
||||
| 32 | total_card_balance | NUMERIC(14,2) | NO | 总卡余额 |
|
||||
| 33 | days_since_last | INTEGER | YES | 距离最近消费的天数 |
|
||||
| 34 | is_active_7d | BOOLEAN | NO | 近7天是否活跃 |
|
||||
| 35 | is_active_30d | BOOLEAN | NO | 近30天是否活跃 |
|
||||
| 36 | is_active_90d | BOOLEAN | NO | 近90天是否活跃 |
|
||||
| 37 | customer_tier | VARCHAR(20) | YES | 客户分层(高价值/中等/低活跃/流失) |
|
||||
| 38 | created_at | TIMESTAMPTZ | NO | 创建时间 |
|
||||
| 39 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
|
||||
| 30-32 | recharge_count_30d/60d/90d | INTEGER | NO | 近N天充值笔数(来源:dwd_recharge_order) |
|
||||
| 33-35 | recharge_amount_30d/60d/90d | NUMERIC(14,2) | NO | 近N天充值金额(仅 `pay_amount` 现金部分,不含 `point_amount` 赠送,来源:dwd_recharge_order) |
|
||||
| 36 | avg_ticket_amount | NUMERIC(14,2) | NO | 次均消费(= total_consume_amount / MAX(total_visit_count, 1)) |
|
||||
| 37 | cash_card_balance | NUMERIC(14,2) | NO | 储值卡余额 |
|
||||
| 38 | gift_card_balance | NUMERIC(14,2) | NO | 赠送卡余额 |
|
||||
| 39 | total_card_balance | NUMERIC(14,2) | NO | 总卡余额 |
|
||||
| 40 | days_since_last | INTEGER | YES | 距离最近消费的天数 |
|
||||
| 41 | is_active_7d | BOOLEAN | NO | 近7天是否活跃 |
|
||||
| 42 | is_active_30d | BOOLEAN | NO | 近30天是否活跃 |
|
||||
| 43 | is_active_90d | BOOLEAN | NO | 近90天是否活跃 |
|
||||
| 44 | customer_tier | VARCHAR(20) | YES | 客户分层(高价值/中等/低活跃/流失) |
|
||||
| 45 | created_at | TIMESTAMPTZ | NO | 创建时间 |
|
||||
| 46 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
|
||||
|
||||
## 数据来源
|
||||
|
||||
### 消费统计来源:dwd_settlement_head
|
||||
|
||||
> ⚠️ **consume_money 口径警告**:`consume_money` 在不同时期存在三种口径(A/B/C),DWS 层不应直接使用。
|
||||
> 应使用 `items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money` 作为全时期一致的消费项目合计。
|
||||
> 详见 [consume_money 口径详解](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md)。
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
site_id,
|
||||
member_id,
|
||||
DATE(pay_time) AS consume_date,
|
||||
COUNT(*) AS visit_count,
|
||||
SUM(consume_money) AS consume_amount,
|
||||
-- ✅ 使用 items_sum 口径(全时期一致),不使用 consume_money
|
||||
SUM(table_charge_money + goods_money + assistant_pd_money
|
||||
+ assistant_cx_money + electricity_money) AS consume_amount,
|
||||
SUM(table_charge_money) AS table_fee,
|
||||
SUM(goods_money) AS goods_amount,
|
||||
SUM(assistant_pd_money + assistant_cx_money) AS assistant_amount
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE member_id != 0 -- 排除散客
|
||||
AND settle_type = 1 -- 已结账
|
||||
AND settle_type IN (1, 3) -- 已结账订单(台桌结账 + 快捷结账)
|
||||
GROUP BY site_id, member_id, DATE(pay_time);
|
||||
```
|
||||
|
||||
@@ -84,19 +94,27 @@ GROUP BY tenant_member_id;
|
||||
- member_id=0 的散客不进入此表统计
|
||||
|
||||
**客户分层规则**
|
||||
```sql
|
||||
customer_tier = CASE
|
||||
WHEN consume_amount_30d >= 1000 THEN '高价值'
|
||||
WHEN consume_amount_30d >= 300 THEN '中等'
|
||||
WHEN is_active_30d THEN '低活跃'
|
||||
ELSE '流失'
|
||||
END
|
||||
```python
|
||||
# 基于 90 天消费次数+金额组合判断(代码实际逻辑)
|
||||
if visit_count_90d >= 3 and consume_amount_90d >= 1000:
|
||||
customer_tier = '高价值'
|
||||
elif visit_count_30d > 0:
|
||||
customer_tier = '中等'
|
||||
elif visit_count_90d > 0:
|
||||
customer_tier = '低活跃'
|
||||
else:
|
||||
customer_tier = '流失'
|
||||
```
|
||||
|
||||
**金额口径说明**
|
||||
- `total_consume_amount` 及各滚动窗口 `consume_amount_*d` 均基于 `items_sum` 口径
|
||||
- `items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`
|
||||
- `total_assistant_amount` = `assistant_pd_money`(陪打)+ `assistant_cx_money`(超休),不使用笼统的 `service_fee`
|
||||
|
||||
## 可回溯性
|
||||
|
||||
| 项目 | 说明 |
|
||||
|------|------|
|
||||
| 可回溯 | ✅ 完全可回溯 |
|
||||
| 数据范围 | 2025-07-16 ~ 至今 |
|
||||
| 依赖表 | dwd_settlement_head, dim_member, dim_member_card_account |
|
||||
| 依赖表 | dwd_settlement_head, dwd_recharge_order, dim_member, dim_member_card_account |
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
# dws_member_project_tag 客户项目标签表
|
||||
|
||||
> 生成时间:2026-03-07
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | dws |
|
||||
| 表名 | dws_member_project_tag |
|
||||
| 主键 | id |
|
||||
| 唯一键 | (site_id, member_id, time_window, category_code) |
|
||||
| 数据来源 | dwd_table_fee_log + dim_table + cfg_area_category |
|
||||
| 更新频率 | 每日全量重建(按 site_id 删除后重新插入) |
|
||||
| 说明 | 按时间窗口计算客户在四大项目的消费时长占比,≥25% 分配标签。散客不参与。 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 说明 |
|
||||
|------|--------|------|------|------|
|
||||
| 1 | id | BIGSERIAL | NO | 自增主键 |
|
||||
| 2 | site_id | BIGINT | NO | 门店ID |
|
||||
| 3 | tenant_id | BIGINT | NO | 租户ID |
|
||||
| 4 | member_id | BIGINT | NO | 会员ID(散客不入此表) |
|
||||
| 5 | time_window | VARCHAR(40) | NO | 时间窗口枚举值 |
|
||||
| 6 | category_code | VARCHAR(30) | NO | 项目分类代码(BILLIARD/SNOOKER/MAHJONG/KTV) |
|
||||
| 7 | category_name | VARCHAR(50) | NO | 项目显示名称(如 🎱 中式/追分) |
|
||||
| 8 | short_name | VARCHAR(10) | NO | 项目简写(如 🎱) |
|
||||
| 9 | duration_seconds | BIGINT | NO | 该项目总计费时长(秒,来源 ledger_count) |
|
||||
| 10 | total_seconds | BIGINT | NO | 所有四大项目总时长(秒) |
|
||||
| 11 | percentage | NUMERIC(5,4) | NO | 占比(0~1,四位小数) |
|
||||
| 12 | is_tagged | BOOLEAN | NO | 占比≥0.25 时为 TRUE |
|
||||
| 13 | computed_at | TIMESTAMPTZ | NO | 计算时间 |
|
||||
| 14 | created_at | TIMESTAMPTZ | NO | 创建时间 |
|
||||
| 15 | updated_at | TIMESTAMPTZ | NO | 更新时间 |
|
||||
|
||||
## 时间窗口
|
||||
|
||||
客户看板使用 2 个时间窗口:
|
||||
|
||||
| 枚举值 | 说明 |
|
||||
|--------|------|
|
||||
| LAST_30_DAYS | 近30天(含今天,base_date-29天 ~ base_date) |
|
||||
| LAST_60_DAYS | 近60天(含今天,base_date-59天 ~ base_date) |
|
||||
|
||||
## 索引
|
||||
|
||||
| 索引名 | 字段 | 类型 | 说明 |
|
||||
|--------|------|------|------|
|
||||
| pk_dws_member_project_tag | id | 主键 | 自增主键 |
|
||||
| uk_dws_member_project_tag | (site_id, member_id, time_window, category_code) | 唯一 | 业务唯一键 |
|
||||
| idx_mpt_site_window_tagged | (site_id, time_window) WHERE is_tagged=TRUE | 部分索引 | 加速看板查询 |
|
||||
|
||||
|
||||
## 数据链路
|
||||
|
||||
```
|
||||
dwd.dwd_table_fee_log (ledger_count, site_table_id)
|
||||
→ JOIN dwd.dim_table (site_table_id → table_id, scd2_is_current=1)
|
||||
→ get_area_category(area_name, table_name) -- 通过 cfg_area_category 映射
|
||||
→ 只保留 BILLIARD/SNOOKER/MAHJONG/KTV
|
||||
→ 排除散客(member_id IS NULL 或 = 0)
|
||||
→ 按 (member_id, category_code) 汇总 ledger_count
|
||||
→ 计算占比 percentage = duration_seconds / total_seconds
|
||||
→ ≥0.25 标记 is_tagged=TRUE
|
||||
→ 写入 dws.dws_member_project_tag
|
||||
```
|
||||
|
||||
### 关键规则
|
||||
|
||||
1. 数据链路走 `dim_table`(通过 `site_table_id` JOIN),不直接用事实表的 `site_table_area_name`
|
||||
2. 客户时长使用 `ledger_count`(计费时长),不使用 `income_seconds`(那是助教工作时长)
|
||||
3. 散客(member_id=0 或 NULL)不参与标签计算
|
||||
4. 只计算四大项目(BILLIARD/SNOOKER/MAHJONG/KTV)
|
||||
5. 标签阈值 25%(`TAG_THRESHOLD = 0.25`)
|
||||
6. 全量删除重建策略:按 `site_id` 删除后重新插入所有时间窗口
|
||||
7. `COALESCE(is_delete, 0) = 0` 过滤已删除的台费记录
|
||||
|
||||
## ETL 任务
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| 任务代码 | DWS_MEMBER_PROJECT_TAG |
|
||||
| Python 类 | MemberProjectTagTask |
|
||||
| 文件 | tasks/dws/member_project_tag_task.py |
|
||||
| 依赖 | DWD_LOAD_FROM_ODS |
|
||||
|
||||
## 变更记录
|
||||
|
||||
| 日期 | 变更 | 说明 |
|
||||
|------|------|------|
|
||||
| 2026-03-07 | 新建表 | 支持客户看板按项目类型筛选 |
|
||||
|
||||
## 验证 SQL
|
||||
|
||||
```sql
|
||||
-- 1. 确认表存在且有数据
|
||||
SELECT COUNT(*) AS row_count,
|
||||
COUNT(DISTINCT member_id) AS member_count,
|
||||
COUNT(DISTINCT time_window) AS window_count
|
||||
FROM dws.dws_member_project_tag;
|
||||
|
||||
-- 2. 确认无散客数据
|
||||
SELECT COUNT(*) FROM dws.dws_member_project_tag WHERE member_id = 0 OR member_id IS NULL;
|
||||
-- 期望:0
|
||||
|
||||
-- 3. 确认占比计算正确
|
||||
SELECT site_id, member_id, time_window, category_code,
|
||||
duration_seconds, total_seconds, percentage,
|
||||
ROUND(duration_seconds::numeric / NULLIF(total_seconds, 0), 4) AS calc_pct,
|
||||
is_tagged,
|
||||
(percentage >= 0.25) AS should_be_tagged
|
||||
FROM dws.dws_member_project_tag
|
||||
WHERE percentage >= 0.25 AND is_tagged = FALSE
|
||||
LIMIT 10;
|
||||
-- 期望:0 行
|
||||
|
||||
-- 4. 确认唯一键无重复
|
||||
SELECT site_id, member_id, time_window, category_code, COUNT(*)
|
||||
FROM dws.dws_member_project_tag
|
||||
GROUP BY site_id, member_id, time_window, category_code
|
||||
HAVING COUNT(*) > 1;
|
||||
-- 期望:0 行
|
||||
```
|
||||
|
||||
## 回滚策略
|
||||
|
||||
```sql
|
||||
-- 删除表(不影响其他表)
|
||||
DROP TABLE IF EXISTS dws.dws_member_project_tag CASCADE;
|
||||
-- 从 task_registry.py 移除 DWS_MEMBER_PROJECT_TAG 注册
|
||||
-- 从 maintenance_task.py DEFAULT_RETENTION_TABLES 移除对应条目
|
||||
```
|
||||
@@ -35,13 +35,14 @@
|
||||
| 15 | table_fee | NUMERIC(12,2) | NO | 台费 |
|
||||
| 16 | goods_amount | NUMERIC(12,2) | NO | 商品金额 |
|
||||
| 17 | assistant_amount | NUMERIC(12,2) | NO | 助教服务金额 |
|
||||
| 18 | total_consume | NUMERIC(12,2) | NO | 消费总额(正价) |
|
||||
| 18 | total_consume | NUMERIC(12,2) | NO | 消费总额(基于 `items_sum` 口径,= tc + goods + pd + cx + electricity) |
|
||||
| 19 | total_discount | NUMERIC(12,2) | NO | 优惠总额 |
|
||||
| 20 | actual_pay | NUMERIC(12,2) | NO | 实付金额 |
|
||||
| 21 | cash_pay | NUMERIC(12,2) | NO | 现金/刷卡支付 |
|
||||
| 22 | cash_card_pay | NUMERIC(12,2) | NO | 储值卡支付 |
|
||||
| 21 | cash_pay | NUMERIC(12,2) | NO | 收银实付(= `pay_amount`,与 actual_pay 同值) |
|
||||
| 22 | balance_pay | NUMERIC(12,2) | NO | 储值卡总支付(= recharge_card_pay + gift_card_pay) |
|
||||
| 22a | recharge_card_pay | NUMERIC(12,2) | NO | 现金充值卡支付(balance_pay 的子项) |
|
||||
| 23 | gift_card_pay | NUMERIC(12,2) | NO | 赠送卡支付 |
|
||||
| 24 | groupbuy_pay | NUMERIC(12,2) | NO | 团购券支付 |
|
||||
| 24 | groupbuy_pay | NUMERIC(12,2) | NO | 团购抵消台费金额(= `coupon_amount`) |
|
||||
| 25 | table_duration_min | INTEGER | NO | 台桌使用时长(分钟,来自台费流水真实秒数) |
|
||||
| 26 | assistant_duration_min | INTEGER | NO | 助教服务时长(分钟) |
|
||||
| 27 | assistant_services | JSONB | YES | 助教服务列表 |
|
||||
@@ -51,28 +52,36 @@
|
||||
## 数据来源
|
||||
|
||||
### 主表来源:dwd_settlement_head
|
||||
|
||||
> ⚠️ `total_consume` 使用 `items_sum` 口径(全时期一致),不使用 `consume_money`(存在三种历史口径混合)。
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
site_id,
|
||||
tenant_id,
|
||||
member_id,
|
||||
order_settle_id,
|
||||
DATE(pay_time) AS visit_date,
|
||||
create_time AS visit_time,
|
||||
member_name AS member_nickname,
|
||||
member_phone AS member_mobile,
|
||||
table_id,
|
||||
table_charge_money AS table_fee,
|
||||
goods_money AS goods_amount,
|
||||
assistant_pd_money + assistant_cx_money AS assistant_amount,
|
||||
consume_money AS total_consume,
|
||||
member_discount_amount + adjust_amount + rounding_amount AS total_discount,
|
||||
pay_amount AS actual_pay,
|
||||
balance_amount AS cash_card_pay,
|
||||
gift_card_amount AS gift_card_pay
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE member_id != 0
|
||||
AND settle_type = 1;
|
||||
sh.site_id,
|
||||
sh.tenant_id,
|
||||
sh.member_id,
|
||||
sh.order_settle_id,
|
||||
DATE(sh.pay_time) AS visit_date,
|
||||
sh.create_time AS visit_time,
|
||||
-- ⚠️ member_nickname/member_mobile 实际从 dim_member 关联获取(nickname/mobile),非结算头表字段
|
||||
dm.nickname AS member_nickname,
|
||||
dm.mobile AS member_mobile,
|
||||
sh.table_id,
|
||||
sh.table_charge_money AS table_fee,
|
||||
sh.goods_money AS goods_amount,
|
||||
sh.assistant_pd_money + sh.assistant_cx_money AS assistant_amount,
|
||||
-- ✅ 使用 items_sum 口径,不使用 consume_money
|
||||
sh.table_charge_money + sh.goods_money + sh.assistant_pd_money
|
||||
+ sh.assistant_cx_money + sh.electricity_money AS total_consume,
|
||||
sh.member_discount_amount + sh.adjust_amount + sh.rounding_amount AS total_discount,
|
||||
sh.pay_amount AS actual_pay,
|
||||
sh.balance_amount AS balance_pay,
|
||||
sh.recharge_card_amount AS recharge_card_pay,
|
||||
sh.gift_card_amount AS gift_card_pay
|
||||
FROM dwd.dwd_settlement_head sh
|
||||
JOIN dwd.dim_member dm ON sh.tenant_member_id = dm.tenant_member_id AND dm.scd2_is_current = 1
|
||||
WHERE sh.member_id IS NOT NULL AND sh.member_id != 0
|
||||
AND sh.settle_type IN (1, 3); -- 仅台桌结账+商城订单,排除退货(6)/退款(7)
|
||||
```
|
||||
|
||||
### 助教服务明细:dwd_assistant_service_log
|
||||
@@ -127,4 +136,4 @@ area_category = COALESCE(
|
||||
|------|------|
|
||||
| 可回溯 | ✅ 完全可回溯 |
|
||||
| 数据范围 | 2025-07-16 ~ 至今 |
|
||||
| 依赖表 | dwd_settlement_head, dwd_assistant_service_log, dwd_table_fee_log, dim_table, dim_member |
|
||||
| 依赖表 | dwd_settlement_head, dwd_assistant_service_log, dwd_table_fee_log, dim_table, dim_member, cfg_area_category |
|
||||
|
||||
@@ -23,20 +23,20 @@
|
||||
| 4 | order_date | DATE | NO | 订单日期(优先 pay_time,其次 create_time) |
|
||||
| 5 | tenant_id | BIGINT | NO | 租户ID |
|
||||
| 6 | member_id | BIGINT | YES | 会员ID(NULL 或 0 为散客) |
|
||||
| 7 | member_flag | BOOLEAN | NO | 是否会员订单 |
|
||||
| 8 | recharge_order_flag | BOOLEAN | NO | 充值订单标记(消费金额=0 且实付>0) |
|
||||
| 7 | member_flag | BOOLEAN | NO | 是否会员订单(来源:`is_bind_member`) |
|
||||
| 8 | recharge_order_flag | BOOLEAN | NO | 充值订单标记(`consume_money = 0` 且实付>0,此处 consume_money 仅用于零值判断,不参与金额计算) |
|
||||
| 9 | item_count | INTEGER | NO | 订单项数 |
|
||||
| 10 | total_item_quantity | INTEGER | NO | 订单项总数量 |
|
||||
| 11 | table_fee_amount | NUMERIC | NO | 台费金额 |
|
||||
| 12 | assistant_service_amount | NUMERIC | NO | 助教服务金额 |
|
||||
| 12 | assistant_service_amount | NUMERIC | NO | 助教服务金额(= `assistant_pd_money` + `assistant_cx_money`) |
|
||||
| 13 | goods_amount | NUMERIC | NO | 商品金额 |
|
||||
| 14 | group_amount | NUMERIC | NO | 团购金额 |
|
||||
| 15 | total_coupon_deduction | NUMERIC | NO | 优惠券抵扣总额 |
|
||||
| 16 | member_discount_amount | NUMERIC | NO | 会员折扣金额 |
|
||||
| 17 | manual_discount_amount | NUMERIC | NO | 手动折扣金额 |
|
||||
| 18 | order_original_amount | NUMERIC | NO | 原价估算(实付+优惠/抵扣) |
|
||||
| 18 | order_original_amount | NUMERIC | NO | 原价估算(= `total_paid_amount + total_coupon_deduction + member_discount_amount + manual_discount_amount`) |
|
||||
| 19 | order_final_amount | NUMERIC | NO | 最终应付金额 |
|
||||
| 20 | stored_card_deduct | NUMERIC | NO | 储值卡抵扣金额 |
|
||||
| 20 | stored_card_deduct | NUMERIC | NO | 储值卡抵扣金额(= `balance_amount`,即 `recharge_card_amount + gift_card_amount`) |
|
||||
| 21 | external_paid_amount | NUMERIC | NO | 外部支付金额(实付-卡类抵扣) |
|
||||
| 22 | total_paid_amount | NUMERIC | NO | 总实付金额 |
|
||||
| 23 | book_table_flow | NUMERIC | NO | 台费流水 |
|
||||
@@ -44,9 +44,9 @@
|
||||
| 25 | book_goods_flow | NUMERIC | NO | 商品流水 |
|
||||
| 26 | book_group_flow | NUMERIC | NO | 团购流水 |
|
||||
| 27 | book_order_flow | NUMERIC | NO | 订单总流水(台费+助教+商品+团购) |
|
||||
| 28 | order_effective_consume_cash | NUMERIC | NO | 有效消费现金 |
|
||||
| 29 | order_effective_recharge_cash | NUMERIC | NO | 有效充值现金 |
|
||||
| 30 | order_effective_flow | NUMERIC | NO | 有效流水 |
|
||||
| 28 | order_effective_consume_cash | NUMERIC | NO | 有效消费现金(= `GREATEST(total_paid_amount - stored_card_deduct, 0)`,即外部支付金额) |
|
||||
| 29 | order_effective_recharge_cash | NUMERIC | NO | 有效充值现金(当前硬编码为 0,占位字段,待后续实现) |
|
||||
| 30 | order_effective_flow | NUMERIC | NO | 有效流水(当前 = `total_paid_amount`) |
|
||||
| 31 | refund_amount | NUMERIC | NO | 退款金额 |
|
||||
| 32 | net_income | NUMERIC | NO | 净收入(实付-退款) |
|
||||
| 33 | created_at | TIMESTAMPTZ | NO | 创建时间 |
|
||||
@@ -54,10 +54,18 @@
|
||||
|
||||
## 业务口径
|
||||
|
||||
> ⚠️ 本表金额字段基于 `items_sum` 各分项(`table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`),
|
||||
> 不使用 `consume_money`(存在三种历史口径混合)。
|
||||
|
||||
- order_date 优先取 pay_time,其次 create_time
|
||||
- recharge_order_flag:消费金额=0 且实付>0 时标记为充值订单
|
||||
- order_original_amount = 实付 + 优惠/抵扣
|
||||
- recharge_order_flag:`consume_money = 0` 且实付>0 时标记为充值订单(此处 consume_money 仅用于零值判断,不参与金额计算)
|
||||
- stored_card_deduct = `balance_amount`(恒等式:`balance_amount = recharge_card_amount + gift_card_amount`,三者不可相加)
|
||||
- order_original_amount = `total_paid_amount + total_coupon_deduction + member_discount_amount + manual_discount_amount`(实付 + 团购抵扣 + 会员折扣 + 手动调整)
|
||||
- external_paid_amount = total_paid_amount - stored_card_deduct(外部支付 = 实付 - 储值卡抵扣)
|
||||
- book_order_flow = 台费 + 助教 + 商品 + 团购
|
||||
- order_effective_recharge_cash:当前硬编码为 0,占位字段
|
||||
- order_effective_consume_cash = `GREATEST(total_paid_amount - stored_card_deduct, 0)`(与 external_paid_amount 同值)
|
||||
- order_effective_flow = `total_paid_amount`(当前实现)
|
||||
- net_income = total_paid_amount - refund_amount
|
||||
|
||||
## 使用说明
|
||||
@@ -81,4 +89,4 @@ ORDER BY order_date DESC;
|
||||
| 项目 | 说明 |
|
||||
|------|------|
|
||||
| 可回溯 | ✅ 完全可回溯 |
|
||||
| 依赖表 | dwd_settlement_head, dwd_table_fee_log, dwd_assistant_service_log, dwd_store_goods_sale, dwd_groupbuy_redemption, dwd_payment, dwd_refund |
|
||||
| 依赖表 | dwd_settlement_head, dwd_table_fee_log, dwd_assistant_service_log, dwd_store_goods_sale, dwd_groupbuy_redemption, dwd_refund, dwd_refund_ex |
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
# group_buy_package_details 团购套餐详情
|
||||
|
||||
> 生成时间:2026-03-05
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | ods |
|
||||
| 表名 | group_buy_package_details |
|
||||
| 主键 | coupon_id |
|
||||
| 数据来源 | `QueryPackageCouponInfo` 详情接口(二级拉取) |
|
||||
| DDL 路径 | `db/etl_feiqiu/ods/group_buy_package_details.sql` |
|
||||
| 说明 | 团购套餐详情 ODS 层,存储每个 couponId 的详情原始数据 |
|
||||
|
||||
## 数据获取方式
|
||||
|
||||
本表数据通过 `ODS_GROUP_PACKAGE` 任务的 **detail_endpoint 二级详情拉取** 子流程获取:
|
||||
1. 主流程先从 `QueryPackageCouponList` 拉取团购列表写入 `ods.group_buy_packages`
|
||||
2. 子流程遍历列表中每个 `id`,串行调用 `QueryPackageCouponInfo` 获取详情
|
||||
3. 详情数据写入本表,采用全量快照模式(`SnapshotMode.FULL_TABLE`),UPSERT on `coupon_id`
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 说明 |
|
||||
|------|--------|------|------|------|
|
||||
| 1 | coupon_id | BIGINT | NO(PK) | 团购套餐 ID(= groupPurchasePackage.id) |
|
||||
| 2 | package_name | TEXT | YES | 团购套餐名称 |
|
||||
| 3 | duration | INTEGER | YES | 台费计时时长(秒) |
|
||||
| 4 | start_time | TIMESTAMPTZ | YES | 可用日期开始 |
|
||||
| 5 | end_time | TIMESTAMPTZ | YES | 可用日期结束 |
|
||||
| 6 | add_start_clock | TEXT | YES | 可用时段开始(如 "00:00:00") |
|
||||
| 7 | add_end_clock | TEXT | YES | 可用时段结束(如 "1.00:00:00") |
|
||||
| 8 | is_enabled | INTEGER | YES | 是否启用(1=启用, 0=禁用) |
|
||||
| 9 | is_delete | INTEGER | YES | 是否已删除(1=已删除, 0=正常) |
|
||||
| 10 | site_id | BIGINT | YES | 店铺 ID |
|
||||
| 11 | tenant_id | BIGINT | YES | 租户 ID |
|
||||
| 12 | create_time | TIMESTAMPTZ | YES | 创建时间 |
|
||||
| 13 | creator_name | TEXT | YES | 创建人 |
|
||||
| 14 | table_area_ids | JSONB | YES | 可用台区 ID 列表(来自 groupPurchasePackage.tableAreaId) |
|
||||
| 15 | table_area_names | JSONB | YES | 可用台区名称列表(来自 groupPurchasePackage.tableAreaNameList) |
|
||||
| 16 | assistant_services | JSONB | YES | 助教服务关联数组(来自 packageCouponAssistants) |
|
||||
| 17 | groupon_site_infos | JSONB | YES | 关联门店信息数组(来自 grouponSiteInfos) |
|
||||
| 18 | package_services | JSONB | YES | 套餐服务数组(来自 packagePackageService,待调研) |
|
||||
| 19 | coupon_details_list | JSONB | YES | 券明细数组(来自 packageCouponDetailsList,待调研) |
|
||||
| 20 | content_hash | TEXT | YES | 业务字段内容哈希,用于变更检测 |
|
||||
| 21 | payload | JSONB | YES | 详情接口完整原始 JSON 响应 |
|
||||
| 22 | fetched_at | TIMESTAMPTZ | YES | ETL 拉取时间戳 |
|
||||
|
||||
## 与列表表的关系
|
||||
|
||||
```
|
||||
ods.group_buy_packages (列表)
|
||||
└── ods.group_buy_package_details (详情)
|
||||
关联字段:group_buy_packages.id = group_buy_package_details.coupon_id
|
||||
关系:1:1(每个列表记录对应一条详情)
|
||||
```
|
||||
|
||||
## 下游消费
|
||||
|
||||
DWD 层 `dwd.dim_groupbuy_package_ex` 在加载时通过 LEFT JOIN 本表,将 `table_area_ids`、`table_area_names`、`assistant_services`、`groupon_site_infos` 四个 JSONB 字段合并到扩展表。
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询最新入库的详情记录
|
||||
SELECT coupon_id, package_name, table_area_names, assistant_services
|
||||
FROM ods.group_buy_package_details
|
||||
ORDER BY fetched_at DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- 关联列表表查看完整信息
|
||||
SELECT p.id, p.package_name, p.selling_price,
|
||||
d.table_area_names, d.assistant_services, d.groupon_site_infos
|
||||
FROM ods.group_buy_packages p
|
||||
LEFT JOIN ods.group_buy_package_details d ON p.id = d.coupon_id
|
||||
WHERE p.is_delete IS DISTINCT FROM 1;
|
||||
```
|
||||
|
||||
## 可回溯性
|
||||
|
||||
| 项目 | 说明 |
|
||||
|------|------|
|
||||
| 可回溯 | ✅ 完全可回溯(保留 payload 原始 JSON) |
|
||||
| 数据来源 | `PackageCoupon/QueryPackageCouponInfo` API |
|
||||
@@ -0,0 +1,89 @@
|
||||
# 团购套餐详情(QueryPackageCouponInfo) → group_buy_package_details 字段映射
|
||||
|
||||
> 生成时间:2026-03-05
|
||||
|
||||
## 端点信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| 接口路径 | `PackageCoupon/QueryPackageCouponInfo` |
|
||||
| 请求方法 | POST |
|
||||
| 请求参数 | `{ "couponId": <id> }`(从 `ods.group_buy_packages.id` 获取) |
|
||||
| ODS 对应表 | `ods.group_buy_package_details` |
|
||||
| JSON 数据路径 | `data` |
|
||||
| 调用方式 | 二级详情拉取(`ODS_GROUP_PACKAGE` 任务的 `detail_endpoint` 子流程) |
|
||||
|
||||
## 响应结构
|
||||
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"groupPurchasePackage": {
|
||||
"id": 123,
|
||||
"packageName": "...",
|
||||
"duration": 3600,
|
||||
"startTime": "...",
|
||||
"endTime": "...",
|
||||
"addStartClock": "00:00:00",
|
||||
"addEndClock": "1.00:00:00",
|
||||
"isEnabled": 1,
|
||||
"isDelete": 0,
|
||||
"siteId": 456,
|
||||
"tenantId": 789,
|
||||
"createTime": "...",
|
||||
"creatorName": "...",
|
||||
"tableAreaId": [1, 2, 3],
|
||||
"tableAreaNameList": ["A区", "B区"]
|
||||
},
|
||||
"packageCouponAssistants": [...],
|
||||
"grouponSiteInfos": [...],
|
||||
"packagePackageService": [...],
|
||||
"packageCouponDetailsList": [...]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 字段映射
|
||||
|
||||
### 结构化字段(来自 data.groupPurchasePackage)
|
||||
|
||||
| JSON 路径 | ODS 列名 | 类型转换 | 说明 |
|
||||
|-----------|----------|----------|------|
|
||||
| data.groupPurchasePackage.id | coupon_id | int→BIGINT | 团购套餐 ID,主键 |
|
||||
| data.groupPurchasePackage.packageName | package_name | string→TEXT | 套餐名称 |
|
||||
| data.groupPurchasePackage.duration | duration | int→INTEGER | 台费计时时长(秒) |
|
||||
| data.groupPurchasePackage.startTime | start_time | string→TIMESTAMPTZ | 可用日期开始 |
|
||||
| data.groupPurchasePackage.endTime | end_time | string→TIMESTAMPTZ | 可用日期结束 |
|
||||
| data.groupPurchasePackage.addStartClock | add_start_clock | string→TEXT | 可用时段开始 |
|
||||
| data.groupPurchasePackage.addEndClock | add_end_clock | string→TEXT | 可用时段结束 |
|
||||
| data.groupPurchasePackage.isEnabled | is_enabled | int→INTEGER | 是否启用 |
|
||||
| data.groupPurchasePackage.isDelete | is_delete | int→INTEGER | 是否已删除 |
|
||||
| data.groupPurchasePackage.siteId | site_id | int→BIGINT | 店铺 ID |
|
||||
| data.groupPurchasePackage.tenantId | tenant_id | int→BIGINT | 租户 ID |
|
||||
| data.groupPurchasePackage.createTime | create_time | string→TIMESTAMPTZ | 创建时间 |
|
||||
| data.groupPurchasePackage.creatorName | creator_name | string→TEXT | 创建人 |
|
||||
|
||||
### JSONB 数组字段
|
||||
|
||||
| JSON 路径 | ODS 列名 | 类型转换 | 说明 |
|
||||
|-----------|----------|----------|------|
|
||||
| data.groupPurchasePackage.tableAreaId | table_area_ids | array→JSONB | 可用台区 ID 列表 |
|
||||
| data.groupPurchasePackage.tableAreaNameList | table_area_names | array→JSONB | 可用台区名称列表 |
|
||||
| data.packageCouponAssistants | assistant_services | array→JSONB | 助教服务关联(含 skillId/assistantLevel/assistantDuration) |
|
||||
| data.grouponSiteInfos | groupon_site_infos | array→JSONB | 关联门店信息(含 siteId/siteName) |
|
||||
| data.packagePackageService | package_services | array→JSONB | 套餐服务数组(待调研,可能为空) |
|
||||
| data.packageCouponDetailsList | coupon_details_list | array→JSONB | 券明细数组(待调研,可能为空) |
|
||||
|
||||
## ETL 补充字段
|
||||
|
||||
| ODS 列名 | 生成逻辑 |
|
||||
|-----------|----------|
|
||||
| content_hash | 基于原始 payload + is_delete 计算 SHA-256 |
|
||||
| payload | 完整原始 JSON 响应(`data` 节点) |
|
||||
| fetched_at | ETL 拉取时间戳(`DEFAULT now()`) |
|
||||
|
||||
## 写入策略
|
||||
|
||||
- 全量快照模式(`SnapshotMode.FULL_TABLE`)
|
||||
- UPSERT on `coupon_id`,每次运行覆盖全部记录
|
||||
- 通过 `content_hash` 去重,内容未变则跳过写入
|
||||
@@ -51,7 +51,7 @@ graph LR
|
||||
| 文档 | 说明 |
|
||||
|------|------|
|
||||
| [BaseTask 公共机制](base_task_mechanism.md) | 任务基类模板方法、TaskContext、时间窗口、注册表、Flow 执行 |
|
||||
| [ODS 层任务](ods_tasks.md) | 23 个通用 ODS 任务的架构、配置结构、API 端点、目标表 |
|
||||
| [ODS 层任务](ods_tasks.md) | 22 个通用 ODS 任务的架构、配置结构、API 端点、目标表 |
|
||||
| [DWD 层任务](dwd_tasks.md) | DWD_LOAD_FROM_ODS 核心装载、SCD2 处理、质量校验 |
|
||||
| [DWS 层任务](dws_tasks.md) | 助教业绩、会员分析、财务统计、库存汇总、运维任务共 17 个 DWS 任务 |
|
||||
| [INDEX 层任务](index_tasks.md) | WBI/NCI/RS/SPI 指数算法 + ML 手动台账导入 |
|
||||
@@ -69,10 +69,9 @@ graph LR
|
||||
|----------|-----------|--------|----------|------|
|
||||
| `ODS_ASSISTANT_ACCOUNT` | `OdsAssistantAccountsTask` | `ods.assistant_accounts_master` | 助教账号档案 | [查看](ods_tasks.md) |
|
||||
| `ODS_ASSISTANT_LEDGER` | `OdsAssistantLedgerTask` | `ods.assistant_service_records` | 助教服务流水 | [查看](ods_tasks.md) |
|
||||
| `ODS_ASSISTANT_ABOLISH` | `OdsAssistantAbolishTask` | `ods.assistant_cancellation_records` | 助教废除记录 | [查看](ods_tasks.md) |
|
||||
| `ODS_INVENTORY_CHANGE` | `OdsInventoryChangeTask` | `ods.goods_stock_movements` | 库存变化记录 | [查看](ods_tasks.md) |
|
||||
| `ODS_INVENTORY_STOCK` | `OdsInventoryStockTask` | `ods.goods_stock_summary` | 库存汇总 | [查看](ods_tasks.md) |
|
||||
| `ODS_GROUP_PACKAGE` | `OdsPackageTask` | `ods.group_buy_packages` | 团购套餐定义 | [查看](ods_tasks.md) |
|
||||
| `ODS_GROUP_PACKAGE` | `OdsPackageTask` | `ods.group_buy_packages` | 团购套餐定义 + 详情子流程(通过 `detail_endpoint` 串行调用 `QueryPackageCouponInfo` 获取每个团购的详情数据,写入 `ods.group_buy_package_details`) | [查看](ods_tasks.md) |
|
||||
| `ODS_GROUP_BUY_REDEMPTION` | `OdsGroupBuyRedemptionTask` | `ods.group_buy_redemption_records` | 团购套餐核销 | [查看](ods_tasks.md) |
|
||||
| `ODS_MEMBER` | `OdsMemberTask` | `ods.member_profiles` | 会员档案 | [查看](ods_tasks.md) |
|
||||
| `ODS_MEMBER_BALANCE` | `OdsMemberBalanceTask` | `ods.member_balance_changes` | 会员余额变动 | [查看](ods_tasks.md) |
|
||||
|
||||
@@ -77,13 +77,15 @@ load(extracted, context) → 遍历 TABLE_MAP
|
||||
| `dwd.dim_goods_category` | `ods.stock_goods_category_tree` | 商品分类维度(含子类展开) |
|
||||
| `dwd.dim_groupbuy_package` | `ods.group_buy_packages` | 团购套餐维度 |
|
||||
| `dwd.dim_groupbuy_package_ex` | `ods.group_buy_packages` | 团购套餐扩展 |
|
||||
| `dwd.dim_staff` | `ods.staff_info_master` | 员工维度 |
|
||||
| `dwd.dim_staff_ex` | `ods.staff_info_master` | 员工扩展 |
|
||||
|
||||
|
||||
#### 事实表映射
|
||||
|
||||
| DWD 表 | ODS 源表 | 说明 |
|
||||
|--------|----------|------|
|
||||
| `dwd.dwd_settlement_head` | `ods.settlement_records` | 结算头(订单结算主记录) |
|
||||
| `dwd.dwd_settlement_head` | `ods.settlement_records` | 结算头(订单结算主记录)— 详见下方「结算头关键字段口径」 |
|
||||
| `dwd.dwd_settlement_head_ex` | `ods.settlement_records` | 结算头扩展(支付方式、撤单、促销等) |
|
||||
| `dwd.dwd_table_fee_log` | `ods.table_fee_transactions` | 台费流水 |
|
||||
| `dwd.dwd_table_fee_log_ex` | `ods.table_fee_transactions` | 台费流水扩展(销售员、消费类型等) |
|
||||
@@ -93,8 +95,8 @@ load(extracted, context) → 遍历 TABLE_MAP
|
||||
| `dwd.dwd_store_goods_sale_ex` | `ods.store_goods_sales_records` | 商品销售扩展 |
|
||||
| `dwd.dwd_assistant_service_log` | `ods.assistant_service_records` | 助教服务记录 |
|
||||
| `dwd.dwd_assistant_service_log_ex` | `ods.assistant_service_records` | 助教服务扩展 |
|
||||
| `dwd.dwd_assistant_trash_event` | `ods.assistant_cancellation_records` | 助教取消/废单事件 |
|
||||
| `dwd.dwd_assistant_trash_event_ex` | `ods.assistant_cancellation_records` | 助教取消扩展 |
|
||||
| ~~`dwd.dwd_assistant_trash_event`~~ | ~~`ods.assistant_cancellation_records`~~ | ~~助教取消/废单事件(2026-02-22 DROP,2026-03-01 清理残留)~~ |
|
||||
| ~~`dwd.dwd_assistant_trash_event_ex`~~ | ~~`ods.assistant_cancellation_records`~~ | ~~助教取消扩展(2026-02-22 DROP,2026-03-01 清理残留)~~ |
|
||||
| `dwd.dwd_member_balance_change` | `ods.member_balance_changes` | 会员余额变动 |
|
||||
| `dwd.dwd_member_balance_change_ex` | `ods.member_balance_changes` | 会员余额变动扩展 |
|
||||
| `dwd.dwd_groupbuy_redemption` | `ods.group_buy_redemption_records` | 团购核销记录 |
|
||||
@@ -106,8 +108,48 @@ load(extracted, context) → 遍历 TABLE_MAP
|
||||
| `dwd.dwd_payment` | `ods.payment_transactions` | 支付记录 |
|
||||
| `dwd.dwd_refund` | `ods.refund_transactions` | 退款记录 |
|
||||
| `dwd.dwd_refund_ex` | `ods.refund_transactions` | 退款扩展 |
|
||||
| `dwd.dwd_goods_stock_summary` | `ods.goods_stock_summary` | 库存汇总 |
|
||||
| `dwd.dwd_goods_stock_movement` | `ods.goods_stock_movements` | 库存变动 |
|
||||
|
||||
> 共计 **17 对维度映射**(含 `_ex`)+ **23 对事实映射**(含 `_ex`)= **40 对**映射。
|
||||
> 共计 **19 对维度映射**(含 `_ex`)+ **23 对事实映射**(含 `_ex`,已排除 2026-02-22 DROP 的 assistant_trash_event)= **42 对**有效映射。
|
||||
|
||||
---
|
||||
|
||||
### 结算头关键字段口径
|
||||
|
||||
`dwd_settlement_head` 是核心交易事实表,以下字段在下游消费时需特别注意:
|
||||
|
||||
#### settle_type 枚举
|
||||
|
||||
| 值 | 含义 | 说明 |
|
||||
|----|------|------|
|
||||
| 1 | 台桌结账 | 正常台桌消费结账 |
|
||||
| 3 | 商城订单 | 商品零售订单 |
|
||||
| 6 | 退货订单 | 商品退货 |
|
||||
| 7 | 退款订单 | 金额退款 |
|
||||
|
||||
> DWS 层计算发生额、收入等指标时,通常只取 `settle_type IN (1, 3)`(正向交易),排除退货/退款。
|
||||
> 本表无 `is_delete` 字段,不可用 `is_delete` 过滤。
|
||||
|
||||
#### consume_money 口径警告
|
||||
|
||||
`consume_money` 存在三种历史口径(A/B/C),**DWS 层不应直接使用**。
|
||||
应使用 `items_sum` 口径:
|
||||
|
||||
```
|
||||
items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money
|
||||
```
|
||||
|
||||
> 详见 [consume_money 口径校准文档](../../../../docs/reports/DWD-DOC/consume/consume-money-caliber.md)
|
||||
> 及 [BD 手册 dwd_settlement_head](../database/DWD/main/BD_manual_dwd_settlement_head.md)
|
||||
|
||||
#### 支付渠道恒等式
|
||||
|
||||
```
|
||||
balance_amount = recharge_card_amount + gift_card_amount -- 储值卡 = 充值卡 + 礼品卡
|
||||
```
|
||||
|
||||
> `balance_amount` 是独立支付渠道,`recharge_card_amount` / `gift_card_amount` 是其分账明细,三者不可重复计算。
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
## 概述
|
||||
|
||||
DWS 层共有 17 个已注册任务(含 DWS_MAINTENANCE),按业务域分为五组:
|
||||
DWS 层共有 19 个已注册任务(含 DWS_MAINTENANCE),按业务域分为六组:
|
||||
|
||||
### 助教业绩域(6 个)
|
||||
|
||||
@@ -28,6 +28,13 @@ DWS 层共有 17 个已注册任务(含 DWS_MAINTENANCE),按业务域分
|
||||
| `DWS_MEMBER_CONSUMPTION` | `MemberConsumptionTask` | `dws_member_consumption_summary` | 日期+会员 | delete-before-insert |
|
||||
| `DWS_MEMBER_VISIT` | `MemberVisitTask` | `dws_member_visit_detail` | 日期+会员+结账单 | delete-before-insert |
|
||||
|
||||
### 项目标签域(2 个)
|
||||
|
||||
| 任务代码 | Python 类 | 目标表 | 粒度 | 更新策略 |
|
||||
|----------|-----------|--------|------|----------|
|
||||
| `DWS_ASSISTANT_PROJECT_TAG` | `AssistantProjectTagTask` | `dws_assistant_project_tag` | 助教+时间窗口+项目 | 全量删除重建(按 site_id) |
|
||||
| `DWS_MEMBER_PROJECT_TAG` | `MemberProjectTagTask` | `dws_member_project_tag` | 会员+时间窗口+项目 | 全量删除重建(按 site_id) |
|
||||
|
||||
### 财务统计域(4 个)
|
||||
|
||||
| 任务代码 | Python 类 | 目标表 | 粒度 | 更新策略 |
|
||||
@@ -373,7 +380,7 @@ DWS 汇总计算涉及历史月份时,不能直接使用维度表的"当前版
|
||||
|
||||
```
|
||||
dwd_assistant_service_log ──┬──► DWS_ASSISTANT_DAILY(日度明细)
|
||||
dwd_assistant_trash_event ──┘ │
|
||||
dwd_assistant_service_log_ex ┘ │
|
||||
▼
|
||||
DWS_ASSISTANT_MONTHLY(月度汇总+档位+排名)
|
||||
│
|
||||
@@ -448,7 +455,7 @@ dwd_assistant_service_log ────► DWS_ASSISTANT_CUSTOMER(客户关系
|
||||
| 来源表 | Schema | 用途 |
|
||||
|--------|--------|------|
|
||||
| `dwd_assistant_service_log` | `dwd` | 助教服务流水(主数据源) |
|
||||
| `dwd_assistant_trash_event` | `dwd` | 废除记录(排除无效业绩) |
|
||||
| `dwd_assistant_service_log_ex` | `dwd` | 扩展表(`is_trash` 标记废除记录) |
|
||||
| `dim_assistant` | `dwd` | 助教维度(SCD2,获取当日等级) |
|
||||
| `cfg_skill_type` | `dws` | 技能 → 课程类型映射 |
|
||||
|
||||
@@ -459,21 +466,23 @@ dwd_assistant_service_log ────► DWS_ASSISTANT_CUSTOMER(客户关系
|
||||
| 字段分组 | 字段 | 说明 |
|
||||
|----------|------|------|
|
||||
| 标识 | `site_id`, `tenant_id`, `assistant_id`, `assistant_nickname`, `stat_date` | 门店、助教、日期 |
|
||||
| 等级 | `assistant_level_code`, `assistant_level_name` | SCD2 as-of 取值,取统计日当日生效的等级 |
|
||||
| 等级 | `assistant_level_code`, `assistant_level_name` | SCD2 as-of 取值(`level_code`),`level_name` 由 code 静态映射得出 |
|
||||
| 服务次数 | `total_service_count`, `base_service_count`, `bonus_service_count`, `room_service_count` | 总/基础课/附加课/包厢课 |
|
||||
| 计费秒数 | `total_seconds`, `base_seconds`, `bonus_seconds`, `room_seconds` | 原始秒数 |
|
||||
| 计费小时 | `total_hours`, `base_hours`, `bonus_hours`, `room_hours` | 秒数 ÷ 3600,`Decimal` 精度 |
|
||||
| 计费金额 | `total_ledger_amount`, `base_ledger_amount`, `bonus_ledger_amount`, `room_ledger_amount` | 台账金额 |
|
||||
| 去重统计 | `unique_customers`, `unique_tables` | 去重客户数(排除散客)、去重台桌数 |
|
||||
| 废除统计 | `trashed_seconds`, `trashed_count` | 被废除的秒数和次数 |
|
||||
| 惩罚检测 | `penalty_minutes`, `penalty_reason`, `is_exempt`, `per_hour_contribution` | 惩罚分钟数(公式:`actual_minutes × (1 - per_hour_contribution / 24)`)、惩罚原因、是否豁免、每小时贡献金额(= `base_ledger_amount / base_hours / overlap_count`) |
|
||||
|
||||
#### 核心业务逻辑
|
||||
|
||||
1. **课程类型分类**:通过 `skill_id` 查询 `cfg_skill_type` 映射,分为 `BASE`(基础课)、`BONUS`(附加课)、`ROOM`(包厢课),未匹配默认 `BASE`
|
||||
2. **废除记录排除**:以 `assistant_service_id` 为键构建废除索引,被废除的服务记录不计入有效业绩(服务次数、时长、金额),但单独统计 `trashed_seconds` 和 `trashed_count`
|
||||
2. **废除记录排除**:通过 JOIN `dwd_assistant_service_log_ex` 的 `is_trash = 1` 标记识别废除记录(`dwd_assistant_trash_event` 已于 2026-02-22 废弃),被废除的服务记录不计入有效业绩(服务次数、时长、金额),但单独统计 `trashed_seconds` 和 `trashed_count`
|
||||
3. **助教等级 SCD2 取值**:调用 `get_assistant_level_asof(assistant_id, service_date)` 获取统计日当日生效的等级版本,而非当前最新版本
|
||||
4. **散客过滤**:`unique_customers` 统计时排除 `member_id` 为 0 或 None 的散客
|
||||
5. **客户/台桌去重**:无论服务记录是否被废除,客户和台桌均参与去重统计
|
||||
6. **定档折算惩罚检测**:聚合完成后,检测同一台桌多名助教重叠挂台的违规情况(规则2)。计算 `per_hour_contribution = base_ledger_amount / base_hours / overlap_count`,若低于阈值(默认 24 元/小时)则按比例扣减 `penalty_minutes`。豁免助教(`is_exempt = True`)不参与惩罚计算。
|
||||
|
||||
---
|
||||
|
||||
@@ -813,6 +822,9 @@ dim_table ────────────────────┘
|
||||
| 全量累计 | `first_consume_date`, `last_consume_date`, `total_visit_count`, `total_consume_amount`, `total_recharge_amount`, `total_table_fee`, `total_goods_amount`, `total_assistant_amount` | 首次/最近消费日期、累计到店次数、累计消费金额、累计充值金额、累计台费、累计商品金额、累计助教费用 |
|
||||
| 滚动窗口(次数) | `visit_count_7d`, `visit_count_10d`, `visit_count_15d`, `visit_count_30d`, `visit_count_60d`, `visit_count_90d` | 各窗口到店次数 |
|
||||
| 滚动窗口(金额) | `consume_amount_7d`, `consume_amount_10d`, `consume_amount_15d`, `consume_amount_30d`, `consume_amount_60d`, `consume_amount_90d` | 各窗口消费金额 |
|
||||
| 充值窗口(笔数) | `recharge_count_30d`, `recharge_count_60d`, `recharge_count_90d` | 近 30/60/90 天充值笔数(来源:dwd_recharge_order) |
|
||||
| 充值窗口(金额) | `recharge_amount_30d`, `recharge_amount_60d`, `recharge_amount_90d` | 近 30/60/90 天充值金额(仅 `pay_amount` 现金部分,不含 `point_amount` 赠送) |
|
||||
| 次均消费 | `avg_ticket_amount` | total_consume_amount / MAX(total_visit_count, 1) |
|
||||
| 卡余额 | `cash_card_balance`, `gift_card_balance`, `total_card_balance` | 储值卡(现金卡)余额、赠送卡余额、总余额 |
|
||||
| 活跃度 | `days_since_last`, `is_active_7d`, `is_active_30d`, `is_active_90d` | 距最近消费天数、近 7/30/90 天是否活跃 |
|
||||
| 客户分层 | `customer_tier` | 分层标签(高价值/中等/低活跃/流失) |
|
||||
@@ -821,15 +833,17 @@ dim_table ────────────────────┘
|
||||
|
||||
**1. 散客排除**
|
||||
|
||||
`member_id` 为 0 或 None 的散客不进入此表统计。SQL 层面和 transform 阶段均做过滤。
|
||||
`member_id` 为 0 或 None 的散客不进入此表统计。SQL 层面和 transform 阶段均做过滤,同时通过 `settle_type IN (1, 3)` 仅保留台桌结账和商城订单(排除退货/退款)。
|
||||
|
||||
**2. 消费统计来源**
|
||||
|
||||
从 `dwd_settlement_head` 按 `member_id` 聚合,消费金额拆分为:
|
||||
- `consume_money`:总消费金额
|
||||
从 `dwd_settlement_head` 按 `member_id` 聚合,消费金额使用 `items_sum` 口径拆分为:
|
||||
- `items_sum`:消费项目合计(= `table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`)
|
||||
- `table_charge_money`:台费
|
||||
- `goods_money`:商品金额
|
||||
- `assistant_pd_money + assistant_cx_money`:助教费用(专业课 + 陪练课合计)
|
||||
- `assistant_pd_money + assistant_cx_money`:助教费用(陪打 + 超休合计)
|
||||
|
||||
> ⚠️ 不使用 `consume_money`(三种历史口径混合),详见 `docs/reports/DWD-DOC/consume/consume-money-caliber.md`
|
||||
|
||||
**3. 滚动窗口**
|
||||
|
||||
@@ -909,7 +923,7 @@ dim_table ────────────────────┘
|
||||
| 会员信息 | `member_nickname`, `member_mobile`, `member_birthday` | 昵称、脱敏手机号、生日 |
|
||||
| 台桌信息 | `table_id`, `table_name`, `area_name`, `area_category` | 台桌 ID、台桌名称、区域名称、区域分类 |
|
||||
| 消费金额 | `table_fee`, `goods_amount`, `assistant_amount`, `total_consume`, `total_discount`, `actual_pay` | 台费、商品金额、助教费用、总消费、总优惠、实付金额 |
|
||||
| 支付方式 | `cash_pay`, `cash_card_pay`, `gift_card_pay`, `groupbuy_pay` | 现金/在线支付、储值卡支付、赠送卡支付、团购券支付 |
|
||||
| 支付方式 | `cash_pay`, `balance_pay`, `recharge_card_pay`, `gift_card_pay`, `groupbuy_pay` | 现金/在线支付、储值卡总支付、现金充值卡支付、赠送卡支付、团购券支付 |
|
||||
| 时长 | `table_duration_min`, `assistant_duration_min` | 台桌使用时长(分钟)、助教服务时长(分钟) |
|
||||
| 助教服务 | `assistant_services` | JSON 格式的助教服务明细 |
|
||||
|
||||
@@ -917,15 +931,15 @@ dim_table ────────────────────┘
|
||||
|
||||
**1. 散客排除**
|
||||
|
||||
SQL 层面通过 `member_id IS NOT NULL AND member_id != 0` 过滤,transform 阶段通过 `is_guest()` 二次过滤。
|
||||
SQL 层面通过 `member_id IS NOT NULL AND member_id != 0` 过滤,同时通过 `settle_type IN (1, 3)` 仅保留台桌结账和商城订单(排除退货/退款),transform 阶段通过 `is_guest()` 二次过滤。
|
||||
|
||||
**2. 消费金额拆分**
|
||||
|
||||
从 `dwd_settlement_head` 直接读取各金额字段:
|
||||
- `table_fee`:`table_charge_money`(台费)
|
||||
- `goods_amount`:`goods_money`(商品金额)
|
||||
- `assistant_amount`:`assistant_pd_money + assistant_cx_money`(专业课 + 陪练课助教费用合计)
|
||||
- `total_consume`:`consume_money`(总消费金额)
|
||||
- `assistant_amount`:`assistant_pd_money + assistant_cx_money`(陪打 + 超休助教费用合计)
|
||||
- `total_consume`:`items_sum`(= `table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money`,不使用 `consume_money`)
|
||||
- `actual_pay`:`pay_amount`(实付金额)
|
||||
|
||||
**3. 总优惠计算**
|
||||
@@ -943,7 +957,8 @@ total_discount = adjust_amount + member_discount_amount + rounding_amount
|
||||
| 字段 | 来源字段 | 说明 |
|
||||
|------|----------|------|
|
||||
| `cash_pay` | `pay_amount` | 现金/在线支付 |
|
||||
| `cash_card_pay` | `balance_amount` | 储值卡(现金卡)支付 |
|
||||
| `balance_pay` | `balance_amount` | 储值卡总支付(= recharge_card_pay + gift_card_pay) |
|
||||
| `recharge_card_pay` | `recharge_card_amount` | 现金充值卡支付(balance_pay 的子项) |
|
||||
| `gift_card_pay` | `gift_card_amount` | 赠送卡支付 |
|
||||
| `groupbuy_pay` | `coupon_amount` | 团购券支付 |
|
||||
|
||||
@@ -1046,12 +1061,12 @@ dwd_member_balance_change ────┘
|
||||
|----------|------|------|
|
||||
| 标识 | `site_id`, `tenant_id`, `stat_date` | 门店、统计日期 |
|
||||
| 发生额 | `gross_amount`, `table_fee_amount`, `goods_amount`, `assistant_pd_amount`, `assistant_cx_amount` | 正价总额及按类型拆分(台费/商品/专业课/陪练课) |
|
||||
| 优惠 | `discount_total`, `discount_groupbuy`, `discount_vip`, `discount_gift_card`, `discount_manual`, `discount_rounding`, `discount_other` | 优惠合计及按类型拆分 |
|
||||
| 优惠 | `discount_total`, `discount_groupbuy`, `discount_vip`, `discount_gift_card`, `discount_manual`, `discount_rounding`, `discount_other` | 优惠合计及按类型拆分(discount_manual=大客户优惠,discount_other=其他手动调整,两者互斥) |
|
||||
| 确认收入 | `confirmed_income` | 发生额 - 优惠合计 |
|
||||
| 现金流入 | `cash_inflow_total`, `cash_pay_amount`, `groupbuy_pay_amount`, `platform_settlement_amount`, `recharge_cash_inflow` | 现金流入合计及来源拆分 |
|
||||
| 现金流出 | `cash_outflow_total`, `platform_fee_amount` | 现金流出合计(支出 + 平台费用) |
|
||||
| 现金净变动 | `cash_balance_change` | 流入 - 流出 |
|
||||
| 卡消费 | `card_consume_total`, `cash_card_consume`, `gift_card_consume` | 储值卡消费 + 赠送卡消费 |
|
||||
| 卡消费 | `card_consume_total`, `recharge_card_consume`, `gift_card_consume` | 现金充值卡消费(= `recharge_card_amount`)+ 赠送卡消费 |
|
||||
| 充值统计 | `recharge_count`, `recharge_total`, `recharge_cash`, `recharge_gift`, `first_recharge_count`, `first_recharge_amount`, `renewal_count`, `renewal_amount` | 充值笔数/金额、首充/续充拆分 |
|
||||
| 订单统计 | `order_count`, `member_order_count`, `guest_order_count`, `avg_order_amount` | 总订单数、会员/散客订单数、客单价 |
|
||||
|
||||
@@ -1063,7 +1078,9 @@ dwd_member_balance_change ────┘
|
||||
gross_amount = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money
|
||||
```
|
||||
|
||||
从 `dwd_settlement_head` 按 `DATE(pay_time)` 聚合,分别统计台费、商品、专业课(PD)、陪练课(CX)四类收入。
|
||||
> 注意:`gross_amount` 为发生额(正价四项),不含 `electricity_money`。完整消费项目合计(`items_sum`)还需加上 `electricity_money`。
|
||||
|
||||
从 `dwd_settlement_head` 按 `biz_date(pay_time)` 聚合,通过 `settle_type IN (1, 3)` 仅保留台桌结账和商城订单(排除退货/退款),分别统计台费、商品、陪打(PD)、超休(CX)四类收入。
|
||||
|
||||
**2. 团购优惠计算**
|
||||
|
||||
@@ -1096,10 +1113,12 @@ discount_other = adjust_amount - big_customer_amount (负值置 0)
|
||||
**5. 优惠合计与确认收入**
|
||||
|
||||
```
|
||||
discount_total = discount_groupbuy + discount_vip + discount_gift_card + discount_manual + discount_rounding
|
||||
discount_total = discount_groupbuy + discount_vip + discount_gift_card + discount_manual + discount_rounding + discount_other
|
||||
confirmed_income = gross_amount - discount_total
|
||||
```
|
||||
|
||||
> `discount_manual` = 大客户优惠,`discount_other` = 其他手动调整,两者互斥,之和 = adjust_amount。
|
||||
|
||||
**6. 现金流计算**
|
||||
|
||||
```
|
||||
@@ -1123,11 +1142,13 @@ daily_expense = expense_amount / days_in_month
|
||||
**8. 卡消费统计**
|
||||
|
||||
```
|
||||
cash_card_consume = recharge_card_amount + balance_amount (储值卡支付)
|
||||
recharge_card_consume = recharge_card_amount (现金充值卡支付部分)
|
||||
gift_card_consume = 赠送卡消费总额 (来自余额变动)
|
||||
card_consume_total = cash_card_consume + gift_card_consume
|
||||
card_consume_total = recharge_card_consume + gift_card_consume
|
||||
```
|
||||
|
||||
> 注意:`balance_amount = recharge_card_amount + gift_card_amount`(恒等式),因此 `recharge_card_consume` 只取 `recharge_card_amount`,不可再加 `balance_amount`,否则重复计算。
|
||||
|
||||
---
|
||||
|
||||
### DWS_FINANCE_RECHARGE — 充值统计
|
||||
@@ -1170,7 +1191,7 @@ card_consume_total = cash_card_consume + gift_card_consume
|
||||
|
||||
每笔充值金额拆分为:
|
||||
```
|
||||
充值总额 = pay_money(现金部分)+ gift_money(赠送部分)
|
||||
充值总额 = pay_amount(现金部分)+ point_amount(赠送部分)
|
||||
```
|
||||
|
||||
**2. 会员去重统计**
|
||||
@@ -1236,14 +1257,14 @@ total_card_balance = cash_card_balance + gift_card_balance
|
||||
|
||||
**维度 1:按收入类型(`structure_type = 'INCOME_TYPE'`)**
|
||||
|
||||
从 `dwd_settlement_head` 按 `pay_time::DATE` 聚合,仅统计已结账订单(`settle_status = 1`),每日展开为 4 条记录:
|
||||
从 `dwd_settlement_head` 按 `pay_time::DATE` 聚合,仅统计已结账订单(`settle_type IN (1, 3)`),每日展开为 4 条记录:
|
||||
|
||||
| category_code | category_name | 来源字段 | 说明 |
|
||||
|---------------|---------------|----------|------|
|
||||
| `TABLE_FEE` | 台费收入 | `table_charge_money` | 台桌使用费 |
|
||||
| `GOODS` | 商品收入 | `goods_money` | 商品销售 |
|
||||
| `ASSISTANT_BASE` | 助教基础课 | `assistant_pd_money` | 专业课(PD=陪打) |
|
||||
| `ASSISTANT_BONUS` | 助教附加课 | `assistant_cx_money` | 附加课(CX=超休/促销) |
|
||||
| `ASSISTANT_PD` | 助教陪打 | `assistant_pd_money` | 陪打收入 |
|
||||
| `ASSISTANT_CX` | 助教超休 | `assistant_cx_money` | 超休收入 |
|
||||
|
||||
占比计算:`income_ratio = 该类型金额 / 当日四类收入总和`
|
||||
|
||||
@@ -1327,7 +1348,7 @@ total_card_balance = cash_card_balance + gift_card_balance
|
||||
团购优惠 = coupon_amount - 团购实付
|
||||
```
|
||||
|
||||
仅统计 `coupon_amount > 0` 的已结账订单(`settle_status = 1`)。
|
||||
仅统计 `coupon_amount > 0` 的已结账订单(`settle_type IN (1, 3)`)。
|
||||
|
||||
**2. 赠送卡消费拆分**
|
||||
|
||||
@@ -1406,10 +1427,10 @@ dws_*(所有 DWS 汇总表)──────► DWS_MAINTENANCE(统一维
|
||||
| 商品 | `item_count`, `total_item_quantity` | 商品种类数、商品总数量 |
|
||||
| 费用明细 | `table_fee_amount`, `assistant_service_amount`, `goods_amount`, `group_amount` | 台费、助教费、商品金额、团购金额 |
|
||||
| 优惠 | `total_coupon_deduction`, `member_discount_amount`, `manual_discount_amount` | 团购抵扣、会员折扣、手动调整 |
|
||||
| 金额汇总 | `order_original_amount`, `order_final_amount` | 订单原价、实付金额 |
|
||||
| 支付方式 | `stored_card_deduct`, `external_paid_amount`, `total_paid_amount` | 储值卡抵扣、外部支付、总支付 |
|
||||
| 金额汇总 | `order_original_amount`, `order_final_amount` | 订单原价(= `total_paid_amount + total_coupon_deduction + member_discount_amount + manual_discount_amount`)、实付金额 |
|
||||
| 支付方式 | `stored_card_deduct`, `external_paid_amount`, `total_paid_amount` | 储值卡抵扣(= `balance_amount`)、外部支付、总支付 |
|
||||
| 台账流水 | `book_table_flow`, `book_assistant_flow`, `book_goods_flow`, `book_group_flow`, `book_order_flow` | 台费/助教/商品/团购/订单台账流水 |
|
||||
| 有效消费 | `order_effective_consume_cash`, `order_effective_recharge_cash`, `order_effective_flow` | 有效消费现金、有效充值现金、有效流水 |
|
||||
| 有效消费 | `order_effective_consume_cash`, `order_effective_recharge_cash`, `order_effective_flow` | 有效消费现金、有效充值现金(当前硬编码为 0,占位)、有效流水 |
|
||||
| 退款 | `refund_amount`, `net_income` | 退款金额、净收入 |
|
||||
|
||||
#### 核心业务逻辑
|
||||
@@ -1463,7 +1484,7 @@ net_income = total_paid_amount - refund_amount
|
||||
recharge_order_flag = (consume_money = 0 AND pay_amount > 0)
|
||||
```
|
||||
|
||||
消费金额为 0 但有支付金额的订单标记为充值订单。
|
||||
消费金额为 0 但有支付金额的订单标记为充值订单。此处 `consume_money` 仅用于零值判断(三种口径在 =0 时等价),不涉及金额聚合。
|
||||
|
||||
#### 配置参数
|
||||
|
||||
@@ -1638,3 +1659,116 @@ dwd_goods_stock_summary ──┬──► DWS_GOODS_STOCK_DAILY(日度汇总
|
||||
- `range_start_stock` 取该月第一条记录的值(期初快照)
|
||||
- `range_end_stock` / `current_stock` 取该月最后一条记录的值(期末快照)
|
||||
- `stat_period = 'monthly'`
|
||||
|
||||
|
||||
---
|
||||
|
||||
## 项目标签域
|
||||
|
||||
项目标签域包含 2 个任务,按时间窗口计算助教和客户在四大项目类型(BILLIARD/SNOOKER/MAHJONG/KTV)的时长占比,占比≥25% 则分配标签。数据流向为:
|
||||
|
||||
```
|
||||
dwd_assistant_service_log (income_seconds) ──┐
|
||||
├──► dim_table (site_table_id JOIN)
|
||||
dwd_table_fee_log (ledger_count) ────────────┘ │
|
||||
▼
|
||||
cfg_area_category (get_area_category)
|
||||
│
|
||||
┌──────────────────┴──────────────────┐
|
||||
▼ ▼
|
||||
DWS_ASSISTANT_PROJECT_TAG DWS_MEMBER_PROJECT_TAG
|
||||
(助教项目标签,6 个时间窗口) (客户项目标签,2 个时间窗口)
|
||||
```
|
||||
|
||||
### 公共逻辑
|
||||
|
||||
1. 数据链路走 `dim_table`(通过 `site_table_id` JOIN,`scd2_is_current=1`),获取 `area_name` 和 `table_name`
|
||||
2. 通过 `get_area_category(area_name, table_name)` 映射到 `category_code`
|
||||
3. 只保留四大项目(BILLIARD/SNOOKER/MAHJONG/KTV),排除 SPECIAL/OTHER
|
||||
4. 标签阈值:`TAG_THRESHOLD = 0.25`(25%)
|
||||
5. 更新策略:全量删除重建(按 `site_id` 删除后重新插入所有时间窗口)
|
||||
|
||||
---
|
||||
|
||||
### DWS_ASSISTANT_PROJECT_TAG — 助教项目标签
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| 任务代码 | `DWS_ASSISTANT_PROJECT_TAG` |
|
||||
| Python 类 | `AssistantProjectTagTask`(`tasks/dws/assistant_project_tag_task.py`) |
|
||||
| 目标表 | `dws.dws_assistant_project_tag` |
|
||||
| 主键 | `site_id`, `assistant_id`, `time_window`, `category_code` |
|
||||
| 粒度 | 助教 + 时间窗口 + 项目类型 |
|
||||
| 更新策略 | 全量删除重建(按 site_id) |
|
||||
| 更新频率 | 每日更新 |
|
||||
| 依赖 | `DWD_LOAD_FROM_ODS` |
|
||||
|
||||
#### 数据来源
|
||||
|
||||
| 来源表 | Schema | 用途 |
|
||||
|--------|--------|------|
|
||||
| `dwd_assistant_service_log` | `dwd` | 助教服务流水(`income_seconds` 工作时长) |
|
||||
| `dim_table` | `dwd` | 台桌维度(SCD2 当前版本,`area_name` + `table_name`) |
|
||||
| `cfg_area_category` | `dws` | 区域分类映射(通过 ConfigCache 加载) |
|
||||
|
||||
#### 时间窗口
|
||||
|
||||
| 枚举值 | 说明 |
|
||||
|--------|------|
|
||||
| `THIS_MONTH` | 本月(月初 ~ 今天) |
|
||||
| `THIS_QUARTER` | 本季度(季度首月1日 ~ 今天) |
|
||||
| `LAST_MONTH` | 上月(上月初 ~ 上月末) |
|
||||
| `LAST_3_MONTHS_EXCL_CURRENT` | 前3个月不含本月 |
|
||||
| `LAST_QUARTER` | 上季度 |
|
||||
| `LAST_6_MONTHS` | 最近半年(不含本月) |
|
||||
|
||||
#### 核心业务逻辑
|
||||
|
||||
1. 从 `dwd_assistant_service_log` 按 `(site_assistant_id, site_table_id)` 聚合 `income_seconds`
|
||||
2. 通过 `dim_table` JOIN 获取台桌的 `area_name` 和 `table_name`
|
||||
3. 调用 `get_area_category(area_name, table_name)` 映射到 `category_code`
|
||||
4. 按 `(assistant_id, category_code)` 汇总各项目时长
|
||||
5. 计算占比:`percentage = duration_seconds / total_seconds`(四位小数)
|
||||
6. 占比 ≥ 0.25 标记 `is_tagged = TRUE`
|
||||
7. 过滤条件:`is_delete = 0`,营业日切点通过 `biz_date_sql_expr` 处理
|
||||
|
||||
---
|
||||
|
||||
### DWS_MEMBER_PROJECT_TAG — 客户项目标签
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| 任务代码 | `DWS_MEMBER_PROJECT_TAG` |
|
||||
| Python 类 | `MemberProjectTagTask`(`tasks/dws/member_project_tag_task.py`) |
|
||||
| 目标表 | `dws.dws_member_project_tag` |
|
||||
| 主键 | `site_id`, `member_id`, `time_window`, `category_code` |
|
||||
| 粒度 | 会员 + 时间窗口 + 项目类型 |
|
||||
| 更新策略 | 全量删除重建(按 site_id) |
|
||||
| 更新频率 | 每日更新 |
|
||||
| 依赖 | `DWD_LOAD_FROM_ODS` |
|
||||
|
||||
#### 数据来源
|
||||
|
||||
| 来源表 | Schema | 用途 |
|
||||
|--------|--------|------|
|
||||
| `dwd_table_fee_log` | `dwd` | 台费流水(`ledger_count` 计费时长) |
|
||||
| `dim_table` | `dwd` | 台桌维度(SCD2 当前版本,`area_name` + `table_name`) |
|
||||
| `cfg_area_category` | `dws` | 区域分类映射(通过 ConfigCache 加载) |
|
||||
|
||||
#### 时间窗口
|
||||
|
||||
| 枚举值 | 说明 |
|
||||
|--------|------|
|
||||
| `LAST_30_DAYS` | 近30天(含今天,base_date-29天 ~ base_date) |
|
||||
| `LAST_60_DAYS` | 近60天(含今天,base_date-59天 ~ base_date) |
|
||||
|
||||
#### 核心业务逻辑
|
||||
|
||||
1. 从 `dwd_table_fee_log` 按 `(member_id, site_table_id)` 聚合 `ledger_count`
|
||||
2. 散客排除:`member_id IS NOT NULL AND member_id != 0`
|
||||
3. 通过 `dim_table` JOIN 获取台桌的 `area_name` 和 `table_name`
|
||||
4. 调用 `get_area_category(area_name, table_name)` 映射到 `category_code`
|
||||
5. 按 `(member_id, category_code)` 汇总各项目时长
|
||||
6. 计算占比:`percentage = duration_seconds / total_seconds`(四位小数)
|
||||
7. 占比 ≥ 0.25 标记 `is_tagged = TRUE`
|
||||
8. 过滤条件:`COALESCE(is_delete, 0) = 0`,营业日切点通过 `biz_date_sql_expr` 处理
|
||||
|
||||
@@ -78,7 +78,6 @@ API 返回的 JSON 响应通过两级路径定位数据:先按 `data_path`(
|
||||
| `ODS_SETTLEMENT_RECORDS` | ✅ | `(rangeStartTime, rangeEndTime)` | ❌ | ✅ | ✅ | `NONE` | — |
|
||||
| `ODS_TABLE_USE` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
|
||||
| `ODS_ASSISTANT_LEDGER` | ✅ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
|
||||
| `ODS_ASSISTANT_ABOLISH` | ✅ | 默认 | ❌ | ✅ | ✅ | `NONE` | — |
|
||||
| `ODS_STORE_GOODS_SALES` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
|
||||
| `ODS_PAYMENT` | ❌ | 默认 | ❌ | ✅ | ✅ | `NONE` | — |
|
||||
| `ODS_REFUND` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `pay_time` |
|
||||
@@ -88,6 +87,8 @@ API 返回的 JSON 响应通过两级路径定位数据:先按 `data_path`(
|
||||
| `ODS_MEMBER_BALANCE` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
|
||||
| `ODS_RECHARGE_SETTLE` | ✅ | `(rangeStartTime, rangeEndTime)` | ✅ | ❌ | ✅ | `NONE` | — |
|
||||
| `ODS_GROUP_PACKAGE` | ❌ | 默认 | ❌ | ✅ | ✅ | `FULL_TABLE` | — |
|
||||
|
||||
> `ODS_GROUP_PACKAGE` 额外配置了 `detail_endpoint`,在主流程完成后串行调用 `QueryPackageCouponInfo` 获取每个团购的详情数据,写入 `ods.group_buy_package_details`。
|
||||
| `ODS_GROUP_BUY_REDEMPTION` | ❌ | 默认 | ❌ | ✅ | ✅ | `WINDOW` | `create_time` |
|
||||
| `ODS_INVENTORY_STOCK` | ❌ | 默认 | ❌ | ✅ | ✅ | `NONE` | — |
|
||||
| `ODS_INVENTORY_CHANGE` | ✅ | 默认 | ❌ | ✅ | ✅ | `NONE` | — |
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
## 概述
|
||||
|
||||
ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask` + `OdsTaskSpec` 配置驱动,通过 `ODS_TASK_CLASSES` 字典动态注册,共 23 个任务。
|
||||
ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask` + `OdsTaskSpec` 配置驱动,通过 `ODS_TASK_CLASSES` 字典动态注册,共 22 个任务。
|
||||
|
||||
所有 ODS 任务写入 `ods.*` 表,原始 API 响应以 JSON 格式存入 `payload` 列,元数据列(`fetched_at`、`source_file`、`content_hash` 等)自动填充。
|
||||
|
||||
@@ -22,7 +22,6 @@ ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask`
|
||||
| `ODS_SETTLEMENT_RECORDS` | `OdsOrderSettleTask` | `/Site/GetAllOrderSettleList` | `settlement_records` | 结账记录 |
|
||||
| `ODS_TABLE_USE` | `OdsTableUseTask` | `/Site/GetSiteTableOrderDetails` | `table_fee_transactions` | 台费计费流水 |
|
||||
| `ODS_ASSISTANT_LEDGER` | `OdsAssistantLedgerTask` | `/AssistantPerformance/GetOrderAssistantDetails` | `assistant_service_records` | 助教服务流水 |
|
||||
| `ODS_ASSISTANT_ABOLISH` | `OdsAssistantAbolishTask` | `/AssistantPerformance/GetAbolitionAssistant` | `assistant_cancellation_records` | 助教废除记录 |
|
||||
| `ODS_STORE_GOODS_SALES` | `OdsGoodsLedgerTask` | `/TenantGoods/GetGoodsSalesList` | `store_goods_sales_records` | 门店商品销售流水 |
|
||||
| `ODS_PAYMENT` | `OdsPaymentTask` | `/PayLog/GetPayLogListPage` | `payment_transactions` | 支付流水 |
|
||||
| `ODS_REFUND` | `OdsRefundTask` | `/Order/GetRefundPayLogList` | `refund_transactions` | 退款流水 |
|
||||
@@ -31,7 +30,7 @@ ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask`
|
||||
| `ODS_MEMBER_CARD` | `OdsMemberCardTask` | `/MemberProfile/GetTenantMemberCardList` | `member_stored_value_cards` | 会员储值卡 |
|
||||
| `ODS_MEMBER_BALANCE` | `OdsMemberBalanceTask` | `/MemberProfile/GetMemberCardBalanceChange` | `member_balance_changes` | 会员余额变动 |
|
||||
| `ODS_RECHARGE_SETTLE` | `OdsRechargeSettleTask` | `/Site/GetRechargeSettleList` | `recharge_settlements` | 充值结算 |
|
||||
| `ODS_GROUP_PACKAGE` | `OdsPackageTask` | `/PackageCoupon/QueryPackageCouponList` | `group_buy_packages` | 团购套餐定义 |
|
||||
| `ODS_GROUP_PACKAGE` | `OdsPackageTask` | `/PackageCoupon/QueryPackageCouponList` | `group_buy_packages` | 团购套餐定义(含详情子流程,见下方说明) |
|
||||
| `ODS_GROUP_BUY_REDEMPTION` | `OdsGroupBuyRedemptionTask` | `/Site/GetSiteTableUseDetails` | `group_buy_redemption_records` | 团购套餐核销 |
|
||||
| `ODS_INVENTORY_STOCK` | `OdsInventoryStockTask` | `/TenantGoods/GetGoodsStockReport` | `goods_stock_summary` | 库存汇总 |
|
||||
| `ODS_INVENTORY_CHANGE` | `OdsInventoryChangeTask` | `/GoodsStockManage/QueryGoodsOutboundReceipt` | `goods_stock_movements` | 库存变化记录 |
|
||||
@@ -44,6 +43,26 @@ ODS 层采用**声明式配置**驱动的通用任务模式:由 `BaseOdsTask`
|
||||
|
||||
> 所有目标表均位于 `ods` schema 下。
|
||||
|
||||
### ODS_GROUP_PACKAGE 详情子流程
|
||||
|
||||
`ODS_GROUP_PACKAGE` 任务通过 `detail_endpoint` 配置启用了二级详情拉取:
|
||||
|
||||
| 配置项 | 值 |
|
||||
|--------|-----|
|
||||
| `detail_endpoint` | `/PackageCoupon/QueryPackageCouponInfo` |
|
||||
| `detail_target_table` | `ods.group_buy_package_details` |
|
||||
| `detail_param_builder` | `lambda rec: {"couponId": rec["id"]}` |
|
||||
| `detail_data_path` | `("data",)` |
|
||||
| `detail_id_column` | `id` |
|
||||
|
||||
执行流程:
|
||||
1. 主流程从 `QueryPackageCouponList` 拉取团购列表 → 写入 `ods.group_buy_packages`
|
||||
2. 子流程从 `ods.group_buy_packages` 提取所有 `id`
|
||||
3. 串行调用 `QueryPackageCouponInfo`(通过 `UnifiedPipeline` + `RateLimiter`),获取每个团购的详情
|
||||
4. 详情数据经字段提取后写入 `ods.group_buy_package_details`(全量快照,UPSERT on `coupon_id`)
|
||||
|
||||
详情表字段映射见 `docs/database/ODS/mappings/mapping_QueryPackageCouponInfo_group_buy_package_details.md`。
|
||||
|
||||
---
|
||||
|
||||
## 通用 ODS 任务架构(BaseOdsTask + OdsTaskSpec 模式)
|
||||
@@ -228,7 +247,7 @@ execute(cursor_data)
|
||||
|
||||
### content_hash 去重机制
|
||||
|
||||
`content_hash` 是通用 ODS 任务的核心去重手段,所有 23 个任务默认开启(`skip_unchanged=True`)。
|
||||
`content_hash` 是通用 ODS 任务的核心去重手段,所有 22 个任务默认开启(`skip_unchanged=True`)。
|
||||
|
||||
#### 计算方式
|
||||
|
||||
@@ -277,8 +296,7 @@ ORDER BY id, fetched_at DESC;
|
||||
| `ODS_SETTLEMENT_RECORDS` | 是 | `NONE` | — | 结账记录,按时间窗口增量抓取 |
|
||||
| `ODS_TABLE_USE` | 否 | `WINDOW` | `create_time` | 台费计费流水 |
|
||||
| `ODS_ASSISTANT_LEDGER` | 是 | `WINDOW` | `create_time` | 助教服务流水 |
|
||||
| `ODS_ASSISTANT_ABOLISH` | 是 | `NONE` | — | 助教废除记录 |
|
||||
| `ODS_STORE_GOODS_SALES` | 否 | `WINDOW` | `create_time` | 门店商品销售流水 |
|
||||
| `ODS_STORE_GOODS_SALES` | 是 | `WINDOW` | `create_time` | 门店商品销售流水(2026-03-01 修复:`requires_window` 从 `False` 改为 `True`,新增 `time_fields=("startTime", "endTime")`) |
|
||||
| `ODS_PAYMENT` | 否 | `NONE` | — | 支付流水 |
|
||||
| `ODS_REFUND` | 否 | `WINDOW` | `pay_time` | 退款流水 |
|
||||
| `ODS_PLATFORM_COUPON` | 否 | `WINDOW` | `consume_time` | 平台/团购券核销 |
|
||||
@@ -286,7 +304,7 @@ ORDER BY id, fetched_at DESC;
|
||||
| `ODS_MEMBER_CARD` | 否 | `FULL_TABLE` | — | 会员储值卡 |
|
||||
| `ODS_MEMBER_BALANCE` | 否 | `WINDOW` | `create_time` | 会员余额变动 |
|
||||
| `ODS_RECHARGE_SETTLE` | 是 | `NONE` | — | 充值结算 |
|
||||
| `ODS_GROUP_PACKAGE` | 否 | `FULL_TABLE` | — | 团购套餐定义 |
|
||||
| `ODS_GROUP_PACKAGE` | 否 | `FULL_TABLE` | — | 团购套餐定义 + 详情子流程(`detail_endpoint`) |
|
||||
| `ODS_GROUP_BUY_REDEMPTION` | 否 | `WINDOW` | `create_time` | 团购套餐核销 |
|
||||
| `ODS_INVENTORY_STOCK` | 否 | `NONE` | — | 库存汇总 |
|
||||
| `ODS_INVENTORY_CHANGE` | 是 | `NONE` | — | 库存变化记录 |
|
||||
@@ -297,4 +315,4 @@ ORDER BY id, fetched_at DESC;
|
||||
| `ODS_TENANT_GOODS` | 否 | `FULL_TABLE` | — | 租户商品档案 |
|
||||
| `ODS_STAFF_INFO` | 否 | `FULL_TABLE` | — | 员工档案,全量快照 |
|
||||
|
||||
> 所有 23 个任务默认 `skip_unchanged=True`(去重开启)。
|
||||
> 所有 22 个任务默认 `skip_unchanged=True`(去重开启)。
|
||||
|
||||
@@ -283,7 +283,6 @@ execute()
|
||||
| `member_stored_value_cards` | `ods.member_stored_value_cards` |
|
||||
| `recharge_settlements` | `ods.recharge_settlements` |
|
||||
| `settlement_records` | `ods.settlement_records` |
|
||||
| `assistant_cancellation_records` | `ods.assistant_cancellation_records` |
|
||||
| `assistant_accounts_master` | `ods.assistant_accounts_master` |
|
||||
| `assistant_service_records` | `ods.assistant_service_records` |
|
||||
| `site_tables_master` | `ods.site_tables_master` |
|
||||
|
||||
@@ -247,6 +247,7 @@ class FlowRunner:
|
||||
"""ETL 完成后运行数据一致性检查,输出黑盒测试报告。
|
||||
|
||||
返回报告文件路径,失败时返回 None(不阻断主流程)。
|
||||
CHANGE 2026-02-26 | 改用 FETCH_ROOT 读取实际抓取数据,替代 API_SAMPLE_CACHE_ROOT
|
||||
"""
|
||||
try:
|
||||
from quality.consistency_checker import (
|
||||
@@ -259,13 +260,19 @@ class FlowRunner:
|
||||
|
||||
timer.start_step("CONSISTENCY_CHECK")
|
||||
try:
|
||||
# 优先使用 FETCH_ROOT(ETL 实际抓取的分页 JSON)
|
||||
fetch_root_str = os.environ.get("FETCH_ROOT")
|
||||
fetch_root = Path(fetch_root_str) if fetch_root_str else None
|
||||
|
||||
# 兼容保留:api_sample_dir 作为回退
|
||||
api_sample_dir_str = os.environ.get("API_SAMPLE_CACHE_ROOT")
|
||||
api_sample_dir = Path(api_sample_dir_str) if api_sample_dir_str else None
|
||||
|
||||
report = run_consistency_check(
|
||||
self.db_conn,
|
||||
fetch_root=fetch_root,
|
||||
api_sample_dir=api_sample_dir,
|
||||
include_api_vs_ods=bool(api_sample_dir),
|
||||
include_api_vs_ods=bool(fetch_root or api_sample_dir),
|
||||
include_ods_vs_dwd=True,
|
||||
tz=self.tz,
|
||||
)
|
||||
|
||||
@@ -26,6 +26,7 @@ from api.local_json_client import LocalJsonClient
|
||||
from orchestration.cursor_manager import CursorManager
|
||||
from orchestration.run_tracker import RunTracker
|
||||
from orchestration.task_registry import TaskRegistry
|
||||
from utils.task_log_buffer import TaskLogBuffer
|
||||
|
||||
|
||||
class DataSource(str, Enum):
|
||||
@@ -90,6 +91,8 @@ class TaskExecutor:
|
||||
self.logger.info("开始运行任务: %s, run_uuid=%s", task_codes, run_uuid)
|
||||
|
||||
for task_code in task_codes:
|
||||
# 为每个任务创建独立的日志缓冲区,避免多任务日志交叉
|
||||
task_log_buf = TaskLogBuffer(task_code, self.logger)
|
||||
try:
|
||||
task_result = self.run_single_task(
|
||||
task_code, run_uuid, store_id, data_source=data_source,
|
||||
@@ -107,6 +110,7 @@ class TaskExecutor:
|
||||
results.append(result_entry)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
self.logger.error("任务 %s 失败: %s", task_code, exc, exc_info=True)
|
||||
task_log_buf.error("任务失败: %s", exc)
|
||||
# CHANGE 2026-02-24 | 任务失败后 rollback,防止 InFailedSqlTransaction 级联
|
||||
try:
|
||||
self.db.rollback()
|
||||
@@ -119,6 +123,9 @@ class TaskExecutor:
|
||||
"counts": {},
|
||||
})
|
||||
continue
|
||||
finally:
|
||||
# 任务完成(无论成功/失败),一次性输出该任务的缓冲日志
|
||||
task_log_buf.flush()
|
||||
|
||||
self.logger.info("所有任务执行完成")
|
||||
return results
|
||||
|
||||
@@ -37,6 +37,8 @@ from tasks.dws import (
|
||||
AssistantFinanceTask,
|
||||
MemberConsumptionTask,
|
||||
MemberVisitTask,
|
||||
AssistantProjectTagTask,
|
||||
MemberProjectTagTask,
|
||||
FinanceDailyTask,
|
||||
FinanceRechargeTask,
|
||||
FinanceIncomeStructureTask,
|
||||
@@ -156,6 +158,9 @@ default_registry.register("DWS_ASSISTANT_SALARY", AssistantSalaryTask, layer="DW
|
||||
default_registry.register("DWS_ASSISTANT_FINANCE", AssistantFinanceTask, layer="DWS", depends_on=["DWS_ASSISTANT_SALARY"])
|
||||
default_registry.register("DWS_MEMBER_CONSUMPTION", MemberConsumptionTask, layer="DWS")
|
||||
default_registry.register("DWS_MEMBER_VISIT", MemberVisitTask, layer="DWS")
|
||||
# CHANGE [2026-03-07] intent: 注册项目标签任务,依赖 DWD 装载完成
|
||||
default_registry.register("DWS_ASSISTANT_PROJECT_TAG", AssistantProjectTagTask, layer="DWS", depends_on=["DWD_LOAD_FROM_ODS"])
|
||||
default_registry.register("DWS_MEMBER_PROJECT_TAG", MemberProjectTagTask, layer="DWS", depends_on=["DWD_LOAD_FROM_ODS"])
|
||||
default_registry.register("DWS_FINANCE_DAILY", FinanceDailyTask, layer="DWS")
|
||||
default_registry.register("DWS_FINANCE_RECHARGE", FinanceRechargeTask, layer="DWS")
|
||||
default_registry.register("DWS_FINANCE_INCOME_STRUCTURE", FinanceIncomeStructureTask, layer="DWS")
|
||||
@@ -172,6 +177,7 @@ default_registry.register("DWS_MAINTENANCE", DwsMaintenanceTask, layer="DWS", de
|
||||
"DWS_ASSISTANT_MONTHLY", "DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY", "DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
|
||||
"DWS_ASSISTANT_PROJECT_TAG", "DWS_MEMBER_PROJECT_TAG",
|
||||
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE", "DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
|
||||
1
apps/etl/connectors/feiqiu/pipeline/__init__.py
Normal file
1
apps/etl/connectors/feiqiu/pipeline/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
57
apps/etl/connectors/feiqiu/pipeline/models.py
Normal file
57
apps/etl/connectors/feiqiu/pipeline/models.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""管道数据类:请求描述、执行结果、写入结果。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineRequest:
|
||||
"""管道请求描述。"""
|
||||
|
||||
endpoint: str
|
||||
params: dict
|
||||
page_size: int | None = 200
|
||||
data_path: tuple[str, ...] = ("data",)
|
||||
list_key: str | None = None
|
||||
segment_index: int = 0 # 所属窗口分段索引
|
||||
is_detail: bool = False # 是否为详情请求
|
||||
detail_id: Any = None # 详情请求的 ID
|
||||
# 预取的 API 响应(用于 BaseOdsTask 集成:iter_paginated 已获取数据,
|
||||
# _request_loop 跳过 api.post() 直接使用)
|
||||
_prefetched_response: Any = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineResult:
|
||||
"""管道执行结果。"""
|
||||
|
||||
status: str = "SUCCESS"
|
||||
total_requests: int = 0
|
||||
completed_requests: int = 0
|
||||
total_fetched: int = 0
|
||||
total_inserted: int = 0
|
||||
total_updated: int = 0
|
||||
total_skipped: int = 0
|
||||
total_deleted: int = 0
|
||||
request_failures: int = 0
|
||||
processing_failures: int = 0
|
||||
write_failures: int = 0
|
||||
cancelled: bool = False
|
||||
errors: list[dict] = field(default_factory=list)
|
||||
timing: dict[str, float] = field(default_factory=dict)
|
||||
# Detail_Mode 统计
|
||||
detail_success: int = 0
|
||||
detail_failure: int = 0
|
||||
detail_skipped: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class WriteResult:
|
||||
"""单次批量写入结果。"""
|
||||
|
||||
inserted: int = 0
|
||||
updated: int = 0
|
||||
skipped: int = 0
|
||||
errors: int = 0
|
||||
473
apps/etl/connectors/feiqiu/pipeline/unified_pipeline.py
Normal file
473
apps/etl/connectors/feiqiu/pipeline/unified_pipeline.py
Normal file
@@ -0,0 +1,473 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""统一管道引擎:串行请求 + 异步处理 + 单线程写库。
|
||||
|
||||
核心执行流程:
|
||||
主线程(_request_loop)串行发送 API 请求 → processing_queue
|
||||
→ N 个 worker 线程(_process_worker)并行处理 → write_queue
|
||||
→ 1 个 writer 线程(_write_worker)批量写入数据库
|
||||
|
||||
线程安全保证:
|
||||
- PipelineResult 的计数更新通过 threading.Lock 保护
|
||||
- 队列通信使用 queue.Queue(内置线程安全)
|
||||
- SENTINEL(None)用于通知线程退出
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Callable, Iterable
|
||||
|
||||
from api.rate_limiter import RateLimiter
|
||||
from config.pipeline_config import PipelineConfig
|
||||
from utils.cancellation import CancellationToken
|
||||
from pipeline.models import PipelineRequest, PipelineResult, WriteResult
|
||||
|
||||
# 运行时指标日志间隔(每 N 个请求记录一次队列深度等指标)
|
||||
_METRICS_LOG_INTERVAL = 10
|
||||
|
||||
|
||||
class UnifiedPipeline:
|
||||
"""统一管道引擎:串行请求 + 异步处理 + 单线程写库。
|
||||
|
||||
Args:
|
||||
api_client: API 客户端(duck typing,需有 post 方法)
|
||||
db_connection: 数据库连接(duck typing)
|
||||
logger: 日志记录器
|
||||
config: 管道配置
|
||||
cancel_token: 取消令牌,None 时自动创建一个不会取消的令牌
|
||||
etl_timer: 可选的 EtlTimer 实例,用于在 FlowRunner 计时报告中记录阶段耗时
|
||||
task_code: 任务代码,与 etl_timer 配合使用作为步骤名前缀
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_client, # duck typing: 有 post(endpoint, params) 方法
|
||||
db_connection, # duck typing
|
||||
logger: logging.Logger,
|
||||
config: PipelineConfig,
|
||||
cancel_token: CancellationToken | None = None,
|
||||
etl_timer=None, # 可选 EtlTimer,duck typing
|
||||
task_code: str | None = None,
|
||||
) -> None:
|
||||
self.api = api_client
|
||||
self.db = db_connection
|
||||
self.logger = logger
|
||||
self.config = config
|
||||
self.cancel_token = cancel_token or CancellationToken()
|
||||
self._rate_limiter = RateLimiter(config.rate_min, config.rate_max)
|
||||
self._etl_timer = etl_timer
|
||||
self._task_code = task_code
|
||||
# 结果计数锁,保护 PipelineResult 的并发更新
|
||||
self._lock = threading.Lock()
|
||||
# 处理线程引用,用于运行时指标日志中统计活跃线程数
|
||||
self._workers: list[threading.Thread] = []
|
||||
|
||||
def run(
|
||||
self,
|
||||
requests: Iterable[PipelineRequest],
|
||||
process_fn: Callable[[Any], list[dict]],
|
||||
write_fn: Callable[[list[dict]], WriteResult],
|
||||
) -> PipelineResult:
|
||||
"""执行管道。
|
||||
|
||||
Args:
|
||||
requests: 请求迭代器(由 BaseOdsTask 生成)
|
||||
process_fn: 处理函数,将 API 响应转换为待写入记录列表
|
||||
write_fn: 写入函数,将记录批量写入数据库
|
||||
|
||||
Returns:
|
||||
PipelineResult 包含各阶段统计和最终状态
|
||||
"""
|
||||
# 预取消检查:cancel_token 已取消则立即返回空结果
|
||||
if self.cancel_token.is_cancelled:
|
||||
return PipelineResult(status="CANCELLED", cancelled=True)
|
||||
|
||||
processing_queue: queue.Queue = queue.Queue(
|
||||
maxsize=self.config.queue_size,
|
||||
)
|
||||
write_queue: queue.Queue = queue.Queue(
|
||||
maxsize=self.config.queue_size * 2,
|
||||
)
|
||||
result = PipelineResult()
|
||||
|
||||
# 保存队列引用,供 _request_loop 运行时指标日志使用
|
||||
self._processing_queue = processing_queue
|
||||
self._write_queue = write_queue
|
||||
|
||||
start_time = time.monotonic()
|
||||
|
||||
# EtlTimer 集成:记录请求阶段子步骤
|
||||
timer = self._etl_timer
|
||||
step_name = self._task_code
|
||||
|
||||
# 启动 N 个处理线程
|
||||
self._workers = []
|
||||
for i in range(self.config.workers):
|
||||
t = threading.Thread(
|
||||
target=self._process_worker,
|
||||
args=(processing_queue, write_queue, process_fn, result),
|
||||
name=f"pipeline-worker-{i}",
|
||||
daemon=True,
|
||||
)
|
||||
t.start()
|
||||
self._workers.append(t)
|
||||
|
||||
# 启动 1 个写入线程
|
||||
writer = threading.Thread(
|
||||
target=self._write_worker,
|
||||
args=(write_queue, write_fn, result),
|
||||
name="pipeline-writer",
|
||||
daemon=True,
|
||||
)
|
||||
writer.start()
|
||||
|
||||
# 主线程:串行请求
|
||||
if timer and step_name:
|
||||
try:
|
||||
timer.start_sub_step(step_name, "request")
|
||||
except KeyError:
|
||||
pass # 父步骤不存在时静默跳过
|
||||
request_start = time.monotonic()
|
||||
self._request_loop(requests, processing_queue, result)
|
||||
request_elapsed = time.monotonic() - request_start
|
||||
if timer and step_name:
|
||||
try:
|
||||
timer.stop_sub_step(step_name, "request")
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# 发送 SENTINEL 到处理队列,通知所有 worker 退出
|
||||
if timer and step_name:
|
||||
try:
|
||||
timer.start_sub_step(step_name, "process")
|
||||
except KeyError:
|
||||
pass
|
||||
process_start = time.monotonic()
|
||||
for _ in self._workers:
|
||||
processing_queue.put(None)
|
||||
for w in self._workers:
|
||||
w.join()
|
||||
process_elapsed = time.monotonic() - process_start
|
||||
if timer and step_name:
|
||||
try:
|
||||
timer.stop_sub_step(step_name, "process")
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# 发送 SENTINEL 到写入队列,通知 writer 退出
|
||||
if timer and step_name:
|
||||
try:
|
||||
timer.start_sub_step(step_name, "write")
|
||||
except KeyError:
|
||||
pass
|
||||
write_start = time.monotonic()
|
||||
write_queue.put(None)
|
||||
writer.join()
|
||||
write_elapsed = time.monotonic() - write_start
|
||||
if timer and step_name:
|
||||
try:
|
||||
timer.stop_sub_step(step_name, "write")
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
total_elapsed = time.monotonic() - start_time
|
||||
result.timing["total"] = round(total_elapsed, 3)
|
||||
result.timing["request"] = round(request_elapsed, 3)
|
||||
result.timing["process"] = round(process_elapsed, 3)
|
||||
result.timing["write"] = round(write_elapsed, 3)
|
||||
|
||||
# 确定最终状态
|
||||
if result.cancelled:
|
||||
result.status = "CANCELLED"
|
||||
elif result.status == "FAILED":
|
||||
pass # 连续失败已设置 FAILED,保持不变
|
||||
elif (
|
||||
result.request_failures
|
||||
+ result.processing_failures
|
||||
+ result.write_failures
|
||||
> 0
|
||||
):
|
||||
result.status = "PARTIAL"
|
||||
else:
|
||||
result.status = "SUCCESS"
|
||||
|
||||
# 执行摘要日志(需求 8.2)
|
||||
self.logger.info(
|
||||
"管道执行摘要: status=%s, 总耗时=%.1fs "
|
||||
"[请求=%.1fs, 处理=%.1fs, 写入=%.1fs], "
|
||||
"请求=%d/%d, 获取=%d, "
|
||||
"写入(inserted=%d, updated=%d, skipped=%d), "
|
||||
"失败(request=%d, process=%d, write=%d)",
|
||||
result.status,
|
||||
total_elapsed,
|
||||
request_elapsed,
|
||||
process_elapsed,
|
||||
write_elapsed,
|
||||
result.completed_requests,
|
||||
result.total_requests,
|
||||
result.total_fetched,
|
||||
result.total_inserted,
|
||||
result.total_updated,
|
||||
result.total_skipped,
|
||||
result.request_failures,
|
||||
result.processing_failures,
|
||||
result.write_failures,
|
||||
)
|
||||
|
||||
# 清理队列引用
|
||||
self._processing_queue = None
|
||||
self._write_queue = None
|
||||
self._workers = []
|
||||
|
||||
return result
|
||||
|
||||
def _request_loop(
|
||||
self,
|
||||
requests: Iterable[PipelineRequest],
|
||||
processing_queue: queue.Queue,
|
||||
result: PipelineResult,
|
||||
) -> None:
|
||||
"""主线程:串行发送 API 请求,限流等待,背压阻塞。
|
||||
|
||||
流程:
|
||||
1. 遍历 requests 迭代器
|
||||
2. 检查取消信号
|
||||
3. 调用 api.post() 发送请求
|
||||
4. 将响应 put 到 processing_queue(满时阻塞 = 背压)
|
||||
5. 调用 rate_limiter.wait(),被取消则 break
|
||||
6. 连续失败超过阈值则中断(status=FAILED)
|
||||
"""
|
||||
consecutive_failures = 0
|
||||
|
||||
for req in requests:
|
||||
# 取消检查
|
||||
if self.cancel_token.is_cancelled:
|
||||
with self._lock:
|
||||
result.cancelled = True
|
||||
self.logger.info("收到取消信号,停止发送新请求")
|
||||
break
|
||||
|
||||
with self._lock:
|
||||
result.total_requests += 1
|
||||
|
||||
req_start = time.monotonic()
|
||||
try:
|
||||
# 预取模式:iter_paginated 已获取数据,直接使用
|
||||
if req._prefetched_response is not None:
|
||||
response = req._prefetched_response
|
||||
else:
|
||||
response = self.api.post(req.endpoint, req.params)
|
||||
elapsed = time.monotonic() - req_start
|
||||
|
||||
self.logger.debug(
|
||||
"请求完成: endpoint=%s, 耗时=%.2fs",
|
||||
req.endpoint,
|
||||
elapsed,
|
||||
)
|
||||
|
||||
# 将响应放入处理队列(满时阻塞 = 背压机制)
|
||||
processing_queue.put((req, response))
|
||||
|
||||
with self._lock:
|
||||
result.completed_requests += 1
|
||||
completed = result.completed_requests
|
||||
total = result.total_requests
|
||||
|
||||
# 成功则重置连续失败计数
|
||||
consecutive_failures = 0
|
||||
|
||||
# 运行时指标日志(需求 8.1):每 N 个请求记录一次队列深度和进度
|
||||
if completed % _METRICS_LOG_INTERVAL == 0:
|
||||
self._log_runtime_metrics(result, completed, total)
|
||||
|
||||
except Exception as exc:
|
||||
elapsed = time.monotonic() - req_start
|
||||
consecutive_failures += 1
|
||||
|
||||
self.logger.error(
|
||||
"请求失败: endpoint=%s, 耗时=%.2fs, 错误=%s",
|
||||
req.endpoint,
|
||||
elapsed,
|
||||
exc,
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
result.request_failures += 1
|
||||
result.errors.append({
|
||||
"phase": "request",
|
||||
"endpoint": req.endpoint,
|
||||
"error": str(exc),
|
||||
})
|
||||
|
||||
# 连续失败超过阈值则中断
|
||||
if consecutive_failures >= self.config.max_consecutive_failures:
|
||||
self.logger.error(
|
||||
"连续失败 %d 次,超过阈值 %d,中断管道",
|
||||
consecutive_failures,
|
||||
self.config.max_consecutive_failures,
|
||||
)
|
||||
with self._lock:
|
||||
result.status = "FAILED"
|
||||
break
|
||||
|
||||
# 限流等待(最后一个请求后也等待,保持与上游的间隔一致性)
|
||||
if not self._rate_limiter.wait(self.cancel_token.event):
|
||||
with self._lock:
|
||||
result.cancelled = True
|
||||
self.logger.info("限流等待期间收到取消信号,停止发送新请求")
|
||||
break
|
||||
|
||||
def _process_worker(
|
||||
self,
|
||||
processing_queue: queue.Queue,
|
||||
write_queue: queue.Queue,
|
||||
process_fn: Callable[[Any], list[dict]],
|
||||
result: PipelineResult,
|
||||
) -> None:
|
||||
"""处理线程:从 processing_queue 消费数据,调用 process_fn,结果放入 write_queue。
|
||||
|
||||
收到 SENTINEL(None)时退出。
|
||||
单条记录处理异常时捕获、记录错误、继续处理。
|
||||
"""
|
||||
while True:
|
||||
item = processing_queue.get()
|
||||
|
||||
# SENTINEL:退出信号
|
||||
if item is None:
|
||||
processing_queue.task_done()
|
||||
break
|
||||
|
||||
req, response = item
|
||||
try:
|
||||
records = process_fn(response)
|
||||
|
||||
if records:
|
||||
# 将处理结果放入写入队列
|
||||
write_queue.put(records)
|
||||
|
||||
with self._lock:
|
||||
result.total_fetched += len(records)
|
||||
|
||||
except Exception as exc:
|
||||
self.logger.error(
|
||||
"处理失败: endpoint=%s, 错误=%s",
|
||||
req.endpoint,
|
||||
exc,
|
||||
)
|
||||
with self._lock:
|
||||
result.processing_failures += 1
|
||||
result.errors.append({
|
||||
"phase": "processing",
|
||||
"endpoint": req.endpoint,
|
||||
"error": str(exc),
|
||||
})
|
||||
|
||||
processing_queue.task_done()
|
||||
|
||||
def _write_worker(
|
||||
self,
|
||||
write_queue: queue.Queue,
|
||||
write_fn: Callable[[list[dict]], WriteResult],
|
||||
result: PipelineResult,
|
||||
) -> None:
|
||||
"""写入线程:从 write_queue 消费数据,累积到 batch_size 或超时后批量写入。
|
||||
|
||||
- 累积到 batch_size 条记录时立即写入
|
||||
- 等待 batch_timeout 秒后将已累积的记录写入(即使不足 batch_size)
|
||||
- 写入失败时记录错误、继续处理后续批次
|
||||
- 收到 SENTINEL(None)时将剩余数据 flush 后退出
|
||||
"""
|
||||
batch: list[dict] = []
|
||||
batch_size = self.config.batch_size
|
||||
batch_timeout = self.config.batch_timeout
|
||||
|
||||
while True:
|
||||
try:
|
||||
item = write_queue.get(timeout=batch_timeout)
|
||||
except queue.Empty:
|
||||
# 超时:将已累积的记录写入
|
||||
if batch:
|
||||
self._flush_batch(batch, write_fn, result)
|
||||
batch = []
|
||||
continue
|
||||
|
||||
# SENTINEL:退出信号
|
||||
if item is None:
|
||||
write_queue.task_done()
|
||||
break
|
||||
|
||||
# item 是 list[dict](一次 process_fn 的输出)
|
||||
batch.extend(item)
|
||||
write_queue.task_done()
|
||||
|
||||
# 队列积压警告
|
||||
qsize = write_queue.qsize()
|
||||
if qsize >= self.config.queue_size * 2:
|
||||
self.logger.warning(
|
||||
"写入队列积压: qsize=%d, 阈值=%d",
|
||||
qsize,
|
||||
self.config.queue_size * 2,
|
||||
)
|
||||
|
||||
# 累积到 batch_size 时写入
|
||||
while len(batch) >= batch_size:
|
||||
chunk = batch[:batch_size]
|
||||
batch = batch[batch_size:]
|
||||
self._flush_batch(chunk, write_fn, result)
|
||||
|
||||
# 退出前 flush 剩余数据
|
||||
if batch:
|
||||
self._flush_batch(batch, write_fn, result)
|
||||
|
||||
def _flush_batch(
|
||||
self,
|
||||
batch: list[dict],
|
||||
write_fn: Callable[[list[dict]], WriteResult],
|
||||
result: PipelineResult,
|
||||
) -> None:
|
||||
"""执行一次批量写入,更新结果计数。"""
|
||||
if not batch:
|
||||
return
|
||||
|
||||
try:
|
||||
wr = write_fn(batch)
|
||||
with self._lock:
|
||||
result.total_inserted += wr.inserted
|
||||
result.total_updated += wr.updated
|
||||
result.total_skipped += wr.skipped
|
||||
except Exception as exc:
|
||||
self.logger.error(
|
||||
"批量写入失败: batch_size=%d, 错误=%s",
|
||||
len(batch),
|
||||
exc,
|
||||
)
|
||||
with self._lock:
|
||||
result.write_failures += 1
|
||||
result.errors.append({
|
||||
"phase": "write",
|
||||
"batch_size": len(batch),
|
||||
"error": str(exc),
|
||||
})
|
||||
|
||||
def _log_runtime_metrics(
|
||||
self,
|
||||
result: PipelineResult,
|
||||
completed: int,
|
||||
total: int,
|
||||
) -> None:
|
||||
"""记录运行时指标:队列深度、活跃线程数、进度(需求 8.1)。"""
|
||||
pq_depth = self._processing_queue.qsize() if self._processing_queue else 0
|
||||
wq_depth = self._write_queue.qsize() if self._write_queue else 0
|
||||
active_workers = sum(1 for w in self._workers if w.is_alive())
|
||||
|
||||
self.logger.debug(
|
||||
"运行时指标: 进度=%d/%d, 处理队列=%d, 活跃线程=%d, 写入队列=%d",
|
||||
completed,
|
||||
total,
|
||||
pq_depth,
|
||||
active_workers,
|
||||
wq_depth,
|
||||
)
|
||||
@@ -71,7 +71,6 @@ class ConsistencyReport:
|
||||
ODS_TABLE_TO_JSON_FILE: Dict[str, str] = {
|
||||
"assistant_accounts_master": "assistant_accounts_master.json",
|
||||
"assistant_service_records": "assistant_service_records.json",
|
||||
"assistant_cancellation_records": "assistant_cancellation_records.json",
|
||||
"member_profiles": "member_profiles.json",
|
||||
"member_stored_value_cards": "member_stored_value_cards.json",
|
||||
"member_balance_changes": "member_balance_changes.json",
|
||||
@@ -93,6 +92,35 @@ ODS_TABLE_TO_JSON_FILE: Dict[str, str] = {
|
||||
"stock_goods_category_tree": "stock_goods_category_tree.json",
|
||||
}
|
||||
|
||||
# CHANGE 2026-02-26 | ODS 表名 → task_code 映射,用于从 FETCH_ROOT 定位分页 JSON
|
||||
# FETCH_ROOT 目录结构:{task_code}/{task_code}-{run_id}-{date}-{time}/{ods_table}.json
|
||||
ODS_TABLE_TO_TASK_CODE: Dict[str, str] = {
|
||||
"assistant_accounts_master": "ODS_ASSISTANT_ACCOUNT",
|
||||
"assistant_service_records": "ODS_ASSISTANT_LEDGER",
|
||||
"member_profiles": "ODS_MEMBER",
|
||||
"member_stored_value_cards": "ODS_MEMBER_CARD",
|
||||
"member_balance_changes": "ODS_MEMBER_BALANCE",
|
||||
"recharge_settlements": "ODS_RECHARGE_SETTLE",
|
||||
"settlement_records": "ODS_SETTLEMENT_RECORDS",
|
||||
"table_fee_transactions": "ODS_TABLE_USE",
|
||||
"table_fee_discount_records": "ODS_TABLE_FEE_DISCOUNT",
|
||||
"store_goods_sales_records": "ODS_STORE_GOODS_SALES",
|
||||
"store_goods_master": "ODS_STORE_GOODS",
|
||||
"tenant_goods_master": "ODS_TENANT_GOODS",
|
||||
"site_tables_master": "ODS_TABLES",
|
||||
"group_buy_packages": "ODS_GROUP_PACKAGE",
|
||||
"group_buy_redemption_records": "ODS_GROUP_BUY_REDEMPTION",
|
||||
"platform_coupon_redemption_records": "ODS_PLATFORM_COUPON",
|
||||
"payment_transactions": "ODS_PAYMENT",
|
||||
"refund_transactions": "ODS_REFUND",
|
||||
"goods_stock_summary": "ODS_INVENTORY_STOCK",
|
||||
"goods_stock_movements": "ODS_INVENTORY_CHANGE",
|
||||
"stock_goods_category_tree": "ODS_GOODS_CATEGORY",
|
||||
"staff_info_master": "ODS_STAFF_INFO",
|
||||
"settlement_ticket_records": "ODS_SETTLEMENT_TICKET",
|
||||
"json_archive_records": "ODS_JSON_ARCHIVE",
|
||||
}
|
||||
|
||||
# ODS 元数据列——不来自 API,由 ETL 框架自动填充
|
||||
ODS_META_COLUMNS = frozenset({
|
||||
"payload", "source_file", "source_endpoint",
|
||||
@@ -145,6 +173,86 @@ def _extract_records(data: Any) -> list[dict]:
|
||||
return []
|
||||
|
||||
|
||||
def extract_api_fields_from_fetch_root(
|
||||
fetch_root: Path,
|
||||
ods_table: str,
|
||||
) -> set[str] | None:
|
||||
"""从 FETCH_ROOT 分页 JSON 中提取 API 原始字段名。
|
||||
|
||||
CHANGE 2026-02-26 | 替代 extract_api_fields_from_json 的 API_SAMPLE_CACHE_ROOT 依赖,
|
||||
直接读取 ETL 实际抓取的分页 JSON,无需额外手动生成缓存。
|
||||
|
||||
目录结构:FETCH_ROOT/{task_code}/{task_code}-{run_id}-{date}-{time}/{ods_table}.json
|
||||
分页 JSON 结构:{ "pages": [{ "response": { "data": { "{listKey}": [...] } } }] }
|
||||
"""
|
||||
task_code = ODS_TABLE_TO_TASK_CODE.get(ods_table)
|
||||
if not task_code:
|
||||
return None
|
||||
|
||||
task_dir = fetch_root / task_code
|
||||
if not task_dir.is_dir():
|
||||
return None
|
||||
|
||||
# 取最新 run 目录(按目录名排序,格式含时间戳)
|
||||
run_dirs = sorted(
|
||||
(d for d in task_dir.iterdir() if d.is_dir()),
|
||||
key=lambda d: d.name,
|
||||
reverse=True,
|
||||
)
|
||||
if not run_dirs:
|
||||
return None
|
||||
|
||||
# 在最新 run 目录中查找 {ods_table}.json
|
||||
json_file = run_dirs[0] / f"{ods_table}.json"
|
||||
if not json_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with json_file.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
records = _extract_records_from_paged_json(data)
|
||||
if not records:
|
||||
return None
|
||||
|
||||
all_fields: set[str] = set()
|
||||
for rec in records[:10]:
|
||||
if isinstance(rec, dict):
|
||||
all_fields.update(rec.keys())
|
||||
return all_fields
|
||||
|
||||
|
||||
def _extract_records_from_paged_json(data: Any) -> list[dict]:
|
||||
"""从 ETL 分页 JSON 中提取业务记录。
|
||||
|
||||
分页 JSON 格式:
|
||||
{ "pages": [{ "response": { "data": { "{listKey}": [record, ...] } } }] }
|
||||
也兼容 gen_full_dataflow_doc 的扁平缓存格式(直接列表 / {"data": [...]})。
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return _extract_records(data)
|
||||
|
||||
pages = data.get("pages")
|
||||
if not isinstance(pages, list) or not pages:
|
||||
# 回退到扁平格式
|
||||
return _extract_records(data)
|
||||
|
||||
# 从第一个有数据的 page 中提取记录
|
||||
for page in pages:
|
||||
if not isinstance(page, dict):
|
||||
continue
|
||||
response = page.get("response")
|
||||
if not isinstance(response, dict):
|
||||
continue
|
||||
records = _extract_records(response)
|
||||
if records:
|
||||
return records
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def check_api_vs_ods_fields(
|
||||
api_fields: set[str],
|
||||
ods_columns: set[str],
|
||||
@@ -494,6 +602,7 @@ def run_consistency_check(
|
||||
db_conn,
|
||||
*,
|
||||
api_sample_dir: Path | None = None,
|
||||
fetch_root: Path | None = None,
|
||||
include_api_vs_ods: bool = True,
|
||||
include_ods_vs_dwd: bool = True,
|
||||
sample_limit: int = 5,
|
||||
@@ -504,7 +613,8 @@ def run_consistency_check(
|
||||
|
||||
参数:
|
||||
db_conn: 数据库连接对象(需有 .conn 属性返回 psycopg2 connection)
|
||||
api_sample_dir: API JSON 缓存目录(用于 API vs ODS 检查)
|
||||
api_sample_dir: API JSON 缓存目录(旧方式,兼容保留)
|
||||
fetch_root: FETCH_ROOT 目录(优先使用,从 ETL 实际抓取的分页 JSON 提取字段)
|
||||
include_api_vs_ods: 是否执行 API vs ODS 检查
|
||||
include_ods_vs_dwd: 是否执行 ODS vs DWD 检查
|
||||
sample_limit: 值不一致时的采样行数
|
||||
@@ -519,16 +629,28 @@ def run_consistency_check(
|
||||
|
||||
with db_conn.conn.cursor() as cur:
|
||||
# --- 1. API vs ODS 字段完整性检查 ---
|
||||
if include_api_vs_ods and api_sample_dir:
|
||||
# CHANGE 2026-02-26 | 优先从 FETCH_ROOT 读取实际抓取数据,回退到 api_sample_dir 缓存
|
||||
if include_api_vs_ods and (fetch_root or api_sample_dir):
|
||||
for ods_table, json_file in sorted(ODS_TABLE_TO_JSON_FILE.items()):
|
||||
json_path = api_sample_dir / json_file
|
||||
api_fields = extract_api_fields_from_json(json_path)
|
||||
# 优先尝试 FETCH_ROOT(ETL 实际抓取的分页 JSON)
|
||||
api_fields = None
|
||||
source_hint = ""
|
||||
if fetch_root:
|
||||
api_fields = extract_api_fields_from_fetch_root(fetch_root, ods_table)
|
||||
source_hint = "FETCH_ROOT"
|
||||
|
||||
# 回退到 api_sample_dir(gen_full_dataflow_doc 缓存)
|
||||
if api_fields is None and api_sample_dir:
|
||||
json_path = api_sample_dir / json_file
|
||||
api_fields = extract_api_fields_from_json(json_path)
|
||||
source_hint = "API_SAMPLE_CACHE"
|
||||
|
||||
if api_fields is None:
|
||||
result = TableCheckResult(
|
||||
table_name=f"ods.{ods_table}",
|
||||
check_type="api_vs_ods",
|
||||
passed=True, # 无 JSON 缓存时跳过,不算失败
|
||||
error=f"API JSON 缓存不存在: {json_file}",
|
||||
passed=True, # 无 JSON 数据时跳过,不算失败
|
||||
error=f"无可用 JSON 数据(FETCH_ROOT 和 API 缓存均未找到)",
|
||||
)
|
||||
report.api_vs_ods_results.append(result)
|
||||
continue
|
||||
|
||||
@@ -14,6 +14,9 @@ import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
SAMPLES_DIR = os.path.join("docs", "api-reference", "samples")
|
||||
DOCS_DIR = os.path.join("docs", "api-reference")
|
||||
|
||||
|
||||
@@ -11,6 +11,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
load_dotenv()
|
||||
|
||||
PG_DSN = os.getenv("PG_DSN")
|
||||
|
||||
@@ -23,6 +23,9 @@ from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
|
||||
class DiffKind(str, Enum):
|
||||
"""差异分类枚举。"""
|
||||
|
||||
@@ -13,6 +13,9 @@ from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
load_dotenv()
|
||||
|
||||
SUMMARY_DIR = Path("docs/api-reference/summary")
|
||||
|
||||
@@ -414,6 +414,7 @@ def _check_ods_vs_dwd(
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
|
||||
# 已知的 DWS→DWD 聚合关系映射
|
||||
# 营业日口径:使用 dws.biz_date() 替代 ::date 自然日转换
|
||||
_DWS_DWD_MAP: dict[str, dict] = {
|
||||
"dws.dws_assistant_daily_detail": {
|
||||
"dwd_source": "dwd.dwd_assistant_service_log",
|
||||
@@ -425,28 +426,28 @@ _DWS_DWD_MAP: dict[str, dict] = {
|
||||
"dwd_source": "dwd.dwd_settlement_head",
|
||||
"dws_date_col": "stat_date",
|
||||
"dwd_date_col": "pay_time",
|
||||
"dwd_date_cast": "::date",
|
||||
"dwd_date_cast": "dws.biz_date(%col%)",
|
||||
"description": "财务日度汇总 vs DWD 结账记录",
|
||||
},
|
||||
"dws.dws_member_visit_detail": {
|
||||
"dwd_source": "dwd.dwd_settlement_head",
|
||||
"dws_date_col": "visit_date",
|
||||
"dwd_date_col": "pay_time",
|
||||
"dwd_date_cast": "::date",
|
||||
"dwd_date_cast": "dws.biz_date(%col%)",
|
||||
"description": "会员到店明细 vs DWD 结账记录",
|
||||
},
|
||||
"dws.dws_member_consumption_summary": {
|
||||
"dwd_source": "dwd.dwd_settlement_head",
|
||||
"dws_date_col": "stat_month",
|
||||
"dwd_date_col": "pay_time",
|
||||
"dwd_date_cast": "date_trunc('month', %col%)::date",
|
||||
"dwd_date_cast": "date_trunc('month', dws.biz_date(%col%))::date",
|
||||
"description": "会员消费汇总 vs DWD 结账记录",
|
||||
},
|
||||
"dws.dws_finance_recharge_summary": {
|
||||
"dwd_source": "dwd.dwd_recharge_order",
|
||||
"dws_date_col": "stat_date",
|
||||
"dwd_date_col": "pay_time",
|
||||
"dwd_date_cast": "::date",
|
||||
"dwd_date_cast": "dws.biz_date(%col%)",
|
||||
"description": "充值汇总 vs DWD 充值订单",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -20,6 +20,9 @@ from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ── 配置 ──────────────────────────────────────────────────────────────────
|
||||
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
|
||||
API_TOKEN = os.environ.get("API_TOKEN", "")
|
||||
|
||||
@@ -12,6 +12,9 @@ import re
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 常量
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -68,27 +68,6 @@
|
||||
"payload",
|
||||
"content_hash"
|
||||
],
|
||||
"assistant_cancellation_records": [
|
||||
"id",
|
||||
"siteid",
|
||||
"siteprofile",
|
||||
"assistantname",
|
||||
"assistantabolishamount",
|
||||
"assistanton",
|
||||
"pdchargeminutes",
|
||||
"tableareaid",
|
||||
"tablearea",
|
||||
"tableid",
|
||||
"tablename",
|
||||
"trashreason",
|
||||
"createtime",
|
||||
"source_file",
|
||||
"source_endpoint",
|
||||
"fetched_at",
|
||||
"payload",
|
||||
"content_hash",
|
||||
"tenant_id"
|
||||
],
|
||||
"assistant_service_records": [
|
||||
"id",
|
||||
"tenant_id",
|
||||
|
||||
@@ -15,6 +15,9 @@ import sys
|
||||
import time
|
||||
import requests
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ── 配置 ──────────────────────────────────────────────────────────────────
|
||||
API_BASE = "https://pc.ficoo.vip/apiprod/admin/v1/"
|
||||
API_TOKEN = os.environ.get("API_TOKEN", "")
|
||||
@@ -58,7 +61,6 @@ CROSS_REF_HEADERS = {"字段名", "类型", "示例值", "说明", "field", "exa
|
||||
ACTUAL_LIST_KEY = {
|
||||
"assistant_accounts_master": "assistantInfos",
|
||||
"assistant_service_records": "orderAssistantDetails",
|
||||
"assistant_cancellation_records": "abolitionAssistants",
|
||||
"table_fee_transactions": "siteTableUseDetailsList",
|
||||
"table_fee_discount_records": "taiFeeAdjustInfos",
|
||||
"tenant_goods_master": "tenantGoodsList",
|
||||
|
||||
189
apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
Normal file
189
apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
Normal file
@@ -0,0 +1,189 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""一次性调研脚本:拉取全部团购详情并写入 ods.group_buy_package_details。
|
||||
|
||||
用法(cwd = C:\\NeoZQYY/):
|
||||
python apps/etl/connectors/feiqiu/scripts/research_coupon_details.py
|
||||
|
||||
流程:
|
||||
1. 从 ods.group_buy_packages 读取所有 coupon_id(id 列)
|
||||
2. 串行调用 QueryPackageCouponInfo 详情接口(RateLimiter 5-20s)
|
||||
3. 提取结构化字段 + 计算 content_hash + 保留原始 payload
|
||||
4. UPSERT 写入 ods.group_buy_package_details
|
||||
|
||||
需求覆盖:附录 B 调研 3、4
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ── 环境初始化 ──────────────────────────────────────────────────────────
|
||||
# 加载根 .env(脚本 cwd 为 apps/etl/connectors/feiqiu/)
|
||||
from dotenv import load_dotenv
|
||||
|
||||
_SCRIPT_DIR = Path(__file__).resolve().parent # scripts/
|
||||
_FEIQIU_DIR = _SCRIPT_DIR.parent # apps/etl/connectors/feiqiu/
|
||||
_REPO_ROOT = _FEIQIU_DIR.parents[3] # → connectors/ → etl/ → apps/ → root
|
||||
|
||||
load_dotenv(_REPO_ROOT / ".env")
|
||||
|
||||
# 必需环境变量校验
|
||||
_REQUIRED_ENV = ("FETCH_ROOT", "EXPORT_ROOT", "PG_DSN", "TEST_DB_DSN")
|
||||
_missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)]
|
||||
if _missing:
|
||||
sys.exit(f"ERROR: 缺少必需环境变量: {', '.join(_missing)}")
|
||||
|
||||
TEST_DB_DSN = os.environ["TEST_DB_DSN"]
|
||||
|
||||
# 确保 feiqiu 目录在 sys.path 中,以便从仓库根目录运行时也能 import 本地模块
|
||||
if str(_FEIQIU_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_DIR))
|
||||
|
||||
# ── 依赖导入 ──────────────────────────────────────────────────────────
|
||||
from psycopg2.extras import Json # noqa: E402
|
||||
|
||||
from config.settings import AppConfig # noqa: E402
|
||||
from api.client import APIClient # noqa: E402
|
||||
from api.rate_limiter import RateLimiter # noqa: E402
|
||||
from database.connection import DatabaseConnection # noqa: E402
|
||||
|
||||
# 复用 ods_tasks.py 中的字段提取逻辑
|
||||
from tasks.ods.ods_tasks import _group_package_detail_process_fn # noqa: E402
|
||||
|
||||
|
||||
def main():
|
||||
# ── 1. 加载配置 ──────────────────────────────────────────────────
|
||||
config = AppConfig.load()
|
||||
print(f"✅ 配置加载完成 (store_id={config.get('app.store_id')})")
|
||||
|
||||
# ── 2. 连接测试库 ──────────────────────────────────────────────
|
||||
db = DatabaseConnection(
|
||||
dsn=TEST_DB_DSN,
|
||||
session=config["db"].get("session", {}),
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
print(f"✅ 已连接测试库: {TEST_DB_DSN.split('@')[-1]}")
|
||||
|
||||
# ── 3. 查询所有 coupon_id ────────────────────────────────────
|
||||
rows = db.query("SELECT DISTINCT id FROM ods.group_buy_packages ORDER BY id")
|
||||
coupon_ids = [r["id"] for r in rows]
|
||||
print(f"📋 共 {len(coupon_ids)} 个 coupon_id 待拉取")
|
||||
|
||||
if not coupon_ids:
|
||||
print("⚠️ 没有找到任何 coupon_id,退出")
|
||||
db.close()
|
||||
return
|
||||
|
||||
# ── 4. 初始化 API 客户端 + 限流器 ────────────────────────────
|
||||
api = APIClient(
|
||||
base_url=config["api"]["base_url"],
|
||||
token=config["api"]["token"],
|
||||
timeout=config.get("api.timeout_sec", 20),
|
||||
)
|
||||
limiter = RateLimiter(min_interval=5.0, max_interval=20.0)
|
||||
|
||||
# ── 5. 串行拉取详情 ──────────────────────────────────────────
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
skip_count = 0
|
||||
|
||||
for idx, cid in enumerate(coupon_ids, 1):
|
||||
print(f"\n[{idx}/{len(coupon_ids)}] coupon_id={cid} ...", end=" ", flush=True)
|
||||
|
||||
try:
|
||||
resp = api.get(
|
||||
"/PackageCoupon/QueryPackageCouponInfo",
|
||||
{"couponId": cid},
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"❌ API 错误: {e}")
|
||||
fail_count += 1
|
||||
if idx < len(coupon_ids):
|
||||
limiter.wait()
|
||||
continue
|
||||
|
||||
# 提取字段(复用 _group_package_detail_process_fn)
|
||||
records = _group_package_detail_process_fn(resp)
|
||||
if not records:
|
||||
print("⚠️ 响应无有效数据,跳过")
|
||||
skip_count += 1
|
||||
if idx < len(coupon_ids):
|
||||
limiter.wait()
|
||||
continue
|
||||
|
||||
record = records[0]
|
||||
|
||||
# ── 6. UPSERT 写入 ──────────────────────────────────────
|
||||
try:
|
||||
_upsert_detail(db, record)
|
||||
db.commit()
|
||||
success_count += 1
|
||||
print(f"✅ 写入成功 (hash={record['content_hash'][:8]}...)")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f"❌ 写入失败: {e}")
|
||||
fail_count += 1
|
||||
|
||||
# 限流等待(最后一条不等)
|
||||
if idx < len(coupon_ids):
|
||||
waited = limiter.wait()
|
||||
if not waited:
|
||||
print("⚠️ 等待被中断")
|
||||
break
|
||||
|
||||
# ── 7. 汇总 ──────────────────────────────────────────────────
|
||||
print("\n" + "=" * 50)
|
||||
print(f"📊 拉取完成: 成功={success_count}, 失败={fail_count}, 跳过={skip_count}, 总计={len(coupon_ids)}")
|
||||
print("=" * 50)
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
def _upsert_detail(db: DatabaseConnection, record: dict) -> None:
|
||||
"""UPSERT 单条详情记录到 ods.group_buy_package_details。
|
||||
|
||||
ON CONFLICT (coupon_id) 时更新所有字段。
|
||||
"""
|
||||
columns = [
|
||||
"coupon_id", "package_name", "duration", "start_time", "end_time",
|
||||
"add_start_clock", "add_end_clock", "is_enabled", "is_delete",
|
||||
"site_id", "tenant_id", "create_time", "creator_name",
|
||||
"table_area_ids", "table_area_names", "assistant_services",
|
||||
"groupon_site_infos", "package_services", "coupon_details_list",
|
||||
"content_hash", "payload",
|
||||
]
|
||||
|
||||
# JSONB 字段需要用 Json 适配器
|
||||
_JSONB_COLS = {
|
||||
"table_area_ids", "table_area_names", "assistant_services",
|
||||
"groupon_site_infos", "package_services", "coupon_details_list",
|
||||
"payload",
|
||||
}
|
||||
|
||||
values = []
|
||||
for col in columns:
|
||||
val = record.get(col)
|
||||
if col in _JSONB_COLS and val is not None:
|
||||
val = Json(val)
|
||||
values.append(val)
|
||||
|
||||
col_list = ", ".join(columns)
|
||||
placeholders = ", ".join(["%s"] * len(columns))
|
||||
|
||||
# 除 coupon_id 外的所有列用于 UPDATE
|
||||
update_cols = [c for c in columns if c != "coupon_id"]
|
||||
update_set = ", ".join(f"{c} = EXCLUDED.{c}" for c in update_cols)
|
||||
|
||||
sql = (
|
||||
f"INSERT INTO ods.group_buy_package_details ({col_list}) "
|
||||
f"VALUES ({placeholders}) "
|
||||
f"ON CONFLICT (coupon_id) DO UPDATE SET {update_set}, "
|
||||
f"fetched_at = now()"
|
||||
)
|
||||
|
||||
db.execute(sql, values)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -6,6 +6,9 @@ ODS 列数据来自 information_schema.columns WHERE table_schema = 'ods'。
|
||||
import json
|
||||
import os
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs", "api-reference", "samples")
|
||||
REPORT_DIR = os.environ.get("ETL_REPORT_ROOT")
|
||||
if not REPORT_DIR:
|
||||
@@ -16,7 +19,7 @@ NESTED_OBJECTS = {"siteprofile", "tableprofile"}
|
||||
# 22 张需要比对的表
|
||||
TABLES = [
|
||||
"assistant_accounts_master", "settlement_records", "assistant_service_records",
|
||||
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
|
||||
"table_fee_transactions", "table_fee_discount_records",
|
||||
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
|
||||
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
|
||||
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
|
||||
|
||||
@@ -31,7 +31,7 @@ ODS_META = {"source_file", "source_endpoint", "fetched_at", "payload", "content_
|
||||
|
||||
TABLES = [
|
||||
"assistant_accounts_master", "settlement_records", "assistant_service_records",
|
||||
"assistant_cancellation_records", "table_fee_transactions", "table_fee_discount_records",
|
||||
"table_fee_transactions", "table_fee_discount_records",
|
||||
"payment_transactions", "refund_transactions", "platform_coupon_redemption_records",
|
||||
"tenant_goods_master", "store_goods_sales_records", "store_goods_master",
|
||||
"stock_goods_category_tree", "goods_stock_movements", "member_profiles",
|
||||
@@ -195,7 +195,7 @@ def classify_ods_only(table_name: str, field: str) -> str:
|
||||
return "ODS 后续版本新增字段(当前使用中的台桌关联订单 ID)"
|
||||
# tenant_id 在某些表中是 ODS 额外添加的
|
||||
if field == "tenant_id" and table_name in (
|
||||
"assistant_cancellation_records", "payment_transactions"
|
||||
"payment_transactions",
|
||||
):
|
||||
return "ODS 额外添加的租户 ID 字段(API 响应中不含,ETL 入库时补充)"
|
||||
# API 后续版本新增字段(文档快照未覆盖)
|
||||
|
||||
@@ -14,7 +14,7 @@ import multiprocessing as mp
|
||||
import subprocess
|
||||
import sys
|
||||
import time as time_mod
|
||||
from datetime import date, datetime, time, timedelta
|
||||
from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
@@ -27,6 +27,7 @@ from tasks.utility.check_cutoff_task import CheckCutoffTask
|
||||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
from tasks.ods.ods_tasks import ENABLED_ODS_CODES
|
||||
from utils.logging_utils import build_log_path, configure_logging
|
||||
from neozqyy_shared.datetime_utils import business_date, business_day_range, now_shanghai
|
||||
|
||||
STEP_TIMEOUT_SEC = 120
|
||||
|
||||
@@ -53,6 +54,7 @@ def _compute_dws_window(
|
||||
if dws_start and dws_end and dws_end < dws_start:
|
||||
raise ValueError("dws_end must be >= dws_start")
|
||||
|
||||
cutoff = int(cfg.get("app.business_day_start_hour", 8))
|
||||
store_id = int(cfg.get("app.store_id"))
|
||||
dsn = cfg["db"]["dsn"]
|
||||
session = cfg["db"].get("session")
|
||||
@@ -67,19 +69,22 @@ def _compute_dws_window(
|
||||
if isinstance(mx, date):
|
||||
dws_start = mx - timedelta(days=max(0, int(rebuild_days)))
|
||||
else:
|
||||
dws_start = (datetime.now(tz).date()) - timedelta(days=max(1, int(bootstrap_days)))
|
||||
# 营业日口径:用 business_date 计算"今天"
|
||||
dws_start = business_date(now_shanghai(), cutoff) - timedelta(days=max(1, int(bootstrap_days)))
|
||||
|
||||
if dws_end is None:
|
||||
dws_end = datetime.now(tz).date()
|
||||
dws_end = business_date(now_shanghai(), cutoff)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
start_dt = datetime.combine(dws_start, time.min).replace(tzinfo=tz)
|
||||
# end_dt 取到当天 23:59:59,避免只跑到“当前时刻”的 date() 导致少一天
|
||||
end_dt = datetime.combine(dws_end, time.max).replace(tzinfo=tz)
|
||||
# 营业日口径:窗口边界按 cutoff 小时对齐
|
||||
start_dt = business_day_range(dws_start, cutoff)[0]
|
||||
# end_dt 取到营业日结束(即 dws_end 次日 cutoff 前一秒),覆盖完整营业日
|
||||
end_dt = business_day_range(dws_end, cutoff)[1] - timedelta(seconds=1)
|
||||
return start_dt, end_dt
|
||||
|
||||
|
||||
|
||||
def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
|
||||
dsn = cfg["db"]["dsn"]
|
||||
session = cfg["db"].get("session")
|
||||
@@ -99,21 +104,21 @@ def _run_check_cutoff(cfg: AppConfig, logger: logging.Logger):
|
||||
|
||||
|
||||
def _iter_daily_windows(window_start: datetime, window_end: datetime) -> list[tuple[datetime, datetime]]:
|
||||
"""按营业日拆分时间窗口。
|
||||
|
||||
window_start/window_end 已按 cutoff 小时对齐(由 _compute_dws_window 保证)。
|
||||
"""
|
||||
if window_start > window_end:
|
||||
return []
|
||||
tz = window_start.tzinfo
|
||||
windows: list[tuple[datetime, datetime]] = []
|
||||
cur = window_start
|
||||
while cur <= window_end:
|
||||
day_start = datetime.combine(cur.date(), time.min).replace(tzinfo=tz)
|
||||
day_end = datetime.combine(cur.date(), time.max).replace(tzinfo=tz)
|
||||
if day_start < window_start:
|
||||
day_start = window_start
|
||||
if day_end > window_end:
|
||||
day_end = window_end
|
||||
windows.append((day_start, day_end))
|
||||
next_day = cur.date() + timedelta(days=1)
|
||||
cur = datetime.combine(next_day, time.min).replace(tzinfo=tz)
|
||||
# 从 window_start 开始,每次推进 24 小时(一个营业日)
|
||||
cur_start = window_start
|
||||
while cur_start <= window_end:
|
||||
cur_end = cur_start + timedelta(days=1) - timedelta(seconds=1)
|
||||
if cur_end > window_end:
|
||||
cur_end = window_end
|
||||
windows.append((cur_start, cur_end))
|
||||
cur_start = cur_start + timedelta(days=1)
|
||||
return windows
|
||||
|
||||
|
||||
|
||||
@@ -21,6 +21,9 @@ import sys
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 常量
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -20,6 +20,8 @@ import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from neozqyy_shared.repo_root import ensure_repo_root
|
||||
ensure_repo_root()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. 加载根 .env(遵循 testing-env.md 规范)
|
||||
|
||||
@@ -184,11 +184,18 @@ class BaseTask:
|
||||
if not (override_start and override_end):
|
||||
raise ValueError("run.window_override.start/end 需要同时提供")
|
||||
|
||||
# CHANGE 2026-03-04 | 纯日期字符串按业务日分割:start→当天biz_hour, end→次日biz_hour
|
||||
biz_hour = int(self.config.get("app.business_day_start_hour", 8))
|
||||
|
||||
window_start = override_start
|
||||
if isinstance(window_start, str):
|
||||
window_start = dtparser.parse(window_start)
|
||||
if isinstance(window_start, datetime) and window_start.tzinfo is None:
|
||||
window_start = window_start.replace(tzinfo=self.tz)
|
||||
# 纯日期(时分秒全零)→ 当天业务日起始时刻
|
||||
if window_start.hour == 0 and window_start.minute == 0 and window_start.second == 0:
|
||||
window_start = window_start.replace(hour=biz_hour, tzinfo=self.tz)
|
||||
else:
|
||||
window_start = window_start.replace(tzinfo=self.tz)
|
||||
elif isinstance(window_start, datetime):
|
||||
window_start = window_start.astimezone(self.tz)
|
||||
|
||||
@@ -196,7 +203,11 @@ class BaseTask:
|
||||
if isinstance(window_end, str):
|
||||
window_end = dtparser.parse(window_end)
|
||||
if isinstance(window_end, datetime) and window_end.tzinfo is None:
|
||||
window_end = window_end.replace(tzinfo=self.tz)
|
||||
# 纯日期(时分秒全零)→ 次日业务日起始时刻
|
||||
if window_end.hour == 0 and window_end.minute == 0 and window_end.second == 0:
|
||||
window_end = (window_end + timedelta(days=1)).replace(hour=biz_hour, tzinfo=self.tz)
|
||||
else:
|
||||
window_end = window_end.replace(tzinfo=self.tz)
|
||||
elif isinstance(window_end, datetime):
|
||||
window_end = window_end.astimezone(self.tz)
|
||||
|
||||
|
||||
@@ -5,12 +5,14 @@ from __future__ import annotations
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any, Dict, Iterable, List, Sequence
|
||||
|
||||
from psycopg2.extras import RealDictCursor, execute_batch, execute_values
|
||||
from psycopg2.extras import Json, RealDictCursor, execute_batch, execute_values
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
from tasks.base_task import BaseTask, TaskContext
|
||||
|
||||
|
||||
@@ -70,6 +72,16 @@ class DwdLoadTask(BaseTask):
|
||||
_NUMERIC_RE = re.compile(r"^[+-]?\d+(?:\.\d+)?$")
|
||||
_BOOL_STRINGS = {"true", "false", "1", "0", "yes", "no", "y", "n", "t", "f"}
|
||||
|
||||
# 详情表 LEFT JOIN 配置:当 DWD 表需要从额外的 ODS 详情表获取字段时使用
|
||||
# detail_columns 中的列在 FACT_MAPPINGS 中以 detail."col" 形式引用
|
||||
DETAIL_JOIN_CONFIG: dict[str, dict] = {
|
||||
"dwd.dim_groupbuy_package_ex": {
|
||||
"detail_table": "ods.group_buy_package_details",
|
||||
"join_condition": 'ods_main."id" = detail."coupon_id"',
|
||||
"detail_columns": ["table_area_ids", "table_area_names", "assistant_services", "groupon_site_infos"],
|
||||
},
|
||||
}
|
||||
|
||||
def _strip_scd2_keys(self, pk_cols: Sequence[str]) -> list[str]:
|
||||
return [c for c in pk_cols if c.lower() not in self.SCD_COLS]
|
||||
|
||||
@@ -113,7 +125,10 @@ class DwdLoadTask(BaseTask):
|
||||
) -> str:
|
||||
if key_exprs and order_col:
|
||||
distinct_on = ", ".join(key_exprs)
|
||||
order_by = ", ".join([*key_exprs, f'"{order_col}" DESC NULLS LAST'])
|
||||
# order_col 可能是预格式化的表达式(如 ods_main."fetched_at"),此时直接使用;
|
||||
# 否则包裹双引号
|
||||
order_col_expr = order_col if '"' in order_col else f'"{order_col}"'
|
||||
order_by = ", ".join([*key_exprs, f'{order_col_expr} DESC NULLS LAST'])
|
||||
return (
|
||||
f"SELECT DISTINCT ON ({distinct_on}) {select_cols_sql} "
|
||||
f"FROM {ods_table_sql} {where_sql} ORDER BY {order_by}"
|
||||
@@ -303,6 +318,11 @@ class DwdLoadTask(BaseTask):
|
||||
("table_area_id_list", "table_area_id_list", None),
|
||||
("package_type", "type", None),
|
||||
("tenant_coupon_sale_order_item_id", "tenantcouponsaleorderitemid", None),
|
||||
# CHANGE 2026-03-05: 团购详情字段(来自 ods.group_buy_package_details,通过 LEFT JOIN 关联)
|
||||
("table_area_ids", 'detail."table_area_ids"', None),
|
||||
("table_area_names", 'detail."table_area_names"', None),
|
||||
("assistant_services", 'detail."assistant_services"', None),
|
||||
("groupon_site_infos", 'detail."groupon_site_infos"', None),
|
||||
],
|
||||
"dwd.dim_staff": [
|
||||
("staff_id", "id", None),
|
||||
@@ -311,16 +331,16 @@ class DwdLoadTask(BaseTask):
|
||||
],
|
||||
"dwd.dim_staff_ex": [
|
||||
("staff_id", "id", None),
|
||||
("rank_name", "rankname", None),
|
||||
("cashier_point_id", "cashierpointid", "bigint"),
|
||||
("cashier_point_name", "cashierpointname", None),
|
||||
("group_id", "groupid", "bigint"),
|
||||
("group_name", "groupname", None),
|
||||
("system_user_id", "systemuserid", "bigint"),
|
||||
("tenant_org_id", "tenantorgid", "bigint"),
|
||||
("rank_name", "rank_name", None),
|
||||
("cashier_point_id", "cashier_point_id", "bigint"),
|
||||
("cashier_point_name", "cashier_point_name", None),
|
||||
("group_id", "group_id", "bigint"),
|
||||
("group_name", "group_name", None),
|
||||
("system_user_id", "system_user_id", "bigint"),
|
||||
("tenant_org_id", "tenant_org_id", "bigint"),
|
||||
("auth_code_create", "auth_code_create", "timestamptz"),
|
||||
("create_time", "create_time", "timestamptz"),
|
||||
("user_roles", "userroles", "jsonb"),
|
||||
("user_roles", "user_roles", "jsonb"),
|
||||
],
|
||||
# 事实表主键及关键差异列
|
||||
"dwd.dwd_table_fee_log": [
|
||||
@@ -602,6 +622,7 @@ class DwdLoadTask(BaseTask):
|
||||
],
|
||||
# 库存汇总:goods_stock_summary(ODS 列名全小写)
|
||||
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写(sitegoodsid),不是驼峰
|
||||
# CHANGE 2026-03-01: 补 site_id 映射(ODS 入库时从 app.store_id 注入 siteid)
|
||||
"dwd.dwd_goods_stock_summary": [
|
||||
("site_goods_id", '"sitegoodsid"', "bigint"), # 门店商品 ID(PK)
|
||||
("goods_name", '"goodsname"', None), # 商品名称
|
||||
@@ -617,6 +638,7 @@ class DwdLoadTask(BaseTask):
|
||||
("range_sale_money", '"rangesalemoney"', "numeric"), # 销售金额
|
||||
("range_inventory", '"rangeinventory"', "numeric"), # 盘点调整量
|
||||
("current_stock", '"currentstock"', "numeric"), # 当前库存
|
||||
("site_id", '"siteid"', "bigint"), # 门店 ID(ODS 入库时注入)
|
||||
],
|
||||
# 库存变动流水:goods_stock_movements(ODS 列名全小写)
|
||||
# CHANGE 2026-02-21: BUG 10 fix — ODS 列名是小写,不是驼峰
|
||||
@@ -653,11 +675,12 @@ class DwdLoadTask(BaseTask):
|
||||
|
||||
def load(self, extracted: dict[str, Any], context: TaskContext) -> dict[str, Any]:
|
||||
"""
|
||||
遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。
|
||||
并行遍历映射关系,维度执行 SCD2 合并,事实表按时间增量插入。
|
||||
|
||||
说明:
|
||||
- 为避免长事务导致锁堆积/中断后遗留 idle-in-tx,本任务按“每张表一次事务”提交;
|
||||
- 单表失败会回滚该表并继续后续表,最终在结果中汇总错误信息。
|
||||
- 使用 ThreadPoolExecutor 并行处理多张表,每张表使用独立数据库连接和事务;
|
||||
- 单表失败会回滚该表并继续后续表,最终在结果中汇总错误信息;
|
||||
- 并行线程数通过 AppConfig 的 dwd.parallel_workers 配置(默认 4)。
|
||||
"""
|
||||
now = extracted["now"]
|
||||
summary: List[Dict[str, Any]] = []
|
||||
@@ -668,54 +691,109 @@ class DwdLoadTask(BaseTask):
|
||||
if env_only and not only_tables_cfg:
|
||||
only_tables_cfg = [t.strip() for t in env_only.split(",") if t.strip()]
|
||||
only_tables = {str(t).strip().lower() for t in only_tables_cfg if str(t).strip()} if only_tables_cfg else set()
|
||||
with self.db.conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
for dwd_table, ods_table in self.TABLE_MAP.items():
|
||||
if only_tables and dwd_table.lower() not in only_tables and self._table_base(dwd_table).lower() not in only_tables:
|
||||
continue
|
||||
started = time.monotonic()
|
||||
self.logger.info("DWD 装载开始:%s <= %s", dwd_table, ods_table)
|
||||
|
||||
parallel_workers = int(self.config.get("dwd.parallel_workers", 4))
|
||||
|
||||
# 筛选需要处理的表
|
||||
tables_to_process: list[tuple[str, str]] = []
|
||||
for dwd_table, ods_table in self.TABLE_MAP.items():
|
||||
if only_tables and dwd_table.lower() not in only_tables and self._table_base(dwd_table).lower() not in only_tables:
|
||||
continue
|
||||
tables_to_process.append((dwd_table, ods_table))
|
||||
|
||||
if not tables_to_process:
|
||||
return {"tables": summary, "errors": 0, "error_details": errors}
|
||||
|
||||
# 并行调度:每张表在独立线程中执行,使用独立数据库连接
|
||||
with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
|
||||
futures = {}
|
||||
for dwd_table, ods_table in tables_to_process:
|
||||
future = executor.submit(
|
||||
self._process_single_table,
|
||||
dwd_table, ods_table, now, context,
|
||||
)
|
||||
futures[future] = dwd_table
|
||||
|
||||
for future in as_completed(futures):
|
||||
dwd_table = futures[future]
|
||||
try:
|
||||
dwd_cols = self._get_columns(cur, dwd_table)
|
||||
ods_cols = self._get_columns(cur, ods_table)
|
||||
if not dwd_cols:
|
||||
self.logger.warning("跳过 %s:未能获取 DWD 列信息", dwd_table)
|
||||
continue
|
||||
|
||||
if self._table_base(dwd_table).startswith("dim_"):
|
||||
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||||
self.db.conn.commit()
|
||||
summary.append({"table": dwd_table, "mode": "SCD2", **dim_counts})
|
||||
else:
|
||||
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
|
||||
ods_types = self._get_column_types(cur, ods_table, "ods")
|
||||
fact_counts = self._merge_fact_increment(
|
||||
cur,
|
||||
dwd_table,
|
||||
ods_table,
|
||||
dwd_cols,
|
||||
ods_cols,
|
||||
dwd_types,
|
||||
ods_types,
|
||||
window_start=context.window_start,
|
||||
window_end=context.window_end,
|
||||
)
|
||||
self.db.conn.commit()
|
||||
summary.append({"table": dwd_table, "mode": "INCREMENT", **fact_counts})
|
||||
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
|
||||
table_result = future.result()
|
||||
summary.append(table_result)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
try:
|
||||
self.db.conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.exception("DWD 装载失败:%s,用时 %.2fs,err=%s", dwd_table, elapsed, exc)
|
||||
self.logger.error(
|
||||
"DWD 并行装载失败:%s,err=%s", dwd_table, exc,
|
||||
)
|
||||
errors.append({"table": dwd_table, "error": str(exc)})
|
||||
continue
|
||||
|
||||
return {"tables": summary, "errors": len(errors), "error_details": errors}
|
||||
|
||||
def _process_single_table(
|
||||
self,
|
||||
dwd_table: str,
|
||||
ods_table: str,
|
||||
now: datetime,
|
||||
context: TaskContext,
|
||||
) -> Dict[str, Any]:
|
||||
"""在独立线程中处理单张 DWD 表,使用独立数据库连接和事务。
|
||||
|
||||
每张表创建独立的 DatabaseConnection,处理完成后关闭,
|
||||
保证线程间事务隔离,单表失败不影响其他表。
|
||||
"""
|
||||
started = time.monotonic()
|
||||
self.logger.info("DWD 装载开始:%s <= %s", dwd_table, ods_table)
|
||||
|
||||
# 为当前线程创建独立数据库连接
|
||||
thread_db = DatabaseConnection(
|
||||
dsn=self.db._dsn,
|
||||
session=self.db._session,
|
||||
connect_timeout=self.db._connect_timeout,
|
||||
)
|
||||
try:
|
||||
with thread_db.conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
dwd_cols = self._get_columns(cur, dwd_table)
|
||||
ods_cols = self._get_columns(cur, ods_table)
|
||||
if not dwd_cols:
|
||||
self.logger.warning("跳过 %s:未能获取 DWD 列信息", dwd_table)
|
||||
return {"table": dwd_table, "mode": "SKIPPED", "inserted": 0, "updated": 0}
|
||||
|
||||
if self._table_base(dwd_table).startswith("dim_"):
|
||||
dim_counts = self._merge_dim(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||||
thread_db.conn.commit()
|
||||
result = {"table": dwd_table, "mode": "SCD2", **dim_counts}
|
||||
else:
|
||||
dwd_types = self._get_column_types(cur, dwd_table, "dwd")
|
||||
ods_types = self._get_column_types(cur, ods_table, "ods")
|
||||
fact_counts = self._merge_fact_increment(
|
||||
cur,
|
||||
dwd_table,
|
||||
ods_table,
|
||||
dwd_cols,
|
||||
ods_cols,
|
||||
dwd_types,
|
||||
ods_types,
|
||||
window_start=context.window_start,
|
||||
window_end=context.window_end,
|
||||
)
|
||||
thread_db.conn.commit()
|
||||
result = {"table": dwd_table, "mode": "INCREMENT", **fact_counts}
|
||||
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.info("DWD 装载完成:%s,用时 %.2fs", dwd_table, elapsed)
|
||||
return result
|
||||
except Exception as exc:
|
||||
try:
|
||||
thread_db.conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
elapsed = time.monotonic() - started
|
||||
self.logger.exception(
|
||||
"DWD 装载失败:%s,用时 %.2fs,err=%s", dwd_table, elapsed, exc,
|
||||
)
|
||||
# 重新抛出,让 future.result() 在主线程捕获
|
||||
raise
|
||||
finally:
|
||||
thread_db.close()
|
||||
|
||||
# ---------------------- 辅助方法 ----------------------
|
||||
def _get_columns(self, cur, table: str) -> List[str]:
|
||||
"""获取指定表的列名(小写)。"""
|
||||
@@ -872,6 +950,17 @@ class DwdLoadTask(BaseTask):
|
||||
ods_types = self._get_column_types(cur, ods_table, "ods")
|
||||
ts_types = {"timestamp without time zone", "timestamp with time zone"}
|
||||
table_sql = self._format_table(ods_table, "ods")
|
||||
# CHANGE 2026-03-05: 详情表 LEFT JOIN 支持 — 当 DWD 表配置了 DETAIL_JOIN_CONFIG 时,
|
||||
# 给 ODS 主表加别名 ods_main,LEFT JOIN 详情表为 detail,
|
||||
# 非 detail 列引用加 ods_main. 前缀避免歧义
|
||||
detail_join = self.DETAIL_JOIN_CONFIG.get(dwd_table)
|
||||
ods_alias = "ods_main" if detail_join else ""
|
||||
if detail_join:
|
||||
detail_table_sql = self._format_table(detail_join["detail_table"], "ods")
|
||||
table_sql = (
|
||||
f"{table_sql} AS ods_main "
|
||||
f'LEFT JOIN {detail_table_sql} AS detail ON {detail_join["join_condition"]}'
|
||||
)
|
||||
# 构造 SELECT 表达式,支持 JSON/expression 映射
|
||||
select_exprs: list[str] = []
|
||||
added: set[str] = set()
|
||||
@@ -881,21 +970,26 @@ class DwdLoadTask(BaseTask):
|
||||
continue
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
# detail. 前缀的列直接使用(来自详情表),其他列加 ods_main. 前缀
|
||||
if ods_alias and not src.startswith("detail."):
|
||||
src = self._qualify_column_ref(src, ods_alias)
|
||||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||||
added.add(lc)
|
||||
elif lc in ods_set:
|
||||
col_ref = f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"'
|
||||
# CHANGE 2026-02-22: BUG 12 — 同名列如果是时间类型,加哨兵值过滤
|
||||
if dwd_types.get(lc) in ts_types and ods_types.get(lc) in ts_types:
|
||||
select_exprs.append(
|
||||
f'CASE WHEN "{lc}" >= \'{self._SENTINEL_DATE_THRESHOLD}\'::timestamp '
|
||||
f'THEN "{lc}" ELSE NULL END AS "{lc}"'
|
||||
f"CASE WHEN {col_ref} >= '{self._SENTINEL_DATE_THRESHOLD}'::timestamp "
|
||||
f'THEN {col_ref} ELSE NULL END AS "{lc}"'
|
||||
)
|
||||
else:
|
||||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||||
select_exprs.append(f'{col_ref} AS "{lc}"')
|
||||
added.add(lc)
|
||||
# 分类维度需要额外读取 categoryboxes 以展开子类
|
||||
if dwd_table == "dwd.dim_goods_category" and "categoryboxes" not in added and "categoryboxes" in ods_set:
|
||||
select_exprs.append('"categoryboxes" AS "categoryboxes"')
|
||||
col_ref = f'{ods_alias}."categoryboxes"' if ods_alias else '"categoryboxes"'
|
||||
select_exprs.append(f'{col_ref} AS "categoryboxes"')
|
||||
added.add("categoryboxes")
|
||||
# 主键兜底确保被选出
|
||||
for pk in business_keys:
|
||||
@@ -903,9 +997,12 @@ class DwdLoadTask(BaseTask):
|
||||
if lc not in added:
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
if ods_alias and not src.startswith("detail."):
|
||||
src = self._qualify_column_ref(src, ods_alias)
|
||||
select_exprs.append(f"{self._cast_expr(src, cast_type)} AS \"{lc}\"")
|
||||
elif lc in ods_set:
|
||||
select_exprs.append(f'"{lc}" AS "{lc}"')
|
||||
col_ref = f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"'
|
||||
select_exprs.append(f'{col_ref} AS "{lc}"')
|
||||
added.add(lc)
|
||||
|
||||
if not select_exprs:
|
||||
@@ -917,14 +1014,19 @@ class DwdLoadTask(BaseTask):
|
||||
lc = key.lower()
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
if ods_alias and not src.startswith("detail."):
|
||||
src = self._qualify_column_ref(src, ods_alias)
|
||||
key_exprs.append(self._cast_expr(src, cast_type))
|
||||
elif lc in ods_set:
|
||||
key_exprs.append(f'"{lc}"')
|
||||
key_exprs.append(f'{ods_alias}."{lc}"' if ods_alias else f'"{lc}"')
|
||||
|
||||
select_cols_sql = ", ".join(select_exprs)
|
||||
where_sql = self._append_where_condition("", '"fetched_at" IS NOT NULL')
|
||||
fetched_at_ref = f'{ods_alias}."fetched_at"' if ods_alias else '"fetched_at"'
|
||||
where_sql = self._append_where_condition("", f'{fetched_at_ref} IS NOT NULL')
|
||||
# CHANGE 2026-03-05: order_col 也需要加别名前缀
|
||||
qualified_order_col = f'{ods_alias}."{order_col}"' if ods_alias and order_col else (f'"{order_col}"' if order_col else None)
|
||||
sql = self._latest_snapshot_select_sql(
|
||||
select_cols_sql, table_sql, key_exprs, order_col, where_sql
|
||||
select_cols_sql, table_sql, key_exprs, qualified_order_col, where_sql
|
||||
)
|
||||
cur.execute(sql)
|
||||
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
|
||||
@@ -1006,7 +1108,7 @@ class DwdLoadTask(BaseTask):
|
||||
|
||||
# 批量插入新版本
|
||||
if to_insert:
|
||||
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now)
|
||||
self._insert_dim_rows_bulk(cur, dwd_table, dwd_cols, to_insert, now, dwd_types=dwd_types)
|
||||
|
||||
processed = len(src_rows_by_pk)
|
||||
updated = len(to_close)
|
||||
@@ -1050,11 +1152,16 @@ class DwdLoadTask(BaseTask):
|
||||
dwd_cols: Sequence[str],
|
||||
rows_with_version: Sequence[tuple[Dict[str, Any], int]],
|
||||
now: datetime,
|
||||
dwd_types: Dict[str, str] | None = None,
|
||||
) -> None:
|
||||
"""批量插入新的 SCD2 版本行。"""
|
||||
sorted_cols = [c.lower() for c in sorted(dwd_cols)]
|
||||
insert_cols_sql = ", ".join(f'"{c}"' for c in sorted_cols)
|
||||
table_sql = self._format_table(table, "dwd")
|
||||
# 预计算数组类型列集合,避免 list 值被误包装为 Json
|
||||
_array_cols: set[str] = set()
|
||||
if dwd_types:
|
||||
_array_cols = {c for c, t in dwd_types.items() if "ARRAY" in t.upper() or "[]" in t}
|
||||
|
||||
def build_row(src_row: Dict[str, Any], version: int) -> list[Any]:
|
||||
values: list[Any] = []
|
||||
@@ -1068,7 +1175,15 @@ class DwdLoadTask(BaseTask):
|
||||
elif c == "scd2_version":
|
||||
values.append(version)
|
||||
else:
|
||||
values.append(src_row.get(c))
|
||||
val = src_row.get(c)
|
||||
# CHANGE 2026-03-07: 区分数组列和 JSONB 列
|
||||
# 数组列(TEXT[] 等)的 list 值直接传递,psycopg2 自动转为 PG 数组格式
|
||||
# JSONB 列的 dict/list 值需要 Json() 包装
|
||||
if isinstance(val, list) and c not in _array_cols:
|
||||
val = Json(val)
|
||||
elif isinstance(val, dict):
|
||||
val = Json(val)
|
||||
values.append(val)
|
||||
return values
|
||||
|
||||
values_rows = [build_row(r, ver) for r, ver in rows_with_version]
|
||||
@@ -1395,6 +1510,23 @@ class DwdLoadTask(BaseTask):
|
||||
# CHANGE 2026-02-22: BUG 12 fix — 哨兵日期阈值,上游 API 用 0001-01-01 表示"未设置"
|
||||
_SENTINEL_DATE_THRESHOLD = "0002-01-01"
|
||||
|
||||
@staticmethod
|
||||
def _qualify_column_ref(src: str, alias: str) -> str:
|
||||
"""为裸列引用添加表别名前缀。
|
||||
|
||||
已包含 detail.、别名前缀、JSON 操作符、表达式(CASE/COALESCE 等)的源不做修改。
|
||||
仅对简单列名(如 "col" 或 col)添加 alias."col" 前缀。
|
||||
"""
|
||||
# 已有 detail. 或其他表前缀(含 .)→ 不修改
|
||||
if "." in src:
|
||||
return src
|
||||
# JSON 操作符、SQL 表达式 → 不修改
|
||||
if any(tok in src for tok in ("->", "#>>", "::", "CASE ", "COALESCE", "NULLIF", "(")):
|
||||
return src
|
||||
# 裸列名(可能带引号)→ 加别名前缀
|
||||
bare = src.strip('"')
|
||||
return f'{alias}."{bare}"'
|
||||
|
||||
def _cast_expr(self, col: str, cast_type: str | None) -> str:
|
||||
"""构造带可选 CAST 的列表达式。
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ from .assistant_salary_task import AssistantSalaryTask
|
||||
from .assistant_finance_task import AssistantFinanceTask
|
||||
from .member_consumption_task import MemberConsumptionTask
|
||||
from .member_visit_task import MemberVisitTask
|
||||
from .assistant_project_tag_task import AssistantProjectTagTask
|
||||
from .member_project_tag_task import MemberProjectTagTask
|
||||
from .finance_daily_task import FinanceDailyTask
|
||||
from .finance_recharge_task import FinanceRechargeTask
|
||||
from .finance_income_task import FinanceIncomeStructureTask
|
||||
@@ -56,6 +58,8 @@ __all__ = [
|
||||
# 客户维度
|
||||
"MemberConsumptionTask",
|
||||
"MemberVisitTask",
|
||||
"AssistantProjectTagTask",
|
||||
"MemberProjectTagTask",
|
||||
# 财务维度
|
||||
"FinanceBaseTask",
|
||||
"FinanceDailyTask",
|
||||
|
||||
@@ -34,6 +34,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
from .dws_helpers import mask_mobile, calc_days_since
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
class AssistantCustomerTask(BaseDwsTask):
|
||||
"""
|
||||
@@ -181,13 +183,16 @@ class AssistantCustomerTask(BaseDwsTask):
|
||||
"""
|
||||
提取助教-客户服务统计(含滚动窗口)
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.3: DATE(start_use_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
|
||||
sql = f"""
|
||||
WITH service_base AS (
|
||||
SELECT
|
||||
site_assistant_id AS assistant_id,
|
||||
nickname AS assistant_nickname,
|
||||
tenant_member_id AS member_id,
|
||||
DATE(start_use_time) AS service_date,
|
||||
{biz_expr} AS service_date,
|
||||
income_seconds,
|
||||
ledger_amount
|
||||
FROM dwd.dwd_assistant_service_log
|
||||
|
||||
@@ -34,6 +34,8 @@ from datetime import date, datetime, time, timedelta
|
||||
from decimal import Decimal, ROUND_HALF_UP
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
|
||||
|
||||
# 惩罚区域集合:大厅 A/B/C/S/TV + 麻将房 M1–M7
|
||||
@@ -197,7 +199,12 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
|
||||
JOIN _ex 表取 is_trash 字段,用于直接判断服务是否被废除。
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-02-26: dwd_assistant_service_log 无 table_area_name 列,
|
||||
# 改为 JOIN dim_table 取 site_table_area_name
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.1: DATE() → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
asl.assistant_service_id,
|
||||
asl.order_settle_id,
|
||||
@@ -214,15 +221,18 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
asl.ledger_unit_price,
|
||||
asl.start_use_time,
|
||||
asl.last_use_time,
|
||||
asl.table_area_name,
|
||||
DATE(asl.start_use_time) AS service_date,
|
||||
COALESCE(dt.site_table_area_name, '') AS table_area_name,
|
||||
{biz_expr} AS service_date,
|
||||
COALESCE(ex.is_trash, 0) AS is_trash
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
LEFT JOIN dwd.dwd_assistant_service_log_ex ex
|
||||
ON asl.assistant_service_id = ex.assistant_service_id
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON asl.site_table_id = dt.table_id
|
||||
AND dt.scd2_is_current = 1
|
||||
WHERE asl.site_id = %s
|
||||
AND DATE(asl.start_use_time) >= %s
|
||||
AND DATE(asl.start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND asl.is_delete = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
@@ -258,14 +268,20 @@ class AssistantDailyTask(BaseDwsTask):
|
||||
# 获取助教当日等级(SCD2 as-of)
|
||||
level_info = self.get_assistant_level_asof(assistant_id, service_date)
|
||||
|
||||
# CHANGE 2026-02-27 | level_name 始终由 code 静态映射得出
|
||||
# SCD2 仅用于取历史 level_code(等级可能变过),
|
||||
# name 不再依赖 SCD2 返回值,避免 SCD2 缺失时 NULL
|
||||
level_code = level_info.get('level_code') if level_info else record.get('assistant_level')
|
||||
level_name = self.level_code_to_name(level_code)
|
||||
|
||||
agg_dict[key] = {
|
||||
'site_id': site_id,
|
||||
'tenant_id': self.config.get("app.tenant_id", site_id),
|
||||
'assistant_id': assistant_id,
|
||||
'assistant_nickname': record.get('assistant_nickname'),
|
||||
'stat_date': service_date,
|
||||
'assistant_level_code': level_info.get('level_code') if level_info else record.get('assistant_level'),
|
||||
'assistant_level_name': level_info.get('level_name') if level_info else None,
|
||||
'assistant_level_code': level_code,
|
||||
'assistant_level_name': level_name,
|
||||
'total_service_count': 0,
|
||||
'base_service_count': 0,
|
||||
'bonus_service_count': 0,
|
||||
|
||||
@@ -28,6 +28,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, CourseType, TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
class AssistantFinanceTask(BaseDwsTask):
|
||||
"""
|
||||
@@ -98,6 +100,8 @@ class AssistantFinanceTask(BaseDwsTask):
|
||||
revenue_total = self.safe_decimal(rev.get('revenue_total', 0))
|
||||
gross_profit = revenue_total - cost_daily
|
||||
gross_margin = gross_profit / revenue_total if revenue_total > 0 else Decimal('0')
|
||||
# 防御:clamp 到 numeric(7,4) 安全范围,避免极端值溢出
|
||||
gross_margin = max(Decimal('-999.9999'), min(Decimal('999.9999'), gross_margin))
|
||||
|
||||
record = {
|
||||
'site_id': site_id,
|
||||
@@ -125,9 +129,12 @@ class AssistantFinanceTask(BaseDwsTask):
|
||||
# load() 已移除——使用 BaseDwsTask 默认实现(DATE_COL="stat_date")
|
||||
|
||||
def _extract_daily_revenue(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.5: DATE(start_use_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("s.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
DATE(s.start_use_time) AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
s.site_assistant_id AS assistant_id,
|
||||
(ARRAY_AGG(s.nickname ORDER BY s.start_use_time DESC))[1] AS assistant_nickname,
|
||||
COUNT(*) AS service_count,
|
||||
@@ -143,10 +150,10 @@ class AssistantFinanceTask(BaseDwsTask):
|
||||
LEFT JOIN dws.cfg_skill_type st
|
||||
ON st.skill_id = s.skill_id AND st.is_active = TRUE
|
||||
WHERE s.site_id = %s
|
||||
AND DATE(s.start_use_time) >= %s
|
||||
AND DATE(s.start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND s.is_delete = 0
|
||||
GROUP BY DATE(s.start_use_time), s.site_assistant_id
|
||||
GROUP BY {biz_expr}, s.site_assistant_id
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
@@ -35,6 +35,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
class AssistantMonthlyTask(BaseDwsTask):
|
||||
"""
|
||||
@@ -262,14 +264,18 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
month_where = " OR ".join(month_conditions)
|
||||
|
||||
# CHANGE 2026-02-22 | Prompt: 需求 A — 按档位分段统计
|
||||
# GROUP BY 加入 assistant_level_code/name,使同一助教月内不同档位各自聚合;
|
||||
# GROUP BY 加入 assistant_level_code,使同一助教月内不同档位各自聚合;
|
||||
# nickname 改用 ARRAY_AGG 按时间倒序取最新值,替代 MAX() 的字典序取值。
|
||||
# 唯一约束已同步变更为 (site_id, assistant_id, stat_month, assistant_level_code)
|
||||
# CHANGE 2026-02-27 | BUG: assistant_level_name 从 GROUP BY 移到 ARRAY_AGG FILTER
|
||||
# 同一 level_code 在 daily_detail 中可能有 NULL 和非 NULL 的 name,
|
||||
# GROUP BY 会产生多行导致 UK 冲突
|
||||
sql = f"""
|
||||
SELECT
|
||||
assistant_id,
|
||||
assistant_level_code,
|
||||
assistant_level_name,
|
||||
-- 同一 level_code 可能有 NULL 和非 NULL 的 name,取最新非空值避免 UK 冲突
|
||||
(ARRAY_AGG(assistant_level_name ORDER BY stat_date DESC) FILTER (WHERE assistant_level_name IS NOT NULL))[1] AS assistant_level_name,
|
||||
(ARRAY_AGG(assistant_nickname ORDER BY stat_date DESC))[1] AS assistant_nickname,
|
||||
DATE_TRUNC('month', stat_date)::DATE AS stat_month,
|
||||
COUNT(DISTINCT stat_date) AS work_days,
|
||||
@@ -291,7 +297,7 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
SUM(trashed_count) AS trashed_count
|
||||
FROM dws.dws_assistant_daily_detail
|
||||
WHERE site_id = %s AND ({month_where})
|
||||
GROUP BY assistant_id, assistant_level_code, assistant_level_name,
|
||||
GROUP BY assistant_id, assistant_level_code,
|
||||
DATE_TRUNC('month', stat_date)
|
||||
"""
|
||||
|
||||
@@ -313,10 +319,13 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
end_month = max(months)
|
||||
next_month = (end_month.replace(day=28) + timedelta(days=4)).replace(day=1)
|
||||
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.4: 使用 Business_Month 口径
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
site_assistant_id AS assistant_id,
|
||||
DATE_TRUNC('month', start_use_time)::DATE AS stat_month,
|
||||
DATE_TRUNC('month', {biz_expr}::timestamp)::DATE AS stat_month,
|
||||
COUNT(DISTINCT CASE WHEN tenant_member_id > 0 THEN tenant_member_id END) AS unique_customers,
|
||||
COUNT(DISTINCT site_table_id) AS unique_tables
|
||||
FROM dwd.dwd_assistant_service_log
|
||||
@@ -324,7 +333,7 @@ class AssistantMonthlyTask(BaseDwsTask):
|
||||
AND start_use_time >= %s
|
||||
AND start_use_time < %s
|
||||
AND is_delete = 0
|
||||
GROUP BY site_assistant_id, DATE_TRUNC('month', start_use_time)
|
||||
GROUP BY site_assistant_id, DATE_TRUNC('month', {biz_expr}::timestamp)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_month, next_month))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
@@ -43,6 +43,8 @@ from typing import Any, Dict, List
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据结构
|
||||
@@ -225,19 +227,22 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
|
||||
settle_type=1 为台桌结账,包含台费、酒水食品等金额。
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(pay_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
order_settle_id,
|
||||
site_id,
|
||||
tenant_id,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
DATE(pay_time) AS stat_date
|
||||
{biz_expr} AS stat_date
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND settle_type = 1
|
||||
AND DATE(pay_time) >= %s
|
||||
AND DATE(pay_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -250,7 +255,10 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
每条记录对应一张台桌在一个订单中的台费信息。
|
||||
real_table_use_seconds 为台桌实际使用时长。
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(start_use_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("tfl.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
tfl.order_settle_id,
|
||||
tfl.site_table_id AS table_id,
|
||||
@@ -259,8 +267,8 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
COALESCE(tfl.ledger_amount, 0) AS table_fee
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
WHERE tfl.site_id = %s
|
||||
AND DATE(tfl.start_use_time) >= %s
|
||||
AND DATE(tfl.start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND COALESCE(tfl.is_delete, 0) = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
@@ -274,7 +282,10 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
通过 LEFT JOIN cfg_skill_type 获取 course_type_code,
|
||||
real_service_money 为助教分成。
|
||||
"""
|
||||
sql = """
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 6.2: DATE(start_use_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
asl.order_settle_id,
|
||||
asl.site_assistant_id AS assistant_id,
|
||||
@@ -290,8 +301,8 @@ class AssistantOrderContributionTask(BaseDwsTask):
|
||||
ON asl.skill_id = cst.skill_id
|
||||
AND cst.is_active = TRUE
|
||||
WHERE asl.site_id = %s
|
||||
AND DATE(asl.start_use_time) >= %s
|
||||
AND DATE(asl.start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND COALESCE(asl.is_delete, 0) = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS 助教项目标签任务
|
||||
|
||||
按时间窗口计算每位助教在四大项目(BILLIARD/SNOOKER/MAHJONG/KTV)的
|
||||
工作时长占比,占比≥25% 则分配标签。
|
||||
|
||||
数据链路:
|
||||
dwd_assistant_service_log (income_seconds)
|
||||
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
|
||||
→ get_area_category(area_name, table_name)
|
||||
→ 按 category_code 汇总 → 计算占比 → 写入 dws_assistant_project_tag
|
||||
|
||||
目标表:
|
||||
dws.dws_assistant_project_tag
|
||||
|
||||
更新策略:
|
||||
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
# 只计算四大项目,排除 SPECIAL/OTHER
|
||||
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
|
||||
|
||||
# 助教看板的 6 个时间窗口
|
||||
ASSISTANT_WINDOWS = [
|
||||
TimeWindow.THIS_MONTH,
|
||||
TimeWindow.THIS_QUARTER,
|
||||
TimeWindow.LAST_MONTH,
|
||||
TimeWindow.LAST_3_MONTHS_EXCL_CURRENT,
|
||||
TimeWindow.LAST_QUARTER,
|
||||
TimeWindow.LAST_6_MONTHS,
|
||||
]
|
||||
|
||||
TAG_THRESHOLD = Decimal("0.25")
|
||||
|
||||
|
||||
class AssistantProjectTagTask(BaseDwsTask):
|
||||
"""助教项目标签 ETL 任务"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_ASSISTANT_PROJECT_TAG"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_assistant_project_tag"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "assistant_id", "time_window", "category_code"]
|
||||
|
||||
def extract(self, context) -> Dict[str, Any]:
|
||||
site_id = context.store_id
|
||||
self.logger.info("%s: 提取助教服务数据", self.get_task_code())
|
||||
|
||||
# 加载配置(cfg_area_category 等)
|
||||
self.load_config_cache()
|
||||
|
||||
# 提取台桌信息(用于 get_area_category 的 table_name 参数)
|
||||
table_info = self._extract_table_info(site_id)
|
||||
|
||||
# 按时间窗口提取助教服务时长
|
||||
window_data: Dict[str, List[Dict]] = {}
|
||||
for window in ASSISTANT_WINDOWS:
|
||||
time_range = self.get_time_window_range(window)
|
||||
rows = self._extract_assistant_durations(
|
||||
site_id, time_range.start, time_range.end
|
||||
)
|
||||
window_data[window.value] = rows
|
||||
|
||||
return {
|
||||
"window_data": window_data,
|
||||
"table_info": table_info,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""提取台桌维度信息"""
|
||||
sql = """
|
||||
SELECT table_id, table_name, site_table_area_name AS area_name
|
||||
FROM dwd.dim_table
|
||||
WHERE site_id = %s AND scd2_is_current = 1
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r["table_id"]: dict(r) for r in (rows or [])}
|
||||
|
||||
def _extract_assistant_durations(
|
||||
self, site_id: int, start_date: date, end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""提取助教服务时长明细(按助教+台桌聚合)"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("asl.start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
asl.site_assistant_id AS assistant_id,
|
||||
asl.site_table_id AS table_id,
|
||||
COALESCE(SUM(asl.income_seconds), 0) AS duration_seconds
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
WHERE asl.site_id = %(site_id)s
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND asl.is_delete = 0
|
||||
GROUP BY asl.site_assistant_id, asl.site_table_id
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
"site_id": site_id,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
})
|
||||
return [dict(r) for r in rows] if rows else []
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context) -> List[Dict[str, Any]]:
|
||||
table_info = extracted["table_info"]
|
||||
site_id = extracted["site_id"]
|
||||
tenant_id = getattr(context, "tenant_id", 0) or 0
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for window_value, rows in extracted["window_data"].items():
|
||||
# 按助教汇总各项目时长
|
||||
# assistant_id → category_code → seconds
|
||||
assistant_cats: Dict[int, Dict[str, int]] = {}
|
||||
|
||||
for row in rows:
|
||||
aid = row["assistant_id"]
|
||||
tid = row["table_id"]
|
||||
secs = self.safe_int(row["duration_seconds"])
|
||||
if secs <= 0:
|
||||
continue
|
||||
|
||||
# 通过 dim_table 获取区域和台桌名
|
||||
tinfo = table_info.get(tid, {})
|
||||
area_name = tinfo.get("area_name")
|
||||
table_name = tinfo.get("table_name")
|
||||
cat = self.get_area_category(area_name, table_name)
|
||||
code = cat.get("category_code", "OTHER")
|
||||
|
||||
# 只计算四大项目
|
||||
if code not in VALID_CATEGORIES:
|
||||
continue
|
||||
|
||||
if aid not in assistant_cats:
|
||||
assistant_cats[aid] = {}
|
||||
assistant_cats[aid][code] = assistant_cats[aid].get(code, 0) + secs
|
||||
|
||||
# 计算占比并生成记录
|
||||
for aid, cats in assistant_cats.items():
|
||||
total = sum(cats.values())
|
||||
if total <= 0:
|
||||
continue
|
||||
|
||||
for code, secs in cats.items():
|
||||
pct = Decimal(str(secs)) / Decimal(str(total))
|
||||
pct = pct.quantize(Decimal("0.0001"))
|
||||
cat_info = self._get_category_display(code)
|
||||
|
||||
results.append({
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"assistant_id": aid,
|
||||
"time_window": window_value,
|
||||
"category_code": code,
|
||||
"category_name": cat_info["category_name"],
|
||||
"short_name": cat_info["short_name"],
|
||||
"duration_seconds": secs,
|
||||
"total_seconds": total,
|
||||
"percentage": float(pct),
|
||||
"is_tagged": pct >= TAG_THRESHOLD,
|
||||
})
|
||||
|
||||
self.logger.info(
|
||||
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
|
||||
self.get_task_code(),
|
||||
len(results),
|
||||
sum(1 for r in results if r["is_tagged"]),
|
||||
)
|
||||
return results
|
||||
|
||||
def _get_category_display(self, code: str) -> Dict[str, str]:
|
||||
"""从配置缓存获取分类的显示名和简写"""
|
||||
cache = self.load_config_cache()
|
||||
for key, cat in cache.area_categories.items():
|
||||
if cat.get("category_code") == code:
|
||||
return {
|
||||
"category_name": cat.get("display_name") or cat.get("category_name", code),
|
||||
"short_name": cat.get("short_name", code[:1]),
|
||||
}
|
||||
# 兜底
|
||||
fallback = {
|
||||
"BILLIARD": ("🎱 中式/追分", "🎱"),
|
||||
"SNOOKER": ("斯诺克", "斯"),
|
||||
"MAHJONG": ("🀄 麻将/棋牌", "🀄"),
|
||||
"KTV": ("🎤 团建/K歌", "🎤"),
|
||||
}
|
||||
name, short = fallback.get(code, (code, code[:1]))
|
||||
return {"category_name": name, "short_name": short}
|
||||
|
||||
def load(self, transformed, context) -> dict:
|
||||
if not transformed:
|
||||
return {"status": "SUCCESS", "counts": {"inserted": 0, "deleted": 0}}
|
||||
|
||||
site_id = transformed[0]["site_id"]
|
||||
|
||||
# 全量删除该门店的标签数据后重建
|
||||
delete_sql = "DELETE FROM dws.dws_assistant_project_tag WHERE site_id = %s"
|
||||
self.db.execute(delete_sql, (site_id,))
|
||||
deleted = self.db.cursor.rowcount if hasattr(self.db, "cursor") else 0
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_assistant_project_tag (
|
||||
site_id, tenant_id, assistant_id, time_window,
|
||||
category_code, category_name, short_name,
|
||||
duration_seconds, total_seconds, percentage, is_tagged,
|
||||
computed_at, created_at, updated_at
|
||||
) VALUES (
|
||||
%(site_id)s, %(tenant_id)s, %(assistant_id)s, %(time_window)s,
|
||||
%(category_code)s, %(category_name)s, %(short_name)s,
|
||||
%(duration_seconds)s, %(total_seconds)s, %(percentage)s, %(is_tagged)s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
for row in transformed:
|
||||
self.db.execute(insert_sql, row)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 删除 %d 条,插入 %d 条",
|
||||
self.get_task_code(), deleted, len(transformed),
|
||||
)
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"counts": {"inserted": len(transformed), "deleted": deleted},
|
||||
}
|
||||
@@ -27,8 +27,9 @@ DWS层任务基类
|
||||
- 提供滚动窗口统计方法
|
||||
|
||||
时间口径说明:
|
||||
- 周起始日:周一
|
||||
- 月/季度起始:第一天0点
|
||||
- 营业日切点:BUSINESS_DAY_START_HOUR(默认 08:00),08:00 前的记录归属前一天
|
||||
- 周起始日:周一 08:00
|
||||
- 月/季度起始:第一天 08:00
|
||||
- 环比规则:对比上一个等长区间
|
||||
- 前3个月:含/不含本月(用于财务筛选)
|
||||
- 最近半年:不含本月
|
||||
@@ -52,6 +53,8 @@ from decimal import Decimal, InvalidOperation
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, TypeVar
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr, business_date, now_shanghai
|
||||
|
||||
from ..base_task import BaseTask, TaskContext
|
||||
|
||||
# =============================================================================
|
||||
@@ -81,6 +84,8 @@ class TimeWindow(Enum):
|
||||
THIS_QUARTER = "THIS_QUARTER" # 本季度
|
||||
LAST_QUARTER = "LAST_QUARTER" # 上季度
|
||||
LAST_6_MONTHS = "LAST_6_MONTHS" # 最近半年(不含本月)
|
||||
LAST_30_DAYS = "LAST_30_DAYS" # 近30天(含今天)
|
||||
LAST_60_DAYS = "LAST_60_DAYS" # 近60天(含今天)
|
||||
|
||||
|
||||
class CourseType(Enum):
|
||||
@@ -292,18 +297,20 @@ class BaseDwsTask(BaseTask):
|
||||
获取时间窗口的日期范围(用于财务报表)
|
||||
|
||||
时间口径说明:
|
||||
- 周起始日为周一
|
||||
- 月/季度起始为第一天0点
|
||||
- 营业日切点:BUSINESS_DAY_START_HOUR(默认 08:00)
|
||||
- 周起始日为周一 08:00
|
||||
- 月/季度起始为第一天 08:00
|
||||
|
||||
Args:
|
||||
window: 时间窗口枚举
|
||||
base_date: 基准日期,默认为今天
|
||||
base_date: 基准日期,默认为当前营业日
|
||||
|
||||
Returns:
|
||||
TimeRange对象
|
||||
"""
|
||||
if base_date is None:
|
||||
base_date = date.today()
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
base_date = business_date(now_shanghai(), cutoff)
|
||||
|
||||
if window == TimeWindow.THIS_WEEK:
|
||||
# 本周(周一起始)
|
||||
@@ -369,6 +376,16 @@ class BaseDwsTask(BaseTask):
|
||||
start = self.get_month_first_day(self._shift_months(month_start, -6))
|
||||
return TimeRange(start=start, end=end)
|
||||
|
||||
elif window == TimeWindow.LAST_30_DAYS:
|
||||
# 近30天(含今天)
|
||||
start = base_date - timedelta(days=29)
|
||||
return TimeRange(start=start, end=base_date)
|
||||
|
||||
elif window == TimeWindow.LAST_60_DAYS:
|
||||
# 近60天(含今天)
|
||||
start = base_date - timedelta(days=59)
|
||||
return TimeRange(start=start, end=base_date)
|
||||
|
||||
raise ValueError(f"不支持的时间窗口类型: {window}")
|
||||
|
||||
def get_comparison_range(self, time_range: TimeRange) -> TimeRange:
|
||||
@@ -410,9 +427,9 @@ class BaseDwsTask(BaseTask):
|
||||
|
||||
def is_new_hire_in_month(self, hire_date: date, stat_month: date) -> bool:
|
||||
"""
|
||||
判断是否为新入职(月1日0点后入职)
|
||||
判断是否为新入职(月1日8点后入职)
|
||||
|
||||
新入职定档规则:月1日0点之后入职的,计算为新入职
|
||||
新入职定档规则:月1日8点之后入职的,计算为新入职
|
||||
|
||||
Args:
|
||||
hire_date: 入职日期
|
||||
@@ -527,10 +544,12 @@ class BaseDwsTask(BaseTask):
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
def _load_area_categories(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""加载区域分类映射"""
|
||||
"""加载区域分类映射(支持台桌级细分)"""
|
||||
sql = """
|
||||
SELECT
|
||||
source_area_name, category_code, category_name,
|
||||
source_area_name, source_table_name,
|
||||
category_code, category_name,
|
||||
display_name, short_name,
|
||||
match_type, match_priority
|
||||
FROM dws.cfg_area_category
|
||||
WHERE is_active = TRUE
|
||||
@@ -540,10 +559,15 @@ class BaseDwsTask(BaseTask):
|
||||
if not rows:
|
||||
return {}
|
||||
|
||||
# 双层索引:(area_name, table_name) → config
|
||||
# table_name 为 NULL 时用空字符串作 key
|
||||
result = {}
|
||||
for row in rows:
|
||||
row_dict = dict(row)
|
||||
result[row_dict['source_area_name']] = row_dict
|
||||
area = row_dict['source_area_name']
|
||||
table = row_dict.get('source_table_name') or ''
|
||||
key = f"{area}\x00{table}" # 复合键,\x00 不会出现在正常名称中
|
||||
result[key] = row_dict
|
||||
return result
|
||||
|
||||
def _load_skill_types(self) -> Dict[int, Dict[str, Any]]:
|
||||
@@ -709,50 +733,57 @@ class BaseDwsTask(BaseTask):
|
||||
# 默认为基础课
|
||||
return CourseType.BASE
|
||||
|
||||
def get_area_category(self, area_name: Optional[str]) -> Dict[str, str]:
|
||||
def get_area_category(self, area_name: Optional[str], table_name: Optional[str] = None) -> Dict[str, str]:
|
||||
"""
|
||||
获取区域分类(支持精确匹配、模糊匹配、兜底)
|
||||
获取区域分类(支持台桌级精确 > 区域精确 > 模糊 > 兜底)
|
||||
|
||||
Args:
|
||||
area_name: 原始区域名称
|
||||
area_name: 原始区域名称(dim_table.site_table_area_name)
|
||||
table_name: 台桌名称(dim_table.table_name),用于台桌级细分映射
|
||||
|
||||
Returns:
|
||||
包含 category_code 和 category_name 的字典
|
||||
包含 category_code, category_name, display_name, short_name 的字典
|
||||
"""
|
||||
config = self.load_config_cache()
|
||||
default = {'category_code': 'OTHER', 'category_name': '其他', 'display_name': '其他', 'short_name': '他'}
|
||||
|
||||
if not area_name:
|
||||
# 无区域名称,返回默认
|
||||
return {'category_code': 'OTHER', 'category_name': '其他区域'}
|
||||
return default
|
||||
|
||||
# 1. 精确匹配
|
||||
if area_name in config.area_categories:
|
||||
cat = config.area_categories[area_name]
|
||||
if cat.get('match_type') == 'EXACT':
|
||||
return {
|
||||
'category_code': cat['category_code'],
|
||||
'category_name': cat['category_name']
|
||||
}
|
||||
cats = config.area_categories
|
||||
|
||||
# 2. 模糊匹配(按优先级)
|
||||
for key, cat in config.area_categories.items():
|
||||
if cat.get('match_type') == 'LIKE':
|
||||
pattern = key.replace('%', '')
|
||||
if pattern and pattern in area_name:
|
||||
return {
|
||||
'category_code': cat['category_code'],
|
||||
'category_name': cat['category_name']
|
||||
}
|
||||
|
||||
# 3. 兜底
|
||||
if 'DEFAULT' in config.area_categories:
|
||||
cat = config.area_categories['DEFAULT']
|
||||
def _pick(cat: Dict[str, Any]) -> Dict[str, str]:
|
||||
return {
|
||||
'category_code': cat['category_code'],
|
||||
'category_name': cat['category_name']
|
||||
'category_name': cat['category_name'],
|
||||
'display_name': cat.get('display_name') or cat['category_name'],
|
||||
'short_name': cat.get('short_name') or '',
|
||||
}
|
||||
|
||||
return {'category_code': 'OTHER', 'category_name': '其他区域'}
|
||||
# 1. 台桌级精确匹配(area_name + table_name)
|
||||
if table_name:
|
||||
key = f"{area_name}\x00{table_name}"
|
||||
if key in cats and cats[key].get('match_type') == 'EXACT':
|
||||
return _pick(cats[key])
|
||||
|
||||
# 2. 区域级精确匹配(area_name + 空 table_name)
|
||||
key = f"{area_name}\x00"
|
||||
if key in cats and cats[key].get('match_type') == 'EXACT':
|
||||
return _pick(cats[key])
|
||||
|
||||
# 3. 模糊匹配(按优先级,已排序)
|
||||
for k, cat in cats.items():
|
||||
if cat.get('match_type') == 'LIKE':
|
||||
pattern = cat['source_area_name'].replace('%', '')
|
||||
if pattern and pattern in area_name:
|
||||
return _pick(cat)
|
||||
|
||||
# 4. 兜底
|
||||
fallback_key = f"DEFAULT\x00"
|
||||
if fallback_key in cats:
|
||||
return _pick(cats[fallback_key])
|
||||
|
||||
return default
|
||||
|
||||
def calculate_sprint_bonus(
|
||||
self,
|
||||
@@ -908,8 +939,10 @@ class BaseDwsTask(BaseTask):
|
||||
offset = 0
|
||||
cols_str = ", ".join(columns)
|
||||
|
||||
# 构建WHERE条件
|
||||
where_parts = [f"DATE({date_col}) >= %s", f"DATE({date_col}) <= %s"]
|
||||
# 构建WHERE条件 — 使用营业日归属表达式替代 DATE()
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr(date_col, cutoff)
|
||||
where_parts = [f"{biz_expr} >= %s", f"{biz_expr} <= %s"]
|
||||
params: List[Any] = [start_date, end_date]
|
||||
|
||||
if where_clause:
|
||||
@@ -972,15 +1005,24 @@ class BaseDwsTask(BaseTask):
|
||||
获取助教在指定日期的等级(SCD2 as-of取值)
|
||||
|
||||
助教等级是SCD2维度,历史月份不能直接用"当前等级"。
|
||||
需要按有效期as-of join取数。
|
||||
优先精确匹配 [scd2_start, scd2_end) 区间;
|
||||
若无匹配(服务日期早于首条 SCD2 或区间有间隙),
|
||||
回退取 scd2_start_time <= asof_date 的最近一条,
|
||||
因为从该记录起等级未变。
|
||||
|
||||
Args:
|
||||
assistant_id: 助教ID
|
||||
asof_date: 取值日期
|
||||
|
||||
Returns:
|
||||
助教等级信息,包含level_code和level_name
|
||||
助教等级信息,包含level_code和level_name;无记录时返回None
|
||||
"""
|
||||
# CHANGE 2026-02-27 | 放宽 SCD2 匹配:去掉 scd2_end_time 条件,
|
||||
# 改为取 scd2_start_time <= asof_date 的最近一条。
|
||||
# 原逻辑要求 asof_date 严格落在 [start, end) 区间内,
|
||||
# 当 SCD2 记录有间隙或服务日期早于首条记录时返回 None,
|
||||
# 导致 dws_assistant_daily_detail.assistant_level_name 出现 NULL,
|
||||
# 下游 monthly 聚合时同一 level_code 有 NULL/非NULL 两种值引发 UK 冲突。
|
||||
sql = """
|
||||
SELECT
|
||||
assistant_id,
|
||||
@@ -999,13 +1041,30 @@ class BaseDwsTask(BaseTask):
|
||||
FROM dwd.dim_assistant
|
||||
WHERE assistant_id = %s
|
||||
AND scd2_start_time <= %s
|
||||
AND (scd2_end_time IS NULL OR scd2_end_time > %s)
|
||||
ORDER BY scd2_start_time DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
rows = self.db.query(sql, (assistant_id, asof_date, asof_date))
|
||||
rows = self.db.query(sql, (assistant_id, asof_date))
|
||||
return dict(rows[0]) if rows else None
|
||||
|
||||
|
||||
# CHANGE 2026-02-27 | 新增 level_code → level_name 静态映射
|
||||
# 当 SCD2 记录晚于服务日期(dim_assistant 后期才开始同步)时,
|
||||
# 用服务记录自带的 assistant_level 做 fallback 映射
|
||||
LEVEL_CODE_NAME_MAP: dict[int, str] = {
|
||||
8: "助教管理",
|
||||
10: "初级",
|
||||
20: "中级",
|
||||
30: "高级",
|
||||
40: "星级",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def level_code_to_name(level_code: int | None) -> str | None:
|
||||
"""将 assistant_level code 映射为中文名称,无匹配返回 None"""
|
||||
if level_code is None:
|
||||
return None
|
||||
return BaseDwsTask.LEVEL_CODE_NAME_MAP.get(int(level_code))
|
||||
|
||||
def get_member_card_balance_asof(
|
||||
self,
|
||||
member_id: int,
|
||||
|
||||
@@ -22,6 +22,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask
|
||||
from .dws_helpers import parse_id_list
|
||||
|
||||
@@ -39,9 +41,11 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""结账单日汇总(结算头表按日聚合)"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
COUNT(*) AS order_count,
|
||||
COUNT(CASE WHEN member_id != 0 AND member_id IS NOT NULL THEN 1 END) AS member_order_count,
|
||||
COUNT(CASE WHEN member_id = 0 OR member_id IS NULL THEN 1 END) AS guest_order_count,
|
||||
@@ -61,13 +65,17 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
SUM(member_discount_amount) AS member_discount_amount,
|
||||
SUM(rounding_amount) AS rounding_amount,
|
||||
SUM(pl_coupon_sale_amount) AS pl_coupon_sale_amount,
|
||||
-- 消费金额
|
||||
SUM(consume_money) AS total_consume
|
||||
-- CHANGE 2026-03-07 | consume_money → items_sum 口径校准
|
||||
-- consume_money 存在三种历史口径混合,DWS 层统一使用 items_sum
|
||||
SUM(table_charge_money + goods_money + assistant_pd_money
|
||||
+ assistant_cx_money + electricity_money) AS items_sum
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND DATE(pay_time) >= %s
|
||||
AND DATE(pay_time) <= %s
|
||||
GROUP BY DATE(pay_time)
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
-- CHANGE 2026-03-07 | 排除退货(6)/退款(7),仅保留台桌结账(1)+商城订单(3)
|
||||
AND settle_type IN (1, 3)
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -83,9 +91,11 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""充值日汇总(充值订单按日聚合)"""
|
||||
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money,实际字段为 pay_amount/point_amount
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_amount + point_amount) AS recharge_total,
|
||||
SUM(pay_amount) AS recharge_cash,
|
||||
@@ -101,9 +111,9 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count
|
||||
FROM dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
AND DATE(pay_time) >= %s
|
||||
AND DATE(pay_time) <= %s
|
||||
GROUP BY DATE(pay_time)
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -118,9 +128,11 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""团购核销日汇总(结算头表 + 团购核销表联查)"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
sh.pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
COUNT(CASE WHEN sh.coupon_amount > 0 THEN 1 END) AS groupbuy_count,
|
||||
SUM(
|
||||
CASE
|
||||
@@ -137,9 +149,9 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
ON gr.order_settle_id = sh.order_settle_id
|
||||
AND COALESCE(gr.is_delete, 0) = 0
|
||||
WHERE sh.site_id = %s
|
||||
AND sh.pay_time >= %s
|
||||
AND sh.pay_time < %s + INTERVAL '1 day'
|
||||
GROUP BY sh.pay_time::DATE
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -188,16 +200,18 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
if not member_ids and not order_ids:
|
||||
return []
|
||||
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
order_settle_id,
|
||||
member_id,
|
||||
adjust_amount
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND pay_time >= %s
|
||||
AND pay_time < %s + INTERVAL '1 day'
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND adjust_amount != 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
@@ -242,20 +256,22 @@ class FinanceBaseTask(BaseDwsTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""赠送卡消费汇总(余额变动按日聚合)"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_change = biz_date_sql_expr("change_time", cutoff)
|
||||
id_list = ", ".join(str(card_id) for card_id in self.GIFT_CARD_TYPE_IDS)
|
||||
sql = f"""
|
||||
SELECT
|
||||
change_time::DATE AS stat_date,
|
||||
{biz_expr_change} AS stat_date,
|
||||
SUM(ABS(change_amount)) AS gift_card_consume
|
||||
FROM dwd.dwd_member_balance_change
|
||||
WHERE site_id = %s
|
||||
AND change_time >= %s
|
||||
AND change_time < %s + INTERVAL '1 day'
|
||||
AND {biz_expr_change} >= %s
|
||||
AND {biz_expr_change} <= %s
|
||||
AND from_type = 1
|
||||
AND change_amount < 0
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
AND card_type_id IN ({id_list})
|
||||
GROUP BY change_time::DATE
|
||||
GROUP BY {biz_expr_change}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
@@ -222,6 +222,8 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
member_discount = self.safe_decimal(settle.get('member_discount_amount', 0))
|
||||
rounding_amount = self.safe_decimal(settle.get('rounding_amount', 0))
|
||||
big_customer_amount = self.safe_decimal(big_customer.get('big_customer_amount', 0))
|
||||
# 大客户优惠不超过手动调整总额(大客户是 adjust 的子集)
|
||||
big_customer_amount = min(big_customer_amount, adjust_amount) if adjust_amount > 0 else Decimal('0')
|
||||
other_discount = adjust_amount - big_customer_amount
|
||||
if other_discount < 0:
|
||||
other_discount = Decimal('0')
|
||||
@@ -229,8 +231,8 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
# 赠送卡消费(来自余额变动)
|
||||
gift_card_consume_amount = self.safe_decimal(gift_card.get('gift_card_consume', 0))
|
||||
|
||||
# 优惠合计
|
||||
discount_total = discount_groupbuy + member_discount + gift_card_consume_amount + adjust_amount + rounding_amount
|
||||
# 优惠合计(大客户 + 其他 = adjust_amount,互斥拆分)
|
||||
discount_total = discount_groupbuy + member_discount + gift_card_consume_amount + big_customer_amount + other_discount + rounding_amount
|
||||
|
||||
# 确认收入
|
||||
confirmed_income = gross_amount - discount_total
|
||||
@@ -249,9 +251,12 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
cash_balance_change = cash_inflow_total - cash_outflow_total
|
||||
|
||||
# 卡消费
|
||||
cash_card_consume = card_pay_amount + balance_pay_amount
|
||||
# CHANGE 2026-03-07 | balance 恒等式校准
|
||||
# balance_amount = recharge_card_amount + gift_card_amount
|
||||
# recharge_card_consume 只取现金充值部分(recharge_card_amount),不加 balance_amount 避免重复计算
|
||||
recharge_card_consume = card_pay_amount
|
||||
gift_card_consume = gift_card_consume_amount
|
||||
card_consume_total = cash_card_consume + gift_card_consume
|
||||
card_consume_total = recharge_card_consume + gift_card_consume
|
||||
|
||||
# 充值统计
|
||||
recharge_count = self.safe_int(recharge.get('recharge_count', 0))
|
||||
@@ -284,7 +289,8 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
'discount_groupbuy': discount_groupbuy,
|
||||
'discount_vip': member_discount,
|
||||
'discount_gift_card': gift_card_consume_amount,
|
||||
'discount_manual': adjust_amount,
|
||||
# CHANGE 2026-03-07 | discount_manual 语义修正:存储大客户优惠(与 discount_other 互斥,两者之和 = adjust_amount)
|
||||
'discount_manual': big_customer_amount,
|
||||
'discount_rounding': rounding_amount,
|
||||
'discount_other': other_discount,
|
||||
# 确认收入
|
||||
@@ -297,7 +303,7 @@ class FinanceDailyTask(FinanceBaseTask):
|
||||
'platform_fee_amount': platform_fee_amount,
|
||||
'recharge_cash_inflow': recharge_cash_inflow,
|
||||
'card_consume_total': card_consume_total,
|
||||
'cash_card_consume': cash_card_consume,
|
||||
'recharge_card_consume': recharge_card_consume,
|
||||
'gift_card_consume': gift_card_consume,
|
||||
'cash_outflow_total': cash_outflow_total,
|
||||
'cash_balance_change': cash_balance_change,
|
||||
|
||||
@@ -35,6 +35,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import TaskContext
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
|
||||
@@ -112,9 +114,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
- rounding_amount: 抹零金额
|
||||
- pl_coupon_sale_amount: 平台券销售金额(团购实付路径1)
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
-- 团购相关
|
||||
COALESCE(SUM(coupon_amount), 0) AS coupon_amount_total,
|
||||
COALESCE(SUM(pl_coupon_sale_amount), 0) AS pl_coupon_sale_total,
|
||||
@@ -132,10 +136,10 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
COUNT(*) AS total_orders
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %(site_id)s
|
||||
AND pay_time >= %(start_date)s
|
||||
AND pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND settle_status = 1 -- 已结账
|
||||
GROUP BY pay_time::DATE
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
|
||||
GROUP BY {biz_expr}
|
||||
ORDER BY stat_date
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
@@ -160,9 +164,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
|
||||
返回:{日期: 团购实付总额}
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
sh.pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
SUM(
|
||||
CASE
|
||||
WHEN sh.pl_coupon_sale_amount > 0 THEN sh.pl_coupon_sale_amount
|
||||
@@ -174,11 +180,11 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
ON gr.order_settle_id = sh.order_settle_id
|
||||
AND COALESCE(gr.is_delete, 0) = 0
|
||||
WHERE sh.site_id = %(site_id)s
|
||||
AND sh.pay_time >= %(start_date)s
|
||||
AND sh.pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND sh.settle_status = 1
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND sh.settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
|
||||
AND sh.coupon_amount > 0 -- 只统计有团购的订单
|
||||
GROUP BY sh.pay_time::DATE
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
@@ -206,22 +212,24 @@ class FinanceDiscountDetailTask(FinanceBaseTask):
|
||||
2794699703437125, # 酒水卡
|
||||
2793266846533445, # 活动抵用券
|
||||
)
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("change_time", cutoff)
|
||||
id_list = ", ".join(str(card_id) for card_id in gift_card_type_ids)
|
||||
sql = f"""
|
||||
SELECT
|
||||
change_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
card_type_id,
|
||||
COUNT(*) AS consume_count,
|
||||
SUM(ABS(change_amount)) AS consume_amount
|
||||
FROM dwd.dwd_member_balance_change
|
||||
WHERE site_id = %(site_id)s
|
||||
AND change_time >= %(start_date)s
|
||||
AND change_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND from_type = 1
|
||||
AND change_amount < 0
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
AND card_type_id IN ({id_list})
|
||||
GROUP BY change_time::DATE, card_type_id
|
||||
GROUP BY {biz_expr}, card_type_id
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
'site_id': site_id,
|
||||
|
||||
@@ -33,6 +33,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import TaskContext
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
|
||||
@@ -94,32 +96,35 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
收入类型分类:
|
||||
- TABLE_FEE: 台费收入 (table_charge_money)
|
||||
- GOODS: 商品收入 (goods_money)
|
||||
- ASSISTANT_BASE: 助教基础课 (assistant_pd_money)
|
||||
- ASSISTANT_BONUS: 助教附加课 (assistant_cx_money)
|
||||
- ASSISTANT_PD: 助教陪打收入 (assistant_pd_money)
|
||||
- ASSISTANT_CX: 助教超休收入 (assistant_cx_money)
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
-- 台费收入
|
||||
COALESCE(SUM(table_charge_money), 0) AS table_fee_income,
|
||||
COUNT(CASE WHEN table_charge_money > 0 THEN 1 END) AS table_fee_orders,
|
||||
-- 商品收入
|
||||
COALESCE(SUM(goods_money), 0) AS goods_income,
|
||||
COUNT(CASE WHEN goods_money > 0 THEN 1 END) AS goods_orders,
|
||||
-- 助教基础课收入(PD=陪打)
|
||||
COALESCE(SUM(assistant_pd_money), 0) AS assistant_base_income,
|
||||
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_base_orders,
|
||||
-- 助教附加课收入(CX=超休/促销)
|
||||
COALESCE(SUM(assistant_cx_money), 0) AS assistant_bonus_income,
|
||||
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_bonus_orders,
|
||||
-- CHANGE 2026-03-07 | ASSISTANT_BASE/BONUS → PD/CX 命名校准
|
||||
-- 助教陪打收入
|
||||
COALESCE(SUM(assistant_pd_money), 0) AS assistant_pd_income,
|
||||
COUNT(CASE WHEN assistant_pd_money > 0 THEN 1 END) AS assistant_pd_orders,
|
||||
-- 助教超休收入
|
||||
COALESCE(SUM(assistant_cx_money), 0) AS assistant_cx_income,
|
||||
COUNT(CASE WHEN assistant_cx_money > 0 THEN 1 END) AS assistant_cx_orders,
|
||||
-- 总订单数
|
||||
COUNT(*) AS total_orders
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %(site_id)s
|
||||
AND pay_time >= %(start_date)s
|
||||
AND pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND settle_status = 1 -- 已结账
|
||||
GROUP BY pay_time::DATE
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND settle_type IN (1, 3) -- 台桌结账 + 商城订单,排除退货/撤销
|
||||
GROUP BY {biz_expr}
|
||||
ORDER BY stat_date
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
@@ -142,46 +147,57 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
"""
|
||||
# CHANGE 2026-02-22 | BUG 7 修复 | dim_table 主键是 table_id 而非 site_table_id,
|
||||
# JOIN 条件从 dt.site_table_id → dt.table_id(事实表侧 site_table_id 不变)
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("sh.pay_time", cutoff)
|
||||
sql = f"""
|
||||
WITH area_orders AS (
|
||||
SELECT
|
||||
tfl.pay_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
dt.site_table_area_name AS area_name,
|
||||
dt.table_name AS table_name,
|
||||
tfl.order_settle_id,
|
||||
COALESCE(tfl.ledger_amount, 0) AS income_amount,
|
||||
COALESCE(tfl.ledger_time_seconds, 0) AS duration_seconds
|
||||
COALESCE(tfl.ledger_count, 0) AS duration_seconds
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
INNER JOIN dwd.dwd_settlement_head sh
|
||||
ON sh.order_settle_id = tfl.order_settle_id
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.table_id = tfl.site_table_id
|
||||
AND dt.scd2_is_current = 1
|
||||
WHERE tfl.site_id = %(site_id)s
|
||||
AND tfl.pay_time >= %(start_date)s
|
||||
AND tfl.pay_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND COALESCE(tfl.is_delete, 0) = 0
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
asl.start_use_time::DATE AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
dt.site_table_area_name AS area_name,
|
||||
dt.table_name AS table_name,
|
||||
asl.order_settle_id,
|
||||
COALESCE(asl.ledger_amount, 0) AS income_amount,
|
||||
COALESCE(asl.income_seconds, 0) AS duration_seconds
|
||||
FROM dwd.dwd_assistant_service_log asl
|
||||
INNER JOIN dwd.dwd_settlement_head sh
|
||||
ON sh.order_settle_id = asl.order_settle_id
|
||||
LEFT JOIN dwd.dim_table dt
|
||||
ON dt.table_id = asl.site_table_id
|
||||
AND dt.scd2_is_current = 1
|
||||
WHERE asl.site_id = %(site_id)s
|
||||
AND asl.start_use_time >= %(start_date)s
|
||||
AND asl.start_use_time < %(end_date)s + INTERVAL '1 day'
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND asl.is_delete = 0
|
||||
)
|
||||
SELECT
|
||||
stat_date,
|
||||
area_name,
|
||||
table_name,
|
||||
COALESCE(SUM(income_amount), 0) AS income_amount,
|
||||
COALESCE(SUM(duration_seconds), 0) AS duration_seconds,
|
||||
COUNT(DISTINCT order_settle_id) AS order_count
|
||||
FROM area_orders
|
||||
GROUP BY stat_date, area_name
|
||||
GROUP BY stat_date, area_name, table_name
|
||||
ORDER BY stat_date, area_name
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
@@ -232,14 +248,14 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
"""
|
||||
转换按收入类型的数据
|
||||
|
||||
将每日汇总数据展开为4条记录(台费/商品/基础课/附加课)
|
||||
将每日汇总数据展开为4条记录(台费/商品/陪打/超休)
|
||||
"""
|
||||
# 收入类型定义
|
||||
# CHANGE 2026-03-07 | ASSISTANT_BASE/BONUS → PD/CX 命名校准
|
||||
income_types = [
|
||||
('TABLE_FEE', '台费收入', 'table_fee_income', 'table_fee_orders'),
|
||||
('GOODS', '商品收入', 'goods_income', 'goods_orders'),
|
||||
('ASSISTANT_BASE', '助教基础课', 'assistant_base_income', 'assistant_base_orders'),
|
||||
('ASSISTANT_BONUS', '助教附加课', 'assistant_bonus_income', 'assistant_bonus_orders'),
|
||||
('ASSISTANT_PD', '助教陪打收入', 'assistant_pd_income', 'assistant_pd_orders'),
|
||||
('ASSISTANT_CX', '助教超休收入', 'assistant_cx_income', 'assistant_cx_orders'),
|
||||
]
|
||||
|
||||
records = []
|
||||
@@ -309,8 +325,8 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
duration_seconds = row.get('duration_seconds', 0) or 0
|
||||
order_count = row.get('order_count', 0) or 0
|
||||
|
||||
# 映射区域名称到分类代码
|
||||
category = self.get_area_category(area_name)
|
||||
# CHANGE 2026-03-07 | 传入 table_name 支持台桌级映射(VIP包厢 V5→斯诺克)
|
||||
category = self.get_area_category(area_name, row.get('table_name'))
|
||||
category_code = category.get('category_code', 'OTHER')
|
||||
category_name = category.get('category_name', '其他区域')
|
||||
|
||||
@@ -363,7 +379,7 @@ class FinanceIncomeStructureTask(FinanceBaseTask):
|
||||
"""
|
||||
兼容旧逻辑的映射方法(当前使用 get_area_category)
|
||||
"""
|
||||
return self.get_area_category(area_name)
|
||||
return self.get_area_category(area_name, None)
|
||||
|
||||
def load(self, records: List[Dict[str, Any]], context: TaskContext) -> Dict[str, Any]:
|
||||
"""
|
||||
|
||||
@@ -31,6 +31,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import TaskContext
|
||||
from .finance_base_task import FinanceBaseTask
|
||||
|
||||
@@ -111,9 +113,11 @@ class FinanceRechargeTask(FinanceBaseTask):
|
||||
|
||||
def _extract_recharge_summary(self, site_id: int, start_date: date, end_date: date) -> List[Dict[str, Any]]:
|
||||
# CHANGE 2026-02-21 | BUG 8: dwd_recharge_order 无 pay_money/gift_money,实际字段为 pay_amount/point_amount
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
DATE(pay_time) AS stat_date,
|
||||
{biz_expr} AS stat_date,
|
||||
COUNT(*) AS recharge_count,
|
||||
SUM(pay_amount + point_amount) AS recharge_total,
|
||||
SUM(pay_amount) AS recharge_cash,
|
||||
@@ -129,8 +133,8 @@ class FinanceRechargeTask(FinanceBaseTask):
|
||||
COUNT(DISTINCT member_id) AS recharge_member_count,
|
||||
COUNT(DISTINCT CASE WHEN is_first = 1 THEN member_id END) AS new_member_count
|
||||
FROM dwd.dwd_recharge_order
|
||||
WHERE site_id = %s AND DATE(pay_time) >= %s AND DATE(pay_time) <= %s
|
||||
GROUP BY DATE(pay_time)
|
||||
WHERE site_id = %s AND {biz_expr} >= %s AND {biz_expr} <= %s
|
||||
GROUP BY {biz_expr}
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
@@ -29,6 +29,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
@@ -74,7 +76,9 @@ class GoodsStockDailyTask(BaseDwsTask):
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
@@ -92,11 +96,12 @@ class GoodsStockDailyTask(BaseDwsTask):
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
fetched_at,
|
||||
{biz_expr} AS biz_date
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
@@ -135,11 +140,14 @@ class GoodsStockDailyTask(BaseDwsTask):
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
stat_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 使用 SQL 层计算的营业日归属日期
|
||||
stat_date = row.get("biz_date")
|
||||
if stat_date is None:
|
||||
stat_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
continue
|
||||
|
||||
@@ -31,6 +31,8 @@ from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
@@ -81,7 +83,9 @@ class GoodsStockMonthlyTask(BaseDwsTask):
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
@@ -99,11 +103,12 @@ class GoodsStockMonthlyTask(BaseDwsTask):
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
fetched_at,
|
||||
{biz_expr} AS biz_date
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
@@ -141,12 +146,15 @@ class GoodsStockMonthlyTask(BaseDwsTask):
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 自然月的第一天作为 stat_date
|
||||
# 使用 SQL 层计算的营业日归属日期
|
||||
row_date = row.get("biz_date")
|
||||
if row_date is None:
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 营业月的第一天作为 stat_date
|
||||
first_day = _month_first_day(row_date)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
|
||||
@@ -31,6 +31,8 @@ from datetime import date, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
|
||||
|
||||
@@ -82,7 +84,9 @@ class GoodsStockWeeklyTask(BaseDwsTask):
|
||||
self.get_task_code(), site_id, start_date, end_date,
|
||||
)
|
||||
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("fetched_at", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
site_goods_id,
|
||||
goods_name,
|
||||
@@ -100,11 +104,12 @@ class GoodsStockWeeklyTask(BaseDwsTask):
|
||||
current_stock,
|
||||
site_id,
|
||||
tenant_id,
|
||||
fetched_at
|
||||
fetched_at,
|
||||
{biz_expr} AS biz_date
|
||||
FROM dwd.dwd_goods_stock_summary
|
||||
WHERE site_id = %s
|
||||
AND DATE(fetched_at) >= %s
|
||||
AND DATE(fetched_at) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
ORDER BY fetched_at
|
||||
"""
|
||||
rows = self.query_dwd(sql, (site_id, start_date, end_date))
|
||||
@@ -142,12 +147,15 @@ class GoodsStockWeeklyTask(BaseDwsTask):
|
||||
fetched_at = row.get("fetched_at")
|
||||
if fetched_at is None:
|
||||
continue
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# ISO 周的周一作为 stat_date
|
||||
# 使用 SQL 层计算的营业日归属日期
|
||||
row_date = row.get("biz_date")
|
||||
if row_date is None:
|
||||
row_date = (
|
||||
fetched_at.date()
|
||||
if hasattr(fetched_at, "date")
|
||||
else fetched_at
|
||||
)
|
||||
# 营业周的周一作为 stat_date
|
||||
monday = _iso_monday(row_date)
|
||||
site_goods_id = row.get("site_goods_id")
|
||||
if site_goods_id is None:
|
||||
|
||||
@@ -12,6 +12,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
from .base_index_task import BaseIndexTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemberActivityData:
|
||||
@@ -238,6 +240,8 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
end_date: date,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""提取到店记录(按天去重)"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
condition_sql = self._build_visit_condition_sql()
|
||||
sql = f"""
|
||||
WITH visit_source AS (
|
||||
@@ -258,12 +262,12 @@ class MemberIndexBaseTask(BaseIndexTask):
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id AS member_id,
|
||||
DATE(pay_time) AS visit_date,
|
||||
{biz_expr} AS visit_date,
|
||||
MAX(pay_time) AS last_visit_time,
|
||||
SUM(COALESCE(pay_amount, 0)) AS day_pay_amount
|
||||
FROM visit_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id, DATE(pay_time)
|
||||
GROUP BY canonical_member_id, {biz_expr}
|
||||
ORDER BY canonical_member_id, visit_date
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
|
||||
@@ -214,7 +214,7 @@ class RelationIndexTask(BaseIndexTask):
|
||||
JOIN dwd.dim_assistant d
|
||||
ON s.user_id = d.user_id
|
||||
AND d.scd2_is_current = 1
|
||||
AND COALESCE(d.is_delete, 0) = 0
|
||||
AND COALESCE(d.leave_status, 0) = 0
|
||||
WHERE s.site_id = %s
|
||||
AND s.tenant_member_id > 0
|
||||
AND s.user_id > 0
|
||||
|
||||
@@ -18,6 +18,8 @@ from typing import Any, Dict, List, Optional
|
||||
from .base_index_task import BaseIndexTask
|
||||
from ..base_dws_task import TaskContext
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 数据类定义
|
||||
@@ -333,6 +335,10 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
short_days = int(params.get('spend_window_short_days', 30))
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
|
||||
# CHANGE 2026-03-01 | business-day-cutoff 7.6: DATE(pay_time) → 营业日归属表达式
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
|
||||
# 单条 SQL 同时聚合 30 天和 90 天窗口,避免两次扫描
|
||||
# INTERVAL 天数通过 f-string 内嵌(整数,安全);site_id 走参数化
|
||||
sql = f"""
|
||||
@@ -357,7 +363,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
-- 90 天窗口
|
||||
SUM(pay_amount) AS spend_90,
|
||||
COUNT(*) AS orders_90,
|
||||
COUNT(DISTINCT DATE(pay_time)) AS visit_days_90,
|
||||
COUNT(DISTINCT {biz_expr}) AS visit_days_90,
|
||||
COUNT(DISTINCT EXTRACT(ISOYEAR FROM pay_time)::int * 100
|
||||
+ EXTRACT(WEEK FROM pay_time)::int) AS active_weeks_90,
|
||||
-- 30 天窗口(子集过滤)
|
||||
@@ -366,7 +372,7 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN 1 ELSE 0 END) AS orders_30,
|
||||
COUNT(DISTINCT CASE WHEN pay_time >= NOW() - INTERVAL '{short_days} days'
|
||||
THEN DATE(pay_time) END) AS visit_days_30
|
||||
THEN {biz_expr} END) AS visit_days_30
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
@@ -467,12 +473,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
{biz_expr_s} AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
@@ -516,12 +525,15 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
long_days = int(params.get('spend_window_long_days', 90))
|
||||
alpha = float(params.get('ewma_alpha_daily_spend', 0.3))
|
||||
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_s = biz_date_sql_expr("s.pay_time", cutoff)
|
||||
|
||||
sql = f"""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id)
|
||||
AS canonical_member_id,
|
||||
DATE(s.pay_time) AS pay_date,
|
||||
{biz_expr_s} AS pay_date,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
@@ -572,13 +584,17 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
return result
|
||||
|
||||
# CHANGE 2026-03-02 | 基数校准改用非零样本中位数,零消费会员不参与校准
|
||||
# 原因:零消费会员不参与 SPI 有效区分,纳入中位数只会拉低基数
|
||||
_CALIBRATE_MIN_SAMPLE = 10 # 非零样本最小数量,低于此值回退默认值
|
||||
|
||||
def _calibrate_amount_bases(
|
||||
self, features: Dict[int, SPIMemberFeatures], params: Dict[str, float]
|
||||
) -> Dict[str, float]:
|
||||
"""从门店数据计算中位数作为金额压缩基数校准值。
|
||||
|
||||
优先级:cfg_index_parameters 配置值 > 自动校准中位数 > DEFAULT_PARAMS 默认值。
|
||||
自动校准中位数 ≤ 0 时回退到 DEFAULT_PARAMS。
|
||||
优先级:cfg_index_parameters 配置值 > 非零样本自动校准中位数 > DEFAULT_PARAMS 默认值。
|
||||
仅使用值 > 0 的样本计算中位数;非零样本数 < _CALIBRATE_MIN_SAMPLE 时回退默认值。
|
||||
"""
|
||||
# 特征字段 → 对应的 amount_base 参数名
|
||||
base_extractors: Dict[str, callable] = {
|
||||
@@ -600,21 +616,23 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
continue
|
||||
|
||||
# 从特征数据计算中位数
|
||||
values = [extractor(f) for f in features.values()]
|
||||
median_val = self.calculate_median(values)
|
||||
# 仅取非零样本计算中位数
|
||||
nonzero_values = [v for v in (extractor(f) for f in features.values()) if v > 0]
|
||||
|
||||
if median_val > 0:
|
||||
if len(nonzero_values) >= self._CALIBRATE_MIN_SAMPLE:
|
||||
median_val = self.calculate_median(nonzero_values)
|
||||
calibrated[base_key] = median_val
|
||||
self.logger.info(
|
||||
"SPI 基数校准: %s 自动校准为中位数 %.2f", base_key, median_val,
|
||||
"SPI 基数校准: %s 非零样本 %d/%d,中位数 %.2f",
|
||||
base_key, len(nonzero_values), len(features), median_val,
|
||||
)
|
||||
else:
|
||||
# 中位数 ≤ 0,回退到 DEFAULT_PARAMS
|
||||
# 非零样本不足,回退到 DEFAULT_PARAMS
|
||||
calibrated[base_key] = self.DEFAULT_PARAMS[base_key]
|
||||
self.logger.warning(
|
||||
"SPI 基数校准: %s 中位数 %.2f ≤ 0,回退到默认值 %.2f",
|
||||
base_key, median_val, self.DEFAULT_PARAMS[base_key],
|
||||
"SPI 基数校准: %s 非零样本 %d 不足(最低 %d),回退到默认值 %.2f",
|
||||
base_key, len(nonzero_values), self._CALIBRATE_MIN_SAMPLE,
|
||||
self.DEFAULT_PARAMS[base_key],
|
||||
)
|
||||
|
||||
return calibrated
|
||||
@@ -747,6 +765,13 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
)
|
||||
"""
|
||||
inserted = 0
|
||||
# raw score 列为 numeric(10,4),display 列为 numeric(5,2)
|
||||
# 防止极端数据导致 NumericValueOutOfRange
|
||||
RAW_MAX = 999999.9999
|
||||
DISP_MAX = 999.99
|
||||
def _clamp(v, lo, hi):
|
||||
return max(lo, min(hi, v))
|
||||
|
||||
for f in data_list:
|
||||
cur.execute(insert_sql, (
|
||||
f.site_id, f.member_id,
|
||||
@@ -754,9 +779,14 @@ class SpendingPowerIndexTask(BaseIndexTask):
|
||||
f.orders_30, f.orders_90,
|
||||
f.visit_days_30, f.visit_days_90,
|
||||
f.avg_ticket_90, f.active_weeks_90, f.daily_spend_ewma_90,
|
||||
f.score_level_raw, f.score_speed_raw, f.score_stability_raw,
|
||||
f.score_level_display, f.score_speed_display, f.score_stability_display,
|
||||
f.raw_score, f.display_score,
|
||||
_clamp(f.score_level_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_speed_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_stability_raw, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.score_level_display, 0, DISP_MAX),
|
||||
_clamp(f.score_speed_display, 0, DISP_MAX),
|
||||
_clamp(f.score_stability_display, 0, DISP_MAX),
|
||||
_clamp(f.raw_score, -RAW_MAX, RAW_MAX),
|
||||
_clamp(f.display_score, 0, DISP_MAX),
|
||||
))
|
||||
inserted += max(cur.rowcount, 0)
|
||||
|
||||
|
||||
@@ -68,6 +68,10 @@ class DwsMaintenanceTask(BaseDwsTask):
|
||||
{"table": "dws_finance_recharge_summary", "date_col": "stat_date"},
|
||||
{"table": "dws_finance_expense_summary", "date_col": "expense_month"},
|
||||
{"table": "dws_platform_settlement", "date_col": "settlement_date"},
|
||||
# CHANGE [2026-03-07] intent: 项目标签表纳入历史数据清理范围
|
||||
# assumptions: computed_at 为清理日期列,与其他表的 stat_date 语义一致
|
||||
{"table": "dws_assistant_project_tag", "date_col": "computed_at"},
|
||||
{"table": "dws_member_project_tag", "date_col": "computed_at"},
|
||||
]
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
|
||||
@@ -6,9 +6,10 @@
|
||||
以"会员"为粒度,统计消费行为和滚动窗口指标
|
||||
|
||||
数据来源:
|
||||
- dwd_settlement_head: 结账单头表
|
||||
- dwd_settlement_head: 结账单头表(settle_type IN (1,3) 过滤有效订单)
|
||||
- dim_member: 会员维度
|
||||
- dim_member_card_account: 会员卡账户
|
||||
- dwd_recharge_order: 充值订单(30/60/90 天窗口统计)
|
||||
|
||||
目标表:
|
||||
dws.dws_member_consumption_summary
|
||||
@@ -32,6 +33,8 @@ from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
from .dws_helpers import mask_mobile, calc_days_since
|
||||
|
||||
@@ -209,12 +212,18 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
"""
|
||||
提取会员消费统计(含滚动窗口)
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
-- CHANGE 2026-03-07 | consume_money → items_sum 口径校准
|
||||
-- consume_money 存在三种历史口径(A/B/C)混合,DWS 层统一使用 items_sum
|
||||
-- items_sum = table_charge_money + goods_money + assistant_pd_money + assistant_cx_money + electricity_money
|
||||
WITH consume_base AS (
|
||||
SELECT
|
||||
member_id,
|
||||
DATE(pay_time) AS consume_date,
|
||||
consume_money,
|
||||
{biz_expr} AS consume_date,
|
||||
table_charge_money + goods_money + assistant_pd_money
|
||||
+ assistant_cx_money + electricity_money AS items_sum,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
assistant_pd_money + assistant_cx_money AS assistant_amount
|
||||
@@ -222,6 +231,9 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
-- CHANGE 2026-03-07 | dwd_settlement_head 无 is_delete 字段,改用 settle_type 过滤
|
||||
-- settle_type: 1=台桌结账, 3=商城订单; 排除 6=退货, 7=撤销
|
||||
AND settle_type IN (1, 3)
|
||||
)
|
||||
SELECT
|
||||
member_id,
|
||||
@@ -229,7 +241,7 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
MAX(consume_date) AS last_consume_date,
|
||||
-- 全量累计
|
||||
COUNT(*) AS total_visit_count,
|
||||
SUM(consume_money) AS total_consume_amount,
|
||||
SUM(items_sum) AS total_consume_amount,
|
||||
SUM(table_charge_money) AS total_table_fee,
|
||||
SUM(goods_money) AS total_goods_amount,
|
||||
SUM(assistant_amount) AS total_assistant_amount,
|
||||
@@ -240,12 +252,12 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN 1 END) AS visit_count_30d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN 1 END) AS visit_count_60d,
|
||||
COUNT(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN 1 END) AS visit_count_90d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN consume_money ELSE 0 END) AS consume_amount_7d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN consume_money ELSE 0 END) AS consume_amount_10d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN consume_money ELSE 0 END) AS consume_amount_15d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN consume_money ELSE 0 END) AS consume_amount_30d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN consume_money ELSE 0 END) AS consume_amount_60d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN consume_money ELSE 0 END) AS consume_amount_90d
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '6 days' THEN items_sum ELSE 0 END) AS consume_amount_7d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '9 days' THEN items_sum ELSE 0 END) AS consume_amount_10d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '14 days' THEN items_sum ELSE 0 END) AS consume_amount_15d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '29 days' THEN items_sum ELSE 0 END) AS consume_amount_30d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '59 days' THEN items_sum ELSE 0 END) AS consume_amount_60d,
|
||||
SUM(CASE WHEN consume_date >= %s - INTERVAL '89 days' THEN items_sum ELSE 0 END) AS consume_amount_90d
|
||||
FROM consume_base
|
||||
GROUP BY member_id
|
||||
"""
|
||||
@@ -257,29 +269,21 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
"""
|
||||
提取会员信息
|
||||
|
||||
生日优先级:手动补录(fdw_app.member_birthday_manual)> API 来源(dim_member.birthday)
|
||||
FDW 连接失败时降级为仅使用 dim_member.birthday
|
||||
生日来源:dim_member.birthday(API 来源)
|
||||
CHANGE 2026-02-26 | 维客线索重构:移除 FDW member_birthday_manual 读取,
|
||||
生日不再单独补录,归入维客线索"客户基础信息"大类
|
||||
"""
|
||||
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
|
||||
# CHANGE 2026-02-22 | 恢复 birthday 字段(C1 迁移已加列),供后续 C2 COALESCE 使用
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
# CHANGE 2026-02-22 | 需求 C2:COALESCE 优先手动补录生日,FDW 失败时降级
|
||||
sql_with_fdw = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr_create = biz_date_sql_expr("m.create_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
m.member_id,
|
||||
m.nickname,
|
||||
m.mobile,
|
||||
m.member_card_grade_name,
|
||||
DATE(m.create_time) AS register_date,
|
||||
{biz_expr_create} AS register_date,
|
||||
m.recharge_money_sum,
|
||||
COALESCE(
|
||||
(SELECT birthday_value
|
||||
FROM fdw_app.member_birthday_manual
|
||||
WHERE member_id = m.member_id
|
||||
ORDER BY recorded_at ASC
|
||||
LIMIT 1),
|
||||
m.birthday
|
||||
) AS birthday
|
||||
m.birthday
|
||||
FROM dwd.dim_member m
|
||||
WHERE m.member_id IN (
|
||||
SELECT DISTINCT member_id
|
||||
@@ -289,36 +293,7 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
AND member_id != 0
|
||||
) AND m.scd2_is_current = 1
|
||||
"""
|
||||
# CHANGE 2026-02-24 | 修复列名:tenant_member_id → member_id(dwd_settlement_head 无 tenant_member_id 列)
|
||||
sql_fallback = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
member_card_grade_name,
|
||||
DATE(create_time) AS register_date,
|
||||
recharge_money_sum,
|
||||
birthday
|
||||
FROM dwd.dim_member
|
||||
WHERE member_id IN (
|
||||
SELECT DISTINCT member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
"""
|
||||
try:
|
||||
rows = self.db.query(sql_with_fdw, (site_id,))
|
||||
except Exception as exc:
|
||||
# CHANGE [2026-02-24] FDW 查询失败后事务处于 failed 状态,必须先 rollback 再执行 fallback
|
||||
self.db.rollback()
|
||||
# FDW 连接失败,降级为仅使用 dim_member.birthday
|
||||
self.logger.warning(
|
||||
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
|
||||
self.get_task_code(), exc,
|
||||
)
|
||||
rows = self.db.query(sql_fallback, (site_id,))
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
|
||||
result = {}
|
||||
for row in (rows or []):
|
||||
@@ -343,11 +318,11 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
balance
|
||||
FROM dwd.dim_member_card_account
|
||||
WHERE tenant_member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
SELECT DISTINCT member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
"""
|
||||
@@ -390,21 +365,23 @@ class MemberConsumptionTask(BaseDwsTask):
|
||||
返回: {member_id: {count_30d, count_60d, count_90d,
|
||||
amount_30d, amount_60d, amount_90d}}
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
member_id,
|
||||
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '29 days' THEN 1 END) AS count_30d,
|
||||
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '59 days' THEN 1 END) AS count_60d,
|
||||
COUNT(CASE WHEN DATE(pay_time) >= %s - INTERVAL '89 days' THEN 1 END) AS count_90d,
|
||||
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '29 days' THEN pay_amount ELSE 0 END), 0) AS amount_30d,
|
||||
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '59 days' THEN pay_amount ELSE 0 END), 0) AS amount_60d,
|
||||
COALESCE(SUM(CASE WHEN DATE(pay_time) >= %s - INTERVAL '89 days' THEN pay_amount ELSE 0 END), 0) AS amount_90d
|
||||
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '29 days' THEN 1 END) AS count_30d,
|
||||
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '59 days' THEN 1 END) AS count_60d,
|
||||
COUNT(CASE WHEN {biz_expr} >= %s - INTERVAL '89 days' THEN 1 END) AS count_90d,
|
||||
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '29 days' THEN pay_amount ELSE 0 END), 0) AS amount_30d,
|
||||
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '59 days' THEN pay_amount ELSE 0 END), 0) AS amount_60d,
|
||||
COALESCE(SUM(CASE WHEN {biz_expr} >= %s - INTERVAL '89 days' THEN pay_amount ELSE 0 END), 0) AS amount_90d
|
||||
FROM dwd.dwd_recharge_order
|
||||
WHERE site_id = %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
AND pay_time IS NOT NULL
|
||||
AND DATE(pay_time) <= %s
|
||||
AND {biz_expr} <= %s
|
||||
GROUP BY member_id
|
||||
"""
|
||||
params = (
|
||||
|
||||
224
apps/etl/connectors/feiqiu/tasks/dws/member_project_tag_task.py
Normal file
224
apps/etl/connectors/feiqiu/tasks/dws/member_project_tag_task.py
Normal file
@@ -0,0 +1,224 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS 客户项目标签任务
|
||||
|
||||
按时间窗口计算每位客户在四大项目(BILLIARD/SNOOKER/MAHJONG/KTV)的
|
||||
消费时长占比,占比≥25% 则分配标签。散客(member_id=0)不参与。
|
||||
|
||||
数据链路:
|
||||
dwd_table_fee_log (ledger_count)
|
||||
→ JOIN dim_table (site_table_id → table_id, scd2_is_current=1)
|
||||
→ get_area_category(area_name, table_name)
|
||||
→ 按 category_code 汇总 → 计算占比 → 写入 dws_member_project_tag
|
||||
|
||||
目标表:
|
||||
dws.dws_member_project_tag
|
||||
|
||||
更新策略:
|
||||
全量删除重建(按 site_id 删除后重新插入所有时间窗口)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from tasks.dws.base_dws_task import BaseDwsTask, TimeWindow
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
# 只计算四大项目
|
||||
VALID_CATEGORIES = {"BILLIARD", "SNOOKER", "MAHJONG", "KTV"}
|
||||
|
||||
# 客户看板的 2 个时间窗口
|
||||
MEMBER_WINDOWS = [
|
||||
TimeWindow.LAST_30_DAYS,
|
||||
TimeWindow.LAST_60_DAYS,
|
||||
]
|
||||
|
||||
TAG_THRESHOLD = Decimal("0.25")
|
||||
|
||||
|
||||
class MemberProjectTagTask(BaseDwsTask):
|
||||
"""客户项目标签 ETL 任务"""
|
||||
|
||||
def get_task_code(self) -> str:
|
||||
return "DWS_MEMBER_PROJECT_TAG"
|
||||
|
||||
def get_target_table(self) -> str:
|
||||
return "dws_member_project_tag"
|
||||
|
||||
def get_primary_keys(self) -> List[str]:
|
||||
return ["site_id", "member_id", "time_window", "category_code"]
|
||||
|
||||
def extract(self, context) -> Dict[str, Any]:
|
||||
site_id = context.store_id
|
||||
self.logger.info("%s: 提取客户台费时长数据", self.get_task_code())
|
||||
|
||||
self.load_config_cache()
|
||||
table_info = self._extract_table_info(site_id)
|
||||
|
||||
window_data: Dict[str, List[Dict]] = {}
|
||||
for window in MEMBER_WINDOWS:
|
||||
time_range = self.get_time_window_range(window)
|
||||
rows = self._extract_member_durations(
|
||||
site_id, time_range.start, time_range.end
|
||||
)
|
||||
window_data[window.value] = rows
|
||||
|
||||
return {
|
||||
"window_data": window_data,
|
||||
"table_info": table_info,
|
||||
"site_id": site_id,
|
||||
}
|
||||
|
||||
def _extract_table_info(self, site_id: int) -> Dict[int, Dict[str, Any]]:
|
||||
"""提取台桌维度信息"""
|
||||
sql = """
|
||||
SELECT table_id, table_name, site_table_area_name AS area_name
|
||||
FROM dwd.dim_table
|
||||
WHERE site_id = %s AND scd2_is_current = 1
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r["table_id"]: dict(r) for r in (rows or [])}
|
||||
|
||||
def _extract_member_durations(
|
||||
self, site_id: int, start_date: date, end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""提取客户台费时长明细(按客户+台桌聚合),排除散客"""
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("tfl.ledger_end_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
tfl.member_id,
|
||||
tfl.site_table_id AS table_id,
|
||||
COALESCE(SUM(tfl.ledger_count), 0) AS duration_seconds
|
||||
FROM dwd.dwd_table_fee_log tfl
|
||||
WHERE tfl.site_id = %(site_id)s
|
||||
AND {biz_expr} >= %(start_date)s
|
||||
AND {biz_expr} <= %(end_date)s
|
||||
AND COALESCE(tfl.is_delete, 0) = 0
|
||||
AND tfl.member_id IS NOT NULL
|
||||
AND tfl.member_id != 0
|
||||
GROUP BY tfl.member_id, tfl.site_table_id
|
||||
"""
|
||||
rows = self.db.query(sql, {
|
||||
"site_id": site_id,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
})
|
||||
return [dict(r) for r in rows] if rows else []
|
||||
|
||||
def transform(self, extracted: Dict[str, Any], context) -> List[Dict[str, Any]]:
|
||||
table_info = extracted["table_info"]
|
||||
site_id = extracted["site_id"]
|
||||
tenant_id = getattr(context, "tenant_id", 0) or 0
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for window_value, rows in extracted["window_data"].items():
|
||||
# member_id → category_code → seconds
|
||||
member_cats: Dict[int, Dict[str, int]] = {}
|
||||
|
||||
for row in rows:
|
||||
mid = row["member_id"]
|
||||
tid = row["table_id"]
|
||||
secs = self.safe_int(row["duration_seconds"])
|
||||
if secs <= 0:
|
||||
continue
|
||||
|
||||
tinfo = table_info.get(tid, {})
|
||||
area_name = tinfo.get("area_name")
|
||||
table_name = tinfo.get("table_name")
|
||||
cat = self.get_area_category(area_name, table_name)
|
||||
code = cat.get("category_code", "OTHER")
|
||||
|
||||
if code not in VALID_CATEGORIES:
|
||||
continue
|
||||
|
||||
if mid not in member_cats:
|
||||
member_cats[mid] = {}
|
||||
member_cats[mid][code] = member_cats[mid].get(code, 0) + secs
|
||||
|
||||
for mid, cats in member_cats.items():
|
||||
total = sum(cats.values())
|
||||
if total <= 0:
|
||||
continue
|
||||
|
||||
for code, secs in cats.items():
|
||||
pct = Decimal(str(secs)) / Decimal(str(total))
|
||||
pct = pct.quantize(Decimal("0.0001"))
|
||||
cat_info = self._get_category_display(code)
|
||||
|
||||
results.append({
|
||||
"site_id": site_id,
|
||||
"tenant_id": tenant_id,
|
||||
"member_id": mid,
|
||||
"time_window": window_value,
|
||||
"category_code": code,
|
||||
"category_name": cat_info["category_name"],
|
||||
"short_name": cat_info["short_name"],
|
||||
"duration_seconds": secs,
|
||||
"total_seconds": total,
|
||||
"percentage": float(pct),
|
||||
"is_tagged": pct >= TAG_THRESHOLD,
|
||||
})
|
||||
|
||||
self.logger.info(
|
||||
"%s: 生成 %d 条标签记录(其中 %d 条达标)",
|
||||
self.get_task_code(),
|
||||
len(results),
|
||||
sum(1 for r in results if r["is_tagged"]),
|
||||
)
|
||||
return results
|
||||
|
||||
def _get_category_display(self, code: str) -> Dict[str, str]:
|
||||
"""从配置缓存获取分类的显示名和简写"""
|
||||
cache = self.load_config_cache()
|
||||
for key, cat in cache.area_categories.items():
|
||||
if cat.get("category_code") == code:
|
||||
return {
|
||||
"category_name": cat.get("display_name") or cat.get("category_name", code),
|
||||
"short_name": cat.get("short_name", code[:1]),
|
||||
}
|
||||
fallback = {
|
||||
"BILLIARD": ("🎱 中式/追分", "🎱"),
|
||||
"SNOOKER": ("斯诺克", "斯"),
|
||||
"MAHJONG": ("🀄 麻将/棋牌", "🀄"),
|
||||
"KTV": ("🎤 团建/K歌", "🎤"),
|
||||
}
|
||||
name, short = fallback.get(code, (code, code[:1]))
|
||||
return {"category_name": name, "short_name": short}
|
||||
|
||||
def load(self, transformed, context) -> dict:
|
||||
if not transformed:
|
||||
return {"status": "SUCCESS", "counts": {"inserted": 0, "deleted": 0}}
|
||||
|
||||
site_id = transformed[0]["site_id"]
|
||||
|
||||
delete_sql = "DELETE FROM dws.dws_member_project_tag WHERE site_id = %s"
|
||||
self.db.execute(delete_sql, (site_id,))
|
||||
deleted = self.db.cursor.rowcount if hasattr(self.db, "cursor") else 0
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO dws.dws_member_project_tag (
|
||||
site_id, tenant_id, member_id, time_window,
|
||||
category_code, category_name, short_name,
|
||||
duration_seconds, total_seconds, percentage, is_tagged,
|
||||
computed_at, created_at, updated_at
|
||||
) VALUES (
|
||||
%(site_id)s, %(tenant_id)s, %(member_id)s, %(time_window)s,
|
||||
%(category_code)s, %(category_name)s, %(short_name)s,
|
||||
%(duration_seconds)s, %(total_seconds)s, %(percentage)s, %(is_tagged)s,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
"""
|
||||
for row in transformed:
|
||||
self.db.execute(insert_sql, row)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 删除 %d 条,插入 %d 条",
|
||||
self.get_task_code(), deleted, len(transformed),
|
||||
)
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"counts": {"inserted": len(transformed), "deleted": deleted},
|
||||
}
|
||||
@@ -35,6 +35,8 @@ from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from neozqyy_shared.datetime_utils import biz_date_sql_expr
|
||||
|
||||
from .base_dws_task import BaseDwsTask, TaskContext
|
||||
from .dws_helpers import mask_mobile
|
||||
|
||||
@@ -152,7 +154,7 @@ class MemberVisitTask(BaseDwsTask):
|
||||
|
||||
# 获取区域分类
|
||||
area_name = tbl_info.get('area_name')
|
||||
area_cat = self.get_area_category(area_name)
|
||||
area_cat = self.get_area_category(area_name, tbl_info.get('table_name'))
|
||||
|
||||
# 构建助教服务JSON
|
||||
assistant_services_json = self._build_assistant_services_json(services)
|
||||
@@ -175,7 +177,7 @@ class MemberVisitTask(BaseDwsTask):
|
||||
# 会员信息
|
||||
'member_nickname': memb_info.get('nickname'),
|
||||
'member_mobile': self._mask_mobile(memb_info.get('mobile')),
|
||||
# CHANGE 2026-02-22 | 恢复从 dim_member.birthday 读取
|
||||
# CHANGE 2026-02-26 | 生日仅从 dim_member.birthday 读取(API 来源)
|
||||
'member_birthday': memb_info.get('birthday'),
|
||||
# 台桌信息
|
||||
'table_id': table_id,
|
||||
@@ -187,12 +189,20 @@ class MemberVisitTask(BaseDwsTask):
|
||||
'goods_amount': self.safe_decimal(settle.get('goods_money', 0)),
|
||||
'assistant_amount': self.safe_decimal(settle.get('assistant_pd_money', 0)) + \
|
||||
self.safe_decimal(settle.get('assistant_cx_money', 0)),
|
||||
'total_consume': self.safe_decimal(settle.get('consume_money', 0)),
|
||||
# CHANGE 2026-03-07 | consume_money → items_sum 口径校准
|
||||
'total_consume': (
|
||||
self.safe_decimal(settle.get('table_charge_money', 0))
|
||||
+ self.safe_decimal(settle.get('goods_money', 0))
|
||||
+ self.safe_decimal(settle.get('assistant_pd_money', 0))
|
||||
+ self.safe_decimal(settle.get('assistant_cx_money', 0))
|
||||
+ self.safe_decimal(settle.get('electricity_money', 0))
|
||||
),
|
||||
'total_discount': self._calc_total_discount(settle),
|
||||
'actual_pay': self.safe_decimal(settle.get('pay_amount', 0)),
|
||||
# 支付方式
|
||||
'cash_pay': self.safe_decimal(settle.get('pay_amount', 0)),
|
||||
'cash_card_pay': self.safe_decimal(settle.get('balance_amount', 0)),
|
||||
'balance_pay': self.safe_decimal(settle.get('balance_amount', 0)),
|
||||
'recharge_card_pay': self.safe_decimal(settle.get('recharge_card_amount', 0)),
|
||||
'gift_card_pay': self.safe_decimal(settle.get('gift_card_amount', 0)),
|
||||
'groupbuy_pay': self.safe_decimal(settle.get('coupon_amount', 0)),
|
||||
# 时长
|
||||
@@ -205,7 +215,49 @@ class MemberVisitTask(BaseDwsTask):
|
||||
|
||||
return results
|
||||
|
||||
# load() 已移除——使用 BaseDwsTask 默认实现(DATE_COL="visit_date")
|
||||
# CHANGE 2026-02-27 | bugfix: 覆盖 load(),在标准 delete-by-window 后
|
||||
# 额外按 order_settle_id 清理旧数据,防止 biz_date 切换后残留记录导致唯一约束冲突。
|
||||
# 背景:visit_date 从 pay_time::date 改为 biz_date_sql_expr 后,凌晨订单的
|
||||
# visit_date 前移一天,旧数据不在新窗口的 delete 范围内,insert 时触发
|
||||
# uk_dws_member_visit (site_id, member_id, order_settle_id) 冲突。
|
||||
def load(self, transformed, context: "TaskContext") -> dict:
|
||||
if not transformed:
|
||||
return {"counts": {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}}
|
||||
|
||||
date_col = self.DATE_COL or "stat_date"
|
||||
deleted = self.delete_existing_data(context, date_col=date_col)
|
||||
|
||||
# 额外清理:按本批 order_settle_id 删除可能残留在其他日期窗口的旧记录
|
||||
order_ids = [r["order_settle_id"] for r in transformed if r.get("order_settle_id")]
|
||||
extra_deleted = 0
|
||||
if order_ids:
|
||||
full_table = f"{self.DWS_SCHEMA}.{self.get_target_table()}"
|
||||
placeholders = ",".join(["%s"] * len(order_ids))
|
||||
sql = (
|
||||
f"DELETE FROM {full_table} "
|
||||
f"WHERE site_id = %s AND order_settle_id IN ({placeholders})"
|
||||
)
|
||||
site_id = transformed[0].get("site_id", context.store_id)
|
||||
with self.db.conn.cursor() as cur:
|
||||
cur.execute(sql, [site_id] + order_ids)
|
||||
extra_deleted = cur.rowcount
|
||||
if extra_deleted:
|
||||
self.logger.info(
|
||||
"%s: 额外清理残留旧数据 %d 行(order_settle_id 去重)",
|
||||
self.get_task_code(), extra_deleted,
|
||||
)
|
||||
|
||||
inserted = self.bulk_insert(transformed)
|
||||
return {
|
||||
"counts": {
|
||||
"fetched": len(transformed),
|
||||
"inserted": inserted,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
},
|
||||
"extra": {"deleted": deleted, "extra_deleted": extra_deleted},
|
||||
}
|
||||
|
||||
# ==========================================================================
|
||||
# 数据提取方法
|
||||
@@ -220,7 +272,9 @@ class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
提取结账单
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("pay_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
@@ -228,8 +282,9 @@ class MemberVisitTask(BaseDwsTask):
|
||||
member_id,
|
||||
create_time,
|
||||
pay_time,
|
||||
DATE(pay_time) AS visit_date,
|
||||
consume_money,
|
||||
{biz_expr} AS visit_date,
|
||||
-- CHANGE 2026-03-07 | 新增 electricity_money 用于 items_sum 计算
|
||||
electricity_money,
|
||||
pay_amount,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
@@ -244,10 +299,12 @@ class MemberVisitTask(BaseDwsTask):
|
||||
recharge_card_amount
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND DATE(pay_time) >= %s
|
||||
AND DATE(pay_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
-- CHANGE 2026-03-07 | 排除退货(6)/退款(7),仅保留台桌结账(1)+商城订单(3)
|
||||
AND settle_type IN (1, 3)
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
@@ -261,7 +318,9 @@ class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
提取助教服务明细
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("start_use_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
order_settle_id,
|
||||
site_assistant_id AS assistant_id,
|
||||
@@ -270,8 +329,8 @@ class MemberVisitTask(BaseDwsTask):
|
||||
ledger_amount
|
||||
FROM dwd.dwd_assistant_service_log
|
||||
WHERE site_id = %s
|
||||
AND DATE(start_use_time) >= %s
|
||||
AND DATE(start_use_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND is_delete = 0
|
||||
"""
|
||||
rows = self.db.query(sql, (site_id, start_date, end_date))
|
||||
@@ -286,14 +345,16 @@ class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
提取台费时长(真实秒数)
|
||||
"""
|
||||
sql = """
|
||||
cutoff = self.config.get("app.business_day_start_hour", 8)
|
||||
biz_expr = biz_date_sql_expr("ledger_end_time", cutoff)
|
||||
sql = f"""
|
||||
SELECT
|
||||
order_settle_id,
|
||||
SUM(COALESCE(real_table_use_seconds, 0)) AS table_use_seconds
|
||||
FROM dwd.dwd_table_fee_log
|
||||
WHERE site_id = %s
|
||||
AND DATE(ledger_end_time) >= %s
|
||||
AND DATE(ledger_end_time) <= %s
|
||||
AND {biz_expr} >= %s
|
||||
AND {biz_expr} <= %s
|
||||
AND COALESCE(is_delete, 0) = 0
|
||||
GROUP BY order_settle_id
|
||||
"""
|
||||
@@ -304,61 +365,26 @@ class MemberVisitTask(BaseDwsTask):
|
||||
"""
|
||||
提取会员信息
|
||||
|
||||
生日优先级:手动补录(fdw_app.member_birthday_manual)> API 来源(dim_member.birthday)
|
||||
FDW 连接失败时降级为仅使用 dim_member.birthday
|
||||
生日来源:dim_member.birthday(API 来源)
|
||||
CHANGE 2026-02-26 | 维客线索重构:移除 FDW member_birthday_manual 读取,
|
||||
生日不再单独补录,归入维客线索"客户基础信息"大类
|
||||
"""
|
||||
# CHANGE 2026-02-21 | dim_member 无 site_id 字段,改用 register_site_id
|
||||
# CHANGE 2026-02-22 | 恢复 birthday 字段(C1 迁移已加列)
|
||||
# CHANGE 2026-02-22 | 需求 B:通过事实表反查,支持跨店消费会员
|
||||
# CHANGE 2026-02-22 | 需求 C2:COALESCE 优先手动补录生日,FDW 失败时降级
|
||||
sql_with_fdw = """
|
||||
sql = """
|
||||
SELECT
|
||||
m.member_id,
|
||||
m.nickname,
|
||||
m.mobile,
|
||||
COALESCE(
|
||||
(SELECT birthday_value
|
||||
FROM fdw_app.member_birthday_manual
|
||||
WHERE member_id = m.member_id
|
||||
ORDER BY recorded_at ASC
|
||||
LIMIT 1),
|
||||
m.birthday
|
||||
) AS birthday
|
||||
m.birthday
|
||||
FROM dwd.dim_member m
|
||||
WHERE m.member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
SELECT DISTINCT member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
AND member_id IS NOT NULL
|
||||
AND member_id != 0
|
||||
) AND m.scd2_is_current = 1
|
||||
"""
|
||||
sql_fallback = """
|
||||
SELECT
|
||||
member_id,
|
||||
nickname,
|
||||
mobile,
|
||||
birthday
|
||||
FROM dwd.dim_member
|
||||
WHERE member_id IN (
|
||||
SELECT DISTINCT tenant_member_id
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE site_id = %s
|
||||
AND tenant_member_id IS NOT NULL
|
||||
AND tenant_member_id != 0
|
||||
) AND scd2_is_current = 1
|
||||
"""
|
||||
try:
|
||||
rows = self.db.query(sql_with_fdw, (site_id,))
|
||||
except Exception as exc:
|
||||
# CHANGE [2026-02-24] FDW 查询失败后事务处于 failed 状态,必须先 rollback 再执行 fallback
|
||||
self.db.rollback()
|
||||
# FDW 连接失败,降级为仅使用 dim_member.birthday
|
||||
self.logger.warning(
|
||||
"%s: FDW 读取 member_birthday_manual 失败,降级为 dim_member.birthday — %s",
|
||||
self.get_task_code(), exc,
|
||||
)
|
||||
rows = self.db.query(sql_fallback, (site_id,))
|
||||
rows = self.db.query(sql, (site_id,))
|
||||
return {r['member_id']: dict(r) for r in (rows or [])}
|
||||
|
||||
|
||||
|
||||
@@ -15,6 +15,11 @@ from psycopg2.extras import Json, execute_values
|
||||
from models.parsers import TypeParser
|
||||
from tasks.base_task import BaseTask
|
||||
from utils.windowing import build_window_segments, calc_window_minutes, calc_window_days, format_window_days
|
||||
from config.pipeline_config import PipelineConfig
|
||||
from pipeline.models import PipelineRequest, PipelineResult, WriteResult
|
||||
from pipeline.unified_pipeline import UnifiedPipeline
|
||||
from utils.cancellation import CancellationToken
|
||||
from utils.task_log_buffer import TaskLogBuffer
|
||||
|
||||
|
||||
ColumnTransform = Callable[[Any], Any]
|
||||
@@ -67,6 +72,15 @@ class OdsTaskSpec:
|
||||
# WINDOW 模式的时间列名
|
||||
snapshot_time_column: str | None = None
|
||||
|
||||
# ── Detail_Mode 可选配置(二级详情拉取)──
|
||||
detail_endpoint: str | None = None # 详情接口 endpoint
|
||||
detail_param_builder: Callable[[dict], dict] | None = None # 详情请求参数构造函数
|
||||
detail_target_table: str | None = None # 详情数据目标表名
|
||||
detail_data_path: Tuple[str, ...] | None = None # 详情数据的 data_path
|
||||
detail_list_key: str | None = None # 详情数据的 list_key
|
||||
detail_id_column: str | None = None # 从列表数据中提取 ID 的列名
|
||||
detail_process_fn: Callable[[Any], list[dict]] | None = None # 自定义详情处理函数
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.snapshot_mode == SnapshotMode.WINDOW and not self.snapshot_time_column:
|
||||
raise ValueError(
|
||||
@@ -88,7 +102,10 @@ class BaseOdsTask(BaseTask):
|
||||
|
||||
def execute(self, cursor_data: dict | None = None) -> dict:
|
||||
spec = self.SPEC
|
||||
# 创建任务级日志缓冲区,任务完成后一次性输出,避免多任务日志交叉
|
||||
self._log_buf = TaskLogBuffer(spec.code, self.logger)
|
||||
self.logger.info("开始执行%s (ODS)", spec.code)
|
||||
self._log_buf.info("开始执行%s (ODS)", spec.code)
|
||||
|
||||
window_start, window_end, window_minutes = self._resolve_window(cursor_data)
|
||||
segments = build_window_segments(
|
||||
@@ -111,6 +128,11 @@ class BaseOdsTask(BaseTask):
|
||||
total_segments,
|
||||
format_window_days(total_days),
|
||||
)
|
||||
self._log_buf.info(
|
||||
"窗口拆分为 %s 段(共 %s 天)",
|
||||
total_segments,
|
||||
format_window_days(total_days),
|
||||
)
|
||||
|
||||
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
|
||||
if not store_id:
|
||||
@@ -141,6 +163,10 @@ class BaseOdsTask(BaseTask):
|
||||
]
|
||||
has_is_delete = self._table_has_column(spec.table_name, "is_delete")
|
||||
|
||||
# 构建 PipelineConfig(支持任务级覆盖)
|
||||
pipeline_config = PipelineConfig.from_app_config(self.config, spec.code)
|
||||
cancel_token = getattr(self, '_cancel_token', None) or CancellationToken()
|
||||
|
||||
try:
|
||||
for idx, (seg_start, seg_end) in enumerate(segments, start=1):
|
||||
params = self._build_params(
|
||||
@@ -158,11 +184,12 @@ class BaseOdsTask(BaseTask):
|
||||
"errors": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
# 快照软删除需要的共享状态(process_fn 闭包写入)
|
||||
segment_keys: set[tuple] = set()
|
||||
# CHANGE 2026-02-18 | 收集 WINDOW 模式下 API 返回数据的实际最早时间戳
|
||||
segment_earliest_time: datetime | None = None
|
||||
segment_earliest_time: list[datetime | None] = [None]
|
||||
# CHANGE [2026-02-24] 收集 API 返回数据的实际最晚时间戳,用于 late-cutoff 保护
|
||||
segment_latest_time: datetime | None = None
|
||||
segment_latest_time: list[datetime | None] = [None]
|
||||
|
||||
self.logger.info(
|
||||
"%s: 开始执行(%s/%s),窗口[%s ~ %s]",
|
||||
@@ -172,52 +199,51 @@ class BaseOdsTask(BaseTask):
|
||||
seg_start,
|
||||
seg_end,
|
||||
)
|
||||
self._log_buf.info(
|
||||
"开始执行(%s/%s),窗口[%s ~ %s]",
|
||||
idx, total_segments, seg_start, seg_end,
|
||||
)
|
||||
|
||||
for _, page_records, _, response_payload in self.api.iter_paginated(
|
||||
endpoint=spec.endpoint,
|
||||
params=params,
|
||||
page_size=page_size,
|
||||
data_path=spec.data_path,
|
||||
list_key=spec.list_key,
|
||||
):
|
||||
if (
|
||||
snapshot_missing_delete
|
||||
and has_is_delete
|
||||
and business_pk_cols
|
||||
and snapshot_mode != SnapshotMode.NONE
|
||||
):
|
||||
segment_keys.update(self._collect_business_keys(page_records, business_pk_cols))
|
||||
# CHANGE 2026-02-18 | 收集实际最早时间戳,用于 early-cutoff 保护
|
||||
if (
|
||||
snapshot_protect_early_cutoff
|
||||
and snapshot_mode == SnapshotMode.WINDOW
|
||||
and snapshot_time_column
|
||||
):
|
||||
page_earliest = self._collect_earliest_time(
|
||||
page_records, snapshot_time_column
|
||||
)
|
||||
if page_earliest is not None:
|
||||
if segment_earliest_time is None or page_earliest < segment_earliest_time:
|
||||
segment_earliest_time = page_earliest
|
||||
# CHANGE [2026-02-24] 收集实际最晚时间戳,用于 late-cutoff 保护
|
||||
page_latest = self._collect_latest_time(
|
||||
page_records, snapshot_time_column
|
||||
)
|
||||
if page_latest is not None:
|
||||
if segment_latest_time is None or page_latest > segment_latest_time:
|
||||
segment_latest_time = page_latest
|
||||
inserted, updated, skipped = self._insert_records_schema_aware(
|
||||
table=spec.table_name,
|
||||
records=page_records,
|
||||
response_payload=response_payload,
|
||||
source_file=source_file,
|
||||
source_endpoint=spec.endpoint if spec.include_source_endpoint else None,
|
||||
)
|
||||
segment_counts["fetched"] += len(page_records)
|
||||
segment_counts["inserted"] += inserted
|
||||
segment_counts["updated"] += updated
|
||||
segment_counts["skipped"] += skipped
|
||||
# 构建 UnifiedPipeline 并执行当前 segment
|
||||
pipeline = UnifiedPipeline(
|
||||
api_client=self.api,
|
||||
db_connection=self.db,
|
||||
logger=self.logger,
|
||||
config=pipeline_config,
|
||||
cancel_token=cancel_token,
|
||||
)
|
||||
|
||||
requests = self._build_requests(
|
||||
spec, params, page_size, idx - 1,
|
||||
)
|
||||
process_fn = self._build_process_fn(
|
||||
spec,
|
||||
snapshot_missing_delete=snapshot_missing_delete,
|
||||
has_is_delete=has_is_delete,
|
||||
business_pk_cols=business_pk_cols,
|
||||
snapshot_mode=snapshot_mode,
|
||||
snapshot_protect_early_cutoff=snapshot_protect_early_cutoff,
|
||||
snapshot_time_column=snapshot_time_column,
|
||||
segment_keys=segment_keys,
|
||||
segment_earliest_time=segment_earliest_time,
|
||||
segment_latest_time=segment_latest_time,
|
||||
)
|
||||
write_fn = self._build_write_fn(spec, source_file)
|
||||
|
||||
pipe_result = pipeline.run(requests, process_fn, write_fn)
|
||||
|
||||
# 将 PipelineResult 映射到 segment_counts
|
||||
segment_counts["fetched"] = pipe_result.total_fetched
|
||||
segment_counts["inserted"] = pipe_result.total_inserted
|
||||
segment_counts["updated"] = pipe_result.total_updated
|
||||
segment_counts["skipped"] = pipe_result.total_skipped
|
||||
segment_counts["errors"] = (
|
||||
pipe_result.request_failures
|
||||
+ pipe_result.processing_failures
|
||||
+ pipe_result.write_failures
|
||||
)
|
||||
|
||||
# 快照软删除(pipeline 完成后执行,保留原有逻辑)
|
||||
if (
|
||||
snapshot_missing_delete
|
||||
and has_is_delete
|
||||
@@ -230,28 +256,36 @@ class BaseOdsTask(BaseTask):
|
||||
if (
|
||||
snapshot_protect_early_cutoff
|
||||
and snapshot_mode == SnapshotMode.WINDOW
|
||||
and segment_earliest_time is not None
|
||||
and segment_earliest_time > seg_start
|
||||
and segment_earliest_time[0] is not None
|
||||
and segment_earliest_time[0] > seg_start
|
||||
):
|
||||
self.logger.info(
|
||||
"%s: early-cutoff 保护生效,软删除窗口起点从 %s 收窄至 %s",
|
||||
spec.code, seg_start, segment_earliest_time,
|
||||
spec.code, seg_start, segment_earliest_time[0],
|
||||
)
|
||||
effective_window_start = segment_earliest_time
|
||||
self._log_buf.info(
|
||||
"early-cutoff 保护生效,软删除窗口起点从 %s 收窄至 %s",
|
||||
seg_start, segment_earliest_time[0],
|
||||
)
|
||||
effective_window_start = segment_earliest_time[0]
|
||||
# CHANGE [2026-02-24] late-cutoff 保护:用 API 实际最晚时间戳收窄软删除范围
|
||||
# 防止 recent endpoint 数据保留期滚动导致窗口尾部数据消失时误标删除
|
||||
effective_window_end = seg_end
|
||||
if (
|
||||
snapshot_protect_early_cutoff
|
||||
and snapshot_mode == SnapshotMode.WINDOW
|
||||
and segment_latest_time is not None
|
||||
and segment_latest_time < seg_end
|
||||
and segment_latest_time[0] is not None
|
||||
and segment_latest_time[0] < seg_end
|
||||
):
|
||||
self.logger.info(
|
||||
"%s: late-cutoff 保护生效,软删除窗口终点从 %s 收窄至 %s",
|
||||
spec.code, seg_end, segment_latest_time,
|
||||
spec.code, seg_end, segment_latest_time[0],
|
||||
)
|
||||
effective_window_end = segment_latest_time
|
||||
self._log_buf.info(
|
||||
"late-cutoff 保护生效,软删除窗口终点从 %s 收窄至 %s",
|
||||
seg_end, segment_latest_time[0],
|
||||
)
|
||||
effective_window_end = segment_latest_time[0]
|
||||
deleted = self._mark_missing_as_deleted(
|
||||
table=spec.table_name,
|
||||
business_pk_cols=business_pk_cols,
|
||||
@@ -279,6 +313,12 @@ class BaseOdsTask(BaseTask):
|
||||
format_window_days(processed_days),
|
||||
format_window_days(total_days),
|
||||
)
|
||||
self._log_buf.info(
|
||||
"完成(%s/%s),已处理 %s/%s 天",
|
||||
idx, total_segments,
|
||||
format_window_days(processed_days),
|
||||
format_window_days(total_days),
|
||||
)
|
||||
if total_segments > 1:
|
||||
segment_results.append(
|
||||
{
|
||||
@@ -291,13 +331,76 @@ class BaseOdsTask(BaseTask):
|
||||
}
|
||||
)
|
||||
|
||||
# ── Detail_Mode:列表拉取全部完成后,执行二级详情拉取 ──
|
||||
detail_counts = {
|
||||
"detail_success": 0,
|
||||
"detail_failure": 0,
|
||||
"detail_skipped": 0,
|
||||
}
|
||||
if spec.detail_endpoint:
|
||||
self.logger.info("%s: 列表阶段完成,进入详情拉取阶段", spec.code)
|
||||
self._log_buf.info("列表阶段完成,进入详情拉取阶段")
|
||||
detail_pipeline = UnifiedPipeline(
|
||||
api_client=self.api,
|
||||
db_connection=self.db,
|
||||
logger=self.logger,
|
||||
config=pipeline_config,
|
||||
cancel_token=cancel_token,
|
||||
)
|
||||
detail_requests = self._build_detail_requests(spec)
|
||||
detail_process_fn = self._build_detail_process_fn(spec)
|
||||
detail_write_fn = self._build_detail_write_fn(spec, source_file)
|
||||
|
||||
detail_result = detail_pipeline.run(
|
||||
detail_requests, detail_process_fn, detail_write_fn,
|
||||
)
|
||||
self.db.commit()
|
||||
|
||||
# 填充详情统计:成功 = 完成的请求数,失败 = 请求失败数,跳过 = 0(无跳过逻辑)
|
||||
detail_counts["detail_success"] = detail_result.completed_requests
|
||||
detail_counts["detail_failure"] = (
|
||||
detail_result.request_failures
|
||||
+ detail_result.processing_failures
|
||||
+ detail_result.write_failures
|
||||
)
|
||||
# 记录详情阶段每个失败项的错误日志
|
||||
for err in detail_result.errors:
|
||||
self.logger.error(
|
||||
"%s: 详情请求失败, detail_id=%s, error=%s",
|
||||
spec.code,
|
||||
err.get("detail_id", err.get("endpoint", "unknown")),
|
||||
err.get("error", "unknown"),
|
||||
)
|
||||
self._log_buf.error(
|
||||
"详情请求失败, detail_id=%s, error=%s",
|
||||
err.get("detail_id", err.get("endpoint", "unknown")),
|
||||
err.get("error", "unknown"),
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"%s: 详情拉取完成, success=%d, failure=%d, skipped=%d",
|
||||
spec.code,
|
||||
detail_counts["detail_success"],
|
||||
detail_counts["detail_failure"],
|
||||
detail_counts["detail_skipped"],
|
||||
)
|
||||
self._log_buf.info(
|
||||
"详情拉取完成, success=%d, failure=%d, skipped=%d",
|
||||
detail_counts["detail_success"],
|
||||
detail_counts["detail_failure"],
|
||||
detail_counts["detail_skipped"],
|
||||
)
|
||||
|
||||
self.logger.info("%s ODS 任务完成: %s", spec.code, total_counts)
|
||||
self._log_buf.info("ODS 任务完成: %s", total_counts)
|
||||
allow_empty_advance = bool(self.config.get("run.allow_empty_result_advance", False))
|
||||
status = "SUCCESS"
|
||||
if total_counts["fetched"] == 0 and not allow_empty_advance:
|
||||
status = "PARTIAL"
|
||||
|
||||
result = self._build_result(status, total_counts)
|
||||
# 附加详情统计到结果
|
||||
result["detail"] = detail_counts
|
||||
overall_start = segments[0][0]
|
||||
overall_end = segments[-1][1]
|
||||
result["window"] = {
|
||||
@@ -311,14 +414,223 @@ class BaseOdsTask(BaseTask):
|
||||
result["request_params"] = params_list[0]
|
||||
else:
|
||||
result["request_params"] = params_list
|
||||
# 任务完成,将缓冲日志一次性输出到父 logger
|
||||
self._log_buf.flush()
|
||||
return result
|
||||
|
||||
except Exception:
|
||||
self.db.rollback()
|
||||
total_counts["errors"] += 1
|
||||
self.logger.error("%s ODS 任务失败", spec.code, exc_info=True)
|
||||
self._log_buf.error("ODS 任务失败")
|
||||
# 异常时也 flush,确保已收集的日志不丢失
|
||||
self._log_buf.flush()
|
||||
raise
|
||||
|
||||
# ── Pipeline 集成方法 ──
|
||||
|
||||
def _build_requests(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
params: dict,
|
||||
page_size: int,
|
||||
segment_index: int,
|
||||
) -> Iterable[PipelineRequest]:
|
||||
"""生成 PipelineRequest 序列,内部使用 iter_paginated 处理分页。
|
||||
|
||||
每一页的数据通过 _prefetched_response 预取,UnifiedPipeline 的
|
||||
_request_loop 跳过 api.post() 直接使用预取数据。
|
||||
"""
|
||||
for page_num, page_records, total, response_payload in self.api.iter_paginated(
|
||||
endpoint=spec.endpoint,
|
||||
params=params,
|
||||
page_size=page_size,
|
||||
data_path=spec.data_path,
|
||||
list_key=spec.list_key,
|
||||
):
|
||||
yield PipelineRequest(
|
||||
endpoint=spec.endpoint,
|
||||
params=params,
|
||||
page_size=page_size,
|
||||
data_path=spec.data_path,
|
||||
list_key=spec.list_key,
|
||||
segment_index=segment_index,
|
||||
_prefetched_response={
|
||||
"records": page_records,
|
||||
"response_payload": response_payload,
|
||||
},
|
||||
)
|
||||
|
||||
def _build_process_fn(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
*,
|
||||
snapshot_missing_delete: bool,
|
||||
has_is_delete: bool,
|
||||
business_pk_cols: list[str],
|
||||
snapshot_mode: SnapshotMode,
|
||||
snapshot_protect_early_cutoff: bool,
|
||||
snapshot_time_column: str | None,
|
||||
segment_keys: set[tuple],
|
||||
segment_earliest_time: list[datetime | None],
|
||||
segment_latest_time: list[datetime | None],
|
||||
) -> Callable[[Any], list[dict]]:
|
||||
"""构建处理函数:从预取响应中提取记录,收集快照软删除所需的共享状态。"""
|
||||
|
||||
def process_fn(response: Any) -> list[dict]:
|
||||
# response 是 _prefetched_response 字典
|
||||
records = response.get("records", [])
|
||||
if not records:
|
||||
return []
|
||||
|
||||
# 收集业务主键(快照软删除用)
|
||||
if (
|
||||
snapshot_missing_delete
|
||||
and has_is_delete
|
||||
and business_pk_cols
|
||||
and snapshot_mode != SnapshotMode.NONE
|
||||
):
|
||||
segment_keys.update(
|
||||
self._collect_business_keys(records, business_pk_cols)
|
||||
)
|
||||
|
||||
# CHANGE 2026-02-18 | 收集实际最早时间戳,用于 early-cutoff 保护
|
||||
if (
|
||||
snapshot_protect_early_cutoff
|
||||
and snapshot_mode == SnapshotMode.WINDOW
|
||||
and snapshot_time_column
|
||||
):
|
||||
page_earliest = self._collect_earliest_time(
|
||||
records, snapshot_time_column
|
||||
)
|
||||
if page_earliest is not None:
|
||||
if segment_earliest_time[0] is None or page_earliest < segment_earliest_time[0]:
|
||||
segment_earliest_time[0] = page_earliest
|
||||
# CHANGE [2026-02-24] 收集实际最晚时间戳,用于 late-cutoff 保护
|
||||
page_latest = self._collect_latest_time(
|
||||
records, snapshot_time_column
|
||||
)
|
||||
if page_latest is not None:
|
||||
if segment_latest_time[0] is None or page_latest > segment_latest_time[0]:
|
||||
segment_latest_time[0] = page_latest
|
||||
|
||||
return records
|
||||
|
||||
return process_fn
|
||||
|
||||
def _build_write_fn(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
source_file: str | None,
|
||||
) -> Callable[[list[dict]], WriteResult]:
|
||||
"""构建写入函数:调用 _insert_records_schema_aware,返回 WriteResult。"""
|
||||
|
||||
def write_fn(records: list[dict]) -> WriteResult:
|
||||
inserted, updated, skipped = self._insert_records_schema_aware(
|
||||
table=spec.table_name,
|
||||
records=records,
|
||||
response_payload=None,
|
||||
source_file=source_file,
|
||||
source_endpoint=spec.endpoint if spec.include_source_endpoint else None,
|
||||
)
|
||||
return WriteResult(inserted=inserted, updated=updated, skipped=skipped)
|
||||
|
||||
return write_fn
|
||||
|
||||
# ── Detail_Mode 方法 ──
|
||||
|
||||
def _build_detail_requests(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
) -> Iterable[PipelineRequest]:
|
||||
"""从已写入 ODS 的记录中提取 ID 列表,生成详情请求序列。
|
||||
|
||||
仅在 spec.detail_endpoint 已配置时调用。查询 ODS 目标表获取
|
||||
detail_id_column 列的值,为每个 ID 生成一个 is_detail=True 的
|
||||
PipelineRequest。
|
||||
"""
|
||||
if not spec.detail_endpoint or not spec.detail_id_column:
|
||||
return
|
||||
|
||||
# 从 ODS 目标表查询刚写入的 ID 列表
|
||||
id_col = spec.detail_id_column
|
||||
table = spec.table_name
|
||||
query = f"SELECT DISTINCT {id_col} FROM {table} WHERE {id_col} IS NOT NULL"
|
||||
try:
|
||||
cursor = self.db.cursor()
|
||||
cursor.execute(query)
|
||||
rows = cursor.fetchall()
|
||||
cursor.close()
|
||||
except Exception:
|
||||
self.logger.error(
|
||||
"%s: 查询详情 ID 列表失败, table=%s, column=%s",
|
||||
spec.code, table, id_col, exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
if not rows:
|
||||
self.logger.info("%s: 无需拉取详情,ID 列表为空", spec.code)
|
||||
return
|
||||
|
||||
self.logger.info(
|
||||
"%s: 开始详情拉取,共 %d 个 ID", spec.code, len(rows),
|
||||
)
|
||||
|
||||
for (record_id,) in rows:
|
||||
# 使用 detail_param_builder 构造请求参数,或默认 {"id": record_id}
|
||||
if spec.detail_param_builder:
|
||||
params = spec.detail_param_builder({"id": record_id})
|
||||
else:
|
||||
params = {"id": record_id}
|
||||
|
||||
yield PipelineRequest(
|
||||
endpoint=spec.detail_endpoint,
|
||||
params=params,
|
||||
data_path=spec.detail_data_path or ("data",),
|
||||
list_key=spec.detail_list_key,
|
||||
is_detail=True,
|
||||
detail_id=record_id,
|
||||
)
|
||||
|
||||
def _build_detail_process_fn(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
) -> Callable[[Any], list[dict]]:
|
||||
"""构建详情阶段的处理函数:从预取响应中提取记录。
|
||||
|
||||
优先使用 spec.detail_process_fn(自定义处理函数),
|
||||
否则回退到默认的 response.get("records") 提取。
|
||||
"""
|
||||
if spec.detail_process_fn is not None:
|
||||
return spec.detail_process_fn
|
||||
|
||||
def detail_process_fn(response: Any) -> list[dict]:
|
||||
records = response.get("records", [])
|
||||
return records
|
||||
|
||||
return detail_process_fn
|
||||
|
||||
def _build_detail_write_fn(
|
||||
self,
|
||||
spec: OdsTaskSpec,
|
||||
source_file: str | None,
|
||||
) -> Callable[[list[dict]], WriteResult]:
|
||||
"""构建详情阶段的写入函数:写入 detail_target_table。"""
|
||||
target_table = spec.detail_target_table or spec.table_name
|
||||
|
||||
def detail_write_fn(records: list[dict]) -> WriteResult:
|
||||
inserted, updated, skipped = self._insert_records_schema_aware(
|
||||
table=target_table,
|
||||
records=records,
|
||||
response_payload=None,
|
||||
source_file=source_file,
|
||||
source_endpoint=spec.detail_endpoint if spec.include_source_endpoint else None,
|
||||
)
|
||||
return WriteResult(inserted=inserted, updated=updated, skipped=skipped)
|
||||
|
||||
return detail_write_fn
|
||||
|
||||
|
||||
def _resolve_window(self, cursor_data: dict | None) -> tuple[datetime, datetime, int]:
|
||||
base_start, base_end, base_minutes = self._get_time_window(cursor_data)
|
||||
|
||||
@@ -909,6 +1221,18 @@ class BaseOdsTask(BaseTask):
|
||||
_fill_missing("siteid", [site_profile.get("siteId"), site_profile.get("id")])
|
||||
_fill_missing("sitename", [site_profile.get("shop_name"), site_profile.get("siteName")])
|
||||
|
||||
# 通用 siteid 注入:ODS 表有 siteid 列但 API 记录不含时,从 app.store_id 填充
|
||||
# 场景:goods_stock_summary 等按门店请求但返回记录不含 siteId 的接口
|
||||
ods_has_siteid = any(c[0].lower() == "siteid" for c in cols_info)
|
||||
if ods_has_siteid:
|
||||
store_id = TypeParser.parse_int(self.config.get("app.store_id"))
|
||||
if store_id:
|
||||
for item in merged_records:
|
||||
merged = item["merged"]
|
||||
existing = self._get_value_case_insensitive(merged, "siteid")
|
||||
if existing in (None, "", 0):
|
||||
merged["siteid"] = store_id
|
||||
|
||||
business_keys = [c for c in pk_cols if str(c).lower() != "content_hash"]
|
||||
# P2(A): 使用 spec 上的显式开关控制去重,不再隐式依赖 has_fetched_at
|
||||
# CHANGE 2026-02-19 | force_full_update 时仍查最新 hash(用于判断是否回退到历史版本),
|
||||
@@ -1240,6 +1564,56 @@ def _bool_col(name: str, *sources: str) -> ColumnSpec:
|
||||
return ColumnSpec(column=name, sources=sources, transform=_to_bool)
|
||||
|
||||
|
||||
# ── 团购详情接口自定义 process_fn ──
|
||||
# API 原始响应结构:{"data": {"groupPurchasePackage": {...}, "packageCouponAssistants": [...], ...}, "code": 0}
|
||||
# detail_mode 下 process_fn 收到的是 api.post() 的原始 JSON 响应
|
||||
|
||||
def _group_package_detail_process_fn(response: Any) -> list[dict]:
|
||||
"""从 QueryPackageCouponInfo 响应中提取字段,组装为一条扁平记录。
|
||||
|
||||
匹配 ods.group_buy_package_details 表结构。
|
||||
"""
|
||||
data = response.get("data")
|
||||
if not data:
|
||||
return []
|
||||
|
||||
pkg = data.get("groupPurchasePackage")
|
||||
if not pkg:
|
||||
return []
|
||||
|
||||
# 结构化字段(来自 data.groupPurchasePackage)
|
||||
record: dict[str, Any] = {
|
||||
"coupon_id": pkg.get("id"),
|
||||
"package_name": pkg.get("package_name"),
|
||||
"duration": pkg.get("duration"),
|
||||
"start_time": pkg.get("start_time"),
|
||||
"end_time": pkg.get("end_time"),
|
||||
"add_start_clock": pkg.get("add_start_clock"),
|
||||
"add_end_clock": pkg.get("add_end_clock"),
|
||||
"is_enabled": pkg.get("is_enabled"),
|
||||
"is_delete": pkg.get("is_delete"),
|
||||
"site_id": pkg.get("site_id"),
|
||||
"tenant_id": pkg.get("tenant_id"),
|
||||
"create_time": pkg.get("create_time"),
|
||||
"creator_name": pkg.get("creator_name"),
|
||||
}
|
||||
|
||||
# JSONB 数组字段
|
||||
record["table_area_ids"] = pkg.get("tableAreaId")
|
||||
record["table_area_names"] = pkg.get("tableAreaNameList")
|
||||
record["assistant_services"] = data.get("packageCouponAssistants")
|
||||
record["groupon_site_infos"] = data.get("grouponSiteInfos")
|
||||
record["package_services"] = data.get("packagePackageService")
|
||||
record["coupon_details_list"] = data.get("packageCouponDetailsList")
|
||||
|
||||
# content_hash:对业务字段(不含 content_hash、payload、fetched_at)计算 SHA256
|
||||
hash_input = json.dumps(record, sort_keys=True, ensure_ascii=False, default=str)
|
||||
record["content_hash"] = hashlib.sha256(hash_input.encode("utf-8")).hexdigest()
|
||||
|
||||
# payload:完整的 data 对象
|
||||
record["payload"] = data
|
||||
|
||||
return [record]
|
||||
|
||||
|
||||
ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
@@ -1251,9 +1625,18 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
data_path=("data",),
|
||||
list_key="assistantInfos",
|
||||
pk_columns=(_int_col("id", "id", required=True),),
|
||||
extra_params={
|
||||
"workStatusEnum": 0,
|
||||
"dingTalkSynced": 0,
|
||||
"leaveId": 0,
|
||||
"criticismStatus": 0,
|
||||
"signStatus": -1,
|
||||
},
|
||||
include_source_endpoint=False,
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
requires_window=False,
|
||||
time_fields=None,
|
||||
snapshot_mode=SnapshotMode.FULL_TABLE,
|
||||
description="助教账号档案 ODS:SearchAssistantInfo -> assistantInfos 原始 JSON",
|
||||
),
|
||||
@@ -1314,7 +1697,8 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_source_endpoint=False,
|
||||
include_fetched_at=False,
|
||||
include_record_index=True,
|
||||
requires_window=False,
|
||||
requires_window=True,
|
||||
time_fields=("startTime", "endTime"),
|
||||
snapshot_mode=SnapshotMode.WINDOW,
|
||||
snapshot_time_column="create_time",
|
||||
description="门店商品销售流水 ODS:GetGoodsSalesList -> orderGoodsLedgers 原始 JSON",
|
||||
@@ -1499,6 +1883,13 @@ ODS_TASK_SPECS: Tuple[OdsTaskSpec, ...] = (
|
||||
include_record_index=True,
|
||||
requires_window=False,
|
||||
snapshot_mode=SnapshotMode.FULL_TABLE,
|
||||
# ── Detail_Mode 配置:团购详情接口 ──
|
||||
detail_endpoint="/PackageCoupon/QueryPackageCouponInfo",
|
||||
detail_param_builder=lambda rec: {"couponId": rec["id"]},
|
||||
detail_target_table="ods.group_buy_package_details",
|
||||
detail_data_path=("data",),
|
||||
detail_id_column="id",
|
||||
detail_process_fn=_group_package_detail_process_fn,
|
||||
description="团购套餐定义 ODS:QueryPackageCouponList -> packageCouponList 原始 JSON",
|
||||
),
|
||||
OdsTaskSpec(
|
||||
|
||||
@@ -24,7 +24,8 @@ WITH base AS (
|
||||
COALESCE(sh.member_discount_amount, 0) AS member_discount_amount,
|
||||
COALESCE(sh.adjust_amount, 0) AS manual_discount_amount,
|
||||
COALESCE(sh.pay_amount, 0) AS total_paid_amount,
|
||||
COALESCE(sh.balance_amount, 0) + COALESCE(sh.recharge_card_amount, 0) + COALESCE(sh.gift_card_amount, 0) AS stored_card_deduct,
|
||||
-- balance_amount = recharge_card_amount + gift_card_amount(恒等式),不可三者相加
|
||||
COALESCE(sh.balance_amount, 0) AS stored_card_deduct,
|
||||
COALESCE(sh.coupon_amount, 0) AS total_coupon_deduction,
|
||||
COALESCE(sh.table_charge_money, 0) AS settle_table_fee_amount,
|
||||
COALESCE(sh.assistant_pd_money, 0) + COALESCE(sh.assistant_cx_money, 0) AS settle_assistant_service_amount,
|
||||
|
||||
@@ -22,7 +22,6 @@ class ManualIngestTask(BaseTask):
|
||||
(("member_stored_value_cards",), "ods.member_stored_value_cards"),
|
||||
(("recharge_settlements",), "ods.recharge_settlements"),
|
||||
(("settlement_records",), "ods.settlement_records"),
|
||||
(("assistant_cancellation_records",), "ods.assistant_cancellation_records"),
|
||||
(("assistant_accounts_master",), "ods.assistant_accounts_master"),
|
||||
(("assistant_service_records",), "ods.assistant_service_records"),
|
||||
(("site_tables_master",), "ods.site_tables_master"),
|
||||
@@ -47,7 +46,6 @@ class ManualIngestTask(BaseTask):
|
||||
"ods.member_stored_value_cards": {"pk": "id"},
|
||||
"ods.recharge_settlements": {"pk": "id"},
|
||||
"ods.settlement_records": {"pk": "id"},
|
||||
"ods.assistant_cancellation_records": {"pk": "id", "json_cols": ["siteProfile"]},
|
||||
"ods.assistant_accounts_master": {"pk": "id"},
|
||||
"ods.assistant_service_records": {"pk": "id", "json_cols": ["siteProfile"]},
|
||||
"ods.site_tables_master": {"pk": "id"},
|
||||
|
||||
@@ -51,6 +51,7 @@ class DwsVerifier(BaseVerifier):
|
||||
"time_column": "stat_date",
|
||||
"source_table": "dwd.dwd_settlement_head",
|
||||
"source_time_column": "pay_time",
|
||||
# CHANGE 2026-03-07 | 补齐 settle_type 过滤,与 finance_base_task 对齐
|
||||
"agg_sql": """
|
||||
SELECT
|
||||
site_id,
|
||||
@@ -59,9 +60,10 @@ class DwsVerifier(BaseVerifier):
|
||||
COALESCE(SUM(pay_amount), 0) as cash_pay_amount,
|
||||
COALESCE(SUM(table_charge_money), 0) as table_fee_amount,
|
||||
COALESCE(SUM(goods_money), 0) as goods_amount,
|
||||
COALESCE(SUM(table_charge_money) + SUM(goods_money) + COALESCE(SUM(assistant_pd_money), 0) + COALESCE(SUM(assistant_cx_money), 0), 0) as gross_amount
|
||||
COALESCE(SUM(table_charge_money + goods_money + assistant_pd_money + assistant_cx_money), 0) as gross_amount
|
||||
FROM dwd.dwd_settlement_head
|
||||
WHERE pay_time >= %s AND pay_time < %s
|
||||
AND settle_type IN (1, 3)
|
||||
GROUP BY site_id, tenant_id, DATE(pay_time)
|
||||
""",
|
||||
"compare_columns": ["cash_pay_amount", "table_fee_amount", "goods_amount", "gross_amount"],
|
||||
|
||||
@@ -97,7 +97,7 @@ class IndexVerifier(BaseVerifier):
|
||||
JOIN dwd.dim_assistant d
|
||||
ON s.user_id = d.user_id
|
||||
AND d.scd2_is_current = 1
|
||||
AND COALESCE(d.is_delete, 0) = 0
|
||||
AND COALESCE(d.leave_status, 0) = 0
|
||||
CROSS JOIN params p
|
||||
WHERE s.last_use_time >= p.start_time
|
||||
AND s.last_use_time < p.end_time
|
||||
|
||||
@@ -22,3 +22,32 @@ def test_config_get_nested():
|
||||
config = AppConfig.load({"app": {"store_id": 1}})
|
||||
assert config.get("db.batch_size") == 1000
|
||||
assert config.get("nonexistent.key", "default") == "default"
|
||||
|
||||
|
||||
def test_business_day_start_hour_default():
|
||||
"""默认值 8 应正常加载"""
|
||||
config = AppConfig.load({"app": {"store_id": 1}})
|
||||
assert config.get("app.business_day_start_hour") == 8
|
||||
|
||||
|
||||
def test_business_day_start_hour_valid_range():
|
||||
"""0–23 范围内的整数应正常加载"""
|
||||
for h in (0, 12, 23):
|
||||
config = AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": h}})
|
||||
assert config.get("app.business_day_start_hour") == h
|
||||
|
||||
|
||||
def test_business_day_start_hour_out_of_range():
|
||||
"""超出 0–23 范围应抛出 SystemExit"""
|
||||
with pytest.raises(SystemExit, match="business_day_start_hour"):
|
||||
AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": 24}})
|
||||
with pytest.raises(SystemExit, match="business_day_start_hour"):
|
||||
AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": -1}})
|
||||
|
||||
|
||||
def test_business_day_start_hour_non_int():
|
||||
"""非整数类型应抛出 SystemExit"""
|
||||
with pytest.raises(SystemExit, match="business_day_start_hour"):
|
||||
AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": "8"}})
|
||||
with pytest.raises(SystemExit, match="business_day_start_hour"):
|
||||
AppConfig.load({"app": {"store_id": 1, "business_day_start_hour": 8.0}})
|
||||
|
||||
50
apps/etl/connectors/feiqiu/utils/cancellation.py
Normal file
50
apps/etl/connectors/feiqiu/utils/cancellation.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""线程安全的取消令牌,用于 ETL 管道的优雅中断。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
|
||||
class CancellationToken:
|
||||
"""线程安全的取消令牌,封装 threading.Event。
|
||||
|
||||
支持手动取消和超时自动取消两种模式。
|
||||
取消操作不可逆——一旦 cancel() 被调用,is_cancelled 永远为 True。
|
||||
"""
|
||||
|
||||
def __init__(self, timeout: float | None = None):
|
||||
"""初始化取消令牌。
|
||||
|
||||
Args:
|
||||
timeout: 超时秒数。传入正数时启动守护定时器,
|
||||
到期后自动调用 cancel()。None 或 <=0 不启动定时器。
|
||||
"""
|
||||
self._event = threading.Event()
|
||||
self._timer: threading.Timer | None = None
|
||||
if timeout is not None and timeout > 0:
|
||||
self._timer = threading.Timer(timeout, self.cancel)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def cancel(self) -> None:
|
||||
"""发出取消信号(幂等,可多次调用)。"""
|
||||
self._event.set()
|
||||
|
||||
@property
|
||||
def is_cancelled(self) -> bool:
|
||||
"""当前是否已取消。"""
|
||||
return self._event.is_set()
|
||||
|
||||
@property
|
||||
def event(self) -> threading.Event:
|
||||
"""底层 Event 对象,供 RateLimiter 等组件轮询使用。"""
|
||||
return self._event
|
||||
|
||||
def dispose(self) -> None:
|
||||
"""清理超时定时器,防止资源泄漏。
|
||||
|
||||
管道结束后应主动调用;即使不调用,守护线程也会随主进程退出。
|
||||
"""
|
||||
if self._timer is not None:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
@@ -12,7 +12,6 @@ ENDPOINT_FILENAME_MAP: dict[str, str] = {
|
||||
"/memberprofile/getmembercardbalancechange": "member_balance_changes.json",
|
||||
"/memberprofile/gettenantmembercardlist": "member_stored_value_cards.json",
|
||||
"/site/getrechargesettlelist": "recharge_settlements.json",
|
||||
"/assistantperformance/getabolitionassistant": "assistant_cancellation_records.json",
|
||||
"/assistantperformance/getorderassistantdetails": "assistant_service_records.json",
|
||||
"/personnelmanagement/searchassistantinfo": "assistant_accounts_master.json",
|
||||
"/table/getsitetables": "site_tables_master.json",
|
||||
|
||||
101
apps/etl/connectors/feiqiu/utils/task_log_buffer.py
Normal file
101
apps/etl/connectors/feiqiu/utils/task_log_buffer.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""任务级日志缓冲区,收集单个任务的所有日志,任务完成后一次性输出。
|
||||
|
||||
解决多任务并行执行时日志行交叉混乱的问题:每个任务维护独立的缓冲区,
|
||||
任务完成后将完整日志按时间顺序一次性输出到父 logger,添加 [task_code] 前缀。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class LogEntry:
|
||||
"""日志条目。"""
|
||||
|
||||
timestamp: datetime
|
||||
level: int
|
||||
task_code: str
|
||||
message: str
|
||||
|
||||
|
||||
class TaskLogBuffer:
|
||||
"""任务级日志缓冲区,收集单个任务的所有日志,任务完成后一次性输出。
|
||||
|
||||
所有写入操作线程安全(内部使用 threading.Lock)。
|
||||
"""
|
||||
|
||||
def __init__(self, task_code: str, parent_logger: logging.Logger) -> None:
|
||||
"""初始化日志缓冲区。
|
||||
|
||||
Args:
|
||||
task_code: 任务代码,用于日志前缀标识。
|
||||
parent_logger: 父 logger,flush() 时日志输出的目标。
|
||||
"""
|
||||
self.task_code = task_code
|
||||
self._parent = parent_logger
|
||||
self._buffer: list[LogEntry] = []
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def log(self, level: int, message: str, *args: object) -> None:
|
||||
"""线程安全地缓冲一条日志。
|
||||
|
||||
Args:
|
||||
level: 日志级别(如 logging.INFO)。
|
||||
message: 日志消息,支持 % 格式化。
|
||||
*args: 格式化参数。
|
||||
"""
|
||||
formatted = message % args if args else message
|
||||
entry = LogEntry(
|
||||
timestamp=datetime.now(),
|
||||
level=level,
|
||||
task_code=self.task_code,
|
||||
message=formatted,
|
||||
)
|
||||
with self._lock:
|
||||
self._buffer.append(entry)
|
||||
|
||||
# ---- 便捷方法 ----
|
||||
|
||||
def debug(self, message: str, *args: object) -> None:
|
||||
self.log(logging.DEBUG, message, *args)
|
||||
|
||||
def info(self, message: str, *args: object) -> None:
|
||||
self.log(logging.INFO, message, *args)
|
||||
|
||||
def warning(self, message: str, *args: object) -> None:
|
||||
self.log(logging.WARNING, message, *args)
|
||||
|
||||
def error(self, message: str, *args: object) -> None:
|
||||
self.log(logging.ERROR, message, *args)
|
||||
|
||||
# ---- 输出 ----
|
||||
|
||||
def flush(self) -> list[LogEntry]:
|
||||
"""将缓冲区内容按时间顺序一次性输出到父 logger,并清空缓冲区。
|
||||
|
||||
输出时每条日志添加 [task_code] 前缀,保证日志归属可识别。
|
||||
|
||||
Returns:
|
||||
按时间戳升序排列的日志条目列表(副本)。
|
||||
"""
|
||||
with self._lock:
|
||||
entries = sorted(self._buffer, key=lambda e: e.timestamp)
|
||||
for entry in entries:
|
||||
self._parent.log(
|
||||
entry.level,
|
||||
"[%s] %s",
|
||||
entry.task_code,
|
||||
entry.message,
|
||||
)
|
||||
self._buffer.clear()
|
||||
return list(entries)
|
||||
|
||||
@property
|
||||
def entries(self) -> list[LogEntry]:
|
||||
"""返回当前缓冲区条目的副本(用于测试/检查)。"""
|
||||
with self._lock:
|
||||
return list(self._buffer)
|
||||
Reference in New Issue
Block a user