在准备环境前提交次全部更改。

This commit is contained in:
Neo
2026-02-19 08:35:13 +08:00
parent ded6dfb9d8
commit 4eac07da47
1387 changed files with 6107191 additions and 33002 deletions

View File

@@ -0,0 +1 @@
# -*- coding: utf-8 -*-

View File

@@ -0,0 +1,158 @@
# -*- coding: utf-8 -*-
"""CLI 命令构建器
从 gui/utils/cli_builder.py 迁移,适配后端 TaskConfigSchema。
将 TaskConfigSchema 转换为 ETL CLI 命令行参数列表。
支持:
- 7 种 Flowapi_ods / api_ods_dwd / api_full / ods_dwd / dwd_dws / dwd_dws_index / dwd_index
- 3 种处理模式increment_only / verify_only / increment_verify
- 自动注入 --store-id 参数
"""
from typing import Any
from ..schemas.tasks import TaskConfigSchema
# 有效的 Flow ID 集合
VALID_FLOWS: set[str] = {
"api_ods",
"api_ods_dwd",
"api_full",
"ods_dwd",
"dwd_dws",
"dwd_dws_index",
"dwd_index",
}
# 有效的处理模式集合
VALID_PROCESSING_MODES: set[str] = {
"increment_only",
"verify_only",
"increment_verify",
}
# CLI 支持的 extra_args 键(值类型 + 布尔类型)
CLI_SUPPORTED_ARGS: set[str] = {
# 值类型参数
"pg_dsn", "pg_host", "pg_port", "pg_name",
"pg_user", "pg_password", "api_base", "api_token", "api_timeout",
"api_page_size", "api_retry_max",
"export_root", "log_root", "fetch_root",
"ingest_source", "idle_start", "idle_end",
"data_source", "pipeline_flow",
"window_split_unit",
# 布尔类型参数
"force_window_override", "write_pretty_json", "allow_empty_advance",
}
class CLIBuilder:
"""将 TaskConfigSchema 转换为 ETL CLI 命令行参数列表"""
def build_command(
self,
config: TaskConfigSchema,
etl_project_path: str,
python_executable: str = "python",
) -> list[str]:
"""构建完整的 CLI 命令参数列表。
生成格式:
[python, -m, cli.main, --flow, {flow_id}, --tasks, ..., --store-id, {site_id}, ...]
Args:
config: 任务配置对象Pydantic 模型)
etl_project_path: ETL 项目根目录路径(用于 cwd不拼入命令
python_executable: Python 可执行文件路径,默认 "python"
Returns:
命令行参数列表
"""
cmd: list[str] = [python_executable, "-m", "cli.main"]
# -- Flow执行流程 --
cmd.extend(["--flow", config.pipeline])
# -- 处理模式 --
if config.processing_mode:
cmd.extend(["--processing-mode", config.processing_mode])
# -- 任务列表 --
if config.tasks:
cmd.extend(["--tasks", ",".join(config.tasks)])
# -- 校验前从 API 获取数据(仅 verify_only 模式有效) --
if config.fetch_before_verify and config.processing_mode == "verify_only":
cmd.append("--fetch-before-verify")
# -- 时间窗口 --
if config.window_mode == "lookback":
# 回溯模式
if config.lookback_hours is not None:
cmd.extend(["--lookback-hours", str(config.lookback_hours)])
if config.overlap_seconds is not None:
cmd.extend(["--overlap-seconds", str(config.overlap_seconds)])
else:
# 自定义时间窗口
if config.window_start:
cmd.extend(["--window-start", config.window_start])
if config.window_end:
cmd.extend(["--window-end", config.window_end])
# -- 时间窗口切分 --
if config.window_split and config.window_split != "none":
cmd.extend(["--window-split", config.window_split])
if config.window_split_days is not None:
cmd.extend(["--window-split-days", str(config.window_split_days)])
# -- Dry-run --
if config.dry_run:
cmd.append("--dry-run")
# -- 强制全量处理 --
if config.force_full:
cmd.append("--force-full")
# -- 本地 JSON 模式 → --data-source offline --
if config.ods_use_local_json:
cmd.extend(["--data-source", "offline"])
# -- 门店 ID自动注入 --
if config.store_id is not None:
cmd.extend(["--store-id", str(config.store_id)])
# -- 额外参数(只传递 CLI 支持的参数) --
for key, value in config.extra_args.items():
if value is not None and key in CLI_SUPPORTED_ARGS:
arg_name = f"--{key.replace('_', '-')}"
if isinstance(value, bool):
if value:
cmd.append(arg_name)
else:
cmd.extend([arg_name, str(value)])
return cmd
def build_command_string(
self,
config: TaskConfigSchema,
etl_project_path: str,
python_executable: str = "python",
) -> str:
"""构建命令行字符串(用于显示/日志记录)。
对包含空格的参数自动添加引号。
"""
cmd = self.build_command(config, etl_project_path, python_executable)
quoted: list[str] = []
for arg in cmd:
if " " in arg or '"' in arg:
quoted.append(f'"{arg}"')
else:
quoted.append(arg)
return " ".join(quoted)
# 全局单例
cli_builder = CLIBuilder()

View File

@@ -0,0 +1,303 @@
# -*- coding: utf-8 -*-
"""调度器服务
后台 asyncio 循环,每 30 秒检查一次到期的调度任务,
将其 TaskConfig 入队到 TaskQueue。
核心逻辑:
- check_and_enqueue():查询 enabled=true 且 next_run_at <= now 的调度任务
- start() / stop():管理后台循环生命周期
- _calculate_next_run():根据 ScheduleConfig 计算下次执行时间
"""
from __future__ import annotations
import asyncio
import json
import logging
from datetime import datetime, timedelta, timezone
from ..database import get_connection
from ..schemas.schedules import ScheduleConfigSchema
from ..schemas.tasks import TaskConfigSchema
from .task_queue import task_queue
logger = logging.getLogger(__name__)
# 调度器轮询间隔(秒)
SCHEDULER_POLL_INTERVAL = 30
def _parse_time(time_str: str) -> tuple[int, int]:
"""解析 HH:MM 格式的时间字符串,返回 (hour, minute)。"""
parts = time_str.split(":")
return int(parts[0]), int(parts[1])
def calculate_next_run(
schedule_config: ScheduleConfigSchema,
now: datetime | None = None,
) -> datetime | None:
"""根据调度配置计算下次执行时间。
Args:
schedule_config: 调度配置
now: 当前时间(默认 UTC now方便测试注入
Returns:
下次执行时间UTConce 类型返回 None 表示不再执行
"""
if now is None:
now = datetime.now(timezone.utc)
stype = schedule_config.schedule_type
if stype == "once":
# 一次性任务执行后不再调度
return None
if stype == "interval":
unit_map = {
"minutes": timedelta(minutes=schedule_config.interval_value),
"hours": timedelta(hours=schedule_config.interval_value),
"days": timedelta(days=schedule_config.interval_value),
}
delta = unit_map.get(schedule_config.interval_unit)
if delta is None:
logger.warning("未知的 interval_unit: %s", schedule_config.interval_unit)
return None
return now + delta
if stype == "daily":
hour, minute = _parse_time(schedule_config.daily_time)
# 计算明天的 daily_time
tomorrow = now + timedelta(days=1)
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
if stype == "weekly":
hour, minute = _parse_time(schedule_config.weekly_time)
days = sorted(schedule_config.weekly_days) if schedule_config.weekly_days else [1]
# ISO weekday: 1=Monday ... 7=Sunday
current_weekday = now.isoweekday()
# 找到下一个匹配的 weekday
for day in days:
if day > current_weekday:
delta_days = day - current_weekday
next_dt = now + timedelta(days=delta_days)
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
# 本周没有更晚的 weekday跳到下周第一个
first_day = days[0]
delta_days = 7 - current_weekday + first_day
next_dt = now + timedelta(days=delta_days)
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
if stype == "cron":
# 简单 cron 解析:仅支持 "minute hour * * *" 格式(每日定时)
# 复杂 cron 表达式可后续引入 croniter 库
return _parse_simple_cron(schedule_config.cron_expression, now)
logger.warning("未知的 schedule_type: %s", stype)
return None
def _parse_simple_cron(expression: str, now: datetime) -> datetime | None:
"""简单 cron 解析器,支持基本的 5 字段格式。
支持的格式:
- "M H * * *" → 每天 H:M
- "M H * * D" → 每周 D 的 H:MD 为 0-60=Sunday
- 其他格式回退到每天 04:00
不支持范围、列表、步进等高级语法。如需完整 cron 支持,
可在 pyproject.toml 中添加 croniter 依赖。
"""
parts = expression.strip().split()
if len(parts) != 5:
logger.warning("无法解析 cron 表达式: %s,回退到明天 04:00", expression)
tomorrow = now + timedelta(days=1)
return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0)
minute_str, hour_str, dom, month, dow = parts
try:
minute = int(minute_str) if minute_str != "*" else 0
hour = int(hour_str) if hour_str != "*" else 0
except ValueError:
logger.warning("cron 表达式时间字段无法解析: %s,回退到明天 04:00", expression)
tomorrow = now + timedelta(days=1)
return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0)
# 如果指定了 day-of-week非 *
if dow != "*":
try:
cron_dow = int(dow) # 0=Sunday, 1=Monday, ..., 6=Saturday
except ValueError:
tomorrow = now + timedelta(days=1)
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
# 转换为 ISO weekday1=Monday, 7=Sunday
iso_dow = 7 if cron_dow == 0 else cron_dow
current_iso = now.isoweekday()
if iso_dow > current_iso:
delta_days = iso_dow - current_iso
elif iso_dow < current_iso:
delta_days = 7 - current_iso + iso_dow
else:
# 同一天,看时间是否已过
target_today = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
if now < target_today:
delta_days = 0
else:
delta_days = 7
next_dt = now + timedelta(days=delta_days)
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
# 每天定时dom=* month=* dow=*
tomorrow = now + timedelta(days=1)
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
class Scheduler:
"""基于 PostgreSQL 的定时调度器
后台 asyncio 循环每 SCHEDULER_POLL_INTERVAL 秒检查一次到期任务,
将其 TaskConfig 入队到 TaskQueue。
"""
def __init__(self) -> None:
self._running = False
self._loop_task: asyncio.Task | None = None
# ------------------------------------------------------------------
# 核心:检查到期任务并入队
# ------------------------------------------------------------------
def check_and_enqueue(self) -> int:
"""查询 enabled=true 且 next_run_at <= now 的调度任务,将其入队。
Returns:
本次入队的任务数量
"""
conn = get_connection()
enqueued = 0
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT id, site_id, task_config, schedule_config
FROM scheduled_tasks
WHERE enabled = TRUE
AND next_run_at IS NOT NULL
AND next_run_at <= NOW()
ORDER BY next_run_at ASC
"""
)
rows = cur.fetchall()
for row in rows:
task_id = str(row[0])
site_id = row[1]
task_config_raw = row[2] if isinstance(row[2], dict) else json.loads(row[2])
schedule_config_raw = row[3] if isinstance(row[3], dict) else json.loads(row[3])
try:
config = TaskConfigSchema(**task_config_raw)
schedule_cfg = ScheduleConfigSchema(**schedule_config_raw)
except Exception:
logger.exception("调度任务 [%s] 配置反序列化失败,跳过", task_id)
continue
# 入队
try:
queue_id = task_queue.enqueue(config, site_id)
logger.info(
"调度任务 [%s] 入队成功 → queue_id=%s site_id=%s",
task_id, queue_id, site_id,
)
enqueued += 1
except Exception:
logger.exception("调度任务 [%s] 入队失败", task_id)
continue
# 更新调度任务状态
now = datetime.now(timezone.utc)
next_run = calculate_next_run(schedule_cfg, now)
with conn.cursor() as cur:
cur.execute(
"""
UPDATE scheduled_tasks
SET last_run_at = NOW(),
run_count = run_count + 1,
next_run_at = %s,
last_status = 'enqueued',
updated_at = NOW()
WHERE id = %s
""",
(next_run, task_id),
)
conn.commit()
except Exception:
logger.exception("check_and_enqueue 执行异常")
try:
conn.rollback()
except Exception:
pass
finally:
conn.close()
if enqueued > 0:
logger.info("本轮调度检查:%d 个任务入队", enqueued)
return enqueued
# ------------------------------------------------------------------
# 后台循环
# ------------------------------------------------------------------
async def _loop(self) -> None:
"""后台 asyncio 循环,每 SCHEDULER_POLL_INTERVAL 秒检查一次。"""
self._running = True
logger.info("Scheduler 后台循环启动(间隔 %ds", SCHEDULER_POLL_INTERVAL)
while self._running:
try:
# 在线程池中执行同步数据库操作,避免阻塞事件循环
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, self.check_and_enqueue)
except Exception:
logger.exception("Scheduler 循环迭代异常")
await asyncio.sleep(SCHEDULER_POLL_INTERVAL)
logger.info("Scheduler 后台循环停止")
# ------------------------------------------------------------------
# 生命周期
# ------------------------------------------------------------------
def start(self) -> None:
"""启动后台调度循环(在 FastAPI lifespan 中调用)。"""
if self._loop_task is None or self._loop_task.done():
self._loop_task = asyncio.create_task(self._loop())
logger.info("Scheduler 已启动")
async def stop(self) -> None:
"""停止后台调度循环。"""
self._running = False
if self._loop_task and not self._loop_task.done():
self._loop_task.cancel()
try:
await self._loop_task
except asyncio.CancelledError:
pass
self._loop_task = None
logger.info("Scheduler 已停止")
# 全局单例
scheduler = Scheduler()

View File

@@ -0,0 +1,391 @@
# -*- coding: utf-8 -*-
"""ETL 任务执行器
通过 asyncio.create_subprocess_exec 启动 ETL CLI 子进程,
逐行读取 stdout/stderr 并广播到 WebSocket 订阅者,
执行完成后将结果写入 task_execution_log 表。
设计要点:
- 每个 execution_id 对应一个子进程,存储在 _processes 字典中
- 日志行存储在内存缓冲区 _log_buffers 中
- WebSocket 订阅者通过 asyncio.Queue 接收实时日志
- Windows 兼容:取消时使用 process.terminate() 而非 SIGTERM
"""
from __future__ import annotations
import asyncio
import logging
import subprocess
import sys
import threading
import time
from datetime import datetime, timezone
from typing import Any
from ..config import ETL_PROJECT_PATH
from ..database import get_connection
from ..schemas.tasks import TaskConfigSchema
from ..services.cli_builder import cli_builder
logger = logging.getLogger(__name__)
class TaskExecutor:
"""管理 ETL CLI 子进程的生命周期"""
def __init__(self) -> None:
# execution_id → subprocess.Popen
self._processes: dict[str, subprocess.Popen] = {}
# execution_id → list[str]stdout + stderr 混合日志)
self._log_buffers: dict[str, list[str]] = {}
# execution_id → set[asyncio.Queue]WebSocket 订阅者)
self._subscribers: dict[str, set[asyncio.Queue[str | None]]] = {}
# ------------------------------------------------------------------
# WebSocket 订阅管理
# ------------------------------------------------------------------
def subscribe(self, execution_id: str) -> asyncio.Queue[str | None]:
"""注册一个 WebSocket 订阅者,返回用于读取日志行的 Queue。
Queue 中推送 str 表示日志行None 表示执行结束。
"""
if execution_id not in self._subscribers:
self._subscribers[execution_id] = set()
queue: asyncio.Queue[str | None] = asyncio.Queue()
self._subscribers[execution_id].add(queue)
return queue
def unsubscribe(self, execution_id: str, queue: asyncio.Queue[str | None]) -> None:
"""移除一个 WebSocket 订阅者。"""
subs = self._subscribers.get(execution_id)
if subs:
subs.discard(queue)
if not subs:
del self._subscribers[execution_id]
def _broadcast(self, execution_id: str, line: str) -> None:
"""向所有订阅者广播一行日志。"""
subs = self._subscribers.get(execution_id)
if subs:
for q in subs:
q.put_nowait(line)
def _broadcast_end(self, execution_id: str) -> None:
"""通知所有订阅者执行已结束(发送 None 哨兵)。"""
subs = self._subscribers.get(execution_id)
if subs:
for q in subs:
q.put_nowait(None)
# ------------------------------------------------------------------
# 日志缓冲区
# ------------------------------------------------------------------
def get_logs(self, execution_id: str) -> list[str]:
"""获取指定执行的内存日志缓冲区(副本)。"""
return list(self._log_buffers.get(execution_id, []))
# ------------------------------------------------------------------
# 执行状态查询
# ------------------------------------------------------------------
def is_running(self, execution_id: str) -> bool:
"""判断指定执行是否仍在运行。"""
proc = self._processes.get(execution_id)
if proc is None:
return False
return proc.poll() is None
def get_running_ids(self) -> list[str]:
"""返回当前所有运行中的 execution_id 列表。"""
return [eid for eid, p in self._processes.items() if p.returncode is None]
# ------------------------------------------------------------------
# 核心执行
# ------------------------------------------------------------------
async def execute(
self,
config: TaskConfigSchema,
execution_id: str,
queue_id: str | None = None,
site_id: int | None = None,
) -> None:
"""以子进程方式调用 ETL CLI。
使用 subprocess.Popen + 线程读取,兼容 Windows避免
asyncio.create_subprocess_exec 在 Windows 上的 NotImplementedError
"""
cmd = cli_builder.build_command(
config, ETL_PROJECT_PATH, python_executable=sys.executable
)
command_str = " ".join(cmd)
effective_site_id = site_id or config.store_id
logger.info(
"启动 ETL 子进程 [%s]: %s (cwd=%s)",
execution_id, command_str, ETL_PROJECT_PATH,
)
self._log_buffers[execution_id] = []
started_at = datetime.now(timezone.utc)
t0 = time.monotonic()
self._write_execution_log(
execution_id=execution_id,
queue_id=queue_id,
site_id=effective_site_id,
task_codes=config.tasks,
status="running",
started_at=started_at,
command=command_str,
)
exit_code: int | None = None
status = "running"
stdout_lines: list[str] = []
stderr_lines: list[str] = []
try:
# 构建额外环境变量DWD 表过滤通过环境变量注入)
extra_env: dict[str, str] = {}
if config.dwd_only_tables:
extra_env["DWD_ONLY_TABLES"] = ",".join(config.dwd_only_tables)
# 在线程池中运行子进程,兼容 Windows
exit_code = await asyncio.get_event_loop().run_in_executor(
None,
self._run_subprocess,
cmd,
execution_id,
stdout_lines,
stderr_lines,
extra_env or None,
)
if exit_code == 0:
status = "success"
else:
status = "failed"
logger.info(
"ETL 子进程 [%s] 退出exit_code=%s, status=%s",
execution_id, exit_code, status,
)
except asyncio.CancelledError:
status = "cancelled"
logger.info("ETL 子进程 [%s] 已取消", execution_id)
# 尝试终止子进程
proc = self._processes.get(execution_id)
if proc and proc.poll() is None:
proc.terminate()
except Exception as exc:
status = "failed"
import traceback
tb = traceback.format_exc()
stderr_lines.append(f"[task_executor] 子进程启动/执行异常: {exc}")
stderr_lines.append(tb)
logger.exception("ETL 子进程 [%s] 执行异常", execution_id)
finally:
elapsed_ms = int((time.monotonic() - t0) * 1000)
finished_at = datetime.now(timezone.utc)
self._broadcast_end(execution_id)
self._processes.pop(execution_id, None)
self._update_execution_log(
execution_id=execution_id,
status=status,
finished_at=finished_at,
exit_code=exit_code,
duration_ms=elapsed_ms,
output_log="\n".join(stdout_lines),
error_log="\n".join(stderr_lines),
)
def _run_subprocess(
self,
cmd: list[str],
execution_id: str,
stdout_lines: list[str],
stderr_lines: list[str],
extra_env: dict[str, str] | None = None,
) -> int:
"""在线程中运行子进程并逐行读取输出。"""
import os
env = os.environ.copy()
# 强制子进程使用 UTF-8 输出,避免 Windows GBK 乱码
env["PYTHONIOENCODING"] = "utf-8"
if extra_env:
env.update(extra_env)
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=ETL_PROJECT_PATH,
env=env,
text=True,
encoding="utf-8",
errors="replace",
)
self._processes[execution_id] = proc
def read_stream(
stream, stream_name: str, collector: list[str],
) -> None:
"""逐行读取流并广播。"""
for raw_line in stream:
line = raw_line.rstrip("\n").rstrip("\r")
tagged = f"[{stream_name}] {line}"
buf = self._log_buffers.get(execution_id)
if buf is not None:
buf.append(tagged)
collector.append(line)
self._broadcast(execution_id, tagged)
t_out = threading.Thread(
target=read_stream, args=(proc.stdout, "stdout", stdout_lines),
daemon=True,
)
t_err = threading.Thread(
target=read_stream, args=(proc.stderr, "stderr", stderr_lines),
daemon=True,
)
t_out.start()
t_err.start()
proc.wait()
t_out.join(timeout=5)
t_err.join(timeout=5)
return proc.returncode
# ------------------------------------------------------------------
# 取消
# ------------------------------------------------------------------
async def cancel(self, execution_id: str) -> bool:
"""向子进程发送终止信号。
Returns:
True 表示成功发送终止信号False 表示进程不存在或已退出。
"""
proc = self._processes.get(execution_id)
if proc is None:
return False
# subprocess.Popen: poll() 返回 None 表示仍在运行
if proc.poll() is not None:
return False
logger.info("取消 ETL 子进程 [%s], pid=%s", execution_id, proc.pid)
try:
proc.terminate()
except ProcessLookupError:
return False
return True
# ------------------------------------------------------------------
# 数据库操作(同步,在线程池中执行也可,此处简单直连)
# ------------------------------------------------------------------
@staticmethod
def _write_execution_log(
*,
execution_id: str,
queue_id: str | None,
site_id: int | None,
task_codes: list[str],
status: str,
started_at: datetime,
command: str,
) -> None:
"""插入一条执行日志记录running 状态)。"""
try:
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO task_execution_log
(id, queue_id, site_id, task_codes, status,
started_at, command)
VALUES (%s, %s, %s, %s, %s, %s, %s)
""",
(
execution_id,
queue_id,
site_id or 0,
task_codes,
status,
started_at,
command,
),
)
conn.commit()
finally:
conn.close()
except Exception:
logger.exception("写入 execution_log 失败 [%s]", execution_id)
@staticmethod
def _update_execution_log(
*,
execution_id: str,
status: str,
finished_at: datetime,
exit_code: int | None,
duration_ms: int,
output_log: str,
error_log: str,
) -> None:
"""更新执行日志记录(完成状态)。"""
try:
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
UPDATE task_execution_log
SET status = %s,
finished_at = %s,
exit_code = %s,
duration_ms = %s,
output_log = %s,
error_log = %s
WHERE id = %s
""",
(
status,
finished_at,
exit_code,
duration_ms,
output_log,
error_log,
execution_id,
),
)
conn.commit()
finally:
conn.close()
except Exception:
logger.exception("更新 execution_log 失败 [%s]", execution_id)
# ------------------------------------------------------------------
# 清理
# ------------------------------------------------------------------
def cleanup(self, execution_id: str) -> None:
"""清理指定执行的内存资源(日志缓冲区和订阅者)。
通常在确认日志已持久化后调用。
"""
self._log_buffers.pop(execution_id, None)
self._subscribers.pop(execution_id, None)
# 全局单例
task_executor = TaskExecutor()

View File

@@ -0,0 +1,486 @@
# -*- coding: utf-8 -*-
"""任务队列服务
基于 PostgreSQL task_queue 表实现 FIFO 队列,支持:
- enqueue入队自动分配 position当前最大 + 1
- dequeue取出 position 最小的 pending 任务
- reorder调整任务在队列中的位置
- delete删除 pending 任务
- process_loop后台协程队列非空且无运行中任务时自动取出执行
所有操作按 site_id 过滤,实现门店隔离。
"""
from __future__ import annotations
import asyncio
import json
import logging
import uuid
from dataclasses import dataclass, field
from typing import Any
from ..database import get_connection
from ..schemas.tasks import TaskConfigSchema
logger = logging.getLogger(__name__)
# 后台循环轮询间隔(秒)
POLL_INTERVAL_SECONDS = 2
@dataclass
class QueuedTask:
"""队列任务数据对象"""
id: str
site_id: int
config: dict[str, Any]
status: str
position: int
created_at: Any = None
started_at: Any = None
finished_at: Any = None
exit_code: int | None = None
error_message: str | None = None
class TaskQueue:
"""基于 PostgreSQL 的任务队列"""
def __init__(self) -> None:
self._running = False
self._loop_task: asyncio.Task | None = None
# ------------------------------------------------------------------
# 入队
# ------------------------------------------------------------------
def enqueue(self, config: TaskConfigSchema, site_id: int) -> str:
"""将任务配置入队,自动分配 position。
Args:
config: 任务配置
site_id: 门店 ID门店隔离
Returns:
新创建的队列任务 IDUUID 字符串)
"""
task_id = str(uuid.uuid4())
config_json = config.model_dump(mode="json")
conn = get_connection()
try:
with conn.cursor() as cur:
# 取当前该门店 pending 任务的最大 position新任务排在末尾
cur.execute(
"""
SELECT COALESCE(MAX(position), 0)
FROM task_queue
WHERE site_id = %s AND status = 'pending'
""",
(site_id,),
)
max_pos = cur.fetchone()[0]
new_pos = max_pos + 1
cur.execute(
"""
INSERT INTO task_queue (id, site_id, config, status, position)
VALUES (%s, %s, %s, 'pending', %s)
""",
(task_id, site_id, json.dumps(config_json), new_pos),
)
conn.commit()
finally:
conn.close()
logger.info("任务入队 [%s] site_id=%s position=%s", task_id, site_id, new_pos)
return task_id
# ------------------------------------------------------------------
# 出队
# ------------------------------------------------------------------
def dequeue(self, site_id: int) -> QueuedTask | None:
"""取出 position 最小的 pending 任务,将其状态改为 running。
Args:
site_id: 门店 ID
Returns:
QueuedTask 或 None队列为空时
"""
conn = get_connection()
try:
with conn.cursor() as cur:
# 选取 position 最小的 pending 任务并锁定
cur.execute(
"""
SELECT id, site_id, config, status, position,
created_at, started_at, finished_at,
exit_code, error_message
FROM task_queue
WHERE site_id = %s AND status = 'pending'
ORDER BY position ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
""",
(site_id,),
)
row = cur.fetchone()
if row is None:
conn.commit()
return None
task = QueuedTask(
id=str(row[0]),
site_id=row[1],
config=row[2] if isinstance(row[2], dict) else json.loads(row[2]),
status=row[3],
position=row[4],
created_at=row[5],
started_at=row[6],
finished_at=row[7],
exit_code=row[8],
error_message=row[9],
)
# 更新状态为 running
cur.execute(
"""
UPDATE task_queue
SET status = 'running', started_at = NOW()
WHERE id = %s
""",
(task.id,),
)
conn.commit()
finally:
conn.close()
task.status = "running"
logger.info("任务出队 [%s] site_id=%s", task.id, site_id)
return task
# ------------------------------------------------------------------
# 重排
# ------------------------------------------------------------------
def reorder(self, task_id: str, new_position: int, site_id: int) -> None:
"""调整任务在队列中的位置。
仅允许对 pending 状态的任务重排。将目标任务移到 new_position
其余 pending 任务按原有相对顺序重新编号。
Args:
task_id: 要移动的任务 ID
new_position: 目标位置1-based
site_id: 门店 ID
"""
conn = get_connection()
try:
with conn.cursor() as cur:
# 获取该门店所有 pending 任务,按 position 排序
cur.execute(
"""
SELECT id FROM task_queue
WHERE site_id = %s AND status = 'pending'
ORDER BY position ASC
""",
(site_id,),
)
rows = cur.fetchall()
task_ids = [str(r[0]) for r in rows]
if task_id not in task_ids:
conn.commit()
return
# 从列表中移除目标任务,再插入到新位置
task_ids.remove(task_id)
# new_position 是 1-based转为 0-based 索引并 clamp
insert_idx = max(0, min(new_position - 1, len(task_ids)))
task_ids.insert(insert_idx, task_id)
# 按新顺序重新分配 position1-based 连续编号)
for idx, tid in enumerate(task_ids, start=1):
cur.execute(
"UPDATE task_queue SET position = %s WHERE id = %s",
(idx, tid),
)
conn.commit()
finally:
conn.close()
logger.info(
"任务重排 [%s] → position=%s site_id=%s",
task_id, new_position, site_id,
)
# ------------------------------------------------------------------
# 删除
# ------------------------------------------------------------------
def delete(self, task_id: str, site_id: int) -> bool:
"""删除 pending 状态的任务。
Args:
task_id: 任务 ID
site_id: 门店 ID
Returns:
True 表示成功删除False 表示任务不存在或非 pending 状态。
"""
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
DELETE FROM task_queue
WHERE id = %s AND site_id = %s AND status = 'pending'
""",
(task_id, site_id),
)
deleted = cur.rowcount > 0
conn.commit()
finally:
conn.close()
if deleted:
logger.info("任务删除 [%s] site_id=%s", task_id, site_id)
else:
logger.warning(
"任务删除失败 [%s] site_id=%s(不存在或非 pending",
task_id, site_id,
)
return deleted
# ------------------------------------------------------------------
# 查询
# ------------------------------------------------------------------
def list_pending(self, site_id: int) -> list[QueuedTask]:
"""列出指定门店的所有 pending 任务,按 position 升序。"""
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT id, site_id, config, status, position,
created_at, started_at, finished_at,
exit_code, error_message
FROM task_queue
WHERE site_id = %s AND status = 'pending'
ORDER BY position ASC
""",
(site_id,),
)
rows = cur.fetchall()
conn.commit()
finally:
conn.close()
return [
QueuedTask(
id=str(r[0]),
site_id=r[1],
config=r[2] if isinstance(r[2], dict) else json.loads(r[2]),
status=r[3],
position=r[4],
created_at=r[5],
started_at=r[6],
finished_at=r[7],
exit_code=r[8],
error_message=r[9],
)
for r in rows
]
def has_running(self, site_id: int) -> bool:
"""检查指定门店是否有 running 状态的任务。"""
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT EXISTS(
SELECT 1 FROM task_queue
WHERE site_id = %s AND status = 'running'
)
""",
(site_id,),
)
result = cur.fetchone()[0]
conn.commit()
finally:
conn.close()
return result
# ------------------------------------------------------------------
# 后台处理循环
# ------------------------------------------------------------------
async def process_loop(self) -> None:
"""后台协程:队列非空且无运行中任务时,自动取出并执行。
循环逻辑:
1. 查询所有有 pending 任务的 site_id
2. 对每个 site_id若无 running 任务则 dequeue 并执行
3. 等待 POLL_INTERVAL_SECONDS 后重复
"""
# 延迟导入避免循环依赖
from .task_executor import task_executor
self._running = True
logger.info("TaskQueue process_loop 启动")
while self._running:
try:
await self._process_once(task_executor)
except Exception:
logger.exception("process_loop 迭代异常")
await asyncio.sleep(POLL_INTERVAL_SECONDS)
logger.info("TaskQueue process_loop 停止")
async def _process_once(self, executor: Any) -> None:
"""单次处理:扫描所有门店的 pending 队列并执行。"""
site_ids = self._get_pending_site_ids()
for site_id in site_ids:
if self.has_running(site_id):
continue
task = self.dequeue(site_id)
if task is None:
continue
config = TaskConfigSchema(**task.config)
execution_id = str(uuid.uuid4())
logger.info(
"process_loop 自动执行 [%s] queue_id=%s site_id=%s",
execution_id, task.id, site_id,
)
# 异步启动执行(不阻塞循环)
asyncio.create_task(
self._execute_and_update(
executor, config, execution_id, task.id, site_id,
)
)
async def _execute_and_update(
self,
executor: Any,
config: TaskConfigSchema,
execution_id: str,
queue_id: str,
site_id: int,
) -> None:
"""执行任务并更新队列状态。"""
try:
await executor.execute(
config=config,
execution_id=execution_id,
queue_id=queue_id,
site_id=site_id,
)
# 执行完成后根据 executor 的结果更新 task_queue 状态
self._update_queue_status_from_log(queue_id)
except Exception:
logger.exception("队列任务执行异常 [%s]", queue_id)
self._mark_failed(queue_id, "执行过程中发生未捕获异常")
def _get_pending_site_ids(self) -> list[int]:
"""获取所有有 pending 任务的 site_id 列表。"""
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT DISTINCT site_id FROM task_queue
WHERE status = 'pending'
"""
)
rows = cur.fetchall()
conn.commit()
finally:
conn.close()
return [r[0] for r in rows]
def _update_queue_status_from_log(self, queue_id: str) -> None:
"""从 task_execution_log 读取执行结果,同步到 task_queue 记录。"""
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT status, finished_at, exit_code, error_log
FROM task_execution_log
WHERE queue_id = %s
ORDER BY started_at DESC
LIMIT 1
""",
(queue_id,),
)
row = cur.fetchone()
if row:
cur.execute(
"""
UPDATE task_queue
SET status = %s, finished_at = %s,
exit_code = %s, error_message = %s
WHERE id = %s
""",
(row[0], row[1], row[2], row[3], queue_id),
)
conn.commit()
finally:
conn.close()
def _mark_failed(self, queue_id: str, error_message: str) -> None:
"""将队列任务标记为 failed。"""
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
UPDATE task_queue
SET status = 'failed', finished_at = NOW(),
error_message = %s
WHERE id = %s
""",
(error_message, queue_id),
)
conn.commit()
finally:
conn.close()
# ------------------------------------------------------------------
# 生命周期
# ------------------------------------------------------------------
def start(self) -> None:
"""启动后台处理循环(在 FastAPI lifespan 中调用)。"""
if self._loop_task is None or self._loop_task.done():
self._loop_task = asyncio.create_task(self.process_loop())
logger.info("TaskQueue 后台循环已启动")
async def stop(self) -> None:
"""停止后台处理循环。"""
self._running = False
if self._loop_task and not self._loop_task.done():
self._loop_task.cancel()
try:
await self._loop_task
except asyncio.CancelledError:
pass
self._loop_task = None
logger.info("TaskQueue 后台循环已停止")
# 全局单例
task_queue = TaskQueue()

View File

@@ -0,0 +1,221 @@
# -*- coding: utf-8 -*-
"""静态任务注册表
从 ETL orchestration/task_registry.py 提取的任务元数据硬编码副本。
后端不直接导入 ETL 代码,避免引入重量级依赖链。
业务域分组逻辑:按任务代码前缀 / 目标表语义归类,与 GUI 保持一致。
"""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass(frozen=True)
class TaskDefinition:
"""单个 ETL 任务的元数据"""
code: str
name: str
description: str
domain: str # 业务域:会员 / 结算 / 助教 / 商品 / 台桌 / 团购 / 库存 / 财务 / 指数 / 工具
layer: str # ODS / DWD / DWS / INDEX / UTILITY
requires_window: bool = True
is_ods: bool = False
is_dimension: bool = False
default_enabled: bool = True
is_common: bool = True # 常用任务标记False 表示工具类/手动类任务
@dataclass(frozen=True)
class DwdTableDefinition:
"""DWD 表元数据"""
table_name: str # 完整表名(含 schema
display_name: str
domain: str
ods_source: str # 对应的 ODS 源表
is_dimension: bool = False
# ── ODS 任务定义 ──────────────────────────────────────────────
ODS_TASKS: list[TaskDefinition] = [
TaskDefinition("ODS_ASSISTANT_ACCOUNT", "助教账号", "抽取助教账号主数据", "助教", "ODS", is_ods=True),
TaskDefinition("ODS_ASSISTANT_LEDGER", "助教服务记录", "抽取助教服务流水", "助教", "ODS", is_ods=True),
TaskDefinition("ODS_ASSISTANT_ABOLISH", "助教取消记录", "抽取助教取消/作废记录", "助教", "ODS", is_ods=True),
TaskDefinition("ODS_SETTLEMENT_RECORDS", "结算记录", "抽取订单结算记录", "结算", "ODS", is_ods=True),
TaskDefinition("ODS_SETTLEMENT_TICKET", "结账小票", "抽取结账小票明细", "结算", "ODS", is_ods=True),
TaskDefinition("ODS_TABLE_USE", "台费流水", "抽取台费使用流水", "台桌", "ODS", is_ods=True),
TaskDefinition("ODS_TABLE_FEE_DISCOUNT", "台费折扣", "抽取台费折扣记录", "台桌", "ODS", is_ods=True),
TaskDefinition("ODS_TABLES", "台桌主数据", "抽取门店台桌信息", "台桌", "ODS", is_ods=True, requires_window=False),
TaskDefinition("ODS_PAYMENT", "支付流水", "抽取支付交易记录", "结算", "ODS", is_ods=True),
TaskDefinition("ODS_REFUND", "退款流水", "抽取退款交易记录", "结算", "ODS", is_ods=True),
TaskDefinition("ODS_PLATFORM_COUPON", "平台券核销", "抽取平台优惠券核销记录", "团购", "ODS", is_ods=True),
TaskDefinition("ODS_MEMBER", "会员主数据", "抽取会员档案", "会员", "ODS", is_ods=True),
TaskDefinition("ODS_MEMBER_CARD", "会员储值卡", "抽取会员储值卡信息", "会员", "ODS", is_ods=True),
TaskDefinition("ODS_MEMBER_BALANCE", "会员余额变动", "抽取会员余额变动记录", "会员", "ODS", is_ods=True),
TaskDefinition("ODS_RECHARGE_SETTLE", "充值结算", "抽取充值结算记录", "会员", "ODS", is_ods=True),
TaskDefinition("ODS_GROUP_PACKAGE", "团购套餐", "抽取团购套餐定义", "团购", "ODS", is_ods=True, requires_window=False),
TaskDefinition("ODS_GROUP_BUY_REDEMPTION", "团购核销", "抽取团购核销记录", "团购", "ODS", is_ods=True),
TaskDefinition("ODS_INVENTORY_STOCK", "库存快照", "抽取商品库存汇总", "库存", "ODS", is_ods=True, requires_window=False),
TaskDefinition("ODS_INVENTORY_CHANGE", "库存变动", "抽取库存出入库记录", "库存", "ODS", is_ods=True),
TaskDefinition("ODS_GOODS_CATEGORY", "商品分类", "抽取商品分类树", "商品", "ODS", is_ods=True, requires_window=False),
TaskDefinition("ODS_STORE_GOODS", "门店商品", "抽取门店商品主数据", "商品", "ODS", is_ods=True, requires_window=False),
TaskDefinition("ODS_STORE_GOODS_SALES", "商品销售", "抽取门店商品销售记录", "商品", "ODS", is_ods=True),
TaskDefinition("ODS_TENANT_GOODS", "租户商品", "抽取租户级商品主数据", "商品", "ODS", is_ods=True, requires_window=False),
]
# ── DWD 任务定义 ──────────────────────────────────────────────
DWD_TASKS: list[TaskDefinition] = [
TaskDefinition("DWD_LOAD_FROM_ODS", "DWD 装载", "从 ODS 装载至 DWD维度 SCD2 + 事实增量)", "通用", "DWD", requires_window=False),
TaskDefinition("DWD_QUALITY_CHECK", "DWD 质量检查", "对 DWD 层数据执行质量校验", "通用", "DWD", requires_window=False, is_common=False),
]
# ── DWS 任务定义 ──────────────────────────────────────────────
DWS_TASKS: list[TaskDefinition] = [
TaskDefinition("DWS_BUILD_ORDER_SUMMARY", "订单汇总构建", "构建订单汇总宽表", "结算", "DWS"),
TaskDefinition("DWS_ASSISTANT_DAILY", "助教日报", "汇总助教每日业绩", "助教", "DWS"),
TaskDefinition("DWS_ASSISTANT_MONTHLY", "助教月报", "汇总助教月度业绩", "助教", "DWS"),
TaskDefinition("DWS_ASSISTANT_CUSTOMER", "助教客户分析", "汇总助教-客户关系", "助教", "DWS"),
TaskDefinition("DWS_ASSISTANT_SALARY", "助教工资计算", "计算助教工资", "助教", "DWS"),
TaskDefinition("DWS_ASSISTANT_FINANCE", "助教财务汇总", "汇总助教财务数据", "助教", "DWS"),
TaskDefinition("DWS_MEMBER_CONSUMPTION", "会员消费分析", "汇总会员消费数据", "会员", "DWS"),
TaskDefinition("DWS_MEMBER_VISIT", "会员到店分析", "汇总会员到店频次", "会员", "DWS"),
TaskDefinition("DWS_FINANCE_DAILY", "财务日报", "汇总每日财务数据", "财务", "DWS"),
TaskDefinition("DWS_FINANCE_RECHARGE", "充值汇总", "汇总充值数据", "财务", "DWS"),
TaskDefinition("DWS_FINANCE_INCOME_STRUCTURE", "收入结构", "分析收入结构", "财务", "DWS"),
TaskDefinition("DWS_FINANCE_DISCOUNT_DETAIL", "折扣明细", "汇总折扣明细", "财务", "DWS"),
# CHANGE [2026-02-19] intent: 同步 ETL 侧合并——原 DWS_RETENTION_CLEANUP / DWS_MV_REFRESH_* 已合并为 DWS_MAINTENANCE
TaskDefinition("DWS_MAINTENANCE", "DWS 维护", "刷新物化视图 + 清理过期留存数据", "通用", "DWS", requires_window=False, is_common=False),
]
# ── INDEX 任务定义 ────────────────────────────────────────────
INDEX_TASKS: list[TaskDefinition] = [
TaskDefinition("DWS_WINBACK_INDEX", "回流指数 (WBI)", "计算会员回流指数", "指数", "INDEX"),
TaskDefinition("DWS_NEWCONV_INDEX", "新客转化指数 (NCI)", "计算新客转化指数", "指数", "INDEX"),
TaskDefinition("DWS_ML_MANUAL_IMPORT", "手动导入 (ML)", "手动导入机器学习数据", "指数", "INDEX", requires_window=False, is_common=False),
TaskDefinition("DWS_RELATION_INDEX", "关系指数 (RS)", "计算助教-客户关系指数", "指数", "INDEX"),
]
# ── 工具类任务定义 ────────────────────────────────────────────
UTILITY_TASKS: list[TaskDefinition] = [
TaskDefinition("MANUAL_INGEST", "手动导入", "从本地 JSON 文件手动导入数据", "工具", "UTILITY", requires_window=False, is_common=False),
TaskDefinition("INIT_ODS_SCHEMA", "初始化 ODS Schema", "创建 ODS 层表结构", "工具", "UTILITY", requires_window=False, is_common=False),
TaskDefinition("INIT_DWD_SCHEMA", "初始化 DWD Schema", "创建 DWD 层表结构", "工具", "UTILITY", requires_window=False, is_common=False),
TaskDefinition("INIT_DWS_SCHEMA", "初始化 DWS Schema", "创建 DWS 层表结构", "工具", "UTILITY", requires_window=False, is_common=False),
TaskDefinition("ODS_JSON_ARCHIVE", "ODS JSON 归档", "归档 ODS 原始 JSON 文件", "工具", "UTILITY", requires_window=False, is_common=False),
TaskDefinition("CHECK_CUTOFF", "游标检查", "检查各任务数据游标截止点", "工具", "UTILITY", requires_window=False, is_common=False),
TaskDefinition("SEED_DWS_CONFIG", "DWS 配置种子", "初始化 DWS 配置数据", "工具", "UTILITY", requires_window=False, is_common=False),
TaskDefinition("DATA_INTEGRITY_CHECK", "数据完整性校验", "校验跨层数据完整性", "工具", "UTILITY", requires_window=False, is_common=False),
]
# ── 全量任务列表 ──────────────────────────────────────────────
ALL_TASKS: list[TaskDefinition] = ODS_TASKS + DWD_TASKS + DWS_TASKS + INDEX_TASKS + UTILITY_TASKS
# 按 code 索引,便于快速查找
_TASK_BY_CODE: dict[str, TaskDefinition] = {t.code: t for t in ALL_TASKS}
def get_all_tasks() -> list[TaskDefinition]:
return ALL_TASKS
def get_task_by_code(code: str) -> TaskDefinition | None:
return _TASK_BY_CODE.get(code.upper())
def get_tasks_grouped_by_domain() -> dict[str, list[TaskDefinition]]:
"""按业务域分组返回任务列表"""
groups: dict[str, list[TaskDefinition]] = {}
for t in ALL_TASKS:
groups.setdefault(t.domain, []).append(t)
return groups
def get_tasks_by_layer(layer: str) -> list[TaskDefinition]:
"""获取指定层的所有任务"""
layer_upper = layer.upper()
return [t for t in ALL_TASKS if t.layer == layer_upper]
# ── Flow → 层映射 ────────────────────────────────────────────
# 每种 Flow 包含的层,用于前端按 Flow 过滤可选任务
FLOW_LAYER_MAP: dict[str, list[str]] = {
"api_ods": ["ODS"],
"api_ods_dwd": ["ODS", "DWD"],
"api_full": ["ODS", "DWD", "DWS", "INDEX"],
"ods_dwd": ["DWD"],
"dwd_dws": ["DWS"],
"dwd_dws_index": ["DWS", "INDEX"],
"dwd_index": ["INDEX"],
}
def get_compatible_tasks(flow_id: str) -> list[TaskDefinition]:
"""根据 Flow 包含的层,返回兼容的任务列表"""
layers = FLOW_LAYER_MAP.get(flow_id, [])
return [t for t in ALL_TASKS if t.layer in layers]
# ── DWD 表定义 ────────────────────────────────────────────────
DWD_TABLES: list[DwdTableDefinition] = [
# 维度表
DwdTableDefinition("dwd.dim_site", "门店维度", "台桌", "ods.table_fee_transactions", is_dimension=True),
DwdTableDefinition("dwd.dim_site_ex", "门店维度(扩展)", "台桌", "ods.table_fee_transactions", is_dimension=True),
DwdTableDefinition("dwd.dim_table", "台桌维度", "台桌", "ods.site_tables_master", is_dimension=True),
DwdTableDefinition("dwd.dim_table_ex", "台桌维度(扩展)", "台桌", "ods.site_tables_master", is_dimension=True),
DwdTableDefinition("dwd.dim_assistant", "助教维度", "助教", "ods.assistant_accounts_master", is_dimension=True),
DwdTableDefinition("dwd.dim_assistant_ex", "助教维度(扩展)", "助教", "ods.assistant_accounts_master", is_dimension=True),
DwdTableDefinition("dwd.dim_member", "会员维度", "会员", "ods.member_profiles", is_dimension=True),
DwdTableDefinition("dwd.dim_member_ex", "会员维度(扩展)", "会员", "ods.member_profiles", is_dimension=True),
DwdTableDefinition("dwd.dim_member_card_account", "会员储值卡维度", "会员", "ods.member_stored_value_cards", is_dimension=True),
DwdTableDefinition("dwd.dim_member_card_account_ex", "会员储值卡维度(扩展)", "会员", "ods.member_stored_value_cards", is_dimension=True),
DwdTableDefinition("dwd.dim_tenant_goods", "租户商品维度", "商品", "ods.tenant_goods_master", is_dimension=True),
DwdTableDefinition("dwd.dim_tenant_goods_ex", "租户商品维度(扩展)", "商品", "ods.tenant_goods_master", is_dimension=True),
DwdTableDefinition("dwd.dim_store_goods", "门店商品维度", "商品", "ods.store_goods_master", is_dimension=True),
DwdTableDefinition("dwd.dim_store_goods_ex", "门店商品维度(扩展)", "商品", "ods.store_goods_master", is_dimension=True),
DwdTableDefinition("dwd.dim_goods_category", "商品分类维度", "商品", "ods.stock_goods_category_tree", is_dimension=True),
DwdTableDefinition("dwd.dim_groupbuy_package", "团购套餐维度", "团购", "ods.group_buy_packages", is_dimension=True),
DwdTableDefinition("dwd.dim_groupbuy_package_ex", "团购套餐维度(扩展)", "团购", "ods.group_buy_packages", is_dimension=True),
# 事实表
DwdTableDefinition("dwd.dwd_settlement_head", "结算主表", "结算", "ods.settlement_records"),
DwdTableDefinition("dwd.dwd_settlement_head_ex", "结算主表(扩展)", "结算", "ods.settlement_records"),
DwdTableDefinition("dwd.dwd_table_fee_log", "台费流水", "台桌", "ods.table_fee_transactions"),
DwdTableDefinition("dwd.dwd_table_fee_log_ex", "台费流水(扩展)", "台桌", "ods.table_fee_transactions"),
DwdTableDefinition("dwd.dwd_table_fee_adjust", "台费折扣", "台桌", "ods.table_fee_discount_records"),
DwdTableDefinition("dwd.dwd_table_fee_adjust_ex", "台费折扣(扩展)", "台桌", "ods.table_fee_discount_records"),
DwdTableDefinition("dwd.dwd_store_goods_sale", "商品销售", "商品", "ods.store_goods_sales_records"),
DwdTableDefinition("dwd.dwd_store_goods_sale_ex", "商品销售(扩展)", "商品", "ods.store_goods_sales_records"),
DwdTableDefinition("dwd.dwd_assistant_service_log", "助教服务流水", "助教", "ods.assistant_service_records"),
DwdTableDefinition("dwd.dwd_assistant_service_log_ex", "助教服务流水(扩展)", "助教", "ods.assistant_service_records"),
DwdTableDefinition("dwd.dwd_assistant_trash_event", "助教取消事件", "助教", "ods.assistant_cancellation_records"),
DwdTableDefinition("dwd.dwd_assistant_trash_event_ex", "助教取消事件(扩展)", "助教", "ods.assistant_cancellation_records"),
DwdTableDefinition("dwd.dwd_member_balance_change", "会员余额变动", "会员", "ods.member_balance_changes"),
DwdTableDefinition("dwd.dwd_member_balance_change_ex", "会员余额变动(扩展)", "会员", "ods.member_balance_changes"),
DwdTableDefinition("dwd.dwd_groupbuy_redemption", "团购核销", "团购", "ods.group_buy_redemption_records"),
DwdTableDefinition("dwd.dwd_groupbuy_redemption_ex", "团购核销(扩展)", "团购", "ods.group_buy_redemption_records"),
DwdTableDefinition("dwd.dwd_platform_coupon_redemption", "平台券核销", "团购", "ods.platform_coupon_redemption_records"),
DwdTableDefinition("dwd.dwd_platform_coupon_redemption_ex", "平台券核销(扩展)", "团购", "ods.platform_coupon_redemption_records"),
DwdTableDefinition("dwd.dwd_recharge_order", "充值订单", "会员", "ods.recharge_settlements"),
DwdTableDefinition("dwd.dwd_recharge_order_ex", "充值订单(扩展)", "会员", "ods.recharge_settlements"),
DwdTableDefinition("dwd.dwd_payment", "支付流水", "结算", "ods.payment_transactions"),
DwdTableDefinition("dwd.dwd_refund", "退款流水", "结算", "ods.refund_transactions"),
DwdTableDefinition("dwd.dwd_refund_ex", "退款流水(扩展)", "结算", "ods.refund_transactions"),
]
def get_dwd_tables_grouped_by_domain() -> dict[str, list[DwdTableDefinition]]:
"""按业务域分组返回 DWD 表定义"""
groups: dict[str, list[DwdTableDefinition]] = {}
for t in DWD_TABLES:
groups.setdefault(t.domain, []).append(t)
return groups