在准备环境前提交次全部更改。
This commit is contained in:
1
apps/backend/app/services/__init__.py
Normal file
1
apps/backend/app/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
158
apps/backend/app/services/cli_builder.py
Normal file
158
apps/backend/app/services/cli_builder.py
Normal file
@@ -0,0 +1,158 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""CLI 命令构建器
|
||||
|
||||
从 gui/utils/cli_builder.py 迁移,适配后端 TaskConfigSchema。
|
||||
将 TaskConfigSchema 转换为 ETL CLI 命令行参数列表。
|
||||
|
||||
支持:
|
||||
- 7 种 Flow(api_ods / api_ods_dwd / api_full / ods_dwd / dwd_dws / dwd_dws_index / dwd_index)
|
||||
- 3 种处理模式(increment_only / verify_only / increment_verify)
|
||||
- 自动注入 --store-id 参数
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from ..schemas.tasks import TaskConfigSchema
|
||||
|
||||
# 有效的 Flow ID 集合
|
||||
VALID_FLOWS: set[str] = {
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"api_full",
|
||||
"ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
}
|
||||
|
||||
# 有效的处理模式集合
|
||||
VALID_PROCESSING_MODES: set[str] = {
|
||||
"increment_only",
|
||||
"verify_only",
|
||||
"increment_verify",
|
||||
}
|
||||
|
||||
# CLI 支持的 extra_args 键(值类型 + 布尔类型)
|
||||
CLI_SUPPORTED_ARGS: set[str] = {
|
||||
# 值类型参数
|
||||
"pg_dsn", "pg_host", "pg_port", "pg_name",
|
||||
"pg_user", "pg_password", "api_base", "api_token", "api_timeout",
|
||||
"api_page_size", "api_retry_max",
|
||||
"export_root", "log_root", "fetch_root",
|
||||
"ingest_source", "idle_start", "idle_end",
|
||||
"data_source", "pipeline_flow",
|
||||
"window_split_unit",
|
||||
# 布尔类型参数
|
||||
"force_window_override", "write_pretty_json", "allow_empty_advance",
|
||||
}
|
||||
|
||||
|
||||
class CLIBuilder:
|
||||
"""将 TaskConfigSchema 转换为 ETL CLI 命令行参数列表"""
|
||||
|
||||
def build_command(
|
||||
self,
|
||||
config: TaskConfigSchema,
|
||||
etl_project_path: str,
|
||||
python_executable: str = "python",
|
||||
) -> list[str]:
|
||||
"""构建完整的 CLI 命令参数列表。
|
||||
|
||||
生成格式:
|
||||
[python, -m, cli.main, --flow, {flow_id}, --tasks, ..., --store-id, {site_id}, ...]
|
||||
|
||||
Args:
|
||||
config: 任务配置对象(Pydantic 模型)
|
||||
etl_project_path: ETL 项目根目录路径(用于 cwd,不拼入命令)
|
||||
python_executable: Python 可执行文件路径,默认 "python"
|
||||
|
||||
Returns:
|
||||
命令行参数列表
|
||||
"""
|
||||
cmd: list[str] = [python_executable, "-m", "cli.main"]
|
||||
|
||||
# -- Flow(执行流程) --
|
||||
cmd.extend(["--flow", config.pipeline])
|
||||
|
||||
# -- 处理模式 --
|
||||
if config.processing_mode:
|
||||
cmd.extend(["--processing-mode", config.processing_mode])
|
||||
|
||||
# -- 任务列表 --
|
||||
if config.tasks:
|
||||
cmd.extend(["--tasks", ",".join(config.tasks)])
|
||||
|
||||
# -- 校验前从 API 获取数据(仅 verify_only 模式有效) --
|
||||
if config.fetch_before_verify and config.processing_mode == "verify_only":
|
||||
cmd.append("--fetch-before-verify")
|
||||
|
||||
# -- 时间窗口 --
|
||||
if config.window_mode == "lookback":
|
||||
# 回溯模式
|
||||
if config.lookback_hours is not None:
|
||||
cmd.extend(["--lookback-hours", str(config.lookback_hours)])
|
||||
if config.overlap_seconds is not None:
|
||||
cmd.extend(["--overlap-seconds", str(config.overlap_seconds)])
|
||||
else:
|
||||
# 自定义时间窗口
|
||||
if config.window_start:
|
||||
cmd.extend(["--window-start", config.window_start])
|
||||
if config.window_end:
|
||||
cmd.extend(["--window-end", config.window_end])
|
||||
|
||||
# -- 时间窗口切分 --
|
||||
if config.window_split and config.window_split != "none":
|
||||
cmd.extend(["--window-split", config.window_split])
|
||||
if config.window_split_days is not None:
|
||||
cmd.extend(["--window-split-days", str(config.window_split_days)])
|
||||
|
||||
# -- Dry-run --
|
||||
if config.dry_run:
|
||||
cmd.append("--dry-run")
|
||||
|
||||
# -- 强制全量处理 --
|
||||
if config.force_full:
|
||||
cmd.append("--force-full")
|
||||
|
||||
# -- 本地 JSON 模式 → --data-source offline --
|
||||
if config.ods_use_local_json:
|
||||
cmd.extend(["--data-source", "offline"])
|
||||
|
||||
# -- 门店 ID(自动注入) --
|
||||
if config.store_id is not None:
|
||||
cmd.extend(["--store-id", str(config.store_id)])
|
||||
|
||||
# -- 额外参数(只传递 CLI 支持的参数) --
|
||||
for key, value in config.extra_args.items():
|
||||
if value is not None and key in CLI_SUPPORTED_ARGS:
|
||||
arg_name = f"--{key.replace('_', '-')}"
|
||||
if isinstance(value, bool):
|
||||
if value:
|
||||
cmd.append(arg_name)
|
||||
else:
|
||||
cmd.extend([arg_name, str(value)])
|
||||
|
||||
return cmd
|
||||
|
||||
def build_command_string(
|
||||
self,
|
||||
config: TaskConfigSchema,
|
||||
etl_project_path: str,
|
||||
python_executable: str = "python",
|
||||
) -> str:
|
||||
"""构建命令行字符串(用于显示/日志记录)。
|
||||
|
||||
对包含空格的参数自动添加引号。
|
||||
"""
|
||||
cmd = self.build_command(config, etl_project_path, python_executable)
|
||||
quoted: list[str] = []
|
||||
for arg in cmd:
|
||||
if " " in arg or '"' in arg:
|
||||
quoted.append(f'"{arg}"')
|
||||
else:
|
||||
quoted.append(arg)
|
||||
return " ".join(quoted)
|
||||
|
||||
|
||||
# 全局单例
|
||||
cli_builder = CLIBuilder()
|
||||
303
apps/backend/app/services/scheduler.py
Normal file
303
apps/backend/app/services/scheduler.py
Normal file
@@ -0,0 +1,303 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""调度器服务
|
||||
|
||||
后台 asyncio 循环,每 30 秒检查一次到期的调度任务,
|
||||
将其 TaskConfig 入队到 TaskQueue。
|
||||
|
||||
核心逻辑:
|
||||
- check_and_enqueue():查询 enabled=true 且 next_run_at <= now 的调度任务
|
||||
- start() / stop():管理后台循环生命周期
|
||||
- _calculate_next_run():根据 ScheduleConfig 计算下次执行时间
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from ..database import get_connection
|
||||
from ..schemas.schedules import ScheduleConfigSchema
|
||||
from ..schemas.tasks import TaskConfigSchema
|
||||
from .task_queue import task_queue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 调度器轮询间隔(秒)
|
||||
SCHEDULER_POLL_INTERVAL = 30
|
||||
|
||||
|
||||
def _parse_time(time_str: str) -> tuple[int, int]:
|
||||
"""解析 HH:MM 格式的时间字符串,返回 (hour, minute)。"""
|
||||
parts = time_str.split(":")
|
||||
return int(parts[0]), int(parts[1])
|
||||
|
||||
|
||||
def calculate_next_run(
|
||||
schedule_config: ScheduleConfigSchema,
|
||||
now: datetime | None = None,
|
||||
) -> datetime | None:
|
||||
"""根据调度配置计算下次执行时间。
|
||||
|
||||
Args:
|
||||
schedule_config: 调度配置
|
||||
now: 当前时间(默认 UTC now),方便测试注入
|
||||
|
||||
Returns:
|
||||
下次执行时间(UTC),once 类型返回 None 表示不再执行
|
||||
"""
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
stype = schedule_config.schedule_type
|
||||
|
||||
if stype == "once":
|
||||
# 一次性任务执行后不再调度
|
||||
return None
|
||||
|
||||
if stype == "interval":
|
||||
unit_map = {
|
||||
"minutes": timedelta(minutes=schedule_config.interval_value),
|
||||
"hours": timedelta(hours=schedule_config.interval_value),
|
||||
"days": timedelta(days=schedule_config.interval_value),
|
||||
}
|
||||
delta = unit_map.get(schedule_config.interval_unit)
|
||||
if delta is None:
|
||||
logger.warning("未知的 interval_unit: %s", schedule_config.interval_unit)
|
||||
return None
|
||||
return now + delta
|
||||
|
||||
if stype == "daily":
|
||||
hour, minute = _parse_time(schedule_config.daily_time)
|
||||
# 计算明天的 daily_time
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
if stype == "weekly":
|
||||
hour, minute = _parse_time(schedule_config.weekly_time)
|
||||
days = sorted(schedule_config.weekly_days) if schedule_config.weekly_days else [1]
|
||||
# ISO weekday: 1=Monday ... 7=Sunday
|
||||
current_weekday = now.isoweekday()
|
||||
|
||||
# 找到下一个匹配的 weekday
|
||||
for day in days:
|
||||
if day > current_weekday:
|
||||
delta_days = day - current_weekday
|
||||
next_dt = now + timedelta(days=delta_days)
|
||||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
# 本周没有更晚的 weekday,跳到下周第一个
|
||||
first_day = days[0]
|
||||
delta_days = 7 - current_weekday + first_day
|
||||
next_dt = now + timedelta(days=delta_days)
|
||||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
if stype == "cron":
|
||||
# 简单 cron 解析:仅支持 "minute hour * * *" 格式(每日定时)
|
||||
# 复杂 cron 表达式可后续引入 croniter 库
|
||||
return _parse_simple_cron(schedule_config.cron_expression, now)
|
||||
|
||||
logger.warning("未知的 schedule_type: %s", stype)
|
||||
return None
|
||||
|
||||
|
||||
def _parse_simple_cron(expression: str, now: datetime) -> datetime | None:
|
||||
"""简单 cron 解析器,支持基本的 5 字段格式。
|
||||
|
||||
支持的格式:
|
||||
- "M H * * *" → 每天 H:M
|
||||
- "M H * * D" → 每周 D 的 H:M(D 为 0-6,0=Sunday)
|
||||
- 其他格式回退到每天 04:00
|
||||
|
||||
不支持范围、列表、步进等高级语法。如需完整 cron 支持,
|
||||
可在 pyproject.toml 中添加 croniter 依赖。
|
||||
"""
|
||||
parts = expression.strip().split()
|
||||
if len(parts) != 5:
|
||||
logger.warning("无法解析 cron 表达式: %s,回退到明天 04:00", expression)
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0)
|
||||
|
||||
minute_str, hour_str, dom, month, dow = parts
|
||||
|
||||
try:
|
||||
minute = int(minute_str) if minute_str != "*" else 0
|
||||
hour = int(hour_str) if hour_str != "*" else 0
|
||||
except ValueError:
|
||||
logger.warning("cron 表达式时间字段无法解析: %s,回退到明天 04:00", expression)
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0)
|
||||
|
||||
# 如果指定了 day-of-week(非 *)
|
||||
if dow != "*":
|
||||
try:
|
||||
cron_dow = int(dow) # 0=Sunday, 1=Monday, ..., 6=Saturday
|
||||
except ValueError:
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
# 转换为 ISO weekday(1=Monday, 7=Sunday)
|
||||
iso_dow = 7 if cron_dow == 0 else cron_dow
|
||||
current_iso = now.isoweekday()
|
||||
|
||||
if iso_dow > current_iso:
|
||||
delta_days = iso_dow - current_iso
|
||||
elif iso_dow < current_iso:
|
||||
delta_days = 7 - current_iso + iso_dow
|
||||
else:
|
||||
# 同一天,看时间是否已过
|
||||
target_today = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
if now < target_today:
|
||||
delta_days = 0
|
||||
else:
|
||||
delta_days = 7
|
||||
|
||||
next_dt = now + timedelta(days=delta_days)
|
||||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
# 每天定时(dom=* month=* dow=*)
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
|
||||
class Scheduler:
|
||||
"""基于 PostgreSQL 的定时调度器
|
||||
|
||||
后台 asyncio 循环每 SCHEDULER_POLL_INTERVAL 秒检查一次到期任务,
|
||||
将其 TaskConfig 入队到 TaskQueue。
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._running = False
|
||||
self._loop_task: asyncio.Task | None = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 核心:检查到期任务并入队
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def check_and_enqueue(self) -> int:
|
||||
"""查询 enabled=true 且 next_run_at <= now 的调度任务,将其入队。
|
||||
|
||||
Returns:
|
||||
本次入队的任务数量
|
||||
"""
|
||||
conn = get_connection()
|
||||
enqueued = 0
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, site_id, task_config, schedule_config
|
||||
FROM scheduled_tasks
|
||||
WHERE enabled = TRUE
|
||||
AND next_run_at IS NOT NULL
|
||||
AND next_run_at <= NOW()
|
||||
ORDER BY next_run_at ASC
|
||||
"""
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
for row in rows:
|
||||
task_id = str(row[0])
|
||||
site_id = row[1]
|
||||
task_config_raw = row[2] if isinstance(row[2], dict) else json.loads(row[2])
|
||||
schedule_config_raw = row[3] if isinstance(row[3], dict) else json.loads(row[3])
|
||||
|
||||
try:
|
||||
config = TaskConfigSchema(**task_config_raw)
|
||||
schedule_cfg = ScheduleConfigSchema(**schedule_config_raw)
|
||||
except Exception:
|
||||
logger.exception("调度任务 [%s] 配置反序列化失败,跳过", task_id)
|
||||
continue
|
||||
|
||||
# 入队
|
||||
try:
|
||||
queue_id = task_queue.enqueue(config, site_id)
|
||||
logger.info(
|
||||
"调度任务 [%s] 入队成功 → queue_id=%s site_id=%s",
|
||||
task_id, queue_id, site_id,
|
||||
)
|
||||
enqueued += 1
|
||||
except Exception:
|
||||
logger.exception("调度任务 [%s] 入队失败", task_id)
|
||||
continue
|
||||
|
||||
# 更新调度任务状态
|
||||
now = datetime.now(timezone.utc)
|
||||
next_run = calculate_next_run(schedule_cfg, now)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE scheduled_tasks
|
||||
SET last_run_at = NOW(),
|
||||
run_count = run_count + 1,
|
||||
next_run_at = %s,
|
||||
last_status = 'enqueued',
|
||||
updated_at = NOW()
|
||||
WHERE id = %s
|
||||
""",
|
||||
(next_run, task_id),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
except Exception:
|
||||
logger.exception("check_and_enqueue 执行异常")
|
||||
try:
|
||||
conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if enqueued > 0:
|
||||
logger.info("本轮调度检查:%d 个任务入队", enqueued)
|
||||
return enqueued
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 后台循环
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _loop(self) -> None:
|
||||
"""后台 asyncio 循环,每 SCHEDULER_POLL_INTERVAL 秒检查一次。"""
|
||||
self._running = True
|
||||
logger.info("Scheduler 后台循环启动(间隔 %ds)", SCHEDULER_POLL_INTERVAL)
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
# 在线程池中执行同步数据库操作,避免阻塞事件循环
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(None, self.check_and_enqueue)
|
||||
except Exception:
|
||||
logger.exception("Scheduler 循环迭代异常")
|
||||
|
||||
await asyncio.sleep(SCHEDULER_POLL_INTERVAL)
|
||||
|
||||
logger.info("Scheduler 后台循环停止")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 生命周期
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def start(self) -> None:
|
||||
"""启动后台调度循环(在 FastAPI lifespan 中调用)。"""
|
||||
if self._loop_task is None or self._loop_task.done():
|
||||
self._loop_task = asyncio.create_task(self._loop())
|
||||
logger.info("Scheduler 已启动")
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""停止后台调度循环。"""
|
||||
self._running = False
|
||||
if self._loop_task and not self._loop_task.done():
|
||||
self._loop_task.cancel()
|
||||
try:
|
||||
await self._loop_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._loop_task = None
|
||||
logger.info("Scheduler 已停止")
|
||||
|
||||
|
||||
# 全局单例
|
||||
scheduler = Scheduler()
|
||||
391
apps/backend/app/services/task_executor.py
Normal file
391
apps/backend/app/services/task_executor.py
Normal file
@@ -0,0 +1,391 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""ETL 任务执行器
|
||||
|
||||
通过 asyncio.create_subprocess_exec 启动 ETL CLI 子进程,
|
||||
逐行读取 stdout/stderr 并广播到 WebSocket 订阅者,
|
||||
执行完成后将结果写入 task_execution_log 表。
|
||||
|
||||
设计要点:
|
||||
- 每个 execution_id 对应一个子进程,存储在 _processes 字典中
|
||||
- 日志行存储在内存缓冲区 _log_buffers 中
|
||||
- WebSocket 订阅者通过 asyncio.Queue 接收实时日志
|
||||
- Windows 兼容:取消时使用 process.terminate() 而非 SIGTERM
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from ..config import ETL_PROJECT_PATH
|
||||
from ..database import get_connection
|
||||
from ..schemas.tasks import TaskConfigSchema
|
||||
from ..services.cli_builder import cli_builder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TaskExecutor:
|
||||
"""管理 ETL CLI 子进程的生命周期"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
# execution_id → subprocess.Popen
|
||||
self._processes: dict[str, subprocess.Popen] = {}
|
||||
# execution_id → list[str](stdout + stderr 混合日志)
|
||||
self._log_buffers: dict[str, list[str]] = {}
|
||||
# execution_id → set[asyncio.Queue](WebSocket 订阅者)
|
||||
self._subscribers: dict[str, set[asyncio.Queue[str | None]]] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# WebSocket 订阅管理
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def subscribe(self, execution_id: str) -> asyncio.Queue[str | None]:
|
||||
"""注册一个 WebSocket 订阅者,返回用于读取日志行的 Queue。
|
||||
|
||||
Queue 中推送 str 表示日志行,None 表示执行结束。
|
||||
"""
|
||||
if execution_id not in self._subscribers:
|
||||
self._subscribers[execution_id] = set()
|
||||
queue: asyncio.Queue[str | None] = asyncio.Queue()
|
||||
self._subscribers[execution_id].add(queue)
|
||||
return queue
|
||||
|
||||
def unsubscribe(self, execution_id: str, queue: asyncio.Queue[str | None]) -> None:
|
||||
"""移除一个 WebSocket 订阅者。"""
|
||||
subs = self._subscribers.get(execution_id)
|
||||
if subs:
|
||||
subs.discard(queue)
|
||||
if not subs:
|
||||
del self._subscribers[execution_id]
|
||||
|
||||
def _broadcast(self, execution_id: str, line: str) -> None:
|
||||
"""向所有订阅者广播一行日志。"""
|
||||
subs = self._subscribers.get(execution_id)
|
||||
if subs:
|
||||
for q in subs:
|
||||
q.put_nowait(line)
|
||||
|
||||
def _broadcast_end(self, execution_id: str) -> None:
|
||||
"""通知所有订阅者执行已结束(发送 None 哨兵)。"""
|
||||
subs = self._subscribers.get(execution_id)
|
||||
if subs:
|
||||
for q in subs:
|
||||
q.put_nowait(None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 日志缓冲区
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_logs(self, execution_id: str) -> list[str]:
|
||||
"""获取指定执行的内存日志缓冲区(副本)。"""
|
||||
return list(self._log_buffers.get(execution_id, []))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 执行状态查询
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def is_running(self, execution_id: str) -> bool:
|
||||
"""判断指定执行是否仍在运行。"""
|
||||
proc = self._processes.get(execution_id)
|
||||
if proc is None:
|
||||
return False
|
||||
return proc.poll() is None
|
||||
|
||||
def get_running_ids(self) -> list[str]:
|
||||
"""返回当前所有运行中的 execution_id 列表。"""
|
||||
return [eid for eid, p in self._processes.items() if p.returncode is None]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 核心执行
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
config: TaskConfigSchema,
|
||||
execution_id: str,
|
||||
queue_id: str | None = None,
|
||||
site_id: int | None = None,
|
||||
) -> None:
|
||||
"""以子进程方式调用 ETL CLI。
|
||||
|
||||
使用 subprocess.Popen + 线程读取,兼容 Windows(避免
|
||||
asyncio.create_subprocess_exec 在 Windows 上的 NotImplementedError)。
|
||||
"""
|
||||
cmd = cli_builder.build_command(
|
||||
config, ETL_PROJECT_PATH, python_executable=sys.executable
|
||||
)
|
||||
command_str = " ".join(cmd)
|
||||
effective_site_id = site_id or config.store_id
|
||||
|
||||
logger.info(
|
||||
"启动 ETL 子进程 [%s]: %s (cwd=%s)",
|
||||
execution_id, command_str, ETL_PROJECT_PATH,
|
||||
)
|
||||
|
||||
self._log_buffers[execution_id] = []
|
||||
started_at = datetime.now(timezone.utc)
|
||||
t0 = time.monotonic()
|
||||
|
||||
self._write_execution_log(
|
||||
execution_id=execution_id,
|
||||
queue_id=queue_id,
|
||||
site_id=effective_site_id,
|
||||
task_codes=config.tasks,
|
||||
status="running",
|
||||
started_at=started_at,
|
||||
command=command_str,
|
||||
)
|
||||
|
||||
exit_code: int | None = None
|
||||
status = "running"
|
||||
stdout_lines: list[str] = []
|
||||
stderr_lines: list[str] = []
|
||||
|
||||
try:
|
||||
# 构建额外环境变量(DWD 表过滤通过环境变量注入)
|
||||
extra_env: dict[str, str] = {}
|
||||
if config.dwd_only_tables:
|
||||
extra_env["DWD_ONLY_TABLES"] = ",".join(config.dwd_only_tables)
|
||||
|
||||
# 在线程池中运行子进程,兼容 Windows
|
||||
exit_code = await asyncio.get_event_loop().run_in_executor(
|
||||
None,
|
||||
self._run_subprocess,
|
||||
cmd,
|
||||
execution_id,
|
||||
stdout_lines,
|
||||
stderr_lines,
|
||||
extra_env or None,
|
||||
)
|
||||
|
||||
if exit_code == 0:
|
||||
status = "success"
|
||||
else:
|
||||
status = "failed"
|
||||
|
||||
logger.info(
|
||||
"ETL 子进程 [%s] 退出,exit_code=%s, status=%s",
|
||||
execution_id, exit_code, status,
|
||||
)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
status = "cancelled"
|
||||
logger.info("ETL 子进程 [%s] 已取消", execution_id)
|
||||
# 尝试终止子进程
|
||||
proc = self._processes.get(execution_id)
|
||||
if proc and proc.poll() is None:
|
||||
proc.terminate()
|
||||
except Exception as exc:
|
||||
status = "failed"
|
||||
import traceback
|
||||
tb = traceback.format_exc()
|
||||
stderr_lines.append(f"[task_executor] 子进程启动/执行异常: {exc}")
|
||||
stderr_lines.append(tb)
|
||||
logger.exception("ETL 子进程 [%s] 执行异常", execution_id)
|
||||
finally:
|
||||
elapsed_ms = int((time.monotonic() - t0) * 1000)
|
||||
finished_at = datetime.now(timezone.utc)
|
||||
|
||||
self._broadcast_end(execution_id)
|
||||
self._processes.pop(execution_id, None)
|
||||
|
||||
self._update_execution_log(
|
||||
execution_id=execution_id,
|
||||
status=status,
|
||||
finished_at=finished_at,
|
||||
exit_code=exit_code,
|
||||
duration_ms=elapsed_ms,
|
||||
output_log="\n".join(stdout_lines),
|
||||
error_log="\n".join(stderr_lines),
|
||||
)
|
||||
|
||||
def _run_subprocess(
|
||||
self,
|
||||
cmd: list[str],
|
||||
execution_id: str,
|
||||
stdout_lines: list[str],
|
||||
stderr_lines: list[str],
|
||||
extra_env: dict[str, str] | None = None,
|
||||
) -> int:
|
||||
"""在线程中运行子进程并逐行读取输出。"""
|
||||
import os
|
||||
env = os.environ.copy()
|
||||
# 强制子进程使用 UTF-8 输出,避免 Windows GBK 乱码
|
||||
env["PYTHONIOENCODING"] = "utf-8"
|
||||
if extra_env:
|
||||
env.update(extra_env)
|
||||
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=ETL_PROJECT_PATH,
|
||||
env=env,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
)
|
||||
self._processes[execution_id] = proc
|
||||
|
||||
def read_stream(
|
||||
stream, stream_name: str, collector: list[str],
|
||||
) -> None:
|
||||
"""逐行读取流并广播。"""
|
||||
for raw_line in stream:
|
||||
line = raw_line.rstrip("\n").rstrip("\r")
|
||||
tagged = f"[{stream_name}] {line}"
|
||||
buf = self._log_buffers.get(execution_id)
|
||||
if buf is not None:
|
||||
buf.append(tagged)
|
||||
collector.append(line)
|
||||
self._broadcast(execution_id, tagged)
|
||||
|
||||
t_out = threading.Thread(
|
||||
target=read_stream, args=(proc.stdout, "stdout", stdout_lines),
|
||||
daemon=True,
|
||||
)
|
||||
t_err = threading.Thread(
|
||||
target=read_stream, args=(proc.stderr, "stderr", stderr_lines),
|
||||
daemon=True,
|
||||
)
|
||||
t_out.start()
|
||||
t_err.start()
|
||||
|
||||
proc.wait()
|
||||
t_out.join(timeout=5)
|
||||
t_err.join(timeout=5)
|
||||
|
||||
return proc.returncode
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 取消
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def cancel(self, execution_id: str) -> bool:
|
||||
"""向子进程发送终止信号。
|
||||
|
||||
Returns:
|
||||
True 表示成功发送终止信号,False 表示进程不存在或已退出。
|
||||
"""
|
||||
proc = self._processes.get(execution_id)
|
||||
if proc is None:
|
||||
return False
|
||||
# subprocess.Popen: poll() 返回 None 表示仍在运行
|
||||
if proc.poll() is not None:
|
||||
return False
|
||||
|
||||
logger.info("取消 ETL 子进程 [%s], pid=%s", execution_id, proc.pid)
|
||||
try:
|
||||
proc.terminate()
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
return True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 数据库操作(同步,在线程池中执行也可,此处简单直连)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _write_execution_log(
|
||||
*,
|
||||
execution_id: str,
|
||||
queue_id: str | None,
|
||||
site_id: int | None,
|
||||
task_codes: list[str],
|
||||
status: str,
|
||||
started_at: datetime,
|
||||
command: str,
|
||||
) -> None:
|
||||
"""插入一条执行日志记录(running 状态)。"""
|
||||
try:
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO task_execution_log
|
||||
(id, queue_id, site_id, task_codes, status,
|
||||
started_at, command)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
""",
|
||||
(
|
||||
execution_id,
|
||||
queue_id,
|
||||
site_id or 0,
|
||||
task_codes,
|
||||
status,
|
||||
started_at,
|
||||
command,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception:
|
||||
logger.exception("写入 execution_log 失败 [%s]", execution_id)
|
||||
|
||||
@staticmethod
|
||||
def _update_execution_log(
|
||||
*,
|
||||
execution_id: str,
|
||||
status: str,
|
||||
finished_at: datetime,
|
||||
exit_code: int | None,
|
||||
duration_ms: int,
|
||||
output_log: str,
|
||||
error_log: str,
|
||||
) -> None:
|
||||
"""更新执行日志记录(完成状态)。"""
|
||||
try:
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE task_execution_log
|
||||
SET status = %s,
|
||||
finished_at = %s,
|
||||
exit_code = %s,
|
||||
duration_ms = %s,
|
||||
output_log = %s,
|
||||
error_log = %s
|
||||
WHERE id = %s
|
||||
""",
|
||||
(
|
||||
status,
|
||||
finished_at,
|
||||
exit_code,
|
||||
duration_ms,
|
||||
output_log,
|
||||
error_log,
|
||||
execution_id,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception:
|
||||
logger.exception("更新 execution_log 失败 [%s]", execution_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 清理
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def cleanup(self, execution_id: str) -> None:
|
||||
"""清理指定执行的内存资源(日志缓冲区和订阅者)。
|
||||
|
||||
通常在确认日志已持久化后调用。
|
||||
"""
|
||||
self._log_buffers.pop(execution_id, None)
|
||||
self._subscribers.pop(execution_id, None)
|
||||
|
||||
|
||||
# 全局单例
|
||||
task_executor = TaskExecutor()
|
||||
486
apps/backend/app/services/task_queue.py
Normal file
486
apps/backend/app/services/task_queue.py
Normal file
@@ -0,0 +1,486 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""任务队列服务
|
||||
|
||||
基于 PostgreSQL task_queue 表实现 FIFO 队列,支持:
|
||||
- enqueue:入队,自动分配 position(当前最大 + 1)
|
||||
- dequeue:取出 position 最小的 pending 任务
|
||||
- reorder:调整任务在队列中的位置
|
||||
- delete:删除 pending 任务
|
||||
- process_loop:后台协程,队列非空且无运行中任务时自动取出执行
|
||||
|
||||
所有操作按 site_id 过滤,实现门店隔离。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from ..database import get_connection
|
||||
from ..schemas.tasks import TaskConfigSchema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 后台循环轮询间隔(秒)
|
||||
POLL_INTERVAL_SECONDS = 2
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueuedTask:
|
||||
"""队列任务数据对象"""
|
||||
|
||||
id: str
|
||||
site_id: int
|
||||
config: dict[str, Any]
|
||||
status: str
|
||||
position: int
|
||||
created_at: Any = None
|
||||
started_at: Any = None
|
||||
finished_at: Any = None
|
||||
exit_code: int | None = None
|
||||
error_message: str | None = None
|
||||
|
||||
|
||||
class TaskQueue:
|
||||
"""基于 PostgreSQL 的任务队列"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._running = False
|
||||
self._loop_task: asyncio.Task | None = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 入队
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def enqueue(self, config: TaskConfigSchema, site_id: int) -> str:
|
||||
"""将任务配置入队,自动分配 position。
|
||||
|
||||
Args:
|
||||
config: 任务配置
|
||||
site_id: 门店 ID(门店隔离)
|
||||
|
||||
Returns:
|
||||
新创建的队列任务 ID(UUID 字符串)
|
||||
"""
|
||||
task_id = str(uuid.uuid4())
|
||||
config_json = config.model_dump(mode="json")
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# 取当前该门店 pending 任务的最大 position,新任务排在末尾
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT COALESCE(MAX(position), 0)
|
||||
FROM task_queue
|
||||
WHERE site_id = %s AND status = 'pending'
|
||||
""",
|
||||
(site_id,),
|
||||
)
|
||||
max_pos = cur.fetchone()[0]
|
||||
new_pos = max_pos + 1
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO task_queue (id, site_id, config, status, position)
|
||||
VALUES (%s, %s, %s, 'pending', %s)
|
||||
""",
|
||||
(task_id, site_id, json.dumps(config_json), new_pos),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
logger.info("任务入队 [%s] site_id=%s position=%s", task_id, site_id, new_pos)
|
||||
return task_id
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 出队
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def dequeue(self, site_id: int) -> QueuedTask | None:
|
||||
"""取出 position 最小的 pending 任务,将其状态改为 running。
|
||||
|
||||
Args:
|
||||
site_id: 门店 ID
|
||||
|
||||
Returns:
|
||||
QueuedTask 或 None(队列为空时)
|
||||
"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# 选取 position 最小的 pending 任务并锁定
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, site_id, config, status, position,
|
||||
created_at, started_at, finished_at,
|
||||
exit_code, error_message
|
||||
FROM task_queue
|
||||
WHERE site_id = %s AND status = 'pending'
|
||||
ORDER BY position ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
""",
|
||||
(site_id,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if row is None:
|
||||
conn.commit()
|
||||
return None
|
||||
|
||||
task = QueuedTask(
|
||||
id=str(row[0]),
|
||||
site_id=row[1],
|
||||
config=row[2] if isinstance(row[2], dict) else json.loads(row[2]),
|
||||
status=row[3],
|
||||
position=row[4],
|
||||
created_at=row[5],
|
||||
started_at=row[6],
|
||||
finished_at=row[7],
|
||||
exit_code=row[8],
|
||||
error_message=row[9],
|
||||
)
|
||||
|
||||
# 更新状态为 running
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE task_queue
|
||||
SET status = 'running', started_at = NOW()
|
||||
WHERE id = %s
|
||||
""",
|
||||
(task.id,),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
task.status = "running"
|
||||
logger.info("任务出队 [%s] site_id=%s", task.id, site_id)
|
||||
return task
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 重排
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def reorder(self, task_id: str, new_position: int, site_id: int) -> None:
|
||||
"""调整任务在队列中的位置。
|
||||
|
||||
仅允许对 pending 状态的任务重排。将目标任务移到 new_position,
|
||||
其余 pending 任务按原有相对顺序重新编号。
|
||||
|
||||
Args:
|
||||
task_id: 要移动的任务 ID
|
||||
new_position: 目标位置(1-based)
|
||||
site_id: 门店 ID
|
||||
"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# 获取该门店所有 pending 任务,按 position 排序
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id FROM task_queue
|
||||
WHERE site_id = %s AND status = 'pending'
|
||||
ORDER BY position ASC
|
||||
""",
|
||||
(site_id,),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
task_ids = [str(r[0]) for r in rows]
|
||||
|
||||
if task_id not in task_ids:
|
||||
conn.commit()
|
||||
return
|
||||
|
||||
# 从列表中移除目标任务,再插入到新位置
|
||||
task_ids.remove(task_id)
|
||||
# new_position 是 1-based,转为 0-based 索引并 clamp
|
||||
insert_idx = max(0, min(new_position - 1, len(task_ids)))
|
||||
task_ids.insert(insert_idx, task_id)
|
||||
|
||||
# 按新顺序重新分配 position(1-based 连续编号)
|
||||
for idx, tid in enumerate(task_ids, start=1):
|
||||
cur.execute(
|
||||
"UPDATE task_queue SET position = %s WHERE id = %s",
|
||||
(idx, tid),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
logger.info(
|
||||
"任务重排 [%s] → position=%s site_id=%s",
|
||||
task_id, new_position, site_id,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 删除
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def delete(self, task_id: str, site_id: int) -> bool:
|
||||
"""删除 pending 状态的任务。
|
||||
|
||||
Args:
|
||||
task_id: 任务 ID
|
||||
site_id: 门店 ID
|
||||
|
||||
Returns:
|
||||
True 表示成功删除,False 表示任务不存在或非 pending 状态。
|
||||
"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
DELETE FROM task_queue
|
||||
WHERE id = %s AND site_id = %s AND status = 'pending'
|
||||
""",
|
||||
(task_id, site_id),
|
||||
)
|
||||
deleted = cur.rowcount > 0
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if deleted:
|
||||
logger.info("任务删除 [%s] site_id=%s", task_id, site_id)
|
||||
else:
|
||||
logger.warning(
|
||||
"任务删除失败 [%s] site_id=%s(不存在或非 pending)",
|
||||
task_id, site_id,
|
||||
)
|
||||
return deleted
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 查询
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def list_pending(self, site_id: int) -> list[QueuedTask]:
|
||||
"""列出指定门店的所有 pending 任务,按 position 升序。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, site_id, config, status, position,
|
||||
created_at, started_at, finished_at,
|
||||
exit_code, error_message
|
||||
FROM task_queue
|
||||
WHERE site_id = %s AND status = 'pending'
|
||||
ORDER BY position ASC
|
||||
""",
|
||||
(site_id,),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return [
|
||||
QueuedTask(
|
||||
id=str(r[0]),
|
||||
site_id=r[1],
|
||||
config=r[2] if isinstance(r[2], dict) else json.loads(r[2]),
|
||||
status=r[3],
|
||||
position=r[4],
|
||||
created_at=r[5],
|
||||
started_at=r[6],
|
||||
finished_at=r[7],
|
||||
exit_code=r[8],
|
||||
error_message=r[9],
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
|
||||
def has_running(self, site_id: int) -> bool:
|
||||
"""检查指定门店是否有 running 状态的任务。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT EXISTS(
|
||||
SELECT 1 FROM task_queue
|
||||
WHERE site_id = %s AND status = 'running'
|
||||
)
|
||||
""",
|
||||
(site_id,),
|
||||
)
|
||||
result = cur.fetchone()[0]
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 后台处理循环
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def process_loop(self) -> None:
|
||||
"""后台协程:队列非空且无运行中任务时,自动取出并执行。
|
||||
|
||||
循环逻辑:
|
||||
1. 查询所有有 pending 任务的 site_id
|
||||
2. 对每个 site_id,若无 running 任务则 dequeue 并执行
|
||||
3. 等待 POLL_INTERVAL_SECONDS 后重复
|
||||
"""
|
||||
# 延迟导入避免循环依赖
|
||||
from .task_executor import task_executor
|
||||
|
||||
self._running = True
|
||||
logger.info("TaskQueue process_loop 启动")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
await self._process_once(task_executor)
|
||||
except Exception:
|
||||
logger.exception("process_loop 迭代异常")
|
||||
|
||||
await asyncio.sleep(POLL_INTERVAL_SECONDS)
|
||||
|
||||
logger.info("TaskQueue process_loop 停止")
|
||||
|
||||
async def _process_once(self, executor: Any) -> None:
|
||||
"""单次处理:扫描所有门店的 pending 队列并执行。"""
|
||||
site_ids = self._get_pending_site_ids()
|
||||
|
||||
for site_id in site_ids:
|
||||
if self.has_running(site_id):
|
||||
continue
|
||||
|
||||
task = self.dequeue(site_id)
|
||||
if task is None:
|
||||
continue
|
||||
|
||||
config = TaskConfigSchema(**task.config)
|
||||
execution_id = str(uuid.uuid4())
|
||||
|
||||
logger.info(
|
||||
"process_loop 自动执行 [%s] queue_id=%s site_id=%s",
|
||||
execution_id, task.id, site_id,
|
||||
)
|
||||
|
||||
# 异步启动执行(不阻塞循环)
|
||||
asyncio.create_task(
|
||||
self._execute_and_update(
|
||||
executor, config, execution_id, task.id, site_id,
|
||||
)
|
||||
)
|
||||
|
||||
async def _execute_and_update(
|
||||
self,
|
||||
executor: Any,
|
||||
config: TaskConfigSchema,
|
||||
execution_id: str,
|
||||
queue_id: str,
|
||||
site_id: int,
|
||||
) -> None:
|
||||
"""执行任务并更新队列状态。"""
|
||||
try:
|
||||
await executor.execute(
|
||||
config=config,
|
||||
execution_id=execution_id,
|
||||
queue_id=queue_id,
|
||||
site_id=site_id,
|
||||
)
|
||||
# 执行完成后根据 executor 的结果更新 task_queue 状态
|
||||
self._update_queue_status_from_log(queue_id)
|
||||
except Exception:
|
||||
logger.exception("队列任务执行异常 [%s]", queue_id)
|
||||
self._mark_failed(queue_id, "执行过程中发生未捕获异常")
|
||||
|
||||
def _get_pending_site_ids(self) -> list[int]:
|
||||
"""获取所有有 pending 任务的 site_id 列表。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT DISTINCT site_id FROM task_queue
|
||||
WHERE status = 'pending'
|
||||
"""
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
def _update_queue_status_from_log(self, queue_id: str) -> None:
|
||||
"""从 task_execution_log 读取执行结果,同步到 task_queue 记录。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT status, finished_at, exit_code, error_log
|
||||
FROM task_execution_log
|
||||
WHERE queue_id = %s
|
||||
ORDER BY started_at DESC
|
||||
LIMIT 1
|
||||
""",
|
||||
(queue_id,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE task_queue
|
||||
SET status = %s, finished_at = %s,
|
||||
exit_code = %s, error_message = %s
|
||||
WHERE id = %s
|
||||
""",
|
||||
(row[0], row[1], row[2], row[3], queue_id),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def _mark_failed(self, queue_id: str, error_message: str) -> None:
|
||||
"""将队列任务标记为 failed。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE task_queue
|
||||
SET status = 'failed', finished_at = NOW(),
|
||||
error_message = %s
|
||||
WHERE id = %s
|
||||
""",
|
||||
(error_message, queue_id),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 生命周期
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def start(self) -> None:
|
||||
"""启动后台处理循环(在 FastAPI lifespan 中调用)。"""
|
||||
if self._loop_task is None or self._loop_task.done():
|
||||
self._loop_task = asyncio.create_task(self.process_loop())
|
||||
logger.info("TaskQueue 后台循环已启动")
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""停止后台处理循环。"""
|
||||
self._running = False
|
||||
if self._loop_task and not self._loop_task.done():
|
||||
self._loop_task.cancel()
|
||||
try:
|
||||
await self._loop_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._loop_task = None
|
||||
logger.info("TaskQueue 后台循环已停止")
|
||||
|
||||
|
||||
# 全局单例
|
||||
task_queue = TaskQueue()
|
||||
221
apps/backend/app/services/task_registry.py
Normal file
221
apps/backend/app/services/task_registry.py
Normal file
@@ -0,0 +1,221 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""静态任务注册表
|
||||
|
||||
从 ETL orchestration/task_registry.py 提取的任务元数据硬编码副本。
|
||||
后端不直接导入 ETL 代码,避免引入重量级依赖链。
|
||||
|
||||
业务域分组逻辑:按任务代码前缀 / 目标表语义归类,与 GUI 保持一致。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TaskDefinition:
|
||||
"""单个 ETL 任务的元数据"""
|
||||
|
||||
code: str
|
||||
name: str
|
||||
description: str
|
||||
domain: str # 业务域:会员 / 结算 / 助教 / 商品 / 台桌 / 团购 / 库存 / 财务 / 指数 / 工具
|
||||
layer: str # ODS / DWD / DWS / INDEX / UTILITY
|
||||
requires_window: bool = True
|
||||
is_ods: bool = False
|
||||
is_dimension: bool = False
|
||||
default_enabled: bool = True
|
||||
is_common: bool = True # 常用任务标记,False 表示工具类/手动类任务
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DwdTableDefinition:
|
||||
"""DWD 表元数据"""
|
||||
|
||||
table_name: str # 完整表名(含 schema)
|
||||
display_name: str
|
||||
domain: str
|
||||
ods_source: str # 对应的 ODS 源表
|
||||
is_dimension: bool = False
|
||||
|
||||
|
||||
# ── ODS 任务定义 ──────────────────────────────────────────────
|
||||
|
||||
ODS_TASKS: list[TaskDefinition] = [
|
||||
TaskDefinition("ODS_ASSISTANT_ACCOUNT", "助教账号", "抽取助教账号主数据", "助教", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_ASSISTANT_LEDGER", "助教服务记录", "抽取助教服务流水", "助教", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_ASSISTANT_ABOLISH", "助教取消记录", "抽取助教取消/作废记录", "助教", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_SETTLEMENT_RECORDS", "结算记录", "抽取订单结算记录", "结算", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_SETTLEMENT_TICKET", "结账小票", "抽取结账小票明细", "结算", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_TABLE_USE", "台费流水", "抽取台费使用流水", "台桌", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_TABLE_FEE_DISCOUNT", "台费折扣", "抽取台费折扣记录", "台桌", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_TABLES", "台桌主数据", "抽取门店台桌信息", "台桌", "ODS", is_ods=True, requires_window=False),
|
||||
TaskDefinition("ODS_PAYMENT", "支付流水", "抽取支付交易记录", "结算", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_REFUND", "退款流水", "抽取退款交易记录", "结算", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_PLATFORM_COUPON", "平台券核销", "抽取平台优惠券核销记录", "团购", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_MEMBER", "会员主数据", "抽取会员档案", "会员", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_MEMBER_CARD", "会员储值卡", "抽取会员储值卡信息", "会员", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_MEMBER_BALANCE", "会员余额变动", "抽取会员余额变动记录", "会员", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_RECHARGE_SETTLE", "充值结算", "抽取充值结算记录", "会员", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_GROUP_PACKAGE", "团购套餐", "抽取团购套餐定义", "团购", "ODS", is_ods=True, requires_window=False),
|
||||
TaskDefinition("ODS_GROUP_BUY_REDEMPTION", "团购核销", "抽取团购核销记录", "团购", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_INVENTORY_STOCK", "库存快照", "抽取商品库存汇总", "库存", "ODS", is_ods=True, requires_window=False),
|
||||
TaskDefinition("ODS_INVENTORY_CHANGE", "库存变动", "抽取库存出入库记录", "库存", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_GOODS_CATEGORY", "商品分类", "抽取商品分类树", "商品", "ODS", is_ods=True, requires_window=False),
|
||||
TaskDefinition("ODS_STORE_GOODS", "门店商品", "抽取门店商品主数据", "商品", "ODS", is_ods=True, requires_window=False),
|
||||
TaskDefinition("ODS_STORE_GOODS_SALES", "商品销售", "抽取门店商品销售记录", "商品", "ODS", is_ods=True),
|
||||
TaskDefinition("ODS_TENANT_GOODS", "租户商品", "抽取租户级商品主数据", "商品", "ODS", is_ods=True, requires_window=False),
|
||||
]
|
||||
|
||||
# ── DWD 任务定义 ──────────────────────────────────────────────
|
||||
|
||||
DWD_TASKS: list[TaskDefinition] = [
|
||||
TaskDefinition("DWD_LOAD_FROM_ODS", "DWD 装载", "从 ODS 装载至 DWD(维度 SCD2 + 事实增量)", "通用", "DWD", requires_window=False),
|
||||
TaskDefinition("DWD_QUALITY_CHECK", "DWD 质量检查", "对 DWD 层数据执行质量校验", "通用", "DWD", requires_window=False, is_common=False),
|
||||
]
|
||||
|
||||
# ── DWS 任务定义 ──────────────────────────────────────────────
|
||||
|
||||
DWS_TASKS: list[TaskDefinition] = [
|
||||
TaskDefinition("DWS_BUILD_ORDER_SUMMARY", "订单汇总构建", "构建订单汇总宽表", "结算", "DWS"),
|
||||
TaskDefinition("DWS_ASSISTANT_DAILY", "助教日报", "汇总助教每日业绩", "助教", "DWS"),
|
||||
TaskDefinition("DWS_ASSISTANT_MONTHLY", "助教月报", "汇总助教月度业绩", "助教", "DWS"),
|
||||
TaskDefinition("DWS_ASSISTANT_CUSTOMER", "助教客户分析", "汇总助教-客户关系", "助教", "DWS"),
|
||||
TaskDefinition("DWS_ASSISTANT_SALARY", "助教工资计算", "计算助教工资", "助教", "DWS"),
|
||||
TaskDefinition("DWS_ASSISTANT_FINANCE", "助教财务汇总", "汇总助教财务数据", "助教", "DWS"),
|
||||
TaskDefinition("DWS_MEMBER_CONSUMPTION", "会员消费分析", "汇总会员消费数据", "会员", "DWS"),
|
||||
TaskDefinition("DWS_MEMBER_VISIT", "会员到店分析", "汇总会员到店频次", "会员", "DWS"),
|
||||
TaskDefinition("DWS_FINANCE_DAILY", "财务日报", "汇总每日财务数据", "财务", "DWS"),
|
||||
TaskDefinition("DWS_FINANCE_RECHARGE", "充值汇总", "汇总充值数据", "财务", "DWS"),
|
||||
TaskDefinition("DWS_FINANCE_INCOME_STRUCTURE", "收入结构", "分析收入结构", "财务", "DWS"),
|
||||
TaskDefinition("DWS_FINANCE_DISCOUNT_DETAIL", "折扣明细", "汇总折扣明细", "财务", "DWS"),
|
||||
# CHANGE [2026-02-19] intent: 同步 ETL 侧合并——原 DWS_RETENTION_CLEANUP / DWS_MV_REFRESH_* 已合并为 DWS_MAINTENANCE
|
||||
TaskDefinition("DWS_MAINTENANCE", "DWS 维护", "刷新物化视图 + 清理过期留存数据", "通用", "DWS", requires_window=False, is_common=False),
|
||||
]
|
||||
|
||||
# ── INDEX 任务定义 ────────────────────────────────────────────
|
||||
|
||||
INDEX_TASKS: list[TaskDefinition] = [
|
||||
TaskDefinition("DWS_WINBACK_INDEX", "回流指数 (WBI)", "计算会员回流指数", "指数", "INDEX"),
|
||||
TaskDefinition("DWS_NEWCONV_INDEX", "新客转化指数 (NCI)", "计算新客转化指数", "指数", "INDEX"),
|
||||
TaskDefinition("DWS_ML_MANUAL_IMPORT", "手动导入 (ML)", "手动导入机器学习数据", "指数", "INDEX", requires_window=False, is_common=False),
|
||||
TaskDefinition("DWS_RELATION_INDEX", "关系指数 (RS)", "计算助教-客户关系指数", "指数", "INDEX"),
|
||||
]
|
||||
|
||||
# ── 工具类任务定义 ────────────────────────────────────────────
|
||||
|
||||
UTILITY_TASKS: list[TaskDefinition] = [
|
||||
TaskDefinition("MANUAL_INGEST", "手动导入", "从本地 JSON 文件手动导入数据", "工具", "UTILITY", requires_window=False, is_common=False),
|
||||
TaskDefinition("INIT_ODS_SCHEMA", "初始化 ODS Schema", "创建 ODS 层表结构", "工具", "UTILITY", requires_window=False, is_common=False),
|
||||
TaskDefinition("INIT_DWD_SCHEMA", "初始化 DWD Schema", "创建 DWD 层表结构", "工具", "UTILITY", requires_window=False, is_common=False),
|
||||
TaskDefinition("INIT_DWS_SCHEMA", "初始化 DWS Schema", "创建 DWS 层表结构", "工具", "UTILITY", requires_window=False, is_common=False),
|
||||
TaskDefinition("ODS_JSON_ARCHIVE", "ODS JSON 归档", "归档 ODS 原始 JSON 文件", "工具", "UTILITY", requires_window=False, is_common=False),
|
||||
TaskDefinition("CHECK_CUTOFF", "游标检查", "检查各任务数据游标截止点", "工具", "UTILITY", requires_window=False, is_common=False),
|
||||
TaskDefinition("SEED_DWS_CONFIG", "DWS 配置种子", "初始化 DWS 配置数据", "工具", "UTILITY", requires_window=False, is_common=False),
|
||||
TaskDefinition("DATA_INTEGRITY_CHECK", "数据完整性校验", "校验跨层数据完整性", "工具", "UTILITY", requires_window=False, is_common=False),
|
||||
]
|
||||
|
||||
# ── 全量任务列表 ──────────────────────────────────────────────
|
||||
|
||||
ALL_TASKS: list[TaskDefinition] = ODS_TASKS + DWD_TASKS + DWS_TASKS + INDEX_TASKS + UTILITY_TASKS
|
||||
|
||||
# 按 code 索引,便于快速查找
|
||||
_TASK_BY_CODE: dict[str, TaskDefinition] = {t.code: t for t in ALL_TASKS}
|
||||
|
||||
|
||||
def get_all_tasks() -> list[TaskDefinition]:
|
||||
return ALL_TASKS
|
||||
|
||||
|
||||
def get_task_by_code(code: str) -> TaskDefinition | None:
|
||||
return _TASK_BY_CODE.get(code.upper())
|
||||
|
||||
|
||||
def get_tasks_grouped_by_domain() -> dict[str, list[TaskDefinition]]:
|
||||
"""按业务域分组返回任务列表"""
|
||||
groups: dict[str, list[TaskDefinition]] = {}
|
||||
for t in ALL_TASKS:
|
||||
groups.setdefault(t.domain, []).append(t)
|
||||
return groups
|
||||
|
||||
|
||||
def get_tasks_by_layer(layer: str) -> list[TaskDefinition]:
|
||||
"""获取指定层的所有任务"""
|
||||
layer_upper = layer.upper()
|
||||
return [t for t in ALL_TASKS if t.layer == layer_upper]
|
||||
|
||||
|
||||
# ── Flow → 层映射 ────────────────────────────────────────────
|
||||
# 每种 Flow 包含的层,用于前端按 Flow 过滤可选任务
|
||||
|
||||
FLOW_LAYER_MAP: dict[str, list[str]] = {
|
||||
"api_ods": ["ODS"],
|
||||
"api_ods_dwd": ["ODS", "DWD"],
|
||||
"api_full": ["ODS", "DWD", "DWS", "INDEX"],
|
||||
"ods_dwd": ["DWD"],
|
||||
"dwd_dws": ["DWS"],
|
||||
"dwd_dws_index": ["DWS", "INDEX"],
|
||||
"dwd_index": ["INDEX"],
|
||||
}
|
||||
|
||||
|
||||
def get_compatible_tasks(flow_id: str) -> list[TaskDefinition]:
|
||||
"""根据 Flow 包含的层,返回兼容的任务列表"""
|
||||
layers = FLOW_LAYER_MAP.get(flow_id, [])
|
||||
return [t for t in ALL_TASKS if t.layer in layers]
|
||||
|
||||
|
||||
# ── DWD 表定义 ────────────────────────────────────────────────
|
||||
|
||||
DWD_TABLES: list[DwdTableDefinition] = [
|
||||
# 维度表
|
||||
DwdTableDefinition("dwd.dim_site", "门店维度", "台桌", "ods.table_fee_transactions", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_site_ex", "门店维度(扩展)", "台桌", "ods.table_fee_transactions", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_table", "台桌维度", "台桌", "ods.site_tables_master", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_table_ex", "台桌维度(扩展)", "台桌", "ods.site_tables_master", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_assistant", "助教维度", "助教", "ods.assistant_accounts_master", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_assistant_ex", "助教维度(扩展)", "助教", "ods.assistant_accounts_master", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_member", "会员维度", "会员", "ods.member_profiles", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_member_ex", "会员维度(扩展)", "会员", "ods.member_profiles", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_member_card_account", "会员储值卡维度", "会员", "ods.member_stored_value_cards", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_member_card_account_ex", "会员储值卡维度(扩展)", "会员", "ods.member_stored_value_cards", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_tenant_goods", "租户商品维度", "商品", "ods.tenant_goods_master", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_tenant_goods_ex", "租户商品维度(扩展)", "商品", "ods.tenant_goods_master", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_store_goods", "门店商品维度", "商品", "ods.store_goods_master", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_store_goods_ex", "门店商品维度(扩展)", "商品", "ods.store_goods_master", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_goods_category", "商品分类维度", "商品", "ods.stock_goods_category_tree", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_groupbuy_package", "团购套餐维度", "团购", "ods.group_buy_packages", is_dimension=True),
|
||||
DwdTableDefinition("dwd.dim_groupbuy_package_ex", "团购套餐维度(扩展)", "团购", "ods.group_buy_packages", is_dimension=True),
|
||||
# 事实表
|
||||
DwdTableDefinition("dwd.dwd_settlement_head", "结算主表", "结算", "ods.settlement_records"),
|
||||
DwdTableDefinition("dwd.dwd_settlement_head_ex", "结算主表(扩展)", "结算", "ods.settlement_records"),
|
||||
DwdTableDefinition("dwd.dwd_table_fee_log", "台费流水", "台桌", "ods.table_fee_transactions"),
|
||||
DwdTableDefinition("dwd.dwd_table_fee_log_ex", "台费流水(扩展)", "台桌", "ods.table_fee_transactions"),
|
||||
DwdTableDefinition("dwd.dwd_table_fee_adjust", "台费折扣", "台桌", "ods.table_fee_discount_records"),
|
||||
DwdTableDefinition("dwd.dwd_table_fee_adjust_ex", "台费折扣(扩展)", "台桌", "ods.table_fee_discount_records"),
|
||||
DwdTableDefinition("dwd.dwd_store_goods_sale", "商品销售", "商品", "ods.store_goods_sales_records"),
|
||||
DwdTableDefinition("dwd.dwd_store_goods_sale_ex", "商品销售(扩展)", "商品", "ods.store_goods_sales_records"),
|
||||
DwdTableDefinition("dwd.dwd_assistant_service_log", "助教服务流水", "助教", "ods.assistant_service_records"),
|
||||
DwdTableDefinition("dwd.dwd_assistant_service_log_ex", "助教服务流水(扩展)", "助教", "ods.assistant_service_records"),
|
||||
DwdTableDefinition("dwd.dwd_assistant_trash_event", "助教取消事件", "助教", "ods.assistant_cancellation_records"),
|
||||
DwdTableDefinition("dwd.dwd_assistant_trash_event_ex", "助教取消事件(扩展)", "助教", "ods.assistant_cancellation_records"),
|
||||
DwdTableDefinition("dwd.dwd_member_balance_change", "会员余额变动", "会员", "ods.member_balance_changes"),
|
||||
DwdTableDefinition("dwd.dwd_member_balance_change_ex", "会员余额变动(扩展)", "会员", "ods.member_balance_changes"),
|
||||
DwdTableDefinition("dwd.dwd_groupbuy_redemption", "团购核销", "团购", "ods.group_buy_redemption_records"),
|
||||
DwdTableDefinition("dwd.dwd_groupbuy_redemption_ex", "团购核销(扩展)", "团购", "ods.group_buy_redemption_records"),
|
||||
DwdTableDefinition("dwd.dwd_platform_coupon_redemption", "平台券核销", "团购", "ods.platform_coupon_redemption_records"),
|
||||
DwdTableDefinition("dwd.dwd_platform_coupon_redemption_ex", "平台券核销(扩展)", "团购", "ods.platform_coupon_redemption_records"),
|
||||
DwdTableDefinition("dwd.dwd_recharge_order", "充值订单", "会员", "ods.recharge_settlements"),
|
||||
DwdTableDefinition("dwd.dwd_recharge_order_ex", "充值订单(扩展)", "会员", "ods.recharge_settlements"),
|
||||
DwdTableDefinition("dwd.dwd_payment", "支付流水", "结算", "ods.payment_transactions"),
|
||||
DwdTableDefinition("dwd.dwd_refund", "退款流水", "结算", "ods.refund_transactions"),
|
||||
DwdTableDefinition("dwd.dwd_refund_ex", "退款流水(扩展)", "结算", "ods.refund_transactions"),
|
||||
]
|
||||
|
||||
|
||||
def get_dwd_tables_grouped_by_domain() -> dict[str, list[DwdTableDefinition]]:
|
||||
"""按业务域分组返回 DWD 表定义"""
|
||||
groups: dict[str, list[DwdTableDefinition]] = {}
|
||||
for t in DWD_TABLES:
|
||||
groups.setdefault(t.domain, []).append(t)
|
||||
return groups
|
||||
Reference in New Issue
Block a user