Files
Neo-ZQYY/apps/backend/app/routers/tasks.py

267 lines
8.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""任务注册表 & 配置 API
提供 4 个端点:
- GET /api/tasks/registry — 按业务域分组的任务列表
- GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义
- GET /api/tasks/flows — 7 种 Flow + 4 种处理模式
- POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览
所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。
"""
from __future__ import annotations
from typing import Any
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from app.auth.dependencies import CurrentUser, get_current_user
from app.config import ETL_PROJECT_PATH
from app.schemas.tasks import (
FlowDefinition,
ProcessingModeDefinition,
TaskConfigSchema,
)
from app.services.cli_builder import cli_builder
from app.services.task_registry import (
DWD_TABLES,
FLOW_LAYER_MAP,
get_dwd_tables_grouped_by_domain,
get_tasks_grouped_by_domain,
)
router = APIRouter(prefix="/api/tasks", tags=["任务配置"])
# ── 响应模型 ──────────────────────────────────────────────────
class TaskItem(BaseModel):
code: str
name: str
description: str
domain: str
layer: str
requires_window: bool
is_ods: bool
is_dimension: bool
default_enabled: bool
is_common: bool
class DwdTableItem(BaseModel):
table_name: str
display_name: str
domain: str
ods_source: str
is_dimension: bool
class TaskRegistryResponse(BaseModel):
"""按业务域分组的任务列表"""
groups: dict[str, list[TaskItem]]
class DwdTablesResponse(BaseModel):
"""按业务域分组的 DWD 表定义"""
groups: dict[str, list[DwdTableItem]]
class FlowsResponse(BaseModel):
"""Flow 定义 + 处理模式定义"""
flows: list[FlowDefinition]
processing_modes: list[ProcessingModeDefinition]
class ValidateRequest(BaseModel):
"""验证请求体 — 复用 TaskConfigSchema但 store_id 由后端注入"""
config: TaskConfigSchema
class ValidateResponse(BaseModel):
"""验证结果 + CLI 命令预览"""
valid: bool
command: str
command_args: list[str]
errors: list[str]
# ── Flow 定义(静态) ────────────────────────────────────────
FLOW_DEFINITIONS: list[FlowDefinition] = [
FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]),
FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]),
FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]),
FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]),
FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]),
FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]),
FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]),
]
PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [
ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"),
ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致"),
ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"),
ProcessingModeDefinition(id="full_window", name="全窗口处理", description="用 API 返回数据的实际时间范围处理全部层,无需校验"),
]
# ── 端点 ──────────────────────────────────────────────────────
@router.get("/registry", response_model=TaskRegistryResponse)
async def get_task_registry(
user: CurrentUser = Depends(get_current_user),
) -> TaskRegistryResponse:
"""返回按业务域分组的任务列表"""
grouped = get_tasks_grouped_by_domain()
return TaskRegistryResponse(
groups={
domain: [
TaskItem(
code=t.code,
name=t.name,
description=t.description,
domain=t.domain,
layer=t.layer,
requires_window=t.requires_window,
is_ods=t.is_ods,
is_dimension=t.is_dimension,
default_enabled=t.default_enabled,
is_common=t.is_common,
)
for t in tasks
]
for domain, tasks in grouped.items()
}
)
@router.get("/dwd-tables", response_model=DwdTablesResponse)
async def get_dwd_tables(
user: CurrentUser = Depends(get_current_user),
) -> DwdTablesResponse:
"""返回按业务域分组的 DWD 表定义"""
grouped = get_dwd_tables_grouped_by_domain()
return DwdTablesResponse(
groups={
domain: [
DwdTableItem(
table_name=t.table_name,
display_name=t.display_name,
domain=t.domain,
ods_source=t.ods_source,
is_dimension=t.is_dimension,
)
for t in tables
]
for domain, tables in grouped.items()
}
)
@router.get("/flows", response_model=FlowsResponse)
async def get_flows(
user: CurrentUser = Depends(get_current_user),
) -> FlowsResponse:
"""返回 7 种 Flow 定义和 4 种处理模式定义"""
return FlowsResponse(
flows=FLOW_DEFINITIONS,
processing_modes=PROCESSING_MODE_DEFINITIONS,
)
@router.post("/validate", response_model=ValidateResponse)
async def validate_task_config(
body: ValidateRequest,
user: CurrentUser = Depends(get_current_user),
) -> ValidateResponse:
"""验证 TaskConfig 并返回生成的 CLI 命令预览
从 JWT 注入 store_id前端无需传递。
"""
config = body.config.model_copy(update={"store_id": user.site_id})
errors: list[str] = []
# 验证 Flow ID
# CHANGE [2026-02-20] intent: pipeline → flow统一命名
if config.flow not in FLOW_LAYER_MAP:
errors.append(f"无效的执行流程: {config.flow}")
# 验证任务列表非空
if not config.tasks:
errors.append("任务列表不能为空")
if errors:
return ValidateResponse(
valid=False,
command="",
command_args=[],
errors=errors,
)
cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH)
cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH)
return ValidateResponse(
valid=True,
command=cmd_str,
command_args=cmd_args,
errors=[],
)
# ── GET /api/tasks/sync-check — 对比 ETL 真实注册表 ──────────
class SyncCheckResponse(BaseModel):
"""同步检查结果"""
in_sync: bool
backend_only: list[str]
etl_only: list[str]
error: str | None = None
@router.get("/sync-check", response_model=SyncCheckResponse)
async def sync_check(
user: CurrentUser = Depends(get_current_user),
) -> SyncCheckResponse:
"""对比后端硬编码任务列表与 ETL 真实注册表,返回差异。
通过子进程调用 ETL CLI 获取真实任务列表,避免直接导入 ETL 代码。
"""
import subprocess
import sys
from app.services.task_registry import ALL_TASKS
backend_codes = {t.code for t in ALL_TASKS}
try:
result = subprocess.run(
[sys.executable, "-c",
"from orchestration.task_registry import default_registry; "
"print(','.join(sorted(default_registry.get_all_task_codes())))"],
capture_output=True, text=True, timeout=15,
cwd=ETL_PROJECT_PATH, encoding="utf-8", errors="replace",
)
if result.returncode != 0:
return SyncCheckResponse(
in_sync=False, backend_only=[], etl_only=[],
error=f"ETL 子进程失败: {result.stderr.strip()[:200]}",
)
etl_codes = {c.strip() for c in result.stdout.strip().split(",") if c.strip()}
except Exception as exc:
return SyncCheckResponse(
in_sync=False, backend_only=[], etl_only=[],
error=f"无法连接 ETL: {exc}",
)
backend_only = sorted(backend_codes - etl_codes)
etl_only = sorted(etl_codes - backend_codes)
return SyncCheckResponse(
in_sync=len(backend_only) == 0 and len(etl_only) == 0,
backend_only=backend_only,
etl_only=etl_only,
)