267 lines
8.9 KiB
Python
267 lines
8.9 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""任务注册表 & 配置 API
|
||
|
||
提供 4 个端点:
|
||
- GET /api/tasks/registry — 按业务域分组的任务列表
|
||
- GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义
|
||
- GET /api/tasks/flows — 7 种 Flow + 4 种处理模式
|
||
- POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览
|
||
|
||
所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any
|
||
|
||
from fastapi import APIRouter, Depends
|
||
from pydantic import BaseModel
|
||
|
||
from app.auth.dependencies import CurrentUser, get_current_user
|
||
from app.config import ETL_PROJECT_PATH
|
||
from app.schemas.tasks import (
|
||
FlowDefinition,
|
||
ProcessingModeDefinition,
|
||
TaskConfigSchema,
|
||
)
|
||
from app.services.cli_builder import cli_builder
|
||
from app.services.task_registry import (
|
||
DWD_TABLES,
|
||
FLOW_LAYER_MAP,
|
||
get_dwd_tables_grouped_by_domain,
|
||
get_tasks_grouped_by_domain,
|
||
)
|
||
|
||
router = APIRouter(prefix="/api/tasks", tags=["任务配置"])
|
||
|
||
|
||
# ── 响应模型 ──────────────────────────────────────────────────
|
||
|
||
class TaskItem(BaseModel):
|
||
code: str
|
||
name: str
|
||
description: str
|
||
domain: str
|
||
layer: str
|
||
requires_window: bool
|
||
is_ods: bool
|
||
is_dimension: bool
|
||
default_enabled: bool
|
||
is_common: bool
|
||
|
||
|
||
class DwdTableItem(BaseModel):
|
||
table_name: str
|
||
display_name: str
|
||
domain: str
|
||
ods_source: str
|
||
is_dimension: bool
|
||
|
||
|
||
class TaskRegistryResponse(BaseModel):
|
||
"""按业务域分组的任务列表"""
|
||
groups: dict[str, list[TaskItem]]
|
||
|
||
|
||
class DwdTablesResponse(BaseModel):
|
||
"""按业务域分组的 DWD 表定义"""
|
||
groups: dict[str, list[DwdTableItem]]
|
||
|
||
|
||
class FlowsResponse(BaseModel):
|
||
"""Flow 定义 + 处理模式定义"""
|
||
flows: list[FlowDefinition]
|
||
processing_modes: list[ProcessingModeDefinition]
|
||
|
||
|
||
class ValidateRequest(BaseModel):
|
||
"""验证请求体 — 复用 TaskConfigSchema,但 store_id 由后端注入"""
|
||
config: TaskConfigSchema
|
||
|
||
|
||
class ValidateResponse(BaseModel):
|
||
"""验证结果 + CLI 命令预览"""
|
||
valid: bool
|
||
command: str
|
||
command_args: list[str]
|
||
errors: list[str]
|
||
|
||
|
||
# ── Flow 定义(静态) ────────────────────────────────────────
|
||
|
||
FLOW_DEFINITIONS: list[FlowDefinition] = [
|
||
FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]),
|
||
FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]),
|
||
FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]),
|
||
FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]),
|
||
FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]),
|
||
FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]),
|
||
FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]),
|
||
]
|
||
|
||
PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [
|
||
ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"),
|
||
ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致"),
|
||
ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"),
|
||
ProcessingModeDefinition(id="full_window", name="全窗口处理", description="用 API 返回数据的实际时间范围处理全部层,无需校验"),
|
||
]
|
||
|
||
|
||
# ── 端点 ──────────────────────────────────────────────────────
|
||
|
||
@router.get("/registry", response_model=TaskRegistryResponse)
|
||
async def get_task_registry(
|
||
user: CurrentUser = Depends(get_current_user),
|
||
) -> TaskRegistryResponse:
|
||
"""返回按业务域分组的任务列表"""
|
||
grouped = get_tasks_grouped_by_domain()
|
||
return TaskRegistryResponse(
|
||
groups={
|
||
domain: [
|
||
TaskItem(
|
||
code=t.code,
|
||
name=t.name,
|
||
description=t.description,
|
||
domain=t.domain,
|
||
layer=t.layer,
|
||
requires_window=t.requires_window,
|
||
is_ods=t.is_ods,
|
||
is_dimension=t.is_dimension,
|
||
default_enabled=t.default_enabled,
|
||
is_common=t.is_common,
|
||
)
|
||
for t in tasks
|
||
]
|
||
for domain, tasks in grouped.items()
|
||
}
|
||
)
|
||
|
||
|
||
@router.get("/dwd-tables", response_model=DwdTablesResponse)
|
||
async def get_dwd_tables(
|
||
user: CurrentUser = Depends(get_current_user),
|
||
) -> DwdTablesResponse:
|
||
"""返回按业务域分组的 DWD 表定义"""
|
||
grouped = get_dwd_tables_grouped_by_domain()
|
||
return DwdTablesResponse(
|
||
groups={
|
||
domain: [
|
||
DwdTableItem(
|
||
table_name=t.table_name,
|
||
display_name=t.display_name,
|
||
domain=t.domain,
|
||
ods_source=t.ods_source,
|
||
is_dimension=t.is_dimension,
|
||
)
|
||
for t in tables
|
||
]
|
||
for domain, tables in grouped.items()
|
||
}
|
||
)
|
||
|
||
|
||
@router.get("/flows", response_model=FlowsResponse)
|
||
async def get_flows(
|
||
user: CurrentUser = Depends(get_current_user),
|
||
) -> FlowsResponse:
|
||
"""返回 7 种 Flow 定义和 4 种处理模式定义"""
|
||
return FlowsResponse(
|
||
flows=FLOW_DEFINITIONS,
|
||
processing_modes=PROCESSING_MODE_DEFINITIONS,
|
||
)
|
||
|
||
|
||
@router.post("/validate", response_model=ValidateResponse)
|
||
async def validate_task_config(
|
||
body: ValidateRequest,
|
||
user: CurrentUser = Depends(get_current_user),
|
||
) -> ValidateResponse:
|
||
"""验证 TaskConfig 并返回生成的 CLI 命令预览
|
||
|
||
从 JWT 注入 store_id,前端无需传递。
|
||
"""
|
||
config = body.config.model_copy(update={"store_id": user.site_id})
|
||
errors: list[str] = []
|
||
|
||
# 验证 Flow ID
|
||
# CHANGE [2026-02-20] intent: pipeline → flow,统一命名
|
||
if config.flow not in FLOW_LAYER_MAP:
|
||
errors.append(f"无效的执行流程: {config.flow}")
|
||
|
||
# 验证任务列表非空
|
||
if not config.tasks:
|
||
errors.append("任务列表不能为空")
|
||
|
||
if errors:
|
||
return ValidateResponse(
|
||
valid=False,
|
||
command="",
|
||
command_args=[],
|
||
errors=errors,
|
||
)
|
||
|
||
cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH)
|
||
cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH)
|
||
|
||
return ValidateResponse(
|
||
valid=True,
|
||
command=cmd_str,
|
||
command_args=cmd_args,
|
||
errors=[],
|
||
)
|
||
|
||
|
||
# ── GET /api/tasks/sync-check — 对比 ETL 真实注册表 ──────────
|
||
|
||
class SyncCheckResponse(BaseModel):
|
||
"""同步检查结果"""
|
||
in_sync: bool
|
||
backend_only: list[str]
|
||
etl_only: list[str]
|
||
error: str | None = None
|
||
|
||
|
||
@router.get("/sync-check", response_model=SyncCheckResponse)
|
||
async def sync_check(
|
||
user: CurrentUser = Depends(get_current_user),
|
||
) -> SyncCheckResponse:
|
||
"""对比后端硬编码任务列表与 ETL 真实注册表,返回差异。
|
||
|
||
通过子进程调用 ETL CLI 获取真实任务列表,避免直接导入 ETL 代码。
|
||
"""
|
||
import subprocess
|
||
import sys
|
||
|
||
from app.services.task_registry import ALL_TASKS
|
||
|
||
backend_codes = {t.code for t in ALL_TASKS}
|
||
|
||
try:
|
||
result = subprocess.run(
|
||
[sys.executable, "-c",
|
||
"from orchestration.task_registry import default_registry; "
|
||
"print(','.join(sorted(default_registry.get_all_task_codes())))"],
|
||
capture_output=True, text=True, timeout=15,
|
||
cwd=ETL_PROJECT_PATH, encoding="utf-8", errors="replace",
|
||
)
|
||
if result.returncode != 0:
|
||
return SyncCheckResponse(
|
||
in_sync=False, backend_only=[], etl_only=[],
|
||
error=f"ETL 子进程失败: {result.stderr.strip()[:200]}",
|
||
)
|
||
etl_codes = {c.strip() for c in result.stdout.strip().split(",") if c.strip()}
|
||
except Exception as exc:
|
||
return SyncCheckResponse(
|
||
in_sync=False, backend_only=[], etl_only=[],
|
||
error=f"无法连接 ETL: {exc}",
|
||
)
|
||
|
||
backend_only = sorted(backend_codes - etl_codes)
|
||
etl_only = sorted(etl_codes - backend_codes)
|
||
|
||
return SyncCheckResponse(
|
||
in_sync=len(backend_only) == 0 and len(etl_only) == 0,
|
||
backend_only=backend_only,
|
||
etl_only=etl_only,
|
||
)
|