# -*- coding: utf-8 -*- """任务注册表 & 配置 API 提供 4 个端点: - GET /api/tasks/registry — 按业务域分组的任务列表 - GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义 - GET /api/tasks/flows — 7 种 Flow + 3 种处理模式 - POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览 所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。 """ from __future__ import annotations from typing import Any from fastapi import APIRouter, Depends from pydantic import BaseModel from app.auth.dependencies import CurrentUser, get_current_user from app.config import ETL_PROJECT_PATH from app.schemas.tasks import ( FlowDefinition, ProcessingModeDefinition, TaskConfigSchema, ) from app.services.cli_builder import cli_builder from app.services.task_registry import ( DWD_TABLES, FLOW_LAYER_MAP, get_dwd_tables_grouped_by_domain, get_tasks_grouped_by_domain, ) router = APIRouter(prefix="/api/tasks", tags=["任务配置"]) # ── 响应模型 ────────────────────────────────────────────────── class TaskItem(BaseModel): code: str name: str description: str domain: str layer: str requires_window: bool is_ods: bool is_dimension: bool default_enabled: bool is_common: bool class DwdTableItem(BaseModel): table_name: str display_name: str domain: str ods_source: str is_dimension: bool class TaskRegistryResponse(BaseModel): """按业务域分组的任务列表""" groups: dict[str, list[TaskItem]] class DwdTablesResponse(BaseModel): """按业务域分组的 DWD 表定义""" groups: dict[str, list[DwdTableItem]] class FlowsResponse(BaseModel): """Flow 定义 + 处理模式定义""" flows: list[FlowDefinition] processing_modes: list[ProcessingModeDefinition] class ValidateRequest(BaseModel): """验证请求体 — 复用 TaskConfigSchema,但 store_id 由后端注入""" config: TaskConfigSchema class ValidateResponse(BaseModel): """验证结果 + CLI 命令预览""" valid: bool command: str command_args: list[str] errors: list[str] # ── Flow 定义(静态) ──────────────────────────────────────── FLOW_DEFINITIONS: list[FlowDefinition] = [ FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]), FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]), FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]), FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]), FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]), FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]), FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]), ] PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [ ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"), ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致"), ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"), ] # ── 端点 ────────────────────────────────────────────────────── @router.get("/registry", response_model=TaskRegistryResponse) async def get_task_registry( user: CurrentUser = Depends(get_current_user), ) -> TaskRegistryResponse: """返回按业务域分组的任务列表""" grouped = get_tasks_grouped_by_domain() return TaskRegistryResponse( groups={ domain: [ TaskItem( code=t.code, name=t.name, description=t.description, domain=t.domain, layer=t.layer, requires_window=t.requires_window, is_ods=t.is_ods, is_dimension=t.is_dimension, default_enabled=t.default_enabled, is_common=t.is_common, ) for t in tasks ] for domain, tasks in grouped.items() } ) @router.get("/dwd-tables", response_model=DwdTablesResponse) async def get_dwd_tables( user: CurrentUser = Depends(get_current_user), ) -> DwdTablesResponse: """返回按业务域分组的 DWD 表定义""" grouped = get_dwd_tables_grouped_by_domain() return DwdTablesResponse( groups={ domain: [ DwdTableItem( table_name=t.table_name, display_name=t.display_name, domain=t.domain, ods_source=t.ods_source, is_dimension=t.is_dimension, ) for t in tables ] for domain, tables in grouped.items() } ) @router.get("/flows", response_model=FlowsResponse) async def get_flows( user: CurrentUser = Depends(get_current_user), ) -> FlowsResponse: """返回 7 种 Flow 定义和 3 种处理模式定义""" return FlowsResponse( flows=FLOW_DEFINITIONS, processing_modes=PROCESSING_MODE_DEFINITIONS, ) @router.post("/validate", response_model=ValidateResponse) async def validate_task_config( body: ValidateRequest, user: CurrentUser = Depends(get_current_user), ) -> ValidateResponse: """验证 TaskConfig 并返回生成的 CLI 命令预览 从 JWT 注入 store_id,前端无需传递。 """ config = body.config.model_copy(update={"store_id": user.site_id}) errors: list[str] = [] # 验证 Flow ID if config.pipeline not in FLOW_LAYER_MAP: errors.append(f"无效的执行流程: {config.pipeline}") # 验证任务列表非空 if not config.tasks: errors.append("任务列表不能为空") if errors: return ValidateResponse( valid=False, command="", command_args=[], errors=errors, ) cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH) cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH) return ValidateResponse( valid=True, command=cmd_str, command_args=cmd_args, errors=[], ) # ── GET /api/tasks/sync-check — 对比 ETL 真实注册表 ────────── class SyncCheckResponse(BaseModel): """同步检查结果""" in_sync: bool backend_only: list[str] etl_only: list[str] error: str | None = None @router.get("/sync-check", response_model=SyncCheckResponse) async def sync_check( user: CurrentUser = Depends(get_current_user), ) -> SyncCheckResponse: """对比后端硬编码任务列表与 ETL 真实注册表,返回差异。 通过子进程调用 ETL CLI 获取真实任务列表,避免直接导入 ETL 代码。 """ import subprocess import sys from app.services.task_registry import ALL_TASKS backend_codes = {t.code for t in ALL_TASKS} try: result = subprocess.run( [sys.executable, "-c", "from orchestration.task_registry import default_registry; " "print(','.join(sorted(default_registry.get_all_task_codes())))"], capture_output=True, text=True, timeout=15, cwd=ETL_PROJECT_PATH, encoding="utf-8", errors="replace", ) if result.returncode != 0: return SyncCheckResponse( in_sync=False, backend_only=[], etl_only=[], error=f"ETL 子进程失败: {result.stderr.strip()[:200]}", ) etl_codes = {c.strip() for c in result.stdout.strip().split(",") if c.strip()} except Exception as exc: return SyncCheckResponse( in_sync=False, backend_only=[], etl_only=[], error=f"无法连接 ETL: {exc}", ) backend_only = sorted(backend_codes - etl_codes) etl_only = sorted(etl_codes - backend_codes) return SyncCheckResponse( in_sync=len(backend_only) == 0 and len(etl_only) == 0, backend_only=backend_only, etl_only=etl_only, )