feat: TaskSelector v2 全链路展示 + 同步检查 + MCP Server + 服务器 Git 排除

- admin-web: TaskSelector 重构为按域+层全链路展示,新增同步检查功能
- admin-web: TaskConfig 动态加载 Flow/处理模式定义,DWD 表过滤内嵌域面板
- admin-web: App hydrate 完成前显示 loading,避免误跳 /login
- backend: 新增 /tasks/sync-check 对比后端与 ETL 真实注册表
- backend: 新增 /tasks/flows 返回 Flow 和处理模式定义
- apps/mcp-server: 新增 MCP Server 模块(百炼 AI PostgreSQL 只读查询)
- scripts/server: 新增 setup-server-git.py + server-exclude.txt
- docs: 更新 LAUNCH-CHECKLIST 添加 Git 排除配置步骤
- pyproject.toml: workspace members 新增 mcp-server
This commit is contained in:
Neo
2026-02-19 10:31:16 +08:00
parent 4eac07da47
commit 254ccb1e77
16 changed files with 2375 additions and 1285 deletions

View File

@@ -1,209 +1,264 @@
# -*- coding: utf-8 -*-
"""任务注册表 & 配置 API
提供 4 个端点:
- GET /api/tasks/registry — 按业务域分组的任务列表
- GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义
- GET /api/tasks/flows — 7 种 Flow + 3 种处理模式
- POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览
所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。
"""
from __future__ import annotations
from typing import Any
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from app.auth.dependencies import CurrentUser, get_current_user
from app.config import ETL_PROJECT_PATH
from app.schemas.tasks import (
FlowDefinition,
ProcessingModeDefinition,
TaskConfigSchema,
)
from app.services.cli_builder import cli_builder
from app.services.task_registry import (
DWD_TABLES,
FLOW_LAYER_MAP,
get_dwd_tables_grouped_by_domain,
get_tasks_grouped_by_domain,
)
router = APIRouter(prefix="/api/tasks", tags=["任务配置"])
# ── 响应模型 ──────────────────────────────────────────────────
class TaskItem(BaseModel):
code: str
name: str
description: str
domain: str
layer: str
requires_window: bool
is_ods: bool
is_dimension: bool
default_enabled: bool
is_common: bool
class DwdTableItem(BaseModel):
table_name: str
display_name: str
domain: str
ods_source: str
is_dimension: bool
class TaskRegistryResponse(BaseModel):
"""按业务域分组的任务列表"""
groups: dict[str, list[TaskItem]]
class DwdTablesResponse(BaseModel):
"""按业务域分组的 DWD 表定义"""
groups: dict[str, list[DwdTableItem]]
class FlowsResponse(BaseModel):
"""Flow 定义 + 处理模式定义"""
flows: list[FlowDefinition]
processing_modes: list[ProcessingModeDefinition]
class ValidateRequest(BaseModel):
"""验证请求体 — 复用 TaskConfigSchema但 store_id 由后端注入"""
config: TaskConfigSchema
class ValidateResponse(BaseModel):
"""验证结果 + CLI 命令预览"""
valid: bool
command: str
command_args: list[str]
errors: list[str]
# ── Flow 定义(静态) ────────────────────────────────────────
FLOW_DEFINITIONS: list[FlowDefinition] = [
FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]),
FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]),
FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]),
FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]),
FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]),
FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]),
FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]),
]
PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [
ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"),
ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致(可选'校验前从 API 获取'"),
ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"),
]
# ── 端点 ──────────────────────────────────────────────────────
@router.get("/registry", response_model=TaskRegistryResponse)
async def get_task_registry(
user: CurrentUser = Depends(get_current_user),
) -> TaskRegistryResponse:
"""返回按业务域分组的任务列表"""
grouped = get_tasks_grouped_by_domain()
return TaskRegistryResponse(
groups={
domain: [
TaskItem(
code=t.code,
name=t.name,
description=t.description,
domain=t.domain,
layer=t.layer,
requires_window=t.requires_window,
is_ods=t.is_ods,
is_dimension=t.is_dimension,
default_enabled=t.default_enabled,
is_common=t.is_common,
)
for t in tasks
]
for domain, tasks in grouped.items()
}
)
@router.get("/dwd-tables", response_model=DwdTablesResponse)
async def get_dwd_tables(
user: CurrentUser = Depends(get_current_user),
) -> DwdTablesResponse:
"""返回按业务域分组的 DWD 表定义"""
grouped = get_dwd_tables_grouped_by_domain()
return DwdTablesResponse(
groups={
domain: [
DwdTableItem(
table_name=t.table_name,
display_name=t.display_name,
domain=t.domain,
ods_source=t.ods_source,
is_dimension=t.is_dimension,
)
for t in tables
]
for domain, tables in grouped.items()
}
)
@router.get("/flows", response_model=FlowsResponse)
async def get_flows(
user: CurrentUser = Depends(get_current_user),
) -> FlowsResponse:
"""返回 7 种 Flow 定义和 3 种处理模式定义"""
return FlowsResponse(
flows=FLOW_DEFINITIONS,
processing_modes=PROCESSING_MODE_DEFINITIONS,
)
@router.post("/validate", response_model=ValidateResponse)
async def validate_task_config(
body: ValidateRequest,
user: CurrentUser = Depends(get_current_user),
) -> ValidateResponse:
"""验证 TaskConfig 并返回生成的 CLI 命令预览
从 JWT 注入 store_id前端无需传递。
"""
config = body.config.model_copy(update={"store_id": user.site_id})
errors: list[str] = []
# 验证 Flow ID
if config.pipeline not in FLOW_LAYER_MAP:
errors.append(f"无效的执行流程: {config.pipeline}")
# 验证任务列表非空
if not config.tasks:
errors.append("任务列表不能为空")
if errors:
return ValidateResponse(
valid=False,
command="",
command_args=[],
errors=errors,
)
cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH)
cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH)
return ValidateResponse(
valid=True,
command=cmd_str,
command_args=cmd_args,
errors=[],
)
# -*- coding: utf-8 -*-
"""任务注册表 & 配置 API
提供 4 个端点:
- GET /api/tasks/registry — 按业务域分组的任务列表
- GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义
- GET /api/tasks/flows — 7 种 Flow + 3 种处理模式
- POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览
所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。
"""
from __future__ import annotations
from typing import Any
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from app.auth.dependencies import CurrentUser, get_current_user
from app.config import ETL_PROJECT_PATH
from app.schemas.tasks import (
FlowDefinition,
ProcessingModeDefinition,
TaskConfigSchema,
)
from app.services.cli_builder import cli_builder
from app.services.task_registry import (
DWD_TABLES,
FLOW_LAYER_MAP,
get_dwd_tables_grouped_by_domain,
get_tasks_grouped_by_domain,
)
router = APIRouter(prefix="/api/tasks", tags=["任务配置"])
# ── 响应模型 ──────────────────────────────────────────────────
class TaskItem(BaseModel):
code: str
name: str
description: str
domain: str
layer: str
requires_window: bool
is_ods: bool
is_dimension: bool
default_enabled: bool
is_common: bool
class DwdTableItem(BaseModel):
table_name: str
display_name: str
domain: str
ods_source: str
is_dimension: bool
class TaskRegistryResponse(BaseModel):
"""按业务域分组的任务列表"""
groups: dict[str, list[TaskItem]]
class DwdTablesResponse(BaseModel):
"""按业务域分组的 DWD 表定义"""
groups: dict[str, list[DwdTableItem]]
class FlowsResponse(BaseModel):
"""Flow 定义 + 处理模式定义"""
flows: list[FlowDefinition]
processing_modes: list[ProcessingModeDefinition]
class ValidateRequest(BaseModel):
"""验证请求体 — 复用 TaskConfigSchema但 store_id 由后端注入"""
config: TaskConfigSchema
class ValidateResponse(BaseModel):
"""验证结果 + CLI 命令预览"""
valid: bool
command: str
command_args: list[str]
errors: list[str]
# ── Flow 定义(静态) ────────────────────────────────────────
FLOW_DEFINITIONS: list[FlowDefinition] = [
FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]),
FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]),
FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]),
FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]),
FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]),
FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]),
FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]),
]
PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [
ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"),
ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致"),
ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"),
]
# ── 端点 ──────────────────────────────────────────────────────
@router.get("/registry", response_model=TaskRegistryResponse)
async def get_task_registry(
user: CurrentUser = Depends(get_current_user),
) -> TaskRegistryResponse:
"""返回按业务域分组的任务列表"""
grouped = get_tasks_grouped_by_domain()
return TaskRegistryResponse(
groups={
domain: [
TaskItem(
code=t.code,
name=t.name,
description=t.description,
domain=t.domain,
layer=t.layer,
requires_window=t.requires_window,
is_ods=t.is_ods,
is_dimension=t.is_dimension,
default_enabled=t.default_enabled,
is_common=t.is_common,
)
for t in tasks
]
for domain, tasks in grouped.items()
}
)
@router.get("/dwd-tables", response_model=DwdTablesResponse)
async def get_dwd_tables(
user: CurrentUser = Depends(get_current_user),
) -> DwdTablesResponse:
"""返回按业务域分组的 DWD 表定义"""
grouped = get_dwd_tables_grouped_by_domain()
return DwdTablesResponse(
groups={
domain: [
DwdTableItem(
table_name=t.table_name,
display_name=t.display_name,
domain=t.domain,
ods_source=t.ods_source,
is_dimension=t.is_dimension,
)
for t in tables
]
for domain, tables in grouped.items()
}
)
@router.get("/flows", response_model=FlowsResponse)
async def get_flows(
user: CurrentUser = Depends(get_current_user),
) -> FlowsResponse:
"""返回 7 种 Flow 定义和 3 种处理模式定义"""
return FlowsResponse(
flows=FLOW_DEFINITIONS,
processing_modes=PROCESSING_MODE_DEFINITIONS,
)
@router.post("/validate", response_model=ValidateResponse)
async def validate_task_config(
body: ValidateRequest,
user: CurrentUser = Depends(get_current_user),
) -> ValidateResponse:
"""验证 TaskConfig 并返回生成的 CLI 命令预览
从 JWT 注入 store_id前端无需传递。
"""
config = body.config.model_copy(update={"store_id": user.site_id})
errors: list[str] = []
# 验证 Flow ID
if config.pipeline not in FLOW_LAYER_MAP:
errors.append(f"无效的执行流程: {config.pipeline}")
# 验证任务列表非空
if not config.tasks:
errors.append("任务列表不能为空")
if errors:
return ValidateResponse(
valid=False,
command="",
command_args=[],
errors=errors,
)
cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH)
cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH)
return ValidateResponse(
valid=True,
command=cmd_str,
command_args=cmd_args,
errors=[],
)
# ── GET /api/tasks/sync-check — 对比 ETL 真实注册表 ──────────
class SyncCheckResponse(BaseModel):
"""同步检查结果"""
in_sync: bool
backend_only: list[str]
etl_only: list[str]
error: str | None = None
@router.get("/sync-check", response_model=SyncCheckResponse)
async def sync_check(
user: CurrentUser = Depends(get_current_user),
) -> SyncCheckResponse:
"""对比后端硬编码任务列表与 ETL 真实注册表,返回差异。
通过子进程调用 ETL CLI 获取真实任务列表,避免直接导入 ETL 代码。
"""
import subprocess
import sys
from app.services.task_registry import ALL_TASKS
backend_codes = {t.code for t in ALL_TASKS}
try:
result = subprocess.run(
[sys.executable, "-c",
"from orchestration.task_registry import default_registry; "
"print(','.join(sorted(default_registry.get_all_task_codes())))"],
capture_output=True, text=True, timeout=15,
cwd=ETL_PROJECT_PATH, encoding="utf-8", errors="replace",
)
if result.returncode != 0:
return SyncCheckResponse(
in_sync=False, backend_only=[], etl_only=[],
error=f"ETL 子进程失败: {result.stderr.strip()[:200]}",
)
etl_codes = {c.strip() for c in result.stdout.strip().split(",") if c.strip()}
except Exception as exc:
return SyncCheckResponse(
in_sync=False, backend_only=[], etl_only=[],
error=f"无法连接 ETL: {exc}",
)
backend_only = sorted(backend_codes - etl_codes)
etl_only = sorted(etl_codes - backend_codes)
return SyncCheckResponse(
in_sync=len(backend_only) == 0 and len(etl_only) == 0,
backend_only=backend_only,
etl_only=etl_only,
)