feat: TaskSelector v2 全链路展示 + 同步检查 + MCP Server + 服务器 Git 排除

- admin-web: TaskSelector 重构为按域+层全链路展示,新增同步检查功能
- admin-web: TaskConfig 动态加载 Flow/处理模式定义,DWD 表过滤内嵌域面板
- admin-web: App hydrate 完成前显示 loading,避免误跳 /login
- backend: 新增 /tasks/sync-check 对比后端与 ETL 真实注册表
- backend: 新增 /tasks/flows 返回 Flow 和处理模式定义
- apps/mcp-server: 新增 MCP Server 模块(百炼 AI PostgreSQL 只读查询)
- scripts/server: 新增 setup-server-git.py + server-exclude.txt
- docs: 更新 LAUNCH-CHECKLIST 添加 Git 排除配置步骤
- pyproject.toml: workspace members 新增 mcp-server
This commit is contained in:
Neo
2026-02-19 10:31:16 +08:00
parent 4eac07da47
commit 254ccb1e77
16 changed files with 2375 additions and 1285 deletions

View File

@@ -1,17 +1,18 @@
# apps/
## 作用说明
应用项目顶层目录,存放所有可独立部署/运行的子项目。当前包含 ETL Connector、FastAPI 后端、微信小程序前端,以及预留的管理后台。
## 内部结构
- `etl/pipelines/feiqiu/` — 飞球 Connector数据源连接器抽取→清洗→汇总全流程
- `backend/` — FastAPI 后端(小程序 API、权限、审批
- `miniprogram/` — 微信小程序前端Donut + TDesign
- `admin-web/` — 管理后台(预留,暂未实施)
## Roadmap
- 新增更多 Connector 时,在 `etl/pipelines/` 下按平台名创建子目录
- `admin-web/` 待产品需求确认后启动
# apps/
## 作用说明
应用项目顶层目录,存放所有可独立部署/运行的子项目。当前包含 ETL Connector、FastAPI 后端、微信小程序前端,以及预留的管理后台。
## 内部结构
- `etl/pipelines/feiqiu/` — 飞球 Connector数据源连接器抽取→清洗→汇总全流程
- `backend/` — FastAPI 后端(小程序 API、权限、审批
- `miniprogram/` — 微信小程序前端Donut + TDesign
- `admin-web/` — 管理后台(预留,暂未实施)
- `mcp-server/` — MCP Server为百炼 AI 应用提供 PostgreSQL 只读查询)
## Roadmap
- 新增更多 Connector 时,在 `etl/pipelines/` 下按平台名创建子目录
- `admin-web/` 待产品需求确认后启动

View File

@@ -175,8 +175,15 @@ const AppLayout: React.FC = () => {
const App: React.FC = () => {
const hydrate = useAuthStore((s) => s.hydrate);
const [hydrated, setHydrated] = useState(false);
useEffect(() => { hydrate(); }, [hydrate]);
useEffect(() => {
hydrate();
setHydrated(true);
}, [hydrate]);
/* hydrate 完成前不渲染路由,避免 PrivateRoute 误判跳转到 /login */
if (!hydrated) return <Spin style={{ display: "flex", justifyContent: "center", marginTop: 120 }} />;
return (
<Routes>

View File

@@ -1,32 +1,77 @@
/**
* 任务相关 API 调用。
*
* - fetchTaskRegistry获取按业务域分组的任务注册表
*/
import { apiClient } from './client';
import type { TaskConfig, TaskDefinition } from '../types';
/** 获取按业务域分组的任务注册表 */
export async function fetchTaskRegistry(): Promise<Record<string, TaskDefinition[]>> {
// 后端返回 { groups: { 域名: [TaskItem] } },需要解包
const { data } = await apiClient.get<{ groups: Record<string, TaskDefinition[]> }>('/tasks/registry');
return data.groups;
}
/** 获取按业务域分组的 DWD 表定义 */
export async function fetchDwdTables(): Promise<Record<string, string[]>> {
// 后端返回 { groups: { 域名: [DwdTableItem] } },需要解包并提取 table_name
const { data } = await apiClient.get<{ groups: Record<string, { table_name: string }[]> }>('/tasks/dwd-tables');
const result: Record<string, string[]> = {};
for (const [domain, items] of Object.entries(data.groups)) {
result[domain] = items.map((item) => item.table_name);
}
return result;
}
/** 验证任务配置并返回生成的 CLI 命令预览 */
export async function validateTaskConfig(config: TaskConfig): Promise<{ command: string }> {
const { data } = await apiClient.post<{ command: string }>('/tasks/validate', { config });
return data;
}
/**
* 任务相关 API 调用。
*/
import { apiClient } from './client';
import type { TaskConfig, TaskDefinition } from '../types';
/** DWD 表项(后端返回的原始结构) */
export interface DwdTableItem {
table_name: string;
display_name: string;
domain: string;
ods_source: string;
is_dimension: boolean;
}
/** Flow 定义 */
export interface FlowDef {
id: string;
name: string;
layers: string[];
}
/** 处理模式定义 */
export interface ProcessingModeDef {
id: string;
name: string;
description: string;
}
/** 同步检查结果 */
export interface SyncCheckResult {
in_sync: boolean;
backend_only: string[];
etl_only: string[];
error: string | null;
}
/** 获取按业务域分组的任务注册表 */
export async function fetchTaskRegistry(): Promise<Record<string, TaskDefinition[]>> {
const { data } = await apiClient.get<{ groups: Record<string, TaskDefinition[]> }>('/tasks/registry');
return data.groups;
}
/** 获取按业务域分组的 DWD 表定义(保留完整结构) */
export async function fetchDwdTablesRich(): Promise<Record<string, DwdTableItem[]>> {
const { data } = await apiClient.get<{ groups: Record<string, DwdTableItem[]> }>('/tasks/dwd-tables');
return data.groups;
}
/** 获取按业务域分组的 DWD 表定义(仅表名,兼容旧调用) */
export async function fetchDwdTables(): Promise<Record<string, string[]>> {
const groups = await fetchDwdTablesRich();
const result: Record<string, string[]> = {};
for (const [domain, items] of Object.entries(groups)) {
result[domain] = items.map((item) => item.table_name);
}
return result;
}
/** 获取 Flow 定义和处理模式定义 */
export async function fetchFlows(): Promise<{ flows: FlowDef[]; processing_modes: ProcessingModeDef[] }> {
const { data } = await apiClient.get<{ flows: FlowDef[]; processing_modes: ProcessingModeDef[] }>('/tasks/flows');
return data;
}
/** 验证任务配置并返回生成的 CLI 命令预览 */
export async function validateTaskConfig(config: TaskConfig): Promise<{ command: string }> {
const { data } = await apiClient.post<{ command: string }>('/tasks/validate', { config });
return data;
}
/** 对比后端与 ETL 真实注册表的任务列表差异 */
export async function checkTaskSync(): Promise<SyncCheckResult> {
const { data } = await apiClient.get<SyncCheckResult>('/tasks/sync-check');
return data;
}

View File

@@ -1,307 +1,445 @@
/**
* 按业务域分组的任务选择器。
* 按业务域全链路展示的任务选择器v2
*
* 从 /api/tasks/registry 获取任务注册表,按业务域折叠展示,
* 支持全选/反选和按 Flow 层级过滤。
* 当 Flow 包含 DWD 层时,在 DWD 任务下方内嵌表过滤子选项。
* 每个业务域一个折叠面板,内部按层分组展示完整链路:
* ODS 任务 → DWD 表(该域的) → DWS/INDEX 任务
*
* 功能:
* - 同步检查:工具栏右侧 Badge 指示,点击展示差异 Modal
* - 全选常用 / 全选 / 反选 / 清空 按钮
* - DWD 表选中 = 过滤 DWD_LOAD_FROM_ODS 的装载范围
*/
import React, { useEffect, useState, useMemo, useCallback } from "react";
import {
Collapse,
Checkbox,
Spin,
Alert,
Button,
Space,
Typography,
Tag,
Divider,
Collapse, Checkbox, Spin, Alert, Button, Space, Typography,
Tag, Badge, Modal, Tooltip, Divider,
} from "antd";
import {
CheckCircleOutlined, WarningOutlined, SyncOutlined, TableOutlined,
} from "@ant-design/icons";
import type { CheckboxChangeEvent } from "antd/es/checkbox";
import { fetchTaskRegistry, fetchDwdTables } from "../api/tasks";
import type { TaskDefinition } from "../types";
import { fetchTaskRegistry, fetchDwdTablesRich, checkTaskSync } from "../api/tasks";
import type { DwdTableItem as ApiDwdTableItem, SyncCheckResult } from "../api/tasks";
import type { TaskDefinition, DwdTableItem } from "../types";
const { Text } = Typography;
/* ------------------------------------------------------------------ */
/* Props */
/* ------------------------------------------------------------------ */
/* 层排序 / 标签 / 颜色 */
const LAYER_ORDER: Record<string, number> = { ODS: 0, DWD: 1, DWS: 2, INDEX: 3, UTILITY: 4 };
const LAYER_LABELS: Record<string, string> = {
ODS: "ODS 抽取", DWD: "DWD 装载", DWS: "DWS 汇总", INDEX: "DWS 指数", UTILITY: "工具",
};
const LAYER_COLORS: Record<string, string> = {
ODS: "blue", DWD: "green", DWS: "orange", INDEX: "purple", UTILITY: "default",
};
/* 域排序 */
const DOMAIN_ORDER: Record<string, number> = {
助教: 0, 结算: 1, 台桌: 2, 会员: 3, 商品: 4, 团购: 5, 库存: 6, 财务: 7, 指数: 8, 通用: 9, 工具: 10,
};
export interface TaskSelectorProps {
/** 当前 Flow 包含的层(如 ["ODS", "DWD"] */
layers: string[];
/** 已选中的任务编码列表 */
selectedTasks: string[];
/** 选中任务变化回调 */
onTasksChange: (tasks: string[]) => void;
/** DWD 表过滤:已选中的表名列表 */
selectedDwdTables?: string[];
/** DWD 表过滤变化回调 */
onDwdTablesChange?: (tables: string[]) => void;
}
/* ------------------------------------------------------------------ */
/* 过滤逻辑 */
/* ------------------------------------------------------------------ */
export function filterTasksByLayers(
tasks: TaskDefinition[],
layers: string[],
): TaskDefinition[] {
if (layers.length === 0) return [];
return tasks;
interface DomainGroup {
domain: string;
layerTasks: { layer: string; tasks: TaskDefinition[] }[];
dwdTables: DwdTableItem[];
}
/* ------------------------------------------------------------------ */
/* 组件 */
/* ------------------------------------------------------------------ */
/** 当 layers 包含 DWD 时DWD_LOAD_FROM_ODS 由 DWD 表过滤区块隐含,不单独显示 */
const HIDDEN_WHEN_DWD_VISIBLE = new Set(["DWD_LOAD_FROM_ODS"]);
/** 按域 + 层构建分组 */
function buildDomainGroups(
registry: Record<string, TaskDefinition[]>,
dwdTableGroups: Record<string, DwdTableItem[]>,
layers: string[],
): DomainGroup[] {
const hideDwdTasks = layers.includes("DWD");
const domainSet = new Set<string>();
const tasksByDomainLayer = new Map<string, Map<string, TaskDefinition[]>>();
for (const tasks of Object.values(registry)) {
for (const t of tasks) {
if (!layers.includes(t.layer)) continue;
if (hideDwdTasks && HIDDEN_WHEN_DWD_VISIBLE.has(t.code)) continue;
domainSet.add(t.domain);
if (!tasksByDomainLayer.has(t.domain)) tasksByDomainLayer.set(t.domain, new Map());
const layerMap = tasksByDomainLayer.get(t.domain)!;
if (!layerMap.has(t.layer)) layerMap.set(t.layer, []);
layerMap.get(t.layer)!.push(t);
}
}
if (layers.includes("DWD")) {
for (const domain of Object.keys(dwdTableGroups)) domainSet.add(domain);
}
const groups: DomainGroup[] = [];
for (const domain of domainSet) {
const layerMap = tasksByDomainLayer.get(domain) ?? new Map<string, TaskDefinition[]>();
const layerTasks: { layer: string; tasks: TaskDefinition[] }[] = [];
const sortedLayers = [...layerMap.keys()].sort(
(a, b) => (LAYER_ORDER[a] ?? 99) - (LAYER_ORDER[b] ?? 99),
);
for (const layer of sortedLayers) {
const tasks = layerMap.get(layer)!;
tasks.sort((a, b) => (a.is_common === b.is_common ? 0 : a.is_common ? -1 : 1));
layerTasks.push({ layer, tasks });
}
const dwdTables = layers.includes("DWD") ? (dwdTableGroups[domain] ?? []) : [];
if (layerTasks.length > 0 || dwdTables.length > 0) {
groups.push({ domain, layerTasks, dwdTables });
}
}
groups.sort((a, b) => (DOMAIN_ORDER[a.domain] ?? 99) - (DOMAIN_ORDER[b.domain] ?? 99));
return groups;
}
const TaskSelector: React.FC<TaskSelectorProps> = ({
layers,
selectedTasks,
onTasksChange,
selectedDwdTables = [],
onDwdTablesChange,
layers, selectedTasks, onTasksChange,
selectedDwdTables = [], onDwdTablesChange,
}) => {
const [registry, setRegistry] = useState<Record<string, TaskDefinition[]>>({});
const [dwdTableGroups, setDwdTableGroups] = useState<Record<string, DwdTableItem[]>>({});
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [syncResult, setSyncResult] = useState<SyncCheckResult | null>(null);
const [syncLoading, setSyncLoading] = useState(false);
const [syncModalOpen, setSyncModalOpen] = useState(false);
// DWD 表定义(按域分组)
const [dwdTableGroups, setDwdTableGroups] = useState<Record<string, string[]>>({});
const showDwdFilter = layers.includes("DWD") && !!onDwdTablesChange;
/* ---------- 加载任务注册表 ---------- */
/* 加载数据 */
useEffect(() => {
let cancelled = false;
setLoading(true);
setError(null);
const promises: Promise<void>[] = [
fetchTaskRegistry()
.then((data) => { if (!cancelled) setRegistry(data); })
.catch((err) => { if (!cancelled) setError(err?.message ?? "获取任务列表失败"); }),
];
// 如果包含 DWD 层,同时加载 DWD 表定义
if (layers.includes("DWD")) {
promises.push(
fetchDwdTables()
.then((data) => { if (!cancelled) setDwdTableGroups(data); })
.catch(() => { /* DWD 表加载失败不阻塞任务列表 */ }),
fetchDwdTablesRich()
.then((data) => {
if (cancelled) return;
const converted: Record<string, DwdTableItem[]> = {};
for (const [domain, items] of Object.entries(data)) {
converted[domain] = items.map((item: ApiDwdTableItem) => ({
table_name: item.table_name, display_name: item.display_name,
domain: item.domain, ods_source: item.ods_source, is_dimension: item.is_dimension,
}));
}
setDwdTableGroups(converted);
})
.catch(() => {}),
);
}
Promise.all(promises).finally(() => { if (!cancelled) setLoading(false); });
return () => { cancelled = true; };
}, [layers]);
/* ---------- 按 layers 过滤后的分组 ---------- */
const filteredGroups = useMemo(() => {
const result: Record<string, TaskDefinition[]> = {};
for (const [domain, tasks] of Object.entries(registry)) {
const visible = filterTasksByLayers(tasks, layers);
if (visible.length > 0) {
result[domain] = [...visible].sort((a, b) => {
if (a.is_common === b.is_common) return 0;
return a.is_common ? -1 : 1;
});
}
}
return result;
}, [registry, layers]);
/* 首次加载后自动同步检查 */
useEffect(() => {
if (Object.keys(registry).length > 0) handleSyncCheck();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [registry]);
const domainGroups = useMemo(
() => buildDomainGroups(registry, dwdTableGroups, layers),
[registry, dwdTableGroups, layers],
);
const allVisibleCodes = useMemo(
() => Object.values(filteredGroups).flatMap((t) => t.map((d) => d.code)),
[filteredGroups],
() => domainGroups.flatMap((g) => g.layerTasks.flatMap((lt) => lt.tasks.map((t) => t.code))),
[domainGroups],
);
const allCommonCodes = useMemo(
() => domainGroups.flatMap((g) =>
g.layerTasks.flatMap((lt) => lt.tasks.filter((t) => t.is_common).map((t) => t.code)),
),
[domainGroups],
);
// DWD 表扁平列表
const allDwdTableNames = useMemo(
() => Object.values(dwdTableGroups).flat(),
[dwdTableGroups],
);
/* 同步检查 */
const handleSyncCheck = useCallback(async () => {
setSyncLoading(true);
try { setSyncResult(await checkTaskSync()); }
catch { setSyncResult({ in_sync: false, backend_only: [], etl_only: [], error: "检查失败" }); }
finally { setSyncLoading(false); }
}, []);
/* ---------- 事件处理 ---------- */
const handleDomainChange = useCallback(
(domain: string, checkedCodes: string[]) => {
const otherDomainCodes = selectedTasks.filter(
(code) => !filteredGroups[domain]?.some((t) => t.code === code),
);
onTasksChange([...otherDomainCodes, ...checkedCodes]);
},
[selectedTasks, filteredGroups, onTasksChange],
);
const handleSelectAll = useCallback(() => {
onTasksChange(allVisibleCodes);
}, [allVisibleCodes, onTasksChange]);
const handleInvertSelection = useCallback(() => {
const currentSet = new Set(selectedTasks);
const inverted = allVisibleCodes.filter((code) => !currentSet.has(code));
onTasksChange(inverted);
/* 任务选择 */
const handleSelectAll = useCallback(() => onTasksChange(allVisibleCodes), [allVisibleCodes, onTasksChange]);
const handleSelectCommon = useCallback(() => onTasksChange(allCommonCodes), [allCommonCodes, onTasksChange]);
const handleInvert = useCallback(() => {
const set = new Set(selectedTasks);
onTasksChange(allVisibleCodes.filter((c) => !set.has(c)));
}, [allVisibleCodes, selectedTasks, onTasksChange]);
const handleClear = useCallback(() => onTasksChange([]), [onTasksChange]);
/* ---------- DWD 表过滤事件 ---------- */
const handleDwdDomainTableChange = useCallback(
(domain: string, checked: string[]) => {
if (!onDwdTablesChange) return;
const domainTables = new Set(dwdTableGroups[domain] ?? []);
const otherSelected = selectedDwdTables.filter((t) => !domainTables.has(t));
onDwdTablesChange([...otherSelected, ...checked]);
const handleDomainToggle = useCallback(
(group: DomainGroup, checked: boolean) => {
const codes = new Set(group.layerTasks.flatMap((lt) => lt.tasks.map((t) => t.code)));
if (checked) {
const merged = new Set(selectedTasks);
codes.forEach((c) => merged.add(c));
onTasksChange([...merged]);
} else {
onTasksChange(selectedTasks.filter((c) => !codes.has(c)));
}
},
[selectedDwdTables, dwdTableGroups, onDwdTablesChange],
[selectedTasks, onTasksChange],
);
const handleDwdSelectAll = useCallback(() => {
onDwdTablesChange?.(allDwdTableNames);
}, [allDwdTableNames, onDwdTablesChange]);
const handleTaskToggle = useCallback(
(code: string, checked: boolean) => {
onTasksChange(checked ? [...selectedTasks, code] : selectedTasks.filter((c) => c !== code));
},
[selectedTasks, onTasksChange],
);
const handleDwdClearAll = useCallback(() => {
onDwdTablesChange?.([]);
}, [onDwdTablesChange]);
/* DWD 表选择 */
const handleDwdTableToggle = useCallback(
(tableName: string, checked: boolean) => {
if (!onDwdTablesChange) return;
onDwdTablesChange(checked
? [...selectedDwdTables, tableName]
: selectedDwdTables.filter((t) => t !== tableName));
},
[selectedDwdTables, onDwdTablesChange],
);
/* ---------- 渲染 ---------- */
const handleDwdDomainToggle = useCallback(
(tables: DwdTableItem[], checked: boolean) => {
if (!onDwdTablesChange) return;
const names = new Set(tables.map((t) => t.table_name));
if (checked) {
const merged = new Set(selectedDwdTables);
names.forEach((n) => merged.add(n));
onDwdTablesChange([...merged]);
} else {
onDwdTablesChange(selectedDwdTables.filter((t) => !names.has(t)));
}
},
[selectedDwdTables, onDwdTablesChange],
);
/* 渲染 */
if (loading) return <Spin tip="加载任务列表…" />;
if (error) return <Alert type="error" message="加载失败" description={error} />;
const domainEntries = Object.entries(filteredGroups);
if (domainEntries.length === 0) return <Text type="secondary"> Flow </Text>;
if (domainGroups.length === 0) return <Text type="secondary"> Flow </Text>;
const selectedCount = selectedTasks.filter((c) => allVisibleCodes.includes(c)).length;
// DWD 装载任务是否被选中
const dwdLoadSelected = selectedTasks.includes("DWD_LOAD_FROM_ODS");
const showDwdFilter = layers.includes("DWD") && !!onDwdTablesChange;
/** 渲染某个域下的 DWD 表过滤区块 */
const renderDwdTableFilter = (dwdTables: DwdTableItem[]) => {
if (!showDwdFilter || dwdTables.length === 0) return null;
const domainDwdSelected = selectedDwdTables.filter((t) => dwdTables.some((d) => d.table_name === t));
return (
<div style={{
marginTop: 6, marginLeft: 4, padding: "6px 8px",
background: "#f6ffed", borderRadius: 4, border: "1px solid #d9f7be",
}}>
<div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: 4 }}>
<Space size={4}>
<TableOutlined style={{ color: "#52c41a", fontSize: 12 }} />
<Text style={{ fontSize: 12, fontWeight: 500 }}>DWD </Text>
<Text type="secondary" style={{ fontSize: 11 }}>
{domainDwdSelected.length === 0 ? "(未选 = 全部装载)" : `${domainDwdSelected.length}/${dwdTables.length}`}
</Text>
</Space>
<Space size={4}>
<Button size="small" type="link" style={{ padding: 0, fontSize: 11, height: "auto" }}
onClick={() => handleDwdDomainToggle(dwdTables, true)}></Button>
<Button size="small" type="link" style={{ padding: 0, fontSize: 11, height: "auto" }}
onClick={() => handleDwdDomainToggle(dwdTables, false)}></Button>
</Space>
</div>
{dwdTables.map((dt) => (
<div key={dt.table_name} style={{ padding: "1px 0" }}>
<Checkbox
checked={selectedDwdTables.includes(dt.table_name)}
onChange={(e) => handleDwdTableToggle(dt.table_name, e.target.checked)}
>
<Text style={{ fontSize: 12 }}>{dt.table_name}</Text>
<Text type="secondary" style={{ marginLeft: 6, fontSize: 11 }}>{dt.display_name}</Text>
{dt.is_dimension && <Tag color="cyan" style={{ marginLeft: 4, fontSize: 10, lineHeight: "16px" }}></Tag>}
</Checkbox>
</div>
))}
</div>
);
};
return (
<div>
<Space style={{ marginBottom: 8 }}>
<Button size="small" onClick={handleSelectAll}></Button>
<Button size="small" onClick={handleInvertSelection}></Button>
<Text type="secondary"> {selectedCount} / {allVisibleCodes.length}</Text>
</Space>
{/* 工具栏 */}
<div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: 8 }}>
<Space size={4} wrap>
<Button size="small" onClick={handleSelectCommon}></Button>
<Button size="small" onClick={handleSelectAll}></Button>
<Button size="small" onClick={handleInvert}></Button>
<Button size="small" onClick={handleClear}></Button>
<Text type="secondary" style={{ marginLeft: 4 }}> {selectedCount} / {allVisibleCodes.length}</Text>
</Space>
<Tooltip title="对比后端注册表与 ETL 真实任务列表">
{syncLoading ? (
<Button size="small" icon={<SyncOutlined spin />} disabled></Button>
) : syncResult === null ? (
<Button size="small" icon={<SyncOutlined />} onClick={handleSyncCheck}></Button>
) : syncResult.in_sync ? (
<Button size="small" icon={<CheckCircleOutlined />} style={{ color: "#52c41a", borderColor: "#b7eb8f" }} onClick={handleSyncCheck}></Button>
) : (
<Badge dot>
<Button size="small" danger icon={<WarningOutlined />} onClick={() => setSyncModalOpen(true)}></Button>
</Badge>
)}
</Tooltip>
</div>
{/* 域折叠面板 */}
<Collapse
defaultActiveKey={domainEntries.map(([d]) => d)}
items={domainEntries.map(([domain, tasks]) => {
const domainCodes = tasks.map((t) => t.code);
defaultActiveKey={domainGroups.filter((g) => g.domain !== "工具" && g.domain !== "通用").map((g) => g.domain)}
items={domainGroups.map((group) => {
const domainCodes = group.layerTasks.flatMap((lt) => lt.tasks.map((t) => t.code));
const domainSelected = selectedTasks.filter((c) => domainCodes.includes(c));
const allChecked = domainSelected.length === domainCodes.length;
const allChecked = domainCodes.length > 0 && domainSelected.length === domainCodes.length;
const indeterminate = domainSelected.length > 0 && !allChecked;
const handleDomainCheckAll = (e: CheckboxChangeEvent) => {
handleDomainChange(domain, e.target.checked ? domainCodes : []);
};
return {
key: domain,
key: group.domain,
label: (
<span onClick={(e) => e.stopPropagation()}>
<Checkbox
indeterminate={indeterminate}
checked={allChecked}
onChange={handleDomainCheckAll}
indeterminate={indeterminate} checked={allChecked}
onChange={(e: CheckboxChangeEvent) => handleDomainToggle(group, e.target.checked)}
style={{ marginRight: 8 }}
/>
{domain}
<Text type="secondary" style={{ marginLeft: 4 }}>
({domainSelected.length}/{domainCodes.length})
</Text>
{group.domain}
<Text type="secondary" style={{ marginLeft: 4 }}>({domainSelected.length}/{domainCodes.length})</Text>
</span>
),
children: (
<Checkbox.Group
value={domainSelected}
onChange={(checked) => handleDomainChange(domain, checked as string[])}
>
<Space direction="vertical" style={{ width: "100%" }}>
{tasks.map((t) => (
<Checkbox key={t.code} value={t.code}>
<Text strong style={t.is_common === false ? { color: "#999" } : undefined}>{t.code}</Text>
<Text type="secondary" style={{ marginLeft: 8 }}>{t.name}</Text>
{t.is_common === false && (
<Tag color="default" style={{ marginLeft: 6, fontSize: 11 }}></Tag>
)}
</Checkbox>
))}
</Space>
</Checkbox.Group>
<div>
{(() => {
/* 找到 DWD 表过滤应插入的位置ODS 之后、DWS/INDEX 之前 */
const hasDwdLayer = group.layerTasks.some((lt) => lt.layer === "DWD");
const shouldInsertDwd = !hasDwdLayer && group.dwdTables.length > 0 && showDwdFilter;
/* 插入点:第一个 DWS/INDEX/UTILITY 层之前,若全是 ODS 则在末尾 */
const insertIdx = shouldInsertDwd
? group.layerTasks.findIndex((lt) => (LAYER_ORDER[lt.layer] ?? 99) >= (LAYER_ORDER["DWS"] ?? 2))
: -1;
const effectiveInsertIdx = shouldInsertDwd && insertIdx === -1 ? group.layerTasks.length : insertIdx;
const elements: React.ReactNode[] = [];
group.layerTasks.forEach((lt, idx) => {
/* 在此位置插入 DWD 表过滤 */
if (shouldInsertDwd && idx === effectiveInsertIdx) {
elements.push(
<div key="__dwd_filter__">
{elements.length > 0 && <Divider style={{ margin: "6px 0" }} />}
<div style={{ marginBottom: 4 }}>
<Tag color="green" style={{ fontSize: 11 }}>DWD </Tag>
</div>
{renderDwdTableFilter(group.dwdTables)}
</div>,
);
}
elements.push(
<div key={lt.layer}>
{elements.length > 0 && <Divider style={{ margin: "6px 0" }} />}
<div style={{ marginBottom: 4 }}>
<Tag color={LAYER_COLORS[lt.layer] ?? "default"} style={{ fontSize: 11 }}>
{LAYER_LABELS[lt.layer] ?? lt.layer}
</Tag>
</div>
<div style={{ paddingLeft: 4 }}>
{lt.tasks.map((t) => (
<div key={t.code} style={{ padding: "2px 0" }}>
<Checkbox
checked={selectedTasks.includes(t.code)}
onChange={(e) => handleTaskToggle(t.code, e.target.checked)}
>
<Text strong style={!t.is_common ? { color: "#999" } : undefined}>{t.code}</Text>
<Text type="secondary" style={{ marginLeft: 8 }}>{t.name}</Text>
{!t.is_common && <Tag color="default" style={{ marginLeft: 6, fontSize: 11 }}></Tag>}
</Checkbox>
</div>
))}
</div>
{/* DWD 表过滤紧跟 DWD 层任务 */}
{lt.layer === "DWD" && renderDwdTableFilter(group.dwdTables)}
</div>,
);
});
/* 所有层遍历完后,若插入点在末尾 */
if (shouldInsertDwd && effectiveInsertIdx >= group.layerTasks.length) {
elements.push(
<div key="__dwd_filter__">
{elements.length > 0 && <Divider style={{ margin: "6px 0" }} />}
<div style={{ marginBottom: 4 }}>
<Tag color="green" style={{ fontSize: 11 }}>DWD </Tag>
</div>
{renderDwdTableFilter(group.dwdTables)}
</div>,
);
}
return elements;
})()}
</div>
),
};
})}
/>
{/* DWD 表过滤:仅在 DWD 层且 DWD_LOAD_FROM_ODS 被选中时显示 */}
{showDwdFilter && dwdLoadSelected && allDwdTableNames.length > 0 && (
<>
<Divider style={{ margin: "12px 0 8px" }} />
<div style={{ padding: "0 4px" }}>
<Space style={{ marginBottom: 6 }}>
<Text strong style={{ fontSize: 13 }}>DWD </Text>
<Text type="secondary" style={{ fontSize: 12 }}>
{selectedDwdTables.length === 0
? "(未选择 = 全部装载)"
: `已选 ${selectedDwdTables.length} / ${allDwdTableNames.length}`}
</Text>
</Space>
<div style={{ marginBottom: 6 }}>
<Space size={4}>
<Button size="small" type="link" style={{ padding: 0, fontSize: 12 }} onClick={handleDwdSelectAll}>
</Button>
<Button size="small" type="link" style={{ padding: 0, fontSize: 12 }} onClick={handleDwdClearAll}>
</Button>
</Space>
</div>
<Collapse
size="small"
items={Object.entries(dwdTableGroups).map(([domain, tables]) => {
const domainSelected = selectedDwdTables.filter((t) => tables.includes(t));
const allDomainChecked = domainSelected.length === tables.length;
const domainIndeterminate = domainSelected.length > 0 && !allDomainChecked;
return {
key: domain,
label: (
<span onClick={(e) => e.stopPropagation()}>
<Checkbox
indeterminate={domainIndeterminate}
checked={allDomainChecked}
onChange={(e: CheckboxChangeEvent) =>
handleDwdDomainTableChange(domain, e.target.checked ? tables : [])
}
style={{ marginRight: 8 }}
/>
{domain}
<Text type="secondary" style={{ marginLeft: 4, fontSize: 12 }}>
({domainSelected.length}/{tables.length})
</Text>
</span>
),
children: (
<Checkbox.Group
value={domainSelected}
onChange={(checked) => handleDwdDomainTableChange(domain, checked as string[])}
>
<Space direction="vertical">
{tables.map((table) => (
<Checkbox key={table} value={table}>
<Text style={{ fontSize: 12 }}>{table}</Text>
</Checkbox>
))}
</Space>
</Checkbox.Group>
),
};
})}
/>
{/* 同步差异 Modal */}
<Modal
title="任务注册表同步检查"
open={syncModalOpen}
onCancel={() => setSyncModalOpen(false)}
footer={[
<Button key="refresh" icon={<SyncOutlined />} onClick={() => { handleSyncCheck(); }}></Button>,
<Button key="close" type="primary" onClick={() => setSyncModalOpen(false)}></Button>,
]}
>
{syncResult?.error ? (
<Alert type="error" message="检查出错" description={syncResult.error} />
) : (
<div>
{syncResult?.backend_only && syncResult.backend_only.length > 0 && (
<div style={{ marginBottom: 12 }}>
<Text strong style={{ color: "#faad14" }}> ETL {syncResult.backend_only.length}</Text>
<div style={{ marginTop: 4 }}>
{syncResult.backend_only.map((code) => (
<Tag key={code} color="warning" style={{ marginBottom: 4 }}>{code}</Tag>
))}
</div>
</div>
)}
{syncResult?.etl_only && syncResult.etl_only.length > 0 && (
<div>
<Text strong style={{ color: "#ff4d4f" }}>ETL {syncResult.etl_only.length}</Text>
<div style={{ marginTop: 4 }}>
{syncResult.etl_only.map((code) => (
<Tag key={code} color="error" style={{ marginBottom: 4 }}>{code}</Tag>
))}
</div>
</div>
)}
{syncResult?.in_sync && (
<Alert type="success" message="后端与 ETL 任务列表完全一致" />
)}
</div>
</>
)}
)}
</Modal>
</div>
);
};

View File

@@ -24,6 +24,7 @@ import {
TreeSelect,
Tooltip,
Segmented,
Spin,
} from "antd";
import {
SendOutlined,
@@ -37,7 +38,8 @@ import {
} from "@ant-design/icons";
import { useNavigate } from "react-router-dom";
import TaskSelector from "../components/TaskSelector";
import { validateTaskConfig } from "../api/tasks";
import { validateTaskConfig, fetchFlows } from "../api/tasks";
import type { FlowDef, ProcessingModeDef } from "../api/tasks";
import { submitToQueue, executeDirectly } from "../api/execution";
import { useAuthStore } from "../store/authStore";
import type { RadioChangeEvent } from "antd";
@@ -48,32 +50,45 @@ const { Title, Text } = Typography;
const { TextArea } = Input;
/* ------------------------------------------------------------------ */
/* Flow 定义 */
/* Flow / 处理模式 — 本地 fallbackAPI 不可用时兜底) */
/* ------------------------------------------------------------------ */
const FLOW_DEFINITIONS: Record<string, { name: string; layers: string[]; desc: string }> = {
api_ods: { name: "API → ODS", layers: ["ODS"], desc: "仅抓取原始数据" },
api_ods_dwd: { name: "API → ODS → DWD", layers: ["ODS", "DWD"], desc: "抓取并清洗装载" },
api_full: { name: "API → ODS → DWD → DWS → INDEX", layers: ["ODS", "DWD", "DWS", "INDEX"], desc: "全链路执行" },
ods_dwd: { name: "ODS → DWD", layers: ["DWD"], desc: "仅清洗装载" },
dwd_dws: { name: "DWD → DWS汇总", layers: ["DWS"], desc: "仅汇总计算" },
dwd_dws_index: { name: "DWD → DWS → INDEX", layers: ["DWS", "INDEX"], desc: "汇总+指数" },
dwd_index: { name: "DWD → INDEX", layers: ["INDEX"], desc: "仅指数计算" },
interface FlowEntry { name: string; layers: string[] }
const FALLBACK_FLOWS: Record<string, FlowEntry> = {
api_ods: { name: "API → ODS", layers: ["ODS"] },
api_ods_dwd: { name: "API → ODS → DWD", layers: ["ODS", "DWD"] },
api_full: { name: "API → ODS → DWD → DWS → INDEX", layers: ["ODS", "DWD", "DWS", "INDEX"] },
ods_dwd: { name: "ODS → DWD", layers: ["DWD"] },
dwd_dws: { name: "DWD → DWS汇总", layers: ["DWS"] },
dwd_dws_index: { name: "DWD → DWS → INDEX", layers: ["DWS", "INDEX"] },
dwd_index: { name: "DWD → INDEX", layers: ["INDEX"] },
};
export function getFlowLayers(flowId: string): string[] {
return FLOW_DEFINITIONS[flowId]?.layers ?? [];
}
interface ProcModeEntry { value: string; label: string; desc: string }
/* ------------------------------------------------------------------ */
/* 处理模式 */
/* ------------------------------------------------------------------ */
const PROCESSING_MODES = [
const FALLBACK_PROCESSING_MODES: ProcModeEntry[] = [
{ value: "increment_only", label: "仅增量", desc: "按游标增量抓取和装载" },
{ value: "verify_only", label: "校验并修复", desc: "对比源和目标,修复差异" },
{ value: "increment_verify", label: "增量+校验", desc: "先增量再校验" },
] as const;
];
/** 将 API 返回的 FlowDef[] 转为 Record<id, FlowEntry> */
function apiFlowsToRecord(flows: FlowDef[]): Record<string, FlowEntry> {
const result: Record<string, FlowEntry> = {};
for (const f of flows) result[f.id] = { name: f.name, layers: f.layers };
return result;
}
/** 将 API 返回的 ProcessingModeDef[] 转为 ProcModeEntry[] */
function apiModesToEntries(modes: ProcessingModeDef[]): ProcModeEntry[] {
return modes.map((m) => ({ value: m.id, label: m.name, desc: m.description }));
}
/** 外部可用的 getFlowLayers使用 fallback组件内部用动态数据 */
export function getFlowLayers(flowId: string): string[] {
return FALLBACK_FLOWS[flowId]?.layers ?? [];
}
/* ------------------------------------------------------------------ */
/* 时间窗口 */
@@ -147,6 +162,24 @@ const TaskConfig: React.FC = () => {
const navigate = useNavigate();
const user = useAuthStore((s) => s.user);
/* ---------- Flow / 处理模式 动态加载 ---------- */
const [flowDefs, setFlowDefs] = useState<Record<string, FlowEntry>>(FALLBACK_FLOWS);
const [procModes, setProcModes] = useState<ProcModeEntry[]>(FALLBACK_PROCESSING_MODES);
const [flowsLoading, setFlowsLoading] = useState(true);
useEffect(() => {
let cancelled = false;
fetchFlows()
.then(({ flows, processing_modes }) => {
if (cancelled) return;
if (flows.length > 0) setFlowDefs(apiFlowsToRecord(flows));
if (processing_modes.length > 0) setProcModes(apiModesToEntries(processing_modes));
})
.catch(() => { /* API 不可用,使用 fallback */ })
.finally(() => { if (!cancelled) setFlowsLoading(false); });
return () => { cancelled = true; };
}, []);
/* ---------- 连接器 & Store 树形选择 ---------- */
const { treeData: connectorTreeData, allValues: allConnectorStoreValues } = useMemo(
() => buildConnectorStoreTree(CONNECTOR_DEFS, user?.site_id ?? null),
@@ -199,12 +232,17 @@ const TaskConfig: React.FC = () => {
const [submitting, setSubmitting] = useState(false);
/* ---------- 派生状态 ---------- */
const layers = getFlowLayers(flow);
const layers = flowDefs[flow]?.layers ?? [];
const showVerifyOption = processingMode === "verify_only";
/* ---------- 构建 TaskConfig 对象 ---------- */
const buildTaskConfig = (): TaskConfigType => ({
tasks: selectedTasks,
const buildTaskConfig = (): TaskConfigType => {
/* layers 包含 DWD 时自动注入 DWD_LOAD_FROM_ODSUI 上由 DWD 表过滤区块隐含) */
const tasks = layers.includes("DWD") && !selectedTasks.includes("DWD_LOAD_FROM_ODS")
? [...selectedTasks, "DWD_LOAD_FROM_ODS"]
: selectedTasks;
return {
tasks,
pipeline: flow,
processing_mode: processingMode,
pipeline_flow: "FULL",
@@ -223,7 +261,8 @@ const TaskConfig: React.FC = () => {
dwd_only_tables: selectedDwdTables.length > 0 ? selectedDwdTables : null,
force_full: forceFull,
extra_args: {},
});
};
};
/* ---------- 自动刷新 CLI 预览 ---------- */
const refreshCli = async () => {
@@ -326,12 +365,12 @@ const TaskConfig: React.FC = () => {
</Card>
</Col>
<Col span={16}>
<Card size="small" title="执行流程 (Flow)" style={cardStyle}>
<Card size="small" title={flowsLoading ? <Space size={4}> (Flow) <Spin size="small" /></Space> : "执行流程 (Flow)"} style={cardStyle}>
<Radio.Group value={flow} onChange={handleFlowChange} style={{ width: "100%" }}>
<Row gutter={[0, 4]}>
{Object.entries(FLOW_DEFINITIONS).map(([id, def]) => (
{Object.entries(flowDefs).map(([id, def]) => (
<Col span={12} key={id}>
<Tooltip title={def.desc}>
<Tooltip title={def.name}>
<Radio value={id}>
<Text strong style={{ fontSize: 12 }}>{id}</Text>
</Radio>
@@ -361,7 +400,7 @@ const TaskConfig: React.FC = () => {
}}
>
<Space direction="vertical" style={{ width: "100%" }}>
{PROCESSING_MODES.map((m) => (
{procModes.map((m) => (
<Radio key={m.value} value={m.value}>
<Text strong>{m.label}</Text>
<br />

View File

@@ -1,133 +1,144 @@
/**
* 前后端共享的 TypeScript 类型定义。
* 与设计文档中的 Pydantic 模型和数据库表结构对应。
*/
/** ETL 任务执行配置 */
export interface TaskConfig {
tasks: string[];
/** 执行流程 Flow ID对应 CLI --pipeline */
pipeline: string;
/** 处理模式 */
processing_mode: string;
/** 传统模式兼容(已弃用) */
pipeline_flow: string;
dry_run: boolean;
/** lookback / custom */
window_mode: string;
window_start: string | null;
window_end: string | null;
/** none / day */
window_split: string | null;
/** 1 / 10 / 30 */
window_split_days: number | null;
lookback_hours: number;
overlap_seconds: number;
fetch_before_verify: boolean;
skip_ods_when_fetch_before_verify: boolean;
ods_use_local_json: boolean;
/** 门店 ID由后端从 JWT 注入) */
store_id: number | null;
/** DWD 表级选择 */
dwd_only_tables: string[] | null;
/** 强制全量处理(跳过 hash 去重和变更对比) */
force_full: boolean;
extra_args: Record<string, unknown>;
}
/** 执行流程Flow定义 */
export interface PipelineDefinition {
id: string;
name: string;
/** 包含的层ODS / DWD / DWS / INDEX */
layers: string[];
}
/** 处理模式定义 */
export interface ProcessingModeDefinition {
id: string;
name: string;
description: string;
}
/** 任务注册表中的任务定义 */
export interface TaskDefinition {
code: string;
name: string;
description: string;
/** 业务域(会员、结算、助教等) */
domain: string;
requires_window: boolean;
is_ods: boolean;
is_dimension: boolean;
default_enabled: boolean;
/** 常用任务标记false 表示工具类/手动类任务 */
is_common: boolean;
}
/** 调度配置 */
export interface ScheduleConfig {
schedule_type: "once" | "interval" | "daily" | "weekly" | "cron";
interval_value: number;
interval_unit: "minutes" | "hours" | "days";
daily_time: string;
weekly_days: number[];
weekly_time: string;
cron_expression: string;
enabled: boolean;
start_date: string | null;
end_date: string | null;
}
/** 队列中的任务 */
export interface QueuedTask {
id: string;
site_id: number;
config: TaskConfig;
status: "pending" | "running" | "success" | "failed" | "cancelled";
position: number;
created_at: string;
started_at: string | null;
finished_at: string | null;
exit_code: number | null;
error_message: string | null;
}
/** 执行历史记录 */
export interface ExecutionLog {
id: string;
site_id: number;
task_codes: string[];
status: string;
started_at: string;
finished_at: string | null;
exit_code: number | null;
duration_ms: number | null;
command: string;
summary: Record<string, unknown> | null;
}
/** 调度任务 */
export interface ScheduledTask {
id: string;
site_id: number;
name: string;
task_codes: string[];
task_config: TaskConfig;
schedule_config: ScheduleConfig;
enabled: boolean;
last_run_at: string | null;
next_run_at: string | null;
run_count: number;
last_status: string | null;
created_at: string;
updated_at: string;
}
/** 环境配置项 */
export interface EnvConfigItem {
key: string;
value: string;
is_sensitive: boolean;
}
/**
* 前后端共享的 TypeScript 类型定义。
* 与设计文档中的 Pydantic 模型和数据库表结构对应。
*/
/** ETL 任务执行配置 */
export interface TaskConfig {
tasks: string[];
/** 执行流程 Flow ID对应 CLI --pipeline */
pipeline: string;
/** 处理模式 */
processing_mode: string;
/** 传统模式兼容(已弃用) */
pipeline_flow: string;
dry_run: boolean;
/** lookback / custom */
window_mode: string;
window_start: string | null;
window_end: string | null;
/** none / day */
window_split: string | null;
/** 1 / 10 / 30 */
window_split_days: number | null;
lookback_hours: number;
overlap_seconds: number;
fetch_before_verify: boolean;
skip_ods_when_fetch_before_verify: boolean;
ods_use_local_json: boolean;
/** 门店 ID由后端从 JWT 注入) */
store_id: number | null;
/** DWD 表级选择 */
dwd_only_tables: string[] | null;
/** 强制全量处理(跳过 hash 去重和变更对比) */
force_full: boolean;
extra_args: Record<string, unknown>;
}
/** 执行流程Flow定义 */
export interface PipelineDefinition {
id: string;
name: string;
/** 包含的层ODS / DWD / DWS / INDEX */
layers: string[];
}
/** 处理模式定义 */
export interface ProcessingModeDefinition {
id: string;
name: string;
description: string;
}
/** 任务注册表中的任务定义 */
export interface TaskDefinition {
code: string;
name: string;
description: string;
/** 业务域(会员、结算、助教等) */
domain: string;
/** 所属层ODS / DWD / DWS / INDEX / UTILITY */
layer: string;
requires_window: boolean;
is_ods: boolean;
is_dimension: boolean;
default_enabled: boolean;
/** 常用任务标记false 表示工具类/手动类任务 */
is_common: boolean;
}
/** DWD 表定义(后端返回的完整结构) */
export interface DwdTableItem {
table_name: string;
display_name: string;
domain: string;
ods_source: string;
is_dimension: boolean;
}
/** 调度配置 */
export interface ScheduleConfig {
schedule_type: "once" | "interval" | "daily" | "weekly" | "cron";
interval_value: number;
interval_unit: "minutes" | "hours" | "days";
daily_time: string;
weekly_days: number[];
weekly_time: string;
cron_expression: string;
enabled: boolean;
start_date: string | null;
end_date: string | null;
}
/** 队列中的任务 */
export interface QueuedTask {
id: string;
site_id: number;
config: TaskConfig;
status: "pending" | "running" | "success" | "failed" | "cancelled";
position: number;
created_at: string;
started_at: string | null;
finished_at: string | null;
exit_code: number | null;
error_message: string | null;
}
/** 执行历史记录 */
export interface ExecutionLog {
id: string;
site_id: number;
task_codes: string[];
status: string;
started_at: string;
finished_at: string | null;
exit_code: number | null;
duration_ms: number | null;
command: string;
summary: Record<string, unknown> | null;
}
/** 调度任务 */
export interface ScheduledTask {
id: string;
site_id: number;
name: string;
task_codes: string[];
task_config: TaskConfig;
schedule_config: ScheduleConfig;
enabled: boolean;
last_run_at: string | null;
next_run_at: string | null;
run_count: number;
last_status: string | null;
created_at: string;
updated_at: string;
}
/** 环境配置项 */
export interface EnvConfigItem {
key: string;
value: string;
is_sensitive: boolean;
}

View File

@@ -1,209 +1,264 @@
# -*- coding: utf-8 -*-
"""任务注册表 & 配置 API
提供 4 个端点:
- GET /api/tasks/registry — 按业务域分组的任务列表
- GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义
- GET /api/tasks/flows — 7 种 Flow + 3 种处理模式
- POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览
所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。
"""
from __future__ import annotations
from typing import Any
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from app.auth.dependencies import CurrentUser, get_current_user
from app.config import ETL_PROJECT_PATH
from app.schemas.tasks import (
FlowDefinition,
ProcessingModeDefinition,
TaskConfigSchema,
)
from app.services.cli_builder import cli_builder
from app.services.task_registry import (
DWD_TABLES,
FLOW_LAYER_MAP,
get_dwd_tables_grouped_by_domain,
get_tasks_grouped_by_domain,
)
router = APIRouter(prefix="/api/tasks", tags=["任务配置"])
# ── 响应模型 ──────────────────────────────────────────────────
class TaskItem(BaseModel):
code: str
name: str
description: str
domain: str
layer: str
requires_window: bool
is_ods: bool
is_dimension: bool
default_enabled: bool
is_common: bool
class DwdTableItem(BaseModel):
table_name: str
display_name: str
domain: str
ods_source: str
is_dimension: bool
class TaskRegistryResponse(BaseModel):
"""按业务域分组的任务列表"""
groups: dict[str, list[TaskItem]]
class DwdTablesResponse(BaseModel):
"""按业务域分组的 DWD 表定义"""
groups: dict[str, list[DwdTableItem]]
class FlowsResponse(BaseModel):
"""Flow 定义 + 处理模式定义"""
flows: list[FlowDefinition]
processing_modes: list[ProcessingModeDefinition]
class ValidateRequest(BaseModel):
"""验证请求体 — 复用 TaskConfigSchema但 store_id 由后端注入"""
config: TaskConfigSchema
class ValidateResponse(BaseModel):
"""验证结果 + CLI 命令预览"""
valid: bool
command: str
command_args: list[str]
errors: list[str]
# ── Flow 定义(静态) ────────────────────────────────────────
FLOW_DEFINITIONS: list[FlowDefinition] = [
FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]),
FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]),
FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]),
FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]),
FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]),
FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]),
FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]),
]
PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [
ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"),
ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致(可选'校验前从 API 获取'"),
ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"),
]
# ── 端点 ──────────────────────────────────────────────────────
@router.get("/registry", response_model=TaskRegistryResponse)
async def get_task_registry(
user: CurrentUser = Depends(get_current_user),
) -> TaskRegistryResponse:
"""返回按业务域分组的任务列表"""
grouped = get_tasks_grouped_by_domain()
return TaskRegistryResponse(
groups={
domain: [
TaskItem(
code=t.code,
name=t.name,
description=t.description,
domain=t.domain,
layer=t.layer,
requires_window=t.requires_window,
is_ods=t.is_ods,
is_dimension=t.is_dimension,
default_enabled=t.default_enabled,
is_common=t.is_common,
)
for t in tasks
]
for domain, tasks in grouped.items()
}
)
@router.get("/dwd-tables", response_model=DwdTablesResponse)
async def get_dwd_tables(
user: CurrentUser = Depends(get_current_user),
) -> DwdTablesResponse:
"""返回按业务域分组的 DWD 表定义"""
grouped = get_dwd_tables_grouped_by_domain()
return DwdTablesResponse(
groups={
domain: [
DwdTableItem(
table_name=t.table_name,
display_name=t.display_name,
domain=t.domain,
ods_source=t.ods_source,
is_dimension=t.is_dimension,
)
for t in tables
]
for domain, tables in grouped.items()
}
)
@router.get("/flows", response_model=FlowsResponse)
async def get_flows(
user: CurrentUser = Depends(get_current_user),
) -> FlowsResponse:
"""返回 7 种 Flow 定义和 3 种处理模式定义"""
return FlowsResponse(
flows=FLOW_DEFINITIONS,
processing_modes=PROCESSING_MODE_DEFINITIONS,
)
@router.post("/validate", response_model=ValidateResponse)
async def validate_task_config(
body: ValidateRequest,
user: CurrentUser = Depends(get_current_user),
) -> ValidateResponse:
"""验证 TaskConfig 并返回生成的 CLI 命令预览
从 JWT 注入 store_id前端无需传递。
"""
config = body.config.model_copy(update={"store_id": user.site_id})
errors: list[str] = []
# 验证 Flow ID
if config.pipeline not in FLOW_LAYER_MAP:
errors.append(f"无效的执行流程: {config.pipeline}")
# 验证任务列表非空
if not config.tasks:
errors.append("任务列表不能为空")
if errors:
return ValidateResponse(
valid=False,
command="",
command_args=[],
errors=errors,
)
cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH)
cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH)
return ValidateResponse(
valid=True,
command=cmd_str,
command_args=cmd_args,
errors=[],
)
# -*- coding: utf-8 -*-
"""任务注册表 & 配置 API
提供 4 个端点:
- GET /api/tasks/registry — 按业务域分组的任务列表
- GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义
- GET /api/tasks/flows — 7 种 Flow + 3 种处理模式
- POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览
所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。
"""
from __future__ import annotations
from typing import Any
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from app.auth.dependencies import CurrentUser, get_current_user
from app.config import ETL_PROJECT_PATH
from app.schemas.tasks import (
FlowDefinition,
ProcessingModeDefinition,
TaskConfigSchema,
)
from app.services.cli_builder import cli_builder
from app.services.task_registry import (
DWD_TABLES,
FLOW_LAYER_MAP,
get_dwd_tables_grouped_by_domain,
get_tasks_grouped_by_domain,
)
router = APIRouter(prefix="/api/tasks", tags=["任务配置"])
# ── 响应模型 ──────────────────────────────────────────────────
class TaskItem(BaseModel):
code: str
name: str
description: str
domain: str
layer: str
requires_window: bool
is_ods: bool
is_dimension: bool
default_enabled: bool
is_common: bool
class DwdTableItem(BaseModel):
table_name: str
display_name: str
domain: str
ods_source: str
is_dimension: bool
class TaskRegistryResponse(BaseModel):
"""按业务域分组的任务列表"""
groups: dict[str, list[TaskItem]]
class DwdTablesResponse(BaseModel):
"""按业务域分组的 DWD 表定义"""
groups: dict[str, list[DwdTableItem]]
class FlowsResponse(BaseModel):
"""Flow 定义 + 处理模式定义"""
flows: list[FlowDefinition]
processing_modes: list[ProcessingModeDefinition]
class ValidateRequest(BaseModel):
"""验证请求体 — 复用 TaskConfigSchema但 store_id 由后端注入"""
config: TaskConfigSchema
class ValidateResponse(BaseModel):
"""验证结果 + CLI 命令预览"""
valid: bool
command: str
command_args: list[str]
errors: list[str]
# ── Flow 定义(静态) ────────────────────────────────────────
FLOW_DEFINITIONS: list[FlowDefinition] = [
FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]),
FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]),
FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]),
FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]),
FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]),
FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]),
FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]),
]
PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [
ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"),
ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致"),
ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"),
]
# ── 端点 ──────────────────────────────────────────────────────
@router.get("/registry", response_model=TaskRegistryResponse)
async def get_task_registry(
user: CurrentUser = Depends(get_current_user),
) -> TaskRegistryResponse:
"""返回按业务域分组的任务列表"""
grouped = get_tasks_grouped_by_domain()
return TaskRegistryResponse(
groups={
domain: [
TaskItem(
code=t.code,
name=t.name,
description=t.description,
domain=t.domain,
layer=t.layer,
requires_window=t.requires_window,
is_ods=t.is_ods,
is_dimension=t.is_dimension,
default_enabled=t.default_enabled,
is_common=t.is_common,
)
for t in tasks
]
for domain, tasks in grouped.items()
}
)
@router.get("/dwd-tables", response_model=DwdTablesResponse)
async def get_dwd_tables(
user: CurrentUser = Depends(get_current_user),
) -> DwdTablesResponse:
"""返回按业务域分组的 DWD 表定义"""
grouped = get_dwd_tables_grouped_by_domain()
return DwdTablesResponse(
groups={
domain: [
DwdTableItem(
table_name=t.table_name,
display_name=t.display_name,
domain=t.domain,
ods_source=t.ods_source,
is_dimension=t.is_dimension,
)
for t in tables
]
for domain, tables in grouped.items()
}
)
@router.get("/flows", response_model=FlowsResponse)
async def get_flows(
user: CurrentUser = Depends(get_current_user),
) -> FlowsResponse:
"""返回 7 种 Flow 定义和 3 种处理模式定义"""
return FlowsResponse(
flows=FLOW_DEFINITIONS,
processing_modes=PROCESSING_MODE_DEFINITIONS,
)
@router.post("/validate", response_model=ValidateResponse)
async def validate_task_config(
body: ValidateRequest,
user: CurrentUser = Depends(get_current_user),
) -> ValidateResponse:
"""验证 TaskConfig 并返回生成的 CLI 命令预览
从 JWT 注入 store_id前端无需传递。
"""
config = body.config.model_copy(update={"store_id": user.site_id})
errors: list[str] = []
# 验证 Flow ID
if config.pipeline not in FLOW_LAYER_MAP:
errors.append(f"无效的执行流程: {config.pipeline}")
# 验证任务列表非空
if not config.tasks:
errors.append("任务列表不能为空")
if errors:
return ValidateResponse(
valid=False,
command="",
command_args=[],
errors=errors,
)
cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH)
cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH)
return ValidateResponse(
valid=True,
command=cmd_str,
command_args=cmd_args,
errors=[],
)
# ── GET /api/tasks/sync-check — 对比 ETL 真实注册表 ──────────
class SyncCheckResponse(BaseModel):
"""同步检查结果"""
in_sync: bool
backend_only: list[str]
etl_only: list[str]
error: str | None = None
@router.get("/sync-check", response_model=SyncCheckResponse)
async def sync_check(
user: CurrentUser = Depends(get_current_user),
) -> SyncCheckResponse:
"""对比后端硬编码任务列表与 ETL 真实注册表,返回差异。
通过子进程调用 ETL CLI 获取真实任务列表,避免直接导入 ETL 代码。
"""
import subprocess
import sys
from app.services.task_registry import ALL_TASKS
backend_codes = {t.code for t in ALL_TASKS}
try:
result = subprocess.run(
[sys.executable, "-c",
"from orchestration.task_registry import default_registry; "
"print(','.join(sorted(default_registry.get_all_task_codes())))"],
capture_output=True, text=True, timeout=15,
cwd=ETL_PROJECT_PATH, encoding="utf-8", errors="replace",
)
if result.returncode != 0:
return SyncCheckResponse(
in_sync=False, backend_only=[], etl_only=[],
error=f"ETL 子进程失败: {result.stderr.strip()[:200]}",
)
etl_codes = {c.strip() for c in result.stdout.strip().split(",") if c.strip()}
except Exception as exc:
return SyncCheckResponse(
in_sync=False, backend_only=[], etl_only=[],
error=f"无法连接 ETL: {exc}",
)
backend_only = sorted(backend_codes - etl_codes)
etl_only = sorted(etl_codes - backend_codes)
return SyncCheckResponse(
in_sync=len(backend_only) == 0 and len(etl_only) == 0,
backend_only=backend_only,
etl_only=etl_only,
)

4
apps/mcp-server/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
.env
.env.local
__pycache__/
*.pyc

58
apps/mcp-server/README.md Normal file
View File

@@ -0,0 +1,58 @@
# MCP Server
为阿里云百炼 AI 应用提供 PostgreSQL 只读查询能力的 MCP 服务。
## 用途
小程序端调用百炼 AI 应用时,百炼通过 MCP 协议连接本服务,读取 `etl_feiqiu` 数据库中的运营数据(会员、订单、支付、助教业绩、财务汇总等)。
## 架构
```
微信小程序 → 百炼 AI 应用 → MCP (Streamable HTTP) → 本服务 → PostgreSQL (etl_feiqiu)
```
## 暴露的 Tools
| Tool | 说明 |
|------|------|
| `list_tables` | 列出指定 schema 下的表 |
| `describe_table` | 查看单表结构 |
| `describe_schemas` | 批量返回多个 schema 的表结构(含主键) |
| `query_sql` | 在指定 schema 内执行只读 SQL |
## 可访问的 Schema
`ods` / `dwd` / `dws` / `core` / `meta` / `app`etl_feiqiu 六层架构)
## 安全策略
- 仅允许 SELECT/WITH/SHOW/EXPLAIN禁止 DDL/DML
- 正则 + sqlparse 双重校验
- 跨 schema 引用白名单限制
- 可选 Bearer Token 鉴权(`MCP_TOKEN` 环境变量)
- 生产环境建议使用只读数据库账号
## 配置
环境变量优先级:`MCP_PG_*` > `DB_*` / `ETL_DB_NAME` / `PG_NAME` > 项目根 `.env`
| 变量 | 说明 | 默认值 |
|------|------|--------|
| `MCP_PG_HOST` | 数据库主机(优先) | 回退 `DB_HOST` |
| `MCP_PG_PORT` | 数据库端口(优先) | 回退 `DB_PORT`,默认 5432 |
| `MCP_PG_DATABASE` | 数据库名(优先) | 回退 `ETL_DB_NAME``PG_NAME` |
| `MCP_PG_USER` | 数据库用户(优先) | 回退 `DB_USER` |
| `MCP_PG_PASSWORD` | 数据库密码(优先) | 回退 `DB_PASSWORD` |
| `MCP_TOKEN` | 鉴权 token空则不启用 | 空 |
| `MCP_MAX_ROWS` | query_sql 最大返回行数 | 500 |
| `PORT` | 服务监听端口 | 9000 |
## 启动
```bash
cd apps/mcp-server
python server.py
```
百炼端 MCP 服务地址配置为:`https://mcp.langlangzhuoqiu.cn/mcp`

View File

@@ -0,0 +1,22 @@
[project]
name = "zqyy-mcp-server"
version = "0.1.0"
description = "MCP Server — 为阿里云百炼 AI 应用提供 PostgreSQL 只读查询能力"
requires-python = ">=3.10"
dependencies = [
"mcp[cli]>=1.9",
"psycopg[pool]>=3.1",
"python-dotenv>=1.0",
"sqlparse>=0.5",
"starlette>=0.27",
"uvicorn[standard]>=0.34",
]
[dependency-groups]
dev = [
"pytest>=8.0",
]
[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["."]

412
apps/mcp-server/server.py Normal file
View File

@@ -0,0 +1,412 @@
import os
import re
import contextlib
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import sqlparse
from dotenv import load_dotenv
from psycopg_pool import ConnectionPool
from mcp.server.fastmcp import FastMCP
from mcp.server.transport_security import TransportSecuritySettings
from starlette.applications import Starlette
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse
from starlette.routing import Mount
# 加载配置:.env.local > 同级 .env > 项目根 .env
_here = Path(__file__).resolve().parent
_root = _here.parent.parent # apps/mcp-server -> apps -> NeoZQYY
load_dotenv(_here / ".env.local", override=True)
load_dotenv(_here / ".env", override=False)
load_dotenv(_root / ".env", override=False)
# ----------------------------
# 工具:环境变量解析(避免 int("") 报错)
# ----------------------------
def env_str(name: str, default: str = "", required: bool = False) -> str:
v = os.getenv(name, default)
v = v if v is not None else default
v = v.strip() if isinstance(v, str) else v
if required and (v is None or v == ""):
raise RuntimeError(f"Missing required env var: {name}")
return v
def env_int(name: str, default: Optional[int] = None, required: bool = False) -> int:
raw = os.getenv(name, "")
if raw is None or raw.strip() == "":
if required and default is None:
raise RuntimeError(f"Missing required env var: {name}")
if default is None:
raise RuntimeError(f"Missing env var: {name}")
return default
try:
return int(raw.strip())
except ValueError as e:
raise RuntimeError(f"Invalid int env var {name}={raw!r}") from e
# ----------------------------
# 配置(用环境变量注入)
# MCP_PG_* 优先(独立部署),回退到项目公共 DB_* / PG_NAME
# ----------------------------
PGHOST = env_str("MCP_PG_HOST", default="") or env_str("DB_HOST", required=True)
PGPORT = env_int("MCP_PG_PORT", default=0) or env_int("DB_PORT", default=5432)
PGDATABASE = env_str("MCP_PG_DATABASE", default="") or env_str("ETL_DB_NAME", default="") or env_str("PG_NAME", required=True)
PGUSER = env_str("MCP_PG_USER", default="") or env_str("DB_USER", required=True)
PGPASSWORD = env_str("MCP_PG_PASSWORD", default="") or env_str("DB_PASSWORD", required=True)
MCP_TOKEN = env_str("MCP_TOKEN", default="") # 鉴权 token可空不启用鉴权
MAX_ROWS = env_int("MCP_MAX_ROWS", default=500) # query_sql 默认最大行数
PORT = env_int("PORT", default=9000) # uvicorn 端口
# etl_feiqiu 库的六层 schema 架构
ALLOWED_SCHEMAS = ("ods", "dwd", "dws", "core", "meta", "app")
ALLOWED_SCHEMA_SET = set(ALLOWED_SCHEMAS)
# psycopg DSN如果密码包含空格等特殊字符建议改用 URL 形式并做编码)
DSN = (
f"host={PGHOST} port={PGPORT} dbname={PGDATABASE} "
f"user={PGUSER} password={PGPASSWORD}"
)
# 连接池:不要 open=True避免解释器退出时 __del__ 清理触发异常)
pool = ConnectionPool(conninfo=DSN, min_size=1, max_size=10, timeout=60, open=False)
# ----------------------------
# SQL 只读门禁(最终底线仍是 DB 只读账号)
# ----------------------------
FORBIDDEN = re.compile(
r"\b(insert|update|delete|drop|alter|truncate|create|grant|revoke|copy|call|execute|do)\b",
re.IGNORECASE,
)
# 额外禁止显式跨 schema 访问(避免越权)
# 匹配 schema.table 模式,但排除单字母别名(如 t.id、o.amount
SCHEMA_QUAL = re.compile(r"\b([a-zA-Z_][a-zA-Z0-9_]{1,})\s*\.\s*[a-zA-Z_]", re.IGNORECASE)
def _is_probably_readonly(sql: str) -> bool:
if FORBIDDEN.search(sql):
return False
parsed = sqlparse.parse(sql)
if not parsed:
return False
stmt = parsed[0]
for tok in stmt.tokens:
if tok.is_whitespace:
continue
first = str(tok).strip().lower()
return first in ("select", "with", "show", "explain")
return False
def _validate_schema(schema: str) -> Optional[Dict[str, Any]]:
if schema not in ALLOWED_SCHEMA_SET:
return {"error": f"schema 不允许:{schema}。仅允许:{sorted(ALLOWED_SCHEMA_SET)}"}
return None
def _reject_cross_schema(sql: str, allowed_schema: str) -> Optional[Dict[str, Any]]:
"""
简单防护:如果出现显式 schema 前缀xxx.),要求必须是白名单内的 schema 或系统 schema。
注:这不是 SQL parser 级别的严格策略,但能挡住绝大多数越权写法。
"""
matches = set(m.group(1) for m in SCHEMA_QUAL.finditer(sql or ""))
# 允许所有业务 schema + 系统 schema
safe = ALLOWED_SCHEMA_SET | {"pg_catalog", "information_schema"}
bad = sorted([s for s in matches if s.lower() not in {a.lower() for a in safe}])
if bad:
return {"error": f"SQL 被拒绝:检测到不允许的 schema 引用 {bad},仅允许 {sorted(ALLOWED_SCHEMA_SET)} / 系统 schema。"}
return None
# ----------------------------
# FastMCPStreamable HTTP + JSON 响应
# ----------------------------
mcp = FastMCP(
"postgres-mcp",
stateless_http=True,
json_response=True,
transport_security=TransportSecuritySettings(
enable_dns_rebinding_protection=True,
allowed_hosts=[
# 关键:既允许不带端口,也允许带端口
"mcp.langlangzhuoqiu.cn",
"mcp.langlangzhuoqiu.cn:*",
"localhost",
"localhost:*",
"127.0.0.1",
"127.0.0.1:*",
"100.64.0.4",
"100.64.0.4:*",
"100.64.0.1",
"100.64.0.1:*",
"106.52.16.235",
"106.52.16.235:*",
],
allowed_origins=[
"https://mcp.langlangzhuoqiu.cn",
"https://mcp.langlangzhuoqiu.cn:*",
"http://localhost",
"http://localhost:*",
"http://127.0.0.1",
"http://127.0.0.1:*",
],
),
)
# ----------------------------
# Tools面向 etl_feiqiu 六层 schema
# ----------------------------
@mcp.tool()
def list_tables(schema: str = "dwd", include_views: bool = False) -> Dict[str, Any]:
"""列出指定 schemaods/dwd/dws/core/meta/app下的表可选包含视图"""
err = _validate_schema(schema)
if err:
return err
table_types = ("BASE TABLE", "VIEW") if include_views else ("BASE TABLE",)
sql = """
SELECT table_name, table_type
FROM information_schema.tables
WHERE table_schema = %s AND table_type = ANY(%s)
ORDER BY table_name;
"""
with pool.connection() as conn:
with conn.cursor() as cur:
cur.execute(sql, (schema, list(table_types)))
rows = cur.fetchall()
return {
"schema": schema,
"include_views": include_views,
"tables": [{"name": r[0], "type": r[1]} for r in rows],
"table_count": len(rows),
}
@mcp.tool()
def describe_table(table: str, schema: str = "dwd") -> Dict[str, Any]:
"""查看表结构(字段、类型、是否可空、默认值)"""
err = _validate_schema(schema)
if err:
return err
sql = """
SELECT column_name, data_type, is_nullable, column_default, ordinal_position
FROM information_schema.columns
WHERE table_schema=%s AND table_name=%s
ORDER BY ordinal_position;
"""
with pool.connection() as conn:
with conn.cursor() as cur:
cur.execute(sql, (schema, table))
rows = cur.fetchall()
return {
"schema": schema,
"table": table,
"columns": [
{"name": r[0], "type": r[1], "nullable": r[2], "default": r[3], "position": r[4]}
for r in rows
],
"column_count": len(rows),
}
@mcp.tool()
def describe_schemas(
schemas: Optional[List[str]] = None,
include_views: bool = False,
max_tables_per_schema: int = 500,
) -> Dict[str, Any]:
"""
返回 ods/dwd/dws/core/meta/app schema 下的表结构(含主键)。
不传 schemas 则返回全部六个 schema。
"""
schemas = schemas or list(ALLOWED_SCHEMAS)
invalid = [s for s in schemas if s not in ALLOWED_SCHEMA_SET]
if invalid:
return {"error": f"存在不允许的 schema{invalid}。仅允许:{sorted(ALLOWED_SCHEMA_SET)}"}
table_types = ("BASE TABLE", "VIEW") if include_views else ("BASE TABLE",)
with pool.connection() as conn:
with conn.cursor() as cur:
# 1) 表清单
cur.execute(
"""
SELECT table_schema, table_name, table_type
FROM information_schema.tables
WHERE table_schema = ANY(%s)
AND table_type = ANY(%s)
ORDER BY table_schema, table_name;
""",
(schemas, list(table_types)),
)
table_rows = cur.fetchall()
tables_by_schema: Dict[str, List[Tuple[str, str]]] = defaultdict(list)
for s, t, tt in table_rows:
if len(tables_by_schema[s]) < max_tables_per_schema:
tables_by_schema[s].append((t, tt))
# 2) 所有列(一次性取;如表非常多,可考虑拆分/分页)
cur.execute(
"""
SELECT table_schema, table_name, column_name, data_type, is_nullable, column_default, ordinal_position
FROM information_schema.columns
WHERE table_schema = ANY(%s)
ORDER BY table_schema, table_name, ordinal_position;
""",
(schemas,),
)
col_rows = cur.fetchall()
cols_map: Dict[Tuple[str, str], List[Dict[str, Any]]] = defaultdict(list)
for s, t, c, dt, nul, dft, pos in col_rows:
cols_map[(s, t)].append(
{"name": c, "type": dt, "nullable": nul, "default": dft, "position": pos}
)
# 3) 主键
cur.execute(
"""
SELECT kcu.table_schema, kcu.table_name, kcu.column_name, kcu.ordinal_position
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
AND tc.table_name = kcu.table_name
WHERE tc.constraint_type = 'PRIMARY KEY'
AND tc.table_schema = ANY(%s)
ORDER BY kcu.table_schema, kcu.table_name, kcu.ordinal_position;
""",
(schemas,),
)
pk_rows = cur.fetchall()
pk_map: Dict[Tuple[str, str], List[str]] = defaultdict(list)
for s, t, col, _pos in pk_rows:
pk_map[(s, t)].append(col)
# 4) 组装
result: Dict[str, Any] = {
"schemas": {},
"include_views": include_views,
"limits": {"max_tables_per_schema": max_tables_per_schema},
}
for s in schemas:
schema_tables = tables_by_schema.get(s, [])
result["schemas"][s] = {"table_count": len(schema_tables), "tables": {}}
for t, tt in schema_tables:
key = (s, t)
result["schemas"][s]["tables"][t] = {
"type": tt,
"primary_key": pk_map.get(key, []),
"columns": cols_map.get(key, []),
"column_count": len(cols_map.get(key, [])),
}
return result
@mcp.tool()
def query_sql(schema: str, sql: str, max_rows: int = MAX_ROWS) -> Dict[str, Any]:
"""
在指定 schema 内执行只读 SQL会 SET LOCAL search_path并限制显式跨 schema 引用。
"""
err = _validate_schema(schema)
if err:
return err
sql = (sql or "").strip().rstrip(";")
if not _is_probably_readonly(sql):
return {"error": "SQL 被拒绝仅允许只读select/with/show/explain并禁止危险关键字。"}
cross = _reject_cross_schema(sql, allowed_schema=schema)
if cross:
return cross
with pool.connection() as conn:
with conn.cursor() as cur:
# schema 已白名单校验,可安全拼接
cur.execute(f"SET LOCAL search_path TO {schema}")
cur.execute(sql)
cols = [d.name for d in (cur.description or [])]
rows = cur.fetchmany(max_rows + 1)
truncated = len(rows) > max_rows
rows = rows[:max_rows]
safe_rows: List[List[Any]] = []
for r in rows:
safe_rows.append([v if isinstance(v, (int, float, str, bool)) or v is None else str(v) for v in r])
return {
"schema": schema,
"columns": cols,
"rows": safe_rows,
"row_count": len(safe_rows),
"truncated": truncated,
"max_rows": max_rows,
}
# ----------------------------
# 鉴权 Middleware支持 Bearer 或 query token
# ----------------------------
class AuthMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
if MCP_TOKEN and request.url.path.startswith("/mcp"):
auth = request.headers.get("authorization", "")
token_q = request.query_params.get("token", "")
if auth != f"Bearer {MCP_TOKEN}" and token_q != MCP_TOKEN:
return JSONResponse({"error": "unauthorized"}, status_code=401)
return await call_next(request)
# ----------------------------
# lifespan显式 open/close pool并运行 session_manager
# ----------------------------
@contextlib.asynccontextmanager
async def lifespan(app: Starlette):
pool.open(wait=True, timeout=30)
try:
async with mcp.session_manager.run():
yield
finally:
# 避免解释器退出阶段 __del__ 清理导致异常
pool.close(timeout=5)
# MCP endpoint/mcp默认 streamable_http_path="/mcp"
app = Starlette(
routes=[Mount("/", app=mcp.streamable_http_app())],
lifespan=lifespan,
)
app.add_middleware(AuthMiddleware)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
app,
host="0.0.0.0",
port=PORT,
proxy_headers=True,
forwarded_allow_ips="*",
)

File diff suppressed because it is too large Load Diff

View File

@@ -7,5 +7,6 @@ requires-python = ">=3.10"
members = [
"apps/etl/connectors/feiqiu",
"apps/backend",
"apps/mcp-server",
"packages/shared",
]

View File

View File

@@ -0,0 +1,122 @@
# ==============================================================================
# 服务器端 Git 排除规则
# ==============================================================================
# 用途:服务器上不需要的文件,避免占用磁盘空间和干扰运行环境。
# 使用方式:运行 scripts/server/setup-server-git.py 自动配置。
#
# 原则:服务器只跑后端 API+ 可选 ETL其余全部排除。
# 注意:此文件影响未 track 的新文件。
# 对于已 track 但服务器不需要的文件,
# 由 setup-server-git.py 配合 skip-worktree 处理。
# ===== 环境配置(服务器有自己的 .env不用 Git 里的) =====
.env
.env.local
# 模板保留,方便参考
# !.env.template
# ===== ETL 导出数据(仅开发机留存) =====
export/
# ===== 文档(开发参考用,服务器不需要) =====
docs/
# 如果需要部署文档可单独拉取,但运行时不依赖
# ===== H5 原型设计稿 =====
# 已在 docs/h5_ui/ 下,被 docs/ 规则覆盖
# ===== 小程序源码(服务器不编译小程序) =====
apps/miniprogram/
# ===== 管理后台源码(服务器只需要 dist/,不需要源码和 node_modules =====
apps/admin-web/src/
apps/admin-web/node_modules/
apps/admin-web/pnpm-lock.yaml
# ===== 测试(服务器不跑测试) =====
tests/
.hypothesis/
.pytest_cache/
pytest-cache-files-*/
# ===== 示例数据 =====
samples/
# ===== 临时目录 =====
tmp/
# ===== 运维脚本中的一次性脚本(服务器不需要开发用的 ops 脚本) =====
scripts/ops/
scripts/audit/
scripts/migrate/
# ===== 根目录散文件(开发用,服务器不需要) =====
*.png
*.code-workspace
start-admin.bat
.kiroignore
# ===== Kiro 配置(服务器上不用 Kiro =====
.kiro/
# ===== infra 配置文档(参考用,服务器不需要) =====
infra/
# ===== 日志文件(服务器自己产生的日志不入 Git =====
logs/
*.log
*.jsonl
# ===== Python 虚拟环境(服务器自己 uv sync 生成) =====
.venv/
venv/
ENV/
env/
# ===== Python 缓存与构建产物 =====
__pycache__/
*.pyc
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
dist/
# ===== 测试覆盖率 =====
.coverage
htmlcov/
# ===== Node =====
node_modules/
# ===== infra 敏感文件 =====
infra/**/*.key
infra/**/*.pem
infra/**/*.secret
# ===== IDE =====
.idea/
.vscode/
*.swp
*.swo
*~
.specstory/
.cursorindexingignore
# ===== Windows 杂项 =====
*.lnk
.Deleted/

View File

@@ -0,0 +1,137 @@
"""
服务器 Git 环境配置脚本
在服务器上首次 git clone 后运行一次,完成两件事:
1. 将 server-exclude.txt 复制到 .git/info/exclude
2. 对已 track 但服务器不需要的文件/目录设置 skip-worktree
这样 git pull 不会覆盖本地删除,也不会在工作区还原这些文件。
用法:
cd D:\\NeoZQYY\\test\\repo (或 prod\\repo)
python scripts/server/setup-server-git.py
运行后可以安全删除 export/ 等目录释放磁盘空间。
"""
import shutil
import subprocess
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
# 需要 skip-worktree 的路径前缀(已被 track 但服务器不需要)
SKIP_PREFIXES = [
"export/",
".env",
"docs/",
"tests/",
"samples/",
"infra/",
".kiro/",
".hypothesis/",
"apps/miniprogram/",
"apps/admin-web/src/",
"apps/admin-web/pnpm-lock.yaml",
"scripts/ops/",
"scripts/audit/",
"scripts/migrate/",
# 根目录散文件截图、workspace 文件等)
"coach-detail-full.png",
"customer-detail-full.png",
"perf-records-current.png",
"white-screen-debug.png",
"NeoZQYY.code-workspace",
"start-admin.bat",
".kiroignore",
]
# 完全不需要出现在服务器工作区的目录skip-worktree 后可删除释放空间)
DELETABLE_DIRS = [
"export",
"docs",
"tests",
"samples",
"infra",
".kiro",
".hypothesis",
"apps/miniprogram",
"scripts/ops",
"scripts/audit",
"scripts/migrate",
]
def copy_exclude():
"""复制排除规则到 .git/info/exclude"""
src = REPO_ROOT / "scripts" / "server" / "server-exclude.txt"
dst = REPO_ROOT / ".git" / "info" / "exclude"
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst)
print(f" 已复制 {src.name} -> {dst}")
def get_tracked_files(prefix: str) -> list[str]:
"""获取匹配前缀的已 track 文件列表"""
result = subprocess.run(
["git", "ls-files", "--", prefix],
capture_output=True, text=True, cwd=REPO_ROOT,
)
return [f for f in result.stdout.strip().split("\n") if f]
def skip_worktree(files: list[str]):
"""对文件列表设置 skip-worktree 标记"""
if not files:
return
# git update-index 一次处理的文件数有限,分批
batch_size = 50
for i in range(0, len(files), batch_size):
batch = files[i:i + batch_size]
subprocess.run(
["git", "update-index", "--skip-worktree"] + batch,
cwd=REPO_ROOT,
)
def main():
print("=== 服务器 Git 环境配置 ===\n")
# 1. 复制 exclude 规则
print("[1/3] 配置 .git/info/exclude ...")
copy_exclude()
# 2. 设置 skip-worktree
print("\n[2/3] 设置 skip-worktree已 track 但服务器不需要的文件)...")
total_skipped = 0
for prefix in SKIP_PREFIXES:
files = get_tracked_files(prefix)
if files:
skip_worktree(files)
total_skipped += len(files)
print(f" {prefix} -> {len(files)} 个文件已标记")
else:
print(f" {prefix} -> 无匹配文件")
print(f" 共标记 {total_skipped} 个文件")
# 3. 提示可删除的目录
print("\n[3/3] 以下目录已标记 skip-worktree可安全删除以释放磁盘空间")
for d in DELETABLE_DIRS:
dir_path = REPO_ROOT / d
if dir_path.exists():
# 计算目录大小
size = sum(f.stat().st_size for f in dir_path.rglob("*") if f.is_file())
size_mb = size / (1024 * 1024)
print(f" {d}/ ({size_mb:.1f} MB)")
else:
print(f" {d}/ (不存在,无需处理)")
print("\n如需删除,手动执行:")
for d in DELETABLE_DIRS:
print(f" rmdir /s /q {d}")
print("\n配置完成。后续 git pull 不会还原这些文件。")
if __name__ == "__main__":
main()