feat: TaskSelector v2 全链路展示 + 同步检查 + MCP Server + 服务器 Git 排除

- admin-web: TaskSelector 重构为按域+层全链路展示,新增同步检查功能
- admin-web: TaskConfig 动态加载 Flow/处理模式定义,DWD 表过滤内嵌域面板
- admin-web: App hydrate 完成前显示 loading,避免误跳 /login
- backend: 新增 /tasks/sync-check 对比后端与 ETL 真实注册表
- backend: 新增 /tasks/flows 返回 Flow 和处理模式定义
- apps/mcp-server: 新增 MCP Server 模块(百炼 AI PostgreSQL 只读查询)
- scripts/server: 新增 setup-server-git.py + server-exclude.txt
- docs: 更新 LAUNCH-CHECKLIST 添加 Git 排除配置步骤
- pyproject.toml: workspace members 新增 mcp-server
This commit is contained in:
Neo
2026-02-19 10:31:16 +08:00
parent 4eac07da47
commit 254ccb1e77
16 changed files with 2375 additions and 1285 deletions

View File

@@ -1,17 +1,18 @@
# apps/ # apps/
## 作用说明 ## 作用说明
应用项目顶层目录,存放所有可独立部署/运行的子项目。当前包含 ETL Connector、FastAPI 后端、微信小程序前端,以及预留的管理后台。 应用项目顶层目录,存放所有可独立部署/运行的子项目。当前包含 ETL Connector、FastAPI 后端、微信小程序前端,以及预留的管理后台。
## 内部结构 ## 内部结构
- `etl/pipelines/feiqiu/` — 飞球 Connector数据源连接器抽取→清洗→汇总全流程 - `etl/pipelines/feiqiu/` — 飞球 Connector数据源连接器抽取→清洗→汇总全流程
- `backend/` — FastAPI 后端(小程序 API、权限、审批 - `backend/` — FastAPI 后端(小程序 API、权限、审批
- `miniprogram/` — 微信小程序前端Donut + TDesign - `miniprogram/` — 微信小程序前端Donut + TDesign
- `admin-web/` — 管理后台(预留,暂未实施) - `admin-web/` — 管理后台(预留,暂未实施)
- `mcp-server/` — MCP Server为百炼 AI 应用提供 PostgreSQL 只读查询)
## Roadmap
## Roadmap
- 新增更多 Connector 时,在 `etl/pipelines/` 下按平台名创建子目录
- `admin-web/` 待产品需求确认后启动 - 新增更多 Connector 时,在 `etl/pipelines/` 下按平台名创建子目录
- `admin-web/` 待产品需求确认后启动

View File

@@ -175,8 +175,15 @@ const AppLayout: React.FC = () => {
const App: React.FC = () => { const App: React.FC = () => {
const hydrate = useAuthStore((s) => s.hydrate); const hydrate = useAuthStore((s) => s.hydrate);
const [hydrated, setHydrated] = useState(false);
useEffect(() => { hydrate(); }, [hydrate]); useEffect(() => {
hydrate();
setHydrated(true);
}, [hydrate]);
/* hydrate 完成前不渲染路由,避免 PrivateRoute 误判跳转到 /login */
if (!hydrated) return <Spin style={{ display: "flex", justifyContent: "center", marginTop: 120 }} />;
return ( return (
<Routes> <Routes>

View File

@@ -1,32 +1,77 @@
/** /**
* 任务相关 API 调用。 * 任务相关 API 调用。
* */
* - fetchTaskRegistry获取按业务域分组的任务注册表
*/ import { apiClient } from './client';
import type { TaskConfig, TaskDefinition } from '../types';
import { apiClient } from './client';
import type { TaskConfig, TaskDefinition } from '../types'; /** DWD 表项(后端返回的原始结构) */
export interface DwdTableItem {
/** 获取按业务域分组的任务注册表 */ table_name: string;
export async function fetchTaskRegistry(): Promise<Record<string, TaskDefinition[]>> { display_name: string;
// 后端返回 { groups: { 域名: [TaskItem] } },需要解包 domain: string;
const { data } = await apiClient.get<{ groups: Record<string, TaskDefinition[]> }>('/tasks/registry'); ods_source: string;
return data.groups; is_dimension: boolean;
} }
/** 获取按业务域分组的 DWD 表定义 */ /** Flow 定义 */
export async function fetchDwdTables(): Promise<Record<string, string[]>> { export interface FlowDef {
// 后端返回 { groups: { 域名: [DwdTableItem] } },需要解包并提取 table_name id: string;
const { data } = await apiClient.get<{ groups: Record<string, { table_name: string }[]> }>('/tasks/dwd-tables'); name: string;
const result: Record<string, string[]> = {}; layers: string[];
for (const [domain, items] of Object.entries(data.groups)) { }
result[domain] = items.map((item) => item.table_name);
} /** 处理模式定义 */
return result; export interface ProcessingModeDef {
} id: string;
name: string;
/** 验证任务配置并返回生成的 CLI 命令预览 */ description: string;
export async function validateTaskConfig(config: TaskConfig): Promise<{ command: string }> { }
const { data } = await apiClient.post<{ command: string }>('/tasks/validate', { config });
return data; /** 同步检查结果 */
} export interface SyncCheckResult {
in_sync: boolean;
backend_only: string[];
etl_only: string[];
error: string | null;
}
/** 获取按业务域分组的任务注册表 */
export async function fetchTaskRegistry(): Promise<Record<string, TaskDefinition[]>> {
const { data } = await apiClient.get<{ groups: Record<string, TaskDefinition[]> }>('/tasks/registry');
return data.groups;
}
/** 获取按业务域分组的 DWD 表定义(保留完整结构) */
export async function fetchDwdTablesRich(): Promise<Record<string, DwdTableItem[]>> {
const { data } = await apiClient.get<{ groups: Record<string, DwdTableItem[]> }>('/tasks/dwd-tables');
return data.groups;
}
/** 获取按业务域分组的 DWD 表定义(仅表名,兼容旧调用) */
export async function fetchDwdTables(): Promise<Record<string, string[]>> {
const groups = await fetchDwdTablesRich();
const result: Record<string, string[]> = {};
for (const [domain, items] of Object.entries(groups)) {
result[domain] = items.map((item) => item.table_name);
}
return result;
}
/** 获取 Flow 定义和处理模式定义 */
export async function fetchFlows(): Promise<{ flows: FlowDef[]; processing_modes: ProcessingModeDef[] }> {
const { data } = await apiClient.get<{ flows: FlowDef[]; processing_modes: ProcessingModeDef[] }>('/tasks/flows');
return data;
}
/** 验证任务配置并返回生成的 CLI 命令预览 */
export async function validateTaskConfig(config: TaskConfig): Promise<{ command: string }> {
const { data } = await apiClient.post<{ command: string }>('/tasks/validate', { config });
return data;
}
/** 对比后端与 ETL 真实注册表的任务列表差异 */
export async function checkTaskSync(): Promise<SyncCheckResult> {
const { data } = await apiClient.get<SyncCheckResult>('/tasks/sync-check');
return data;
}

View File

@@ -1,307 +1,445 @@
/** /**
* 按业务域分组的任务选择器。 * 按业务域全链路展示的任务选择器v2
* *
* 从 /api/tasks/registry 获取任务注册表,按业务域折叠展示, * 每个业务域一个折叠面板,内部按层分组展示完整链路:
* 支持全选/反选和按 Flow 层级过滤。 * ODS 任务 → DWD 表(该域的) → DWS/INDEX 任务
* 当 Flow 包含 DWD 层时,在 DWD 任务下方内嵌表过滤子选项。 *
* 功能:
* - 同步检查:工具栏右侧 Badge 指示,点击展示差异 Modal
* - 全选常用 / 全选 / 反选 / 清空 按钮
* - DWD 表选中 = 过滤 DWD_LOAD_FROM_ODS 的装载范围
*/ */
import React, { useEffect, useState, useMemo, useCallback } from "react"; import React, { useEffect, useState, useMemo, useCallback } from "react";
import { import {
Collapse, Collapse, Checkbox, Spin, Alert, Button, Space, Typography,
Checkbox, Tag, Badge, Modal, Tooltip, Divider,
Spin,
Alert,
Button,
Space,
Typography,
Tag,
Divider,
} from "antd"; } from "antd";
import {
CheckCircleOutlined, WarningOutlined, SyncOutlined, TableOutlined,
} from "@ant-design/icons";
import type { CheckboxChangeEvent } from "antd/es/checkbox"; import type { CheckboxChangeEvent } from "antd/es/checkbox";
import { fetchTaskRegistry, fetchDwdTables } from "../api/tasks"; import { fetchTaskRegistry, fetchDwdTablesRich, checkTaskSync } from "../api/tasks";
import type { TaskDefinition } from "../types"; import type { DwdTableItem as ApiDwdTableItem, SyncCheckResult } from "../api/tasks";
import type { TaskDefinition, DwdTableItem } from "../types";
const { Text } = Typography; const { Text } = Typography;
/* ------------------------------------------------------------------ */ /* 层排序 / 标签 / 颜色 */
/* Props */ const LAYER_ORDER: Record<string, number> = { ODS: 0, DWD: 1, DWS: 2, INDEX: 3, UTILITY: 4 };
/* ------------------------------------------------------------------ */ const LAYER_LABELS: Record<string, string> = {
ODS: "ODS 抽取", DWD: "DWD 装载", DWS: "DWS 汇总", INDEX: "DWS 指数", UTILITY: "工具",
};
const LAYER_COLORS: Record<string, string> = {
ODS: "blue", DWD: "green", DWS: "orange", INDEX: "purple", UTILITY: "default",
};
/* 域排序 */
const DOMAIN_ORDER: Record<string, number> = {
助教: 0, 结算: 1, 台桌: 2, 会员: 3, 商品: 4, 团购: 5, 库存: 6, 财务: 7, 指数: 8, 通用: 9, 工具: 10,
};
export interface TaskSelectorProps { export interface TaskSelectorProps {
/** 当前 Flow 包含的层(如 ["ODS", "DWD"] */
layers: string[]; layers: string[];
/** 已选中的任务编码列表 */
selectedTasks: string[]; selectedTasks: string[];
/** 选中任务变化回调 */
onTasksChange: (tasks: string[]) => void; onTasksChange: (tasks: string[]) => void;
/** DWD 表过滤:已选中的表名列表 */
selectedDwdTables?: string[]; selectedDwdTables?: string[];
/** DWD 表过滤变化回调 */
onDwdTablesChange?: (tables: string[]) => void; onDwdTablesChange?: (tables: string[]) => void;
} }
/* ------------------------------------------------------------------ */ interface DomainGroup {
/* 过滤逻辑 */ domain: string;
/* ------------------------------------------------------------------ */ layerTasks: { layer: string; tasks: TaskDefinition[] }[];
dwdTables: DwdTableItem[];
export function filterTasksByLayers(
tasks: TaskDefinition[],
layers: string[],
): TaskDefinition[] {
if (layers.length === 0) return [];
return tasks;
} }
/* ------------------------------------------------------------------ */ /** 当 layers 包含 DWD 时DWD_LOAD_FROM_ODS 由 DWD 表过滤区块隐含,不单独显示 */
/* 组件 */ const HIDDEN_WHEN_DWD_VISIBLE = new Set(["DWD_LOAD_FROM_ODS"]);
/* ------------------------------------------------------------------ */
/** 按域 + 层构建分组 */
function buildDomainGroups(
registry: Record<string, TaskDefinition[]>,
dwdTableGroups: Record<string, DwdTableItem[]>,
layers: string[],
): DomainGroup[] {
const hideDwdTasks = layers.includes("DWD");
const domainSet = new Set<string>();
const tasksByDomainLayer = new Map<string, Map<string, TaskDefinition[]>>();
for (const tasks of Object.values(registry)) {
for (const t of tasks) {
if (!layers.includes(t.layer)) continue;
if (hideDwdTasks && HIDDEN_WHEN_DWD_VISIBLE.has(t.code)) continue;
domainSet.add(t.domain);
if (!tasksByDomainLayer.has(t.domain)) tasksByDomainLayer.set(t.domain, new Map());
const layerMap = tasksByDomainLayer.get(t.domain)!;
if (!layerMap.has(t.layer)) layerMap.set(t.layer, []);
layerMap.get(t.layer)!.push(t);
}
}
if (layers.includes("DWD")) {
for (const domain of Object.keys(dwdTableGroups)) domainSet.add(domain);
}
const groups: DomainGroup[] = [];
for (const domain of domainSet) {
const layerMap = tasksByDomainLayer.get(domain) ?? new Map<string, TaskDefinition[]>();
const layerTasks: { layer: string; tasks: TaskDefinition[] }[] = [];
const sortedLayers = [...layerMap.keys()].sort(
(a, b) => (LAYER_ORDER[a] ?? 99) - (LAYER_ORDER[b] ?? 99),
);
for (const layer of sortedLayers) {
const tasks = layerMap.get(layer)!;
tasks.sort((a, b) => (a.is_common === b.is_common ? 0 : a.is_common ? -1 : 1));
layerTasks.push({ layer, tasks });
}
const dwdTables = layers.includes("DWD") ? (dwdTableGroups[domain] ?? []) : [];
if (layerTasks.length > 0 || dwdTables.length > 0) {
groups.push({ domain, layerTasks, dwdTables });
}
}
groups.sort((a, b) => (DOMAIN_ORDER[a.domain] ?? 99) - (DOMAIN_ORDER[b.domain] ?? 99));
return groups;
}
const TaskSelector: React.FC<TaskSelectorProps> = ({ const TaskSelector: React.FC<TaskSelectorProps> = ({
layers, layers, selectedTasks, onTasksChange,
selectedTasks, selectedDwdTables = [], onDwdTablesChange,
onTasksChange,
selectedDwdTables = [],
onDwdTablesChange,
}) => { }) => {
const [registry, setRegistry] = useState<Record<string, TaskDefinition[]>>({}); const [registry, setRegistry] = useState<Record<string, TaskDefinition[]>>({});
const [dwdTableGroups, setDwdTableGroups] = useState<Record<string, DwdTableItem[]>>({});
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null); const [error, setError] = useState<string | null>(null);
const [syncResult, setSyncResult] = useState<SyncCheckResult | null>(null);
const [syncLoading, setSyncLoading] = useState(false);
const [syncModalOpen, setSyncModalOpen] = useState(false);
// DWD 表定义(按域分组) /* 加载数据 */
const [dwdTableGroups, setDwdTableGroups] = useState<Record<string, string[]>>({});
const showDwdFilter = layers.includes("DWD") && !!onDwdTablesChange;
/* ---------- 加载任务注册表 ---------- */
useEffect(() => { useEffect(() => {
let cancelled = false; let cancelled = false;
setLoading(true); setLoading(true);
setError(null); setError(null);
const promises: Promise<void>[] = [ const promises: Promise<void>[] = [
fetchTaskRegistry() fetchTaskRegistry()
.then((data) => { if (!cancelled) setRegistry(data); }) .then((data) => { if (!cancelled) setRegistry(data); })
.catch((err) => { if (!cancelled) setError(err?.message ?? "获取任务列表失败"); }), .catch((err) => { if (!cancelled) setError(err?.message ?? "获取任务列表失败"); }),
]; ];
// 如果包含 DWD 层,同时加载 DWD 表定义
if (layers.includes("DWD")) { if (layers.includes("DWD")) {
promises.push( promises.push(
fetchDwdTables() fetchDwdTablesRich()
.then((data) => { if (!cancelled) setDwdTableGroups(data); }) .then((data) => {
.catch(() => { /* DWD 表加载失败不阻塞任务列表 */ }), if (cancelled) return;
const converted: Record<string, DwdTableItem[]> = {};
for (const [domain, items] of Object.entries(data)) {
converted[domain] = items.map((item: ApiDwdTableItem) => ({
table_name: item.table_name, display_name: item.display_name,
domain: item.domain, ods_source: item.ods_source, is_dimension: item.is_dimension,
}));
}
setDwdTableGroups(converted);
})
.catch(() => {}),
); );
} }
Promise.all(promises).finally(() => { if (!cancelled) setLoading(false); }); Promise.all(promises).finally(() => { if (!cancelled) setLoading(false); });
return () => { cancelled = true; }; return () => { cancelled = true; };
}, [layers]); }, [layers]);
/* ---------- 按 layers 过滤后的分组 ---------- */ /* 首次加载后自动同步检查 */
const filteredGroups = useMemo(() => { useEffect(() => {
const result: Record<string, TaskDefinition[]> = {}; if (Object.keys(registry).length > 0) handleSyncCheck();
for (const [domain, tasks] of Object.entries(registry)) { // eslint-disable-next-line react-hooks/exhaustive-deps
const visible = filterTasksByLayers(tasks, layers); }, [registry]);
if (visible.length > 0) {
result[domain] = [...visible].sort((a, b) => { const domainGroups = useMemo(
if (a.is_common === b.is_common) return 0; () => buildDomainGroups(registry, dwdTableGroups, layers),
return a.is_common ? -1 : 1; [registry, dwdTableGroups, layers],
}); );
}
}
return result;
}, [registry, layers]);
const allVisibleCodes = useMemo( const allVisibleCodes = useMemo(
() => Object.values(filteredGroups).flatMap((t) => t.map((d) => d.code)), () => domainGroups.flatMap((g) => g.layerTasks.flatMap((lt) => lt.tasks.map((t) => t.code))),
[filteredGroups], [domainGroups],
);
const allCommonCodes = useMemo(
() => domainGroups.flatMap((g) =>
g.layerTasks.flatMap((lt) => lt.tasks.filter((t) => t.is_common).map((t) => t.code)),
),
[domainGroups],
); );
// DWD 表扁平列表 /* 同步检查 */
const allDwdTableNames = useMemo( const handleSyncCheck = useCallback(async () => {
() => Object.values(dwdTableGroups).flat(), setSyncLoading(true);
[dwdTableGroups], try { setSyncResult(await checkTaskSync()); }
); catch { setSyncResult({ in_sync: false, backend_only: [], etl_only: [], error: "检查失败" }); }
finally { setSyncLoading(false); }
}, []);
/* ---------- 事件处理 ---------- */ /* 任务选择 */
const handleSelectAll = useCallback(() => onTasksChange(allVisibleCodes), [allVisibleCodes, onTasksChange]);
const handleDomainChange = useCallback( const handleSelectCommon = useCallback(() => onTasksChange(allCommonCodes), [allCommonCodes, onTasksChange]);
(domain: string, checkedCodes: string[]) => { const handleInvert = useCallback(() => {
const otherDomainCodes = selectedTasks.filter( const set = new Set(selectedTasks);
(code) => !filteredGroups[domain]?.some((t) => t.code === code), onTasksChange(allVisibleCodes.filter((c) => !set.has(c)));
);
onTasksChange([...otherDomainCodes, ...checkedCodes]);
},
[selectedTasks, filteredGroups, onTasksChange],
);
const handleSelectAll = useCallback(() => {
onTasksChange(allVisibleCodes);
}, [allVisibleCodes, onTasksChange]);
const handleInvertSelection = useCallback(() => {
const currentSet = new Set(selectedTasks);
const inverted = allVisibleCodes.filter((code) => !currentSet.has(code));
onTasksChange(inverted);
}, [allVisibleCodes, selectedTasks, onTasksChange]); }, [allVisibleCodes, selectedTasks, onTasksChange]);
const handleClear = useCallback(() => onTasksChange([]), [onTasksChange]);
/* ---------- DWD 表过滤事件 ---------- */ const handleDomainToggle = useCallback(
(group: DomainGroup, checked: boolean) => {
const handleDwdDomainTableChange = useCallback( const codes = new Set(group.layerTasks.flatMap((lt) => lt.tasks.map((t) => t.code)));
(domain: string, checked: string[]) => { if (checked) {
if (!onDwdTablesChange) return; const merged = new Set(selectedTasks);
const domainTables = new Set(dwdTableGroups[domain] ?? []); codes.forEach((c) => merged.add(c));
const otherSelected = selectedDwdTables.filter((t) => !domainTables.has(t)); onTasksChange([...merged]);
onDwdTablesChange([...otherSelected, ...checked]); } else {
onTasksChange(selectedTasks.filter((c) => !codes.has(c)));
}
}, },
[selectedDwdTables, dwdTableGroups, onDwdTablesChange], [selectedTasks, onTasksChange],
); );
const handleDwdSelectAll = useCallback(() => { const handleTaskToggle = useCallback(
onDwdTablesChange?.(allDwdTableNames); (code: string, checked: boolean) => {
}, [allDwdTableNames, onDwdTablesChange]); onTasksChange(checked ? [...selectedTasks, code] : selectedTasks.filter((c) => c !== code));
},
[selectedTasks, onTasksChange],
);
const handleDwdClearAll = useCallback(() => { /* DWD 表选择 */
onDwdTablesChange?.([]); const handleDwdTableToggle = useCallback(
}, [onDwdTablesChange]); (tableName: string, checked: boolean) => {
if (!onDwdTablesChange) return;
onDwdTablesChange(checked
? [...selectedDwdTables, tableName]
: selectedDwdTables.filter((t) => t !== tableName));
},
[selectedDwdTables, onDwdTablesChange],
);
/* ---------- 渲染 ---------- */ const handleDwdDomainToggle = useCallback(
(tables: DwdTableItem[], checked: boolean) => {
if (!onDwdTablesChange) return;
const names = new Set(tables.map((t) => t.table_name));
if (checked) {
const merged = new Set(selectedDwdTables);
names.forEach((n) => merged.add(n));
onDwdTablesChange([...merged]);
} else {
onDwdTablesChange(selectedDwdTables.filter((t) => !names.has(t)));
}
},
[selectedDwdTables, onDwdTablesChange],
);
/* 渲染 */
if (loading) return <Spin tip="加载任务列表…" />; if (loading) return <Spin tip="加载任务列表…" />;
if (error) return <Alert type="error" message="加载失败" description={error} />; if (error) return <Alert type="error" message="加载失败" description={error} />;
if (domainGroups.length === 0) return <Text type="secondary"> Flow </Text>;
const domainEntries = Object.entries(filteredGroups);
if (domainEntries.length === 0) return <Text type="secondary"> Flow </Text>;
const selectedCount = selectedTasks.filter((c) => allVisibleCodes.includes(c)).length; const selectedCount = selectedTasks.filter((c) => allVisibleCodes.includes(c)).length;
// DWD 装载任务是否被选中 const showDwdFilter = layers.includes("DWD") && !!onDwdTablesChange;
const dwdLoadSelected = selectedTasks.includes("DWD_LOAD_FROM_ODS");
/** 渲染某个域下的 DWD 表过滤区块 */
const renderDwdTableFilter = (dwdTables: DwdTableItem[]) => {
if (!showDwdFilter || dwdTables.length === 0) return null;
const domainDwdSelected = selectedDwdTables.filter((t) => dwdTables.some((d) => d.table_name === t));
return (
<div style={{
marginTop: 6, marginLeft: 4, padding: "6px 8px",
background: "#f6ffed", borderRadius: 4, border: "1px solid #d9f7be",
}}>
<div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: 4 }}>
<Space size={4}>
<TableOutlined style={{ color: "#52c41a", fontSize: 12 }} />
<Text style={{ fontSize: 12, fontWeight: 500 }}>DWD </Text>
<Text type="secondary" style={{ fontSize: 11 }}>
{domainDwdSelected.length === 0 ? "(未选 = 全部装载)" : `${domainDwdSelected.length}/${dwdTables.length}`}
</Text>
</Space>
<Space size={4}>
<Button size="small" type="link" style={{ padding: 0, fontSize: 11, height: "auto" }}
onClick={() => handleDwdDomainToggle(dwdTables, true)}></Button>
<Button size="small" type="link" style={{ padding: 0, fontSize: 11, height: "auto" }}
onClick={() => handleDwdDomainToggle(dwdTables, false)}></Button>
</Space>
</div>
{dwdTables.map((dt) => (
<div key={dt.table_name} style={{ padding: "1px 0" }}>
<Checkbox
checked={selectedDwdTables.includes(dt.table_name)}
onChange={(e) => handleDwdTableToggle(dt.table_name, e.target.checked)}
>
<Text style={{ fontSize: 12 }}>{dt.table_name}</Text>
<Text type="secondary" style={{ marginLeft: 6, fontSize: 11 }}>{dt.display_name}</Text>
{dt.is_dimension && <Tag color="cyan" style={{ marginLeft: 4, fontSize: 10, lineHeight: "16px" }}></Tag>}
</Checkbox>
</div>
))}
</div>
);
};
return ( return (
<div> <div>
<Space style={{ marginBottom: 8 }}> {/* 工具栏 */}
<Button size="small" onClick={handleSelectAll}></Button> <div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: 8 }}>
<Button size="small" onClick={handleInvertSelection}></Button> <Space size={4} wrap>
<Text type="secondary"> {selectedCount} / {allVisibleCodes.length}</Text> <Button size="small" onClick={handleSelectCommon}></Button>
</Space> <Button size="small" onClick={handleSelectAll}></Button>
<Button size="small" onClick={handleInvert}></Button>
<Button size="small" onClick={handleClear}></Button>
<Text type="secondary" style={{ marginLeft: 4 }}> {selectedCount} / {allVisibleCodes.length}</Text>
</Space>
<Tooltip title="对比后端注册表与 ETL 真实任务列表">
{syncLoading ? (
<Button size="small" icon={<SyncOutlined spin />} disabled></Button>
) : syncResult === null ? (
<Button size="small" icon={<SyncOutlined />} onClick={handleSyncCheck}></Button>
) : syncResult.in_sync ? (
<Button size="small" icon={<CheckCircleOutlined />} style={{ color: "#52c41a", borderColor: "#b7eb8f" }} onClick={handleSyncCheck}></Button>
) : (
<Badge dot>
<Button size="small" danger icon={<WarningOutlined />} onClick={() => setSyncModalOpen(true)}></Button>
</Badge>
)}
</Tooltip>
</div>
{/* 域折叠面板 */}
<Collapse <Collapse
defaultActiveKey={domainEntries.map(([d]) => d)} defaultActiveKey={domainGroups.filter((g) => g.domain !== "工具" && g.domain !== "通用").map((g) => g.domain)}
items={domainEntries.map(([domain, tasks]) => { items={domainGroups.map((group) => {
const domainCodes = tasks.map((t) => t.code); const domainCodes = group.layerTasks.flatMap((lt) => lt.tasks.map((t) => t.code));
const domainSelected = selectedTasks.filter((c) => domainCodes.includes(c)); const domainSelected = selectedTasks.filter((c) => domainCodes.includes(c));
const allChecked = domainSelected.length === domainCodes.length; const allChecked = domainCodes.length > 0 && domainSelected.length === domainCodes.length;
const indeterminate = domainSelected.length > 0 && !allChecked; const indeterminate = domainSelected.length > 0 && !allChecked;
const handleDomainCheckAll = (e: CheckboxChangeEvent) => {
handleDomainChange(domain, e.target.checked ? domainCodes : []);
};
return { return {
key: domain, key: group.domain,
label: ( label: (
<span onClick={(e) => e.stopPropagation()}> <span onClick={(e) => e.stopPropagation()}>
<Checkbox <Checkbox
indeterminate={indeterminate} indeterminate={indeterminate} checked={allChecked}
checked={allChecked} onChange={(e: CheckboxChangeEvent) => handleDomainToggle(group, e.target.checked)}
onChange={handleDomainCheckAll}
style={{ marginRight: 8 }} style={{ marginRight: 8 }}
/> />
{domain} {group.domain}
<Text type="secondary" style={{ marginLeft: 4 }}> <Text type="secondary" style={{ marginLeft: 4 }}>({domainSelected.length}/{domainCodes.length})</Text>
({domainSelected.length}/{domainCodes.length})
</Text>
</span> </span>
), ),
children: ( children: (
<Checkbox.Group <div>
value={domainSelected} {(() => {
onChange={(checked) => handleDomainChange(domain, checked as string[])} /* 找到 DWD 表过滤应插入的位置ODS 之后、DWS/INDEX 之前 */
> const hasDwdLayer = group.layerTasks.some((lt) => lt.layer === "DWD");
<Space direction="vertical" style={{ width: "100%" }}> const shouldInsertDwd = !hasDwdLayer && group.dwdTables.length > 0 && showDwdFilter;
{tasks.map((t) => ( /* 插入点:第一个 DWS/INDEX/UTILITY 层之前,若全是 ODS 则在末尾 */
<Checkbox key={t.code} value={t.code}> const insertIdx = shouldInsertDwd
<Text strong style={t.is_common === false ? { color: "#999" } : undefined}>{t.code}</Text> ? group.layerTasks.findIndex((lt) => (LAYER_ORDER[lt.layer] ?? 99) >= (LAYER_ORDER["DWS"] ?? 2))
<Text type="secondary" style={{ marginLeft: 8 }}>{t.name}</Text> : -1;
{t.is_common === false && ( const effectiveInsertIdx = shouldInsertDwd && insertIdx === -1 ? group.layerTasks.length : insertIdx;
<Tag color="default" style={{ marginLeft: 6, fontSize: 11 }}></Tag>
)} const elements: React.ReactNode[] = [];
</Checkbox> group.layerTasks.forEach((lt, idx) => {
))} /* 在此位置插入 DWD 表过滤 */
</Space> if (shouldInsertDwd && idx === effectiveInsertIdx) {
</Checkbox.Group> elements.push(
<div key="__dwd_filter__">
{elements.length > 0 && <Divider style={{ margin: "6px 0" }} />}
<div style={{ marginBottom: 4 }}>
<Tag color="green" style={{ fontSize: 11 }}>DWD </Tag>
</div>
{renderDwdTableFilter(group.dwdTables)}
</div>,
);
}
elements.push(
<div key={lt.layer}>
{elements.length > 0 && <Divider style={{ margin: "6px 0" }} />}
<div style={{ marginBottom: 4 }}>
<Tag color={LAYER_COLORS[lt.layer] ?? "default"} style={{ fontSize: 11 }}>
{LAYER_LABELS[lt.layer] ?? lt.layer}
</Tag>
</div>
<div style={{ paddingLeft: 4 }}>
{lt.tasks.map((t) => (
<div key={t.code} style={{ padding: "2px 0" }}>
<Checkbox
checked={selectedTasks.includes(t.code)}
onChange={(e) => handleTaskToggle(t.code, e.target.checked)}
>
<Text strong style={!t.is_common ? { color: "#999" } : undefined}>{t.code}</Text>
<Text type="secondary" style={{ marginLeft: 8 }}>{t.name}</Text>
{!t.is_common && <Tag color="default" style={{ marginLeft: 6, fontSize: 11 }}></Tag>}
</Checkbox>
</div>
))}
</div>
{/* DWD 表过滤紧跟 DWD 层任务 */}
{lt.layer === "DWD" && renderDwdTableFilter(group.dwdTables)}
</div>,
);
});
/* 所有层遍历完后,若插入点在末尾 */
if (shouldInsertDwd && effectiveInsertIdx >= group.layerTasks.length) {
elements.push(
<div key="__dwd_filter__">
{elements.length > 0 && <Divider style={{ margin: "6px 0" }} />}
<div style={{ marginBottom: 4 }}>
<Tag color="green" style={{ fontSize: 11 }}>DWD </Tag>
</div>
{renderDwdTableFilter(group.dwdTables)}
</div>,
);
}
return elements;
})()}
</div>
), ),
}; };
})} })}
/> />
{/* DWD 表过滤:仅在 DWD 层且 DWD_LOAD_FROM_ODS 被选中时显示 */} {/* 同步差异 Modal */}
{showDwdFilter && dwdLoadSelected && allDwdTableNames.length > 0 && ( <Modal
<> title="任务注册表同步检查"
<Divider style={{ margin: "12px 0 8px" }} /> open={syncModalOpen}
<div style={{ padding: "0 4px" }}> onCancel={() => setSyncModalOpen(false)}
<Space style={{ marginBottom: 6 }}> footer={[
<Text strong style={{ fontSize: 13 }}>DWD </Text> <Button key="refresh" icon={<SyncOutlined />} onClick={() => { handleSyncCheck(); }}></Button>,
<Text type="secondary" style={{ fontSize: 12 }}> <Button key="close" type="primary" onClick={() => setSyncModalOpen(false)}></Button>,
{selectedDwdTables.length === 0 ]}
? "(未选择 = 全部装载)" >
: `已选 ${selectedDwdTables.length} / ${allDwdTableNames.length}`} {syncResult?.error ? (
</Text> <Alert type="error" message="检查出错" description={syncResult.error} />
</Space> ) : (
<div style={{ marginBottom: 6 }}> <div>
<Space size={4}> {syncResult?.backend_only && syncResult.backend_only.length > 0 && (
<Button size="small" type="link" style={{ padding: 0, fontSize: 12 }} onClick={handleDwdSelectAll}> <div style={{ marginBottom: 12 }}>
<Text strong style={{ color: "#faad14" }}> ETL {syncResult.backend_only.length}</Text>
</Button> <div style={{ marginTop: 4 }}>
<Button size="small" type="link" style={{ padding: 0, fontSize: 12 }} onClick={handleDwdClearAll}> {syncResult.backend_only.map((code) => (
<Tag key={code} color="warning" style={{ marginBottom: 4 }}>{code}</Tag>
</Button> ))}
</Space> </div>
</div> </div>
<Collapse )}
size="small" {syncResult?.etl_only && syncResult.etl_only.length > 0 && (
items={Object.entries(dwdTableGroups).map(([domain, tables]) => { <div>
const domainSelected = selectedDwdTables.filter((t) => tables.includes(t)); <Text strong style={{ color: "#ff4d4f" }}>ETL {syncResult.etl_only.length}</Text>
const allDomainChecked = domainSelected.length === tables.length; <div style={{ marginTop: 4 }}>
const domainIndeterminate = domainSelected.length > 0 && !allDomainChecked; {syncResult.etl_only.map((code) => (
<Tag key={code} color="error" style={{ marginBottom: 4 }}>{code}</Tag>
return { ))}
key: domain, </div>
label: ( </div>
<span onClick={(e) => e.stopPropagation()}> )}
<Checkbox {syncResult?.in_sync && (
indeterminate={domainIndeterminate} <Alert type="success" message="后端与 ETL 任务列表完全一致" />
checked={allDomainChecked} )}
onChange={(e: CheckboxChangeEvent) =>
handleDwdDomainTableChange(domain, e.target.checked ? tables : [])
}
style={{ marginRight: 8 }}
/>
{domain}
<Text type="secondary" style={{ marginLeft: 4, fontSize: 12 }}>
({domainSelected.length}/{tables.length})
</Text>
</span>
),
children: (
<Checkbox.Group
value={domainSelected}
onChange={(checked) => handleDwdDomainTableChange(domain, checked as string[])}
>
<Space direction="vertical">
{tables.map((table) => (
<Checkbox key={table} value={table}>
<Text style={{ fontSize: 12 }}>{table}</Text>
</Checkbox>
))}
</Space>
</Checkbox.Group>
),
};
})}
/>
</div> </div>
</> )}
)} </Modal>
</div> </div>
); );
}; };

View File

@@ -24,6 +24,7 @@ import {
TreeSelect, TreeSelect,
Tooltip, Tooltip,
Segmented, Segmented,
Spin,
} from "antd"; } from "antd";
import { import {
SendOutlined, SendOutlined,
@@ -37,7 +38,8 @@ import {
} from "@ant-design/icons"; } from "@ant-design/icons";
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import TaskSelector from "../components/TaskSelector"; import TaskSelector from "../components/TaskSelector";
import { validateTaskConfig } from "../api/tasks"; import { validateTaskConfig, fetchFlows } from "../api/tasks";
import type { FlowDef, ProcessingModeDef } from "../api/tasks";
import { submitToQueue, executeDirectly } from "../api/execution"; import { submitToQueue, executeDirectly } from "../api/execution";
import { useAuthStore } from "../store/authStore"; import { useAuthStore } from "../store/authStore";
import type { RadioChangeEvent } from "antd"; import type { RadioChangeEvent } from "antd";
@@ -48,32 +50,45 @@ const { Title, Text } = Typography;
const { TextArea } = Input; const { TextArea } = Input;
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
/* Flow 定义 */ /* Flow / 处理模式 — 本地 fallbackAPI 不可用时兜底) */
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
const FLOW_DEFINITIONS: Record<string, { name: string; layers: string[]; desc: string }> = { interface FlowEntry { name: string; layers: string[] }
api_ods: { name: "API → ODS", layers: ["ODS"], desc: "仅抓取原始数据" },
api_ods_dwd: { name: "API → ODS → DWD", layers: ["ODS", "DWD"], desc: "抓取并清洗装载" }, const FALLBACK_FLOWS: Record<string, FlowEntry> = {
api_full: { name: "API → ODS → DWD → DWS → INDEX", layers: ["ODS", "DWD", "DWS", "INDEX"], desc: "全链路执行" }, api_ods: { name: "API → ODS", layers: ["ODS"] },
ods_dwd: { name: "ODS → DWD", layers: ["DWD"], desc: "仅清洗装载" }, api_ods_dwd: { name: "API → ODS → DWD", layers: ["ODS", "DWD"] },
dwd_dws: { name: "DWD → DWS汇总", layers: ["DWS"], desc: "仅汇总计算" }, api_full: { name: "API → ODS → DWD → DWS → INDEX", layers: ["ODS", "DWD", "DWS", "INDEX"] },
dwd_dws_index: { name: "DWD → DWS → INDEX", layers: ["DWS", "INDEX"], desc: "汇总+指数" }, ods_dwd: { name: "ODS → DWD", layers: ["DWD"] },
dwd_index: { name: "DWD → INDEX", layers: ["INDEX"], desc: "仅指数计算" }, dwd_dws: { name: "DWD → DWS汇总", layers: ["DWS"] },
dwd_dws_index: { name: "DWD → DWS → INDEX", layers: ["DWS", "INDEX"] },
dwd_index: { name: "DWD → INDEX", layers: ["INDEX"] },
}; };
export function getFlowLayers(flowId: string): string[] { interface ProcModeEntry { value: string; label: string; desc: string }
return FLOW_DEFINITIONS[flowId]?.layers ?? [];
}
/* ------------------------------------------------------------------ */ const FALLBACK_PROCESSING_MODES: ProcModeEntry[] = [
/* 处理模式 */
/* ------------------------------------------------------------------ */
const PROCESSING_MODES = [
{ value: "increment_only", label: "仅增量", desc: "按游标增量抓取和装载" }, { value: "increment_only", label: "仅增量", desc: "按游标增量抓取和装载" },
{ value: "verify_only", label: "校验并修复", desc: "对比源和目标,修复差异" }, { value: "verify_only", label: "校验并修复", desc: "对比源和目标,修复差异" },
{ value: "increment_verify", label: "增量+校验", desc: "先增量再校验" }, { value: "increment_verify", label: "增量+校验", desc: "先增量再校验" },
] as const; ];
/** 将 API 返回的 FlowDef[] 转为 Record<id, FlowEntry> */
function apiFlowsToRecord(flows: FlowDef[]): Record<string, FlowEntry> {
const result: Record<string, FlowEntry> = {};
for (const f of flows) result[f.id] = { name: f.name, layers: f.layers };
return result;
}
/** 将 API 返回的 ProcessingModeDef[] 转为 ProcModeEntry[] */
function apiModesToEntries(modes: ProcessingModeDef[]): ProcModeEntry[] {
return modes.map((m) => ({ value: m.id, label: m.name, desc: m.description }));
}
/** 外部可用的 getFlowLayers使用 fallback组件内部用动态数据 */
export function getFlowLayers(flowId: string): string[] {
return FALLBACK_FLOWS[flowId]?.layers ?? [];
}
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
/* 时间窗口 */ /* 时间窗口 */
@@ -147,6 +162,24 @@ const TaskConfig: React.FC = () => {
const navigate = useNavigate(); const navigate = useNavigate();
const user = useAuthStore((s) => s.user); const user = useAuthStore((s) => s.user);
/* ---------- Flow / 处理模式 动态加载 ---------- */
const [flowDefs, setFlowDefs] = useState<Record<string, FlowEntry>>(FALLBACK_FLOWS);
const [procModes, setProcModes] = useState<ProcModeEntry[]>(FALLBACK_PROCESSING_MODES);
const [flowsLoading, setFlowsLoading] = useState(true);
useEffect(() => {
let cancelled = false;
fetchFlows()
.then(({ flows, processing_modes }) => {
if (cancelled) return;
if (flows.length > 0) setFlowDefs(apiFlowsToRecord(flows));
if (processing_modes.length > 0) setProcModes(apiModesToEntries(processing_modes));
})
.catch(() => { /* API 不可用,使用 fallback */ })
.finally(() => { if (!cancelled) setFlowsLoading(false); });
return () => { cancelled = true; };
}, []);
/* ---------- 连接器 & Store 树形选择 ---------- */ /* ---------- 连接器 & Store 树形选择 ---------- */
const { treeData: connectorTreeData, allValues: allConnectorStoreValues } = useMemo( const { treeData: connectorTreeData, allValues: allConnectorStoreValues } = useMemo(
() => buildConnectorStoreTree(CONNECTOR_DEFS, user?.site_id ?? null), () => buildConnectorStoreTree(CONNECTOR_DEFS, user?.site_id ?? null),
@@ -199,12 +232,17 @@ const TaskConfig: React.FC = () => {
const [submitting, setSubmitting] = useState(false); const [submitting, setSubmitting] = useState(false);
/* ---------- 派生状态 ---------- */ /* ---------- 派生状态 ---------- */
const layers = getFlowLayers(flow); const layers = flowDefs[flow]?.layers ?? [];
const showVerifyOption = processingMode === "verify_only"; const showVerifyOption = processingMode === "verify_only";
/* ---------- 构建 TaskConfig 对象 ---------- */ /* ---------- 构建 TaskConfig 对象 ---------- */
const buildTaskConfig = (): TaskConfigType => ({ const buildTaskConfig = (): TaskConfigType => {
tasks: selectedTasks, /* layers 包含 DWD 时自动注入 DWD_LOAD_FROM_ODSUI 上由 DWD 表过滤区块隐含) */
const tasks = layers.includes("DWD") && !selectedTasks.includes("DWD_LOAD_FROM_ODS")
? [...selectedTasks, "DWD_LOAD_FROM_ODS"]
: selectedTasks;
return {
tasks,
pipeline: flow, pipeline: flow,
processing_mode: processingMode, processing_mode: processingMode,
pipeline_flow: "FULL", pipeline_flow: "FULL",
@@ -223,7 +261,8 @@ const TaskConfig: React.FC = () => {
dwd_only_tables: selectedDwdTables.length > 0 ? selectedDwdTables : null, dwd_only_tables: selectedDwdTables.length > 0 ? selectedDwdTables : null,
force_full: forceFull, force_full: forceFull,
extra_args: {}, extra_args: {},
}); };
};
/* ---------- 自动刷新 CLI 预览 ---------- */ /* ---------- 自动刷新 CLI 预览 ---------- */
const refreshCli = async () => { const refreshCli = async () => {
@@ -326,12 +365,12 @@ const TaskConfig: React.FC = () => {
</Card> </Card>
</Col> </Col>
<Col span={16}> <Col span={16}>
<Card size="small" title="执行流程 (Flow)" style={cardStyle}> <Card size="small" title={flowsLoading ? <Space size={4}> (Flow) <Spin size="small" /></Space> : "执行流程 (Flow)"} style={cardStyle}>
<Radio.Group value={flow} onChange={handleFlowChange} style={{ width: "100%" }}> <Radio.Group value={flow} onChange={handleFlowChange} style={{ width: "100%" }}>
<Row gutter={[0, 4]}> <Row gutter={[0, 4]}>
{Object.entries(FLOW_DEFINITIONS).map(([id, def]) => ( {Object.entries(flowDefs).map(([id, def]) => (
<Col span={12} key={id}> <Col span={12} key={id}>
<Tooltip title={def.desc}> <Tooltip title={def.name}>
<Radio value={id}> <Radio value={id}>
<Text strong style={{ fontSize: 12 }}>{id}</Text> <Text strong style={{ fontSize: 12 }}>{id}</Text>
</Radio> </Radio>
@@ -361,7 +400,7 @@ const TaskConfig: React.FC = () => {
}} }}
> >
<Space direction="vertical" style={{ width: "100%" }}> <Space direction="vertical" style={{ width: "100%" }}>
{PROCESSING_MODES.map((m) => ( {procModes.map((m) => (
<Radio key={m.value} value={m.value}> <Radio key={m.value} value={m.value}>
<Text strong>{m.label}</Text> <Text strong>{m.label}</Text>
<br /> <br />

View File

@@ -1,133 +1,144 @@
/** /**
* 前后端共享的 TypeScript 类型定义。 * 前后端共享的 TypeScript 类型定义。
* 与设计文档中的 Pydantic 模型和数据库表结构对应。 * 与设计文档中的 Pydantic 模型和数据库表结构对应。
*/ */
/** ETL 任务执行配置 */ /** ETL 任务执行配置 */
export interface TaskConfig { export interface TaskConfig {
tasks: string[]; tasks: string[];
/** 执行流程 Flow ID对应 CLI --pipeline */ /** 执行流程 Flow ID对应 CLI --pipeline */
pipeline: string; pipeline: string;
/** 处理模式 */ /** 处理模式 */
processing_mode: string; processing_mode: string;
/** 传统模式兼容(已弃用) */ /** 传统模式兼容(已弃用) */
pipeline_flow: string; pipeline_flow: string;
dry_run: boolean; dry_run: boolean;
/** lookback / custom */ /** lookback / custom */
window_mode: string; window_mode: string;
window_start: string | null; window_start: string | null;
window_end: string | null; window_end: string | null;
/** none / day */ /** none / day */
window_split: string | null; window_split: string | null;
/** 1 / 10 / 30 */ /** 1 / 10 / 30 */
window_split_days: number | null; window_split_days: number | null;
lookback_hours: number; lookback_hours: number;
overlap_seconds: number; overlap_seconds: number;
fetch_before_verify: boolean; fetch_before_verify: boolean;
skip_ods_when_fetch_before_verify: boolean; skip_ods_when_fetch_before_verify: boolean;
ods_use_local_json: boolean; ods_use_local_json: boolean;
/** 门店 ID由后端从 JWT 注入) */ /** 门店 ID由后端从 JWT 注入) */
store_id: number | null; store_id: number | null;
/** DWD 表级选择 */ /** DWD 表级选择 */
dwd_only_tables: string[] | null; dwd_only_tables: string[] | null;
/** 强制全量处理(跳过 hash 去重和变更对比) */ /** 强制全量处理(跳过 hash 去重和变更对比) */
force_full: boolean; force_full: boolean;
extra_args: Record<string, unknown>; extra_args: Record<string, unknown>;
} }
/** 执行流程Flow定义 */ /** 执行流程Flow定义 */
export interface PipelineDefinition { export interface PipelineDefinition {
id: string; id: string;
name: string; name: string;
/** 包含的层ODS / DWD / DWS / INDEX */ /** 包含的层ODS / DWD / DWS / INDEX */
layers: string[]; layers: string[];
} }
/** 处理模式定义 */ /** 处理模式定义 */
export interface ProcessingModeDefinition { export interface ProcessingModeDefinition {
id: string; id: string;
name: string; name: string;
description: string; description: string;
} }
/** 任务注册表中的任务定义 */ /** 任务注册表中的任务定义 */
export interface TaskDefinition { export interface TaskDefinition {
code: string; code: string;
name: string; name: string;
description: string; description: string;
/** 业务域(会员、结算、助教等) */ /** 业务域(会员、结算、助教等) */
domain: string; domain: string;
requires_window: boolean; /** 所属层ODS / DWD / DWS / INDEX / UTILITY */
is_ods: boolean; layer: string;
is_dimension: boolean; requires_window: boolean;
default_enabled: boolean; is_ods: boolean;
/** 常用任务标记false 表示工具类/手动类任务 */ is_dimension: boolean;
is_common: boolean; default_enabled: boolean;
} /** 常用任务标记false 表示工具类/手动类任务 */
is_common: boolean;
/** 调度配置 */ }
export interface ScheduleConfig {
schedule_type: "once" | "interval" | "daily" | "weekly" | "cron"; /** DWD 表定义(后端返回的完整结构) */
interval_value: number; export interface DwdTableItem {
interval_unit: "minutes" | "hours" | "days"; table_name: string;
daily_time: string; display_name: string;
weekly_days: number[]; domain: string;
weekly_time: string; ods_source: string;
cron_expression: string; is_dimension: boolean;
enabled: boolean; }
start_date: string | null;
end_date: string | null; /** 调度配置 */
} export interface ScheduleConfig {
schedule_type: "once" | "interval" | "daily" | "weekly" | "cron";
/** 队列中的任务 */ interval_value: number;
export interface QueuedTask { interval_unit: "minutes" | "hours" | "days";
id: string; daily_time: string;
site_id: number; weekly_days: number[];
config: TaskConfig; weekly_time: string;
status: "pending" | "running" | "success" | "failed" | "cancelled"; cron_expression: string;
position: number; enabled: boolean;
created_at: string; start_date: string | null;
started_at: string | null; end_date: string | null;
finished_at: string | null; }
exit_code: number | null;
error_message: string | null; /** 队列中的任务 */
} export interface QueuedTask {
id: string;
/** 执行历史记录 */ site_id: number;
export interface ExecutionLog { config: TaskConfig;
id: string; status: "pending" | "running" | "success" | "failed" | "cancelled";
site_id: number; position: number;
task_codes: string[]; created_at: string;
status: string; started_at: string | null;
started_at: string; finished_at: string | null;
finished_at: string | null; exit_code: number | null;
exit_code: number | null; error_message: string | null;
duration_ms: number | null; }
command: string;
summary: Record<string, unknown> | null; /** 执行历史记录 */
} export interface ExecutionLog {
id: string;
/** 调度任务 */ site_id: number;
export interface ScheduledTask { task_codes: string[];
id: string; status: string;
site_id: number; started_at: string;
name: string; finished_at: string | null;
task_codes: string[]; exit_code: number | null;
task_config: TaskConfig; duration_ms: number | null;
schedule_config: ScheduleConfig; command: string;
enabled: boolean; summary: Record<string, unknown> | null;
last_run_at: string | null; }
next_run_at: string | null;
run_count: number; /** 调度任务 */
last_status: string | null; export interface ScheduledTask {
created_at: string; id: string;
updated_at: string; site_id: number;
} name: string;
task_codes: string[];
/** 环境配置项 */ task_config: TaskConfig;
export interface EnvConfigItem { schedule_config: ScheduleConfig;
key: string; enabled: boolean;
value: string; last_run_at: string | null;
is_sensitive: boolean; next_run_at: string | null;
} run_count: number;
last_status: string | null;
created_at: string;
updated_at: string;
}
/** 环境配置项 */
export interface EnvConfigItem {
key: string;
value: string;
is_sensitive: boolean;
}

View File

@@ -1,209 +1,264 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""任务注册表 & 配置 API """任务注册表 & 配置 API
提供 4 个端点: 提供 4 个端点:
- GET /api/tasks/registry — 按业务域分组的任务列表 - GET /api/tasks/registry — 按业务域分组的任务列表
- GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义 - GET /api/tasks/dwd-tables — 按业务域分组的 DWD 表定义
- GET /api/tasks/flows — 7 种 Flow + 3 种处理模式 - GET /api/tasks/flows — 7 种 Flow + 3 种处理模式
- POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览 - POST /api/tasks/validate — 验证 TaskConfig 并返回 CLI 命令预览
所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。 所有端点需要 JWT 认证。validate 端点从 JWT 注入 store_id。
""" """
from __future__ import annotations from __future__ import annotations
from typing import Any from typing import Any
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from pydantic import BaseModel from pydantic import BaseModel
from app.auth.dependencies import CurrentUser, get_current_user from app.auth.dependencies import CurrentUser, get_current_user
from app.config import ETL_PROJECT_PATH from app.config import ETL_PROJECT_PATH
from app.schemas.tasks import ( from app.schemas.tasks import (
FlowDefinition, FlowDefinition,
ProcessingModeDefinition, ProcessingModeDefinition,
TaskConfigSchema, TaskConfigSchema,
) )
from app.services.cli_builder import cli_builder from app.services.cli_builder import cli_builder
from app.services.task_registry import ( from app.services.task_registry import (
DWD_TABLES, DWD_TABLES,
FLOW_LAYER_MAP, FLOW_LAYER_MAP,
get_dwd_tables_grouped_by_domain, get_dwd_tables_grouped_by_domain,
get_tasks_grouped_by_domain, get_tasks_grouped_by_domain,
) )
router = APIRouter(prefix="/api/tasks", tags=["任务配置"]) router = APIRouter(prefix="/api/tasks", tags=["任务配置"])
# ── 响应模型 ────────────────────────────────────────────────── # ── 响应模型 ──────────────────────────────────────────────────
class TaskItem(BaseModel): class TaskItem(BaseModel):
code: str code: str
name: str name: str
description: str description: str
domain: str domain: str
layer: str layer: str
requires_window: bool requires_window: bool
is_ods: bool is_ods: bool
is_dimension: bool is_dimension: bool
default_enabled: bool default_enabled: bool
is_common: bool is_common: bool
class DwdTableItem(BaseModel): class DwdTableItem(BaseModel):
table_name: str table_name: str
display_name: str display_name: str
domain: str domain: str
ods_source: str ods_source: str
is_dimension: bool is_dimension: bool
class TaskRegistryResponse(BaseModel): class TaskRegistryResponse(BaseModel):
"""按业务域分组的任务列表""" """按业务域分组的任务列表"""
groups: dict[str, list[TaskItem]] groups: dict[str, list[TaskItem]]
class DwdTablesResponse(BaseModel): class DwdTablesResponse(BaseModel):
"""按业务域分组的 DWD 表定义""" """按业务域分组的 DWD 表定义"""
groups: dict[str, list[DwdTableItem]] groups: dict[str, list[DwdTableItem]]
class FlowsResponse(BaseModel): class FlowsResponse(BaseModel):
"""Flow 定义 + 处理模式定义""" """Flow 定义 + 处理模式定义"""
flows: list[FlowDefinition] flows: list[FlowDefinition]
processing_modes: list[ProcessingModeDefinition] processing_modes: list[ProcessingModeDefinition]
class ValidateRequest(BaseModel): class ValidateRequest(BaseModel):
"""验证请求体 — 复用 TaskConfigSchema但 store_id 由后端注入""" """验证请求体 — 复用 TaskConfigSchema但 store_id 由后端注入"""
config: TaskConfigSchema config: TaskConfigSchema
class ValidateResponse(BaseModel): class ValidateResponse(BaseModel):
"""验证结果 + CLI 命令预览""" """验证结果 + CLI 命令预览"""
valid: bool valid: bool
command: str command: str
command_args: list[str] command_args: list[str]
errors: list[str] errors: list[str]
# ── Flow 定义(静态) ──────────────────────────────────────── # ── Flow 定义(静态) ────────────────────────────────────────
FLOW_DEFINITIONS: list[FlowDefinition] = [ FLOW_DEFINITIONS: list[FlowDefinition] = [
FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]), FlowDefinition(id="api_ods", name="API → ODS", layers=["ODS"]),
FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]), FlowDefinition(id="api_ods_dwd", name="API → ODS → DWD", layers=["ODS", "DWD"]),
FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]), FlowDefinition(id="api_full", name="API → ODS → DWD → DWS汇总 → DWS指数", layers=["ODS", "DWD", "DWS", "INDEX"]),
FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]), FlowDefinition(id="ods_dwd", name="ODS → DWD", layers=["DWD"]),
FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]), FlowDefinition(id="dwd_dws", name="DWD → DWS汇总", layers=["DWS"]),
FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]), FlowDefinition(id="dwd_dws_index", name="DWD → DWS汇总 → DWS指数", layers=["DWS", "INDEX"]),
FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]), FlowDefinition(id="dwd_index", name="DWD → DWS指数", layers=["INDEX"]),
] ]
PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [ PROCESSING_MODE_DEFINITIONS: list[ProcessingModeDefinition] = [
ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"), ProcessingModeDefinition(id="increment_only", name="仅增量处理", description="只处理新增和变更的数据"),
ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致(可选'校验前从 API 获取'"), ProcessingModeDefinition(id="verify_only", name="仅校验修复", description="校验现有数据并修复不一致"),
ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"), ProcessingModeDefinition(id="increment_verify", name="增量 + 校验修复", description="先增量处理,再校验并修复"),
] ]
# ── 端点 ────────────────────────────────────────────────────── # ── 端点 ──────────────────────────────────────────────────────
@router.get("/registry", response_model=TaskRegistryResponse) @router.get("/registry", response_model=TaskRegistryResponse)
async def get_task_registry( async def get_task_registry(
user: CurrentUser = Depends(get_current_user), user: CurrentUser = Depends(get_current_user),
) -> TaskRegistryResponse: ) -> TaskRegistryResponse:
"""返回按业务域分组的任务列表""" """返回按业务域分组的任务列表"""
grouped = get_tasks_grouped_by_domain() grouped = get_tasks_grouped_by_domain()
return TaskRegistryResponse( return TaskRegistryResponse(
groups={ groups={
domain: [ domain: [
TaskItem( TaskItem(
code=t.code, code=t.code,
name=t.name, name=t.name,
description=t.description, description=t.description,
domain=t.domain, domain=t.domain,
layer=t.layer, layer=t.layer,
requires_window=t.requires_window, requires_window=t.requires_window,
is_ods=t.is_ods, is_ods=t.is_ods,
is_dimension=t.is_dimension, is_dimension=t.is_dimension,
default_enabled=t.default_enabled, default_enabled=t.default_enabled,
is_common=t.is_common, is_common=t.is_common,
) )
for t in tasks for t in tasks
] ]
for domain, tasks in grouped.items() for domain, tasks in grouped.items()
} }
) )
@router.get("/dwd-tables", response_model=DwdTablesResponse) @router.get("/dwd-tables", response_model=DwdTablesResponse)
async def get_dwd_tables( async def get_dwd_tables(
user: CurrentUser = Depends(get_current_user), user: CurrentUser = Depends(get_current_user),
) -> DwdTablesResponse: ) -> DwdTablesResponse:
"""返回按业务域分组的 DWD 表定义""" """返回按业务域分组的 DWD 表定义"""
grouped = get_dwd_tables_grouped_by_domain() grouped = get_dwd_tables_grouped_by_domain()
return DwdTablesResponse( return DwdTablesResponse(
groups={ groups={
domain: [ domain: [
DwdTableItem( DwdTableItem(
table_name=t.table_name, table_name=t.table_name,
display_name=t.display_name, display_name=t.display_name,
domain=t.domain, domain=t.domain,
ods_source=t.ods_source, ods_source=t.ods_source,
is_dimension=t.is_dimension, is_dimension=t.is_dimension,
) )
for t in tables for t in tables
] ]
for domain, tables in grouped.items() for domain, tables in grouped.items()
} }
) )
@router.get("/flows", response_model=FlowsResponse) @router.get("/flows", response_model=FlowsResponse)
async def get_flows( async def get_flows(
user: CurrentUser = Depends(get_current_user), user: CurrentUser = Depends(get_current_user),
) -> FlowsResponse: ) -> FlowsResponse:
"""返回 7 种 Flow 定义和 3 种处理模式定义""" """返回 7 种 Flow 定义和 3 种处理模式定义"""
return FlowsResponse( return FlowsResponse(
flows=FLOW_DEFINITIONS, flows=FLOW_DEFINITIONS,
processing_modes=PROCESSING_MODE_DEFINITIONS, processing_modes=PROCESSING_MODE_DEFINITIONS,
) )
@router.post("/validate", response_model=ValidateResponse) @router.post("/validate", response_model=ValidateResponse)
async def validate_task_config( async def validate_task_config(
body: ValidateRequest, body: ValidateRequest,
user: CurrentUser = Depends(get_current_user), user: CurrentUser = Depends(get_current_user),
) -> ValidateResponse: ) -> ValidateResponse:
"""验证 TaskConfig 并返回生成的 CLI 命令预览 """验证 TaskConfig 并返回生成的 CLI 命令预览
从 JWT 注入 store_id前端无需传递。 从 JWT 注入 store_id前端无需传递。
""" """
config = body.config.model_copy(update={"store_id": user.site_id}) config = body.config.model_copy(update={"store_id": user.site_id})
errors: list[str] = [] errors: list[str] = []
# 验证 Flow ID # 验证 Flow ID
if config.pipeline not in FLOW_LAYER_MAP: if config.pipeline not in FLOW_LAYER_MAP:
errors.append(f"无效的执行流程: {config.pipeline}") errors.append(f"无效的执行流程: {config.pipeline}")
# 验证任务列表非空 # 验证任务列表非空
if not config.tasks: if not config.tasks:
errors.append("任务列表不能为空") errors.append("任务列表不能为空")
if errors: if errors:
return ValidateResponse( return ValidateResponse(
valid=False, valid=False,
command="", command="",
command_args=[], command_args=[],
errors=errors, errors=errors,
) )
cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH) cmd_args = cli_builder.build_command(config, ETL_PROJECT_PATH)
cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH) cmd_str = cli_builder.build_command_string(config, ETL_PROJECT_PATH)
return ValidateResponse( return ValidateResponse(
valid=True, valid=True,
command=cmd_str, command=cmd_str,
command_args=cmd_args, command_args=cmd_args,
errors=[], errors=[],
) )
# ── GET /api/tasks/sync-check — 对比 ETL 真实注册表 ──────────
class SyncCheckResponse(BaseModel):
"""同步检查结果"""
in_sync: bool
backend_only: list[str]
etl_only: list[str]
error: str | None = None
@router.get("/sync-check", response_model=SyncCheckResponse)
async def sync_check(
user: CurrentUser = Depends(get_current_user),
) -> SyncCheckResponse:
"""对比后端硬编码任务列表与 ETL 真实注册表,返回差异。
通过子进程调用 ETL CLI 获取真实任务列表,避免直接导入 ETL 代码。
"""
import subprocess
import sys
from app.services.task_registry import ALL_TASKS
backend_codes = {t.code for t in ALL_TASKS}
try:
result = subprocess.run(
[sys.executable, "-c",
"from orchestration.task_registry import default_registry; "
"print(','.join(sorted(default_registry.get_all_task_codes())))"],
capture_output=True, text=True, timeout=15,
cwd=ETL_PROJECT_PATH, encoding="utf-8", errors="replace",
)
if result.returncode != 0:
return SyncCheckResponse(
in_sync=False, backend_only=[], etl_only=[],
error=f"ETL 子进程失败: {result.stderr.strip()[:200]}",
)
etl_codes = {c.strip() for c in result.stdout.strip().split(",") if c.strip()}
except Exception as exc:
return SyncCheckResponse(
in_sync=False, backend_only=[], etl_only=[],
error=f"无法连接 ETL: {exc}",
)
backend_only = sorted(backend_codes - etl_codes)
etl_only = sorted(etl_codes - backend_codes)
return SyncCheckResponse(
in_sync=len(backend_only) == 0 and len(etl_only) == 0,
backend_only=backend_only,
etl_only=etl_only,
)

4
apps/mcp-server/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
.env
.env.local
__pycache__/
*.pyc

58
apps/mcp-server/README.md Normal file
View File

@@ -0,0 +1,58 @@
# MCP Server
为阿里云百炼 AI 应用提供 PostgreSQL 只读查询能力的 MCP 服务。
## 用途
小程序端调用百炼 AI 应用时,百炼通过 MCP 协议连接本服务,读取 `etl_feiqiu` 数据库中的运营数据(会员、订单、支付、助教业绩、财务汇总等)。
## 架构
```
微信小程序 → 百炼 AI 应用 → MCP (Streamable HTTP) → 本服务 → PostgreSQL (etl_feiqiu)
```
## 暴露的 Tools
| Tool | 说明 |
|------|------|
| `list_tables` | 列出指定 schema 下的表 |
| `describe_table` | 查看单表结构 |
| `describe_schemas` | 批量返回多个 schema 的表结构(含主键) |
| `query_sql` | 在指定 schema 内执行只读 SQL |
## 可访问的 Schema
`ods` / `dwd` / `dws` / `core` / `meta` / `app`etl_feiqiu 六层架构)
## 安全策略
- 仅允许 SELECT/WITH/SHOW/EXPLAIN禁止 DDL/DML
- 正则 + sqlparse 双重校验
- 跨 schema 引用白名单限制
- 可选 Bearer Token 鉴权(`MCP_TOKEN` 环境变量)
- 生产环境建议使用只读数据库账号
## 配置
环境变量优先级:`MCP_PG_*` > `DB_*` / `ETL_DB_NAME` / `PG_NAME` > 项目根 `.env`
| 变量 | 说明 | 默认值 |
|------|------|--------|
| `MCP_PG_HOST` | 数据库主机(优先) | 回退 `DB_HOST` |
| `MCP_PG_PORT` | 数据库端口(优先) | 回退 `DB_PORT`,默认 5432 |
| `MCP_PG_DATABASE` | 数据库名(优先) | 回退 `ETL_DB_NAME``PG_NAME` |
| `MCP_PG_USER` | 数据库用户(优先) | 回退 `DB_USER` |
| `MCP_PG_PASSWORD` | 数据库密码(优先) | 回退 `DB_PASSWORD` |
| `MCP_TOKEN` | 鉴权 token空则不启用 | 空 |
| `MCP_MAX_ROWS` | query_sql 最大返回行数 | 500 |
| `PORT` | 服务监听端口 | 9000 |
## 启动
```bash
cd apps/mcp-server
python server.py
```
百炼端 MCP 服务地址配置为:`https://mcp.langlangzhuoqiu.cn/mcp`

View File

@@ -0,0 +1,22 @@
[project]
name = "zqyy-mcp-server"
version = "0.1.0"
description = "MCP Server — 为阿里云百炼 AI 应用提供 PostgreSQL 只读查询能力"
requires-python = ">=3.10"
dependencies = [
"mcp[cli]>=1.9",
"psycopg[pool]>=3.1",
"python-dotenv>=1.0",
"sqlparse>=0.5",
"starlette>=0.27",
"uvicorn[standard]>=0.34",
]
[dependency-groups]
dev = [
"pytest>=8.0",
]
[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["."]

412
apps/mcp-server/server.py Normal file
View File

@@ -0,0 +1,412 @@
import os
import re
import contextlib
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import sqlparse
from dotenv import load_dotenv
from psycopg_pool import ConnectionPool
from mcp.server.fastmcp import FastMCP
from mcp.server.transport_security import TransportSecuritySettings
from starlette.applications import Starlette
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse
from starlette.routing import Mount
# 加载配置:.env.local > 同级 .env > 项目根 .env
_here = Path(__file__).resolve().parent
_root = _here.parent.parent # apps/mcp-server -> apps -> NeoZQYY
load_dotenv(_here / ".env.local", override=True)
load_dotenv(_here / ".env", override=False)
load_dotenv(_root / ".env", override=False)
# ----------------------------
# 工具:环境变量解析(避免 int("") 报错)
# ----------------------------
def env_str(name: str, default: str = "", required: bool = False) -> str:
v = os.getenv(name, default)
v = v if v is not None else default
v = v.strip() if isinstance(v, str) else v
if required and (v is None or v == ""):
raise RuntimeError(f"Missing required env var: {name}")
return v
def env_int(name: str, default: Optional[int] = None, required: bool = False) -> int:
raw = os.getenv(name, "")
if raw is None or raw.strip() == "":
if required and default is None:
raise RuntimeError(f"Missing required env var: {name}")
if default is None:
raise RuntimeError(f"Missing env var: {name}")
return default
try:
return int(raw.strip())
except ValueError as e:
raise RuntimeError(f"Invalid int env var {name}={raw!r}") from e
# ----------------------------
# 配置(用环境变量注入)
# MCP_PG_* 优先(独立部署),回退到项目公共 DB_* / PG_NAME
# ----------------------------
PGHOST = env_str("MCP_PG_HOST", default="") or env_str("DB_HOST", required=True)
PGPORT = env_int("MCP_PG_PORT", default=0) or env_int("DB_PORT", default=5432)
PGDATABASE = env_str("MCP_PG_DATABASE", default="") or env_str("ETL_DB_NAME", default="") or env_str("PG_NAME", required=True)
PGUSER = env_str("MCP_PG_USER", default="") or env_str("DB_USER", required=True)
PGPASSWORD = env_str("MCP_PG_PASSWORD", default="") or env_str("DB_PASSWORD", required=True)
MCP_TOKEN = env_str("MCP_TOKEN", default="") # 鉴权 token可空不启用鉴权
MAX_ROWS = env_int("MCP_MAX_ROWS", default=500) # query_sql 默认最大行数
PORT = env_int("PORT", default=9000) # uvicorn 端口
# etl_feiqiu 库的六层 schema 架构
ALLOWED_SCHEMAS = ("ods", "dwd", "dws", "core", "meta", "app")
ALLOWED_SCHEMA_SET = set(ALLOWED_SCHEMAS)
# psycopg DSN如果密码包含空格等特殊字符建议改用 URL 形式并做编码)
DSN = (
f"host={PGHOST} port={PGPORT} dbname={PGDATABASE} "
f"user={PGUSER} password={PGPASSWORD}"
)
# 连接池:不要 open=True避免解释器退出时 __del__ 清理触发异常)
pool = ConnectionPool(conninfo=DSN, min_size=1, max_size=10, timeout=60, open=False)
# ----------------------------
# SQL 只读门禁(最终底线仍是 DB 只读账号)
# ----------------------------
FORBIDDEN = re.compile(
r"\b(insert|update|delete|drop|alter|truncate|create|grant|revoke|copy|call|execute|do)\b",
re.IGNORECASE,
)
# 额外禁止显式跨 schema 访问(避免越权)
# 匹配 schema.table 模式,但排除单字母别名(如 t.id、o.amount
SCHEMA_QUAL = re.compile(r"\b([a-zA-Z_][a-zA-Z0-9_]{1,})\s*\.\s*[a-zA-Z_]", re.IGNORECASE)
def _is_probably_readonly(sql: str) -> bool:
if FORBIDDEN.search(sql):
return False
parsed = sqlparse.parse(sql)
if not parsed:
return False
stmt = parsed[0]
for tok in stmt.tokens:
if tok.is_whitespace:
continue
first = str(tok).strip().lower()
return first in ("select", "with", "show", "explain")
return False
def _validate_schema(schema: str) -> Optional[Dict[str, Any]]:
if schema not in ALLOWED_SCHEMA_SET:
return {"error": f"schema 不允许:{schema}。仅允许:{sorted(ALLOWED_SCHEMA_SET)}"}
return None
def _reject_cross_schema(sql: str, allowed_schema: str) -> Optional[Dict[str, Any]]:
"""
简单防护:如果出现显式 schema 前缀xxx.),要求必须是白名单内的 schema 或系统 schema。
注:这不是 SQL parser 级别的严格策略,但能挡住绝大多数越权写法。
"""
matches = set(m.group(1) for m in SCHEMA_QUAL.finditer(sql or ""))
# 允许所有业务 schema + 系统 schema
safe = ALLOWED_SCHEMA_SET | {"pg_catalog", "information_schema"}
bad = sorted([s for s in matches if s.lower() not in {a.lower() for a in safe}])
if bad:
return {"error": f"SQL 被拒绝:检测到不允许的 schema 引用 {bad},仅允许 {sorted(ALLOWED_SCHEMA_SET)} / 系统 schema。"}
return None
# ----------------------------
# FastMCPStreamable HTTP + JSON 响应
# ----------------------------
mcp = FastMCP(
"postgres-mcp",
stateless_http=True,
json_response=True,
transport_security=TransportSecuritySettings(
enable_dns_rebinding_protection=True,
allowed_hosts=[
# 关键:既允许不带端口,也允许带端口
"mcp.langlangzhuoqiu.cn",
"mcp.langlangzhuoqiu.cn:*",
"localhost",
"localhost:*",
"127.0.0.1",
"127.0.0.1:*",
"100.64.0.4",
"100.64.0.4:*",
"100.64.0.1",
"100.64.0.1:*",
"106.52.16.235",
"106.52.16.235:*",
],
allowed_origins=[
"https://mcp.langlangzhuoqiu.cn",
"https://mcp.langlangzhuoqiu.cn:*",
"http://localhost",
"http://localhost:*",
"http://127.0.0.1",
"http://127.0.0.1:*",
],
),
)
# ----------------------------
# Tools面向 etl_feiqiu 六层 schema
# ----------------------------
@mcp.tool()
def list_tables(schema: str = "dwd", include_views: bool = False) -> Dict[str, Any]:
"""列出指定 schemaods/dwd/dws/core/meta/app下的表可选包含视图"""
err = _validate_schema(schema)
if err:
return err
table_types = ("BASE TABLE", "VIEW") if include_views else ("BASE TABLE",)
sql = """
SELECT table_name, table_type
FROM information_schema.tables
WHERE table_schema = %s AND table_type = ANY(%s)
ORDER BY table_name;
"""
with pool.connection() as conn:
with conn.cursor() as cur:
cur.execute(sql, (schema, list(table_types)))
rows = cur.fetchall()
return {
"schema": schema,
"include_views": include_views,
"tables": [{"name": r[0], "type": r[1]} for r in rows],
"table_count": len(rows),
}
@mcp.tool()
def describe_table(table: str, schema: str = "dwd") -> Dict[str, Any]:
"""查看表结构(字段、类型、是否可空、默认值)"""
err = _validate_schema(schema)
if err:
return err
sql = """
SELECT column_name, data_type, is_nullable, column_default, ordinal_position
FROM information_schema.columns
WHERE table_schema=%s AND table_name=%s
ORDER BY ordinal_position;
"""
with pool.connection() as conn:
with conn.cursor() as cur:
cur.execute(sql, (schema, table))
rows = cur.fetchall()
return {
"schema": schema,
"table": table,
"columns": [
{"name": r[0], "type": r[1], "nullable": r[2], "default": r[3], "position": r[4]}
for r in rows
],
"column_count": len(rows),
}
@mcp.tool()
def describe_schemas(
schemas: Optional[List[str]] = None,
include_views: bool = False,
max_tables_per_schema: int = 500,
) -> Dict[str, Any]:
"""
返回 ods/dwd/dws/core/meta/app schema 下的表结构(含主键)。
不传 schemas 则返回全部六个 schema。
"""
schemas = schemas or list(ALLOWED_SCHEMAS)
invalid = [s for s in schemas if s not in ALLOWED_SCHEMA_SET]
if invalid:
return {"error": f"存在不允许的 schema{invalid}。仅允许:{sorted(ALLOWED_SCHEMA_SET)}"}
table_types = ("BASE TABLE", "VIEW") if include_views else ("BASE TABLE",)
with pool.connection() as conn:
with conn.cursor() as cur:
# 1) 表清单
cur.execute(
"""
SELECT table_schema, table_name, table_type
FROM information_schema.tables
WHERE table_schema = ANY(%s)
AND table_type = ANY(%s)
ORDER BY table_schema, table_name;
""",
(schemas, list(table_types)),
)
table_rows = cur.fetchall()
tables_by_schema: Dict[str, List[Tuple[str, str]]] = defaultdict(list)
for s, t, tt in table_rows:
if len(tables_by_schema[s]) < max_tables_per_schema:
tables_by_schema[s].append((t, tt))
# 2) 所有列(一次性取;如表非常多,可考虑拆分/分页)
cur.execute(
"""
SELECT table_schema, table_name, column_name, data_type, is_nullable, column_default, ordinal_position
FROM information_schema.columns
WHERE table_schema = ANY(%s)
ORDER BY table_schema, table_name, ordinal_position;
""",
(schemas,),
)
col_rows = cur.fetchall()
cols_map: Dict[Tuple[str, str], List[Dict[str, Any]]] = defaultdict(list)
for s, t, c, dt, nul, dft, pos in col_rows:
cols_map[(s, t)].append(
{"name": c, "type": dt, "nullable": nul, "default": dft, "position": pos}
)
# 3) 主键
cur.execute(
"""
SELECT kcu.table_schema, kcu.table_name, kcu.column_name, kcu.ordinal_position
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
AND tc.table_name = kcu.table_name
WHERE tc.constraint_type = 'PRIMARY KEY'
AND tc.table_schema = ANY(%s)
ORDER BY kcu.table_schema, kcu.table_name, kcu.ordinal_position;
""",
(schemas,),
)
pk_rows = cur.fetchall()
pk_map: Dict[Tuple[str, str], List[str]] = defaultdict(list)
for s, t, col, _pos in pk_rows:
pk_map[(s, t)].append(col)
# 4) 组装
result: Dict[str, Any] = {
"schemas": {},
"include_views": include_views,
"limits": {"max_tables_per_schema": max_tables_per_schema},
}
for s in schemas:
schema_tables = tables_by_schema.get(s, [])
result["schemas"][s] = {"table_count": len(schema_tables), "tables": {}}
for t, tt in schema_tables:
key = (s, t)
result["schemas"][s]["tables"][t] = {
"type": tt,
"primary_key": pk_map.get(key, []),
"columns": cols_map.get(key, []),
"column_count": len(cols_map.get(key, [])),
}
return result
@mcp.tool()
def query_sql(schema: str, sql: str, max_rows: int = MAX_ROWS) -> Dict[str, Any]:
"""
在指定 schema 内执行只读 SQL会 SET LOCAL search_path并限制显式跨 schema 引用。
"""
err = _validate_schema(schema)
if err:
return err
sql = (sql or "").strip().rstrip(";")
if not _is_probably_readonly(sql):
return {"error": "SQL 被拒绝仅允许只读select/with/show/explain并禁止危险关键字。"}
cross = _reject_cross_schema(sql, allowed_schema=schema)
if cross:
return cross
with pool.connection() as conn:
with conn.cursor() as cur:
# schema 已白名单校验,可安全拼接
cur.execute(f"SET LOCAL search_path TO {schema}")
cur.execute(sql)
cols = [d.name for d in (cur.description or [])]
rows = cur.fetchmany(max_rows + 1)
truncated = len(rows) > max_rows
rows = rows[:max_rows]
safe_rows: List[List[Any]] = []
for r in rows:
safe_rows.append([v if isinstance(v, (int, float, str, bool)) or v is None else str(v) for v in r])
return {
"schema": schema,
"columns": cols,
"rows": safe_rows,
"row_count": len(safe_rows),
"truncated": truncated,
"max_rows": max_rows,
}
# ----------------------------
# 鉴权 Middleware支持 Bearer 或 query token
# ----------------------------
class AuthMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
if MCP_TOKEN and request.url.path.startswith("/mcp"):
auth = request.headers.get("authorization", "")
token_q = request.query_params.get("token", "")
if auth != f"Bearer {MCP_TOKEN}" and token_q != MCP_TOKEN:
return JSONResponse({"error": "unauthorized"}, status_code=401)
return await call_next(request)
# ----------------------------
# lifespan显式 open/close pool并运行 session_manager
# ----------------------------
@contextlib.asynccontextmanager
async def lifespan(app: Starlette):
pool.open(wait=True, timeout=30)
try:
async with mcp.session_manager.run():
yield
finally:
# 避免解释器退出阶段 __del__ 清理导致异常
pool.close(timeout=5)
# MCP endpoint/mcp默认 streamable_http_path="/mcp"
app = Starlette(
routes=[Mount("/", app=mcp.streamable_http_app())],
lifespan=lifespan,
)
app.add_middleware(AuthMiddleware)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
app,
host="0.0.0.0",
port=PORT,
proxy_headers=True,
forwarded_allow_ips="*",
)

File diff suppressed because it is too large Load Diff

View File

@@ -7,5 +7,6 @@ requires-python = ">=3.10"
members = [ members = [
"apps/etl/connectors/feiqiu", "apps/etl/connectors/feiqiu",
"apps/backend", "apps/backend",
"apps/mcp-server",
"packages/shared", "packages/shared",
] ]

View File

View File

@@ -0,0 +1,122 @@
# ==============================================================================
# 服务器端 Git 排除规则
# ==============================================================================
# 用途:服务器上不需要的文件,避免占用磁盘空间和干扰运行环境。
# 使用方式:运行 scripts/server/setup-server-git.py 自动配置。
#
# 原则:服务器只跑后端 API+ 可选 ETL其余全部排除。
# 注意:此文件影响未 track 的新文件。
# 对于已 track 但服务器不需要的文件,
# 由 setup-server-git.py 配合 skip-worktree 处理。
# ===== 环境配置(服务器有自己的 .env不用 Git 里的) =====
.env
.env.local
# 模板保留,方便参考
# !.env.template
# ===== ETL 导出数据(仅开发机留存) =====
export/
# ===== 文档(开发参考用,服务器不需要) =====
docs/
# 如果需要部署文档可单独拉取,但运行时不依赖
# ===== H5 原型设计稿 =====
# 已在 docs/h5_ui/ 下,被 docs/ 规则覆盖
# ===== 小程序源码(服务器不编译小程序) =====
apps/miniprogram/
# ===== 管理后台源码(服务器只需要 dist/,不需要源码和 node_modules =====
apps/admin-web/src/
apps/admin-web/node_modules/
apps/admin-web/pnpm-lock.yaml
# ===== 测试(服务器不跑测试) =====
tests/
.hypothesis/
.pytest_cache/
pytest-cache-files-*/
# ===== 示例数据 =====
samples/
# ===== 临时目录 =====
tmp/
# ===== 运维脚本中的一次性脚本(服务器不需要开发用的 ops 脚本) =====
scripts/ops/
scripts/audit/
scripts/migrate/
# ===== 根目录散文件(开发用,服务器不需要) =====
*.png
*.code-workspace
start-admin.bat
.kiroignore
# ===== Kiro 配置(服务器上不用 Kiro =====
.kiro/
# ===== infra 配置文档(参考用,服务器不需要) =====
infra/
# ===== 日志文件(服务器自己产生的日志不入 Git =====
logs/
*.log
*.jsonl
# ===== Python 虚拟环境(服务器自己 uv sync 生成) =====
.venv/
venv/
ENV/
env/
# ===== Python 缓存与构建产物 =====
__pycache__/
*.pyc
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
dist/
# ===== 测试覆盖率 =====
.coverage
htmlcov/
# ===== Node =====
node_modules/
# ===== infra 敏感文件 =====
infra/**/*.key
infra/**/*.pem
infra/**/*.secret
# ===== IDE =====
.idea/
.vscode/
*.swp
*.swo
*~
.specstory/
.cursorindexingignore
# ===== Windows 杂项 =====
*.lnk
.Deleted/

View File

@@ -0,0 +1,137 @@
"""
服务器 Git 环境配置脚本
在服务器上首次 git clone 后运行一次,完成两件事:
1. 将 server-exclude.txt 复制到 .git/info/exclude
2. 对已 track 但服务器不需要的文件/目录设置 skip-worktree
这样 git pull 不会覆盖本地删除,也不会在工作区还原这些文件。
用法:
cd D:\\NeoZQYY\\test\\repo (或 prod\\repo)
python scripts/server/setup-server-git.py
运行后可以安全删除 export/ 等目录释放磁盘空间。
"""
import shutil
import subprocess
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
# 需要 skip-worktree 的路径前缀(已被 track 但服务器不需要)
SKIP_PREFIXES = [
"export/",
".env",
"docs/",
"tests/",
"samples/",
"infra/",
".kiro/",
".hypothesis/",
"apps/miniprogram/",
"apps/admin-web/src/",
"apps/admin-web/pnpm-lock.yaml",
"scripts/ops/",
"scripts/audit/",
"scripts/migrate/",
# 根目录散文件截图、workspace 文件等)
"coach-detail-full.png",
"customer-detail-full.png",
"perf-records-current.png",
"white-screen-debug.png",
"NeoZQYY.code-workspace",
"start-admin.bat",
".kiroignore",
]
# 完全不需要出现在服务器工作区的目录skip-worktree 后可删除释放空间)
DELETABLE_DIRS = [
"export",
"docs",
"tests",
"samples",
"infra",
".kiro",
".hypothesis",
"apps/miniprogram",
"scripts/ops",
"scripts/audit",
"scripts/migrate",
]
def copy_exclude():
"""复制排除规则到 .git/info/exclude"""
src = REPO_ROOT / "scripts" / "server" / "server-exclude.txt"
dst = REPO_ROOT / ".git" / "info" / "exclude"
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst)
print(f" 已复制 {src.name} -> {dst}")
def get_tracked_files(prefix: str) -> list[str]:
"""获取匹配前缀的已 track 文件列表"""
result = subprocess.run(
["git", "ls-files", "--", prefix],
capture_output=True, text=True, cwd=REPO_ROOT,
)
return [f for f in result.stdout.strip().split("\n") if f]
def skip_worktree(files: list[str]):
"""对文件列表设置 skip-worktree 标记"""
if not files:
return
# git update-index 一次处理的文件数有限,分批
batch_size = 50
for i in range(0, len(files), batch_size):
batch = files[i:i + batch_size]
subprocess.run(
["git", "update-index", "--skip-worktree"] + batch,
cwd=REPO_ROOT,
)
def main():
print("=== 服务器 Git 环境配置 ===\n")
# 1. 复制 exclude 规则
print("[1/3] 配置 .git/info/exclude ...")
copy_exclude()
# 2. 设置 skip-worktree
print("\n[2/3] 设置 skip-worktree已 track 但服务器不需要的文件)...")
total_skipped = 0
for prefix in SKIP_PREFIXES:
files = get_tracked_files(prefix)
if files:
skip_worktree(files)
total_skipped += len(files)
print(f" {prefix} -> {len(files)} 个文件已标记")
else:
print(f" {prefix} -> 无匹配文件")
print(f" 共标记 {total_skipped} 个文件")
# 3. 提示可删除的目录
print("\n[3/3] 以下目录已标记 skip-worktree可安全删除以释放磁盘空间")
for d in DELETABLE_DIRS:
dir_path = REPO_ROOT / d
if dir_path.exists():
# 计算目录大小
size = sum(f.stat().st_size for f in dir_path.rglob("*") if f.is_file())
size_mb = size / (1024 * 1024)
print(f" {d}/ ({size_mb:.1f} MB)")
else:
print(f" {d}/ (不存在,无需处理)")
print("\n如需删除,手动执行:")
for d in DELETABLE_DIRS:
print(f" rmdir /s /q {d}")
print("\n配置完成。后续 git pull 不会还原这些文件。")
if __name__ == "__main__":
main()