# -*- coding: utf-8 -*- """Flow 运行器:Flow 定义、层→任务映射、校验编排。 从原 ETLScheduler 中提取 Flow 编排逻辑,委托 TaskExecutor 执行具体任务。 所有依赖通过构造函数注入,不自行创建资源。 术语说明:统一使用 Flow 概念,pipeline 参数已移除。 """ from __future__ import annotations import logging import os import uuid from datetime import datetime, timedelta from pathlib import Path from typing import Any, Dict, List, Optional from zoneinfo import ZoneInfo from tasks.verification import filter_verify_tables from orchestration.topological_sort import topological_sort from utils.timer import EtlTimer class FlowRunner: """Flow 编排器:根据 Flow 定义执行多层 ETL 任务并可选运行后置校验。""" # Flow 定义:每个 Flow 包含的层(从 scheduler.py 模块级常量迁移至此) FLOW_LAYERS: dict[str, list[str]] = { "api_ods": ["ODS"], "api_ods_dwd": ["ODS", "DWD"], "api_full": ["ODS", "DWD", "DWS", "INDEX"], "ods_dwd": ["DWD"], "dwd_dws": ["DWS"], "dwd_dws_index": ["DWS", "INDEX"], "dwd_index": ["INDEX"], } def __init__( self, config, task_executor, task_registry, db_conn, api_client, logger: logging.Logger, ): self.config = config self.task_executor = task_executor self.task_registry = task_registry self.db_conn = db_conn self.api_client = api_client self.logger = logger self.tz = ZoneInfo(config.get("app.timezone", "Asia/Shanghai")) # CHANGE [2026-02-20] intent: 移除 pipeline 参数,统一使用 flow(消除历史别名) def run( self, flow: str | None = None, layers: list[str] | None = None, processing_mode: str = "increment_only", data_source: str = "hybrid", window_start: datetime | None = None, window_end: datetime | None = None, window_split: str | None = None, task_codes: list[str] | None = None, fetch_before_verify: bool = False, verify_tables: list[str] | None = None, ) -> dict[str, Any]: """执行 Flow,返回汇总结果。 Args: flow: Flow 名称 (api_ods, api_ods_dwd, api_full, ...),与 layers 二选一 layers: 直接指定层列表 (["ODS", "DWD"] 等),与 flow 名称二选一 processing_mode: 处理模式 (increment_only / verify_only / increment_verify / full_window) data_source: 数据源模式 (online / offline / hybrid) window_start: 时间窗口开始 window_end: 时间窗口结束 window_split: 时间窗口切分 (none / day / week / month) task_codes: 要执行的任务代码列表(作为 Flow 内的任务过滤器) fetch_before_verify: 校验前是否先从 API 获取数据(仅在 verify_only 模式下有效) verify_tables: 指定校验的表名列表(可用于单表验证) Returns: 执行结果字典,包含 status / flow / layers / results / verification_summary """ from utils.task_logger import TaskLogger # 解析层列表:Flow 名称查找 或 直接使用 layers 参数 if flow is not None: if flow not in self.FLOW_LAYERS: raise ValueError(f"无效的 Flow 名称: {flow}") resolved_layers = self.FLOW_LAYERS[flow] run_label = flow elif layers is not None: resolved_layers = layers run_label = f"layers({','.join(layers)})" else: raise ValueError("必须指定 flow 名称或 layers 参数之一") run_uuid = uuid.uuid4().hex flow_logger = TaskLogger(f"FLOW_{run_label.upper()}", self.logger) flow_logger.start(f"开始执行 Flow: {run_label}") # CHANGE [2026-02-20] intent: 集成 EtlTimer 到 Flow 执行流程,记录每个任务的耗时(需求 15.1, 15.2, 15.3) # assumptions: EtlTimer 已通过单元测试,输出路径由 ETL_REPORT_ROOT 环境变量控制 # prompt: "将 EtlTimer 集成到 FlowRunner.run()" timer = EtlTimer(tz=self.tz) timer.start() layers = resolved_layers results: list[dict[str, Any]] = [] verification_summary: dict[str, Any] | None = None ods_dump_dirs: dict[str, str] = {} use_local_json = bool(self.config.get("verification.ods_use_local_json", False)) # 设置默认时间窗口 if window_end is None: window_end = datetime.now(self.tz) if window_start is None: window_start = window_end - timedelta(hours=24) try: if processing_mode == "verify_only": # 仅校验模式 if fetch_before_verify: self.logger.info("Flow %s: 校验模式(先获取 API 数据)", run_label) if task_codes: ods_tasks = [t for t in task_codes if t.startswith("ODS_")] if ods_tasks: self.logger.info("从 API 获取数据: %s", ods_tasks) timer.start_step("FETCH_BEFORE_VERIFY") results = self.task_executor.run_tasks(ods_tasks, data_source=data_source) timer.stop_step("FETCH_BEFORE_VERIFY") else: auto_tasks = self._resolve_tasks(["ODS"]) if auto_tasks: self.logger.info("从 API 获取数据: %s", auto_tasks) timer.start_step("FETCH_BEFORE_VERIFY") results = self.task_executor.run_tasks(auto_tasks, data_source=data_source) timer.stop_step("FETCH_BEFORE_VERIFY") ods_dump_dirs = { r.get("task_code"): r.get("dump_dir") for r in results if r.get("task_code") and r.get("dump_dir") } self.logger.info("API 数据获取完成,开始校验并修复") else: self.logger.info("Flow %s: 仅校验模式,跳过增量 ETL,直接执行校验并修复", run_label) verification_summary = self._run_verification( layers=layers, window_start=window_start, window_end=window_end, window_split=window_split, fetch_from_api=fetch_before_verify, ods_dump_dirs=ods_dump_dirs, use_local_json=use_local_json, verify_tables=verify_tables, ) flow_logger.set_verification_result(verification_summary) else: # 增量 ETL(increment_only 或 increment_verify) self.logger.info("Flow %s: 执行增量 ETL,层=%s", run_label, layers) timer.start_step("INCREMENT_ETL") if task_codes: # CHANGE [2026-02-24] intent: 对前端传入的 task_codes 也执行拓扑排序, # 避免 DWS 在 DWD 未完成时就开始计算(跨层依赖顺序缺失 bug) # prompt: "修复管理后台全选任务时不按层级顺序执行的问题" sorted_codes = topological_sort(task_codes, self.task_registry) results = self.task_executor.run_tasks(sorted_codes, data_source=data_source) else: auto_tasks = self._resolve_tasks(layers) results = self.task_executor.run_tasks(auto_tasks, data_source=data_source) timer.stop_step("INCREMENT_ETL") # increment_verify 模式:增量后执行校验 if processing_mode == "increment_verify": self.logger.info("Flow %s: 开始校验并修复", run_label) timer.start_step("VERIFICATION") verification_summary = self._run_verification( layers=layers, window_start=window_start, window_end=window_end, window_split=window_split, ods_dump_dirs=ods_dump_dirs, use_local_json=use_local_json, verify_tables=verify_tables, ) timer.stop_step("VERIFICATION") flow_logger.set_verification_result(verification_summary) # CHANGE [2026-02-20] intent: 集成 ConsistencyChecker 到 Flow 执行流程,ETL 完成后自动运行一致性检查(需求 16.1, 16.4) # assumptions: ConsistencyChecker 已通过单元测试,报告输出到 ETL_REPORT_ROOT # prompt: "将 ConsistencyChecker 集成到 FlowRunner.run()" consistency_report_path = self._run_post_consistency_check(timer) # 输出计时报告 try: timing_report = timer.finish(write_report=True) self.logger.info("计时报告已生成") except KeyError as ke: self.logger.warning("计时报告输出跳过(环境变量缺失): %s", ke) timing_report = timer.finish(write_report=False) # 汇总计数 — CHANGE 2026-02-21: BUG 11 fix — errors 可能是 list,需类型安全处理 def _safe_int(val) -> int: """将 int/list/None 统一转为 int 计数。""" if isinstance(val, int): return val if isinstance(val, list): return len(val) return 0 flow_logger.set_counts( fetched=sum(_safe_int(r.get("counts", {}).get("fetched", 0)) for r in results), inserted=sum(_safe_int(r.get("counts", {}).get("inserted", 0)) for r in results), updated=sum(_safe_int(r.get("counts", {}).get("updated", 0)) for r in results), errors=sum(_safe_int(r.get("counts", {}).get("errors", 0)) for r in results), ) summary_text = flow_logger.end(status="成功") self.logger.info("\n%s", summary_text) return { "status": "SUCCESS", "flow": run_label, "layers": layers, "results": results, "verification_summary": verification_summary, "consistency_report_path": consistency_report_path, } except Exception as exc: # 异常时也尝试输出计时报告(便于排查耗时瓶颈) try: timer.finish(write_report=True) except Exception: pass summary_text = flow_logger.end(status="失败", error_message=str(exc)) self.logger.error("\n%s", summary_text) raise def _run_post_consistency_check(self, timer: EtlTimer) -> str | None: """ETL 完成后运行数据一致性检查,输出黑盒测试报告。 返回报告文件路径,失败时返回 None(不阻断主流程)。 """ try: from quality.consistency_checker import ( run_consistency_check, write_consistency_report, ) except ImportError: self.logger.warning("一致性检查模块未安装,跳过") return None timer.start_step("CONSISTENCY_CHECK") try: api_sample_dir_str = os.environ.get("API_SAMPLE_CACHE_ROOT") api_sample_dir = Path(api_sample_dir_str) if api_sample_dir_str else None report = run_consistency_check( self.db_conn, api_sample_dir=api_sample_dir, include_api_vs_ods=bool(api_sample_dir), include_ods_vs_dwd=True, tz=self.tz, ) report_path = write_consistency_report(report) self.logger.info("一致性检查报告已生成: %s", report_path) return report_path except KeyError as ke: self.logger.warning("一致性检查报告输出跳过(环境变量缺失): %s", ke) return None except Exception as exc: self.logger.warning("一致性检查失败(不阻断主流程): %s", exc, exc_info=True) return None finally: try: timer.stop_step("CONSISTENCY_CHECK") except KeyError: pass def _resolve_tasks(self, layers: list[str]) -> list[str]: """根据层列表解析任务代码。 优先级:配置值 > TaskRegistry.get_tasks_by_layer() > 空列表(记录警告)。 """ # CHANGE [2026-07-17] intent: 移除所有硬编码回退列表,统一走 Registry(需求 7.1, 7.2, 7.3) # assumptions: TaskRegistry 已注册所有层的任务;DWD 层新增 run.dwd_tasks 配置键 # prompt: "统一 _resolve_tasks() 去掉硬编码回退" # 配置键与层名的映射 _LAYER_CONFIG_KEY = { "ODS": "run.ods_tasks", "DWD": "run.dwd_tasks", "DWS": "run.dws_tasks", "INDEX": "run.index_tasks", } tasks: list[str] = [] for layer in layers: layer_upper = layer.upper() # 1. 优先使用配置值 config_key = _LAYER_CONFIG_KEY.get(layer_upper) if config_key: config_tasks = self.config.get(config_key, []) if config_tasks: tasks.extend(config_tasks) continue # 2. 回退到 Registry registry_tasks = self.task_registry.get_tasks_by_layer(layer_upper) if registry_tasks: tasks.extend(registry_tasks) else: # 3. Registry 也为空,记录警告并跳过 self.logger.warning( "层 %s 在 Registry 中无已注册任务且无配置覆盖,跳过", layer_upper ) # CHANGE [2026-07-18] intent: 对收集到的任务执行拓扑排序,确保依赖方在被依赖方之后(需求 8.3, 8.4, 8.5) if tasks: tasks = topological_sort(tasks, self.task_registry) return tasks def _run_verification( self, layers: list[str], window_start: datetime, window_end: datetime, window_split: str | None = None, fetch_from_api: bool = False, ods_dump_dirs: dict[str, str] | None = None, use_local_json: bool = False, verify_tables: list[str] | None = None, ) -> dict[str, Any]: """对指定层执行后置校验(从原 _run_layer_verification 迁移)。""" try: from tasks.verification import get_verifier_for_layer, build_window_segments except ImportError: self.logger.warning("校验框架未安装,跳过后置校验") return {"status": "SKIPPED", "message": "校验框架未安装"} total_tables = 0 consistent_tables = 0 total_backfilled = 0 total_error_tables = 0 layer_results: dict[str, Any] = {} skip_ods_on_fetch = bool(self.config.get("verification.skip_ods_when_fetch_before_verify", True)) ods_dump_dirs = ods_dump_dirs or {} segments = build_window_segments(window_start, window_end, window_split) for layer in layers: try: if layer.upper() == "ODS" and fetch_from_api and skip_ods_on_fetch: self.logger.info("ODS 层在 fetch_before_verify 下已完成入库,跳过二次校验") layer_results[layer] = { "status": "SKIPPED", "reason": "fetch_before_verify", } continue # CHANGE [2025-07-18] intent: DWS/INDEX 层跳过完整性校验,仅记录日志(需求 6.5) # assumptions: DWS/INDEX 层无轻量级 verifier,跳过最安全 # prompt: "实现 DWS/INDEX 层轻量级校验" if layer.upper() in ("DWS", "INDEX"): self.logger.info( "DWS/INDEX 层使用轻量级校验,跳过完整性检查: %s", layer ) layer_results[layer] = { "status": "SKIPPED", "reason": "lightweight_dws_index", } continue if layer.upper() == "ODS" and fetch_from_api: if use_local_json: if not ods_dump_dirs: self.logger.warning("ODS 校验配置为使用本地 JSON,但未找到 dump 目录,跳过 ODS 校验") layer_results[layer] = { "status": "SKIPPED", "reason": "local_json_missing", } continue verifier = get_verifier_for_layer( layer, self.db_conn, self.logger, api_client=self.api_client, fetch_from_api=True, local_dump_dirs=ods_dump_dirs, use_local_json=True, ) self.logger.info("ODS 层使用本地 JSON 校验(不请求 API)") else: verifier = get_verifier_for_layer( layer, self.db_conn, self.logger, api_client=self.api_client, fetch_from_api=True, ) self.logger.info("ODS 层启用 API 数据校验") else: verifier_kwargs: dict[str, Any] = {} if layer.upper() == "INDEX": try: lookback_days = int(self.config.get("run.index_lookback_days", 60)) except (TypeError, ValueError): lookback_days = 60 verifier_kwargs = { "lookback_days": lookback_days, "config": self.config, } self.logger.info("INDEX 层校验使用回溯天数: %s", lookback_days) if layer.upper() == "DWD": verifier_kwargs["config"] = self.config verifier = get_verifier_for_layer( layer, self.db_conn, self.logger, **verifier_kwargs, ) # 使用 filter_verify_tables 替代原内联静态方法 layer_tables = filter_verify_tables(layer, verify_tables) if verify_tables and not layer_tables: self.logger.info("层 %s 无匹配表,跳过校验", layer) layer_results[layer] = { "status": "SKIPPED", "reason": "table_filter", } continue self.logger.info("开始校验层: %s,时间窗口: %s ~ %s", layer, window_start, window_end) layer_summary = verifier.verify_and_backfill( window_start=window_start, window_end=window_end, auto_backfill=True, split_unit=window_split or "month", tables=layer_tables, ) layer_results[layer] = layer_summary.to_dict() if hasattr(layer_summary, 'to_dict') else {} if hasattr(layer_summary, 'total_tables'): total_tables += layer_summary.total_tables consistent_tables += layer_summary.consistent_tables total_backfilled += layer_summary.total_backfilled total_error_tables += getattr(layer_summary, 'error_tables', 0) self.logger.info( "层 %s 校验完成: 表数=%d, 一致=%d, 错误=%d, 补齐=%d", layer, getattr(layer_summary, 'total_tables', 0), getattr(layer_summary, 'consistent_tables', 0), getattr(layer_summary, 'error_tables', 0), getattr(layer_summary, 'total_backfilled', 0), ) except Exception as exc: self.logger.error("层 %s 校验失败: %s", layer, exc, exc_info=True) layer_results[layer] = {"status": "ERROR", "error": str(exc)} return { "status": "COMPLETED", "total_tables": total_tables, "consistent_tables": consistent_tables, "total_backfilled": total_backfilled, "error_tables": total_error_tables, "layers": layer_results, }