# -*- coding: utf-8 -*- """PipelineRunner 属性测试 - hypothesis 验证管道编排器的通用正确性属性。""" import string from datetime import datetime, timedelta from unittest.mock import MagicMock, patch import pytest from hypothesis import given, settings from hypothesis import strategies as st from orchestration.pipeline_runner import PipelineRunner # run() 内部延迟导入 TaskLogger,需要 mock 源模块路径 _TASK_LOGGER_PATH = "utils.task_logger.TaskLogger" FILE_VERSION = "v1_shell" # ── 策略定义 ────────────────────────────────────────────────────── pipeline_name_st = st.sampled_from(list(PipelineRunner.PIPELINE_LAYERS.keys())) processing_mode_st = st.sampled_from(["increment_only", "verify_only", "increment_verify"]) data_source_st = st.sampled_from(["online", "offline", "hybrid"]) _TASK_PREFIXES = ["ODS_", "DWD_", "DWS_", "INDEX_"] task_code_st = st.builds( lambda prefix, suffix: prefix + suffix, prefix=st.sampled_from(_TASK_PREFIXES), suffix=st.text( alphabet=string.ascii_uppercase + string.digits + "_", min_size=1, max_size=12, ), ) # 单任务结果生成器 task_result_st = st.fixed_dictionaries({ "task_code": task_code_st, "status": st.sampled_from(["SUCCESS", "FAIL", "SKIP"]), "counts": st.fixed_dictionaries({ "fetched": st.integers(min_value=0, max_value=10000), "inserted": st.integers(min_value=0, max_value=10000), "updated": st.integers(min_value=0, max_value=10000), "skipped": st.integers(min_value=0, max_value=10000), "errors": st.integers(min_value=0, max_value=100), }), "dump_dir": st.none(), }) task_results_st = st.lists(task_result_st, min_size=0, max_size=10) # ── 辅助函数 ────────────────────────────────────────────────────── def _make_config(): """创建 mock 配置对象。""" config = MagicMock() config.get = MagicMock(side_effect=lambda key, default=None: { "app.timezone": "Asia/Shanghai", "verification.ods_use_local_json": False, "verification.skip_ods_when_fetch_before_verify": True, "run.ods_tasks": [], "run.dws_tasks": [], "run.index_tasks": [], }.get(key, default)) return config def _make_runner(task_executor=None, task_registry=None): """创建 PipelineRunner 实例,注入 mock 依赖。""" if task_executor is None: task_executor = MagicMock() task_executor.run_tasks.return_value = [] if task_registry is None: task_registry = MagicMock() task_registry.get_tasks_by_layer.return_value = ["FAKE_TASK"] return PipelineRunner( config=_make_config(), task_executor=task_executor, task_registry=task_registry, db_conn=MagicMock(), api_client=MagicMock(), logger=MagicMock(), ) # ── Property 5: 管道名称→层列表映射 ────────────────────────────── # Feature: scheduler-refactor, Property 5: 管道名称→层列表映射 # **Validates: Requirements 2.1** class TestProperty5PipelineNameToLayers: """对于任意有效的管道名称,PipelineRunner 解析出的层列表应与 PIPELINE_LAYERS 字典中的定义完全一致。""" @given(pipeline=pipeline_name_st) @settings(max_examples=100) def test_layers_match_pipeline_definition(self, pipeline): """run() 返回的 layers 字段与 PIPELINE_LAYERS[pipeline] 完全一致。""" executor = MagicMock() executor.run_tasks.return_value = [] runner = _make_runner(task_executor=executor) with patch(_TASK_LOGGER_PATH): result = runner.run( pipeline=pipeline, processing_mode="increment_only", data_source="offline", ) expected_layers = PipelineRunner.PIPELINE_LAYERS[pipeline] assert result["layers"] == expected_layers @given(pipeline=pipeline_name_st) @settings(max_examples=100) def test_resolve_tasks_called_with_correct_layers(self, pipeline): """_resolve_tasks 接收的层列表与 PIPELINE_LAYERS 定义一致。""" executor = MagicMock() executor.run_tasks.return_value = [] runner = _make_runner(task_executor=executor) with ( patch(_TASK_LOGGER_PATH), patch.object(runner, "_resolve_tasks", wraps=runner._resolve_tasks) as spy, ): runner.run( pipeline=pipeline, processing_mode="increment_only", data_source="offline", ) expected_layers = PipelineRunner.PIPELINE_LAYERS[pipeline] spy.assert_called_once_with(expected_layers) # ── Property 6: processing_mode 控制执行流程 ───────────────────── # Feature: scheduler-refactor, Property 6: processing_mode 控制执行流程 # **Validates: Requirements 2.3, 2.4** class TestProperty6ProcessingModeControlsFlow: """对于任意 processing_mode,增量 ETL 执行当且仅当模式包含 increment, 校验流程执行当且仅当模式包含 verify。""" @given( pipeline=pipeline_name_st, mode=processing_mode_st, data_source=data_source_st, ) @settings(max_examples=100) def test_increment_executes_iff_mode_contains_increment(self, pipeline, mode, data_source): """增量 ETL(task_executor.run_tasks)执行当且仅当 mode 包含 'increment'。""" executor = MagicMock() executor.run_tasks.return_value = [] runner = _make_runner(task_executor=executor) with ( patch(_TASK_LOGGER_PATH), patch.object(runner, "_run_verification", return_value={"status": "COMPLETED"}), ): runner.run( pipeline=pipeline, processing_mode=mode, data_source=data_source, ) should_increment = "increment" in mode if should_increment: assert executor.run_tasks.called, ( f"mode={mode} 包含 'increment',但 run_tasks 未被调用" ) else: # verify_only 且 fetch_before_verify=False(默认),run_tasks 不应被调用 assert not executor.run_tasks.called, ( f"mode={mode} 不包含 'increment',但 run_tasks 被调用了" ) @given( pipeline=pipeline_name_st, mode=processing_mode_st, data_source=data_source_st, ) @settings(max_examples=100) def test_verification_executes_iff_mode_contains_verify(self, pipeline, mode, data_source): """校验流程(_run_verification)执行当且仅当 mode 包含 'verify'。""" executor = MagicMock() executor.run_tasks.return_value = [] runner = _make_runner(task_executor=executor) with ( patch(_TASK_LOGGER_PATH), patch.object(runner, "_run_verification", return_value={"status": "COMPLETED"}) as mock_verify, ): runner.run( pipeline=pipeline, processing_mode=mode, data_source=data_source, ) should_verify = "verify" in mode if should_verify: assert mock_verify.called, ( f"mode={mode} 包含 'verify',但 _run_verification 未被调用" ) else: assert not mock_verify.called, ( f"mode={mode} 不包含 'verify',但 _run_verification 被调用了" ) # ── Property 7: 管道结果汇总完整性 ────────────────────────────── # Feature: scheduler-refactor, Property 7: 管道结果汇总完整性 # **Validates: Requirements 2.6** class TestProperty7PipelineSummaryCompleteness: """对于任意一组任务执行结果,PipelineRunner 返回的汇总字典应包含 status/pipeline/layers/results 字段,且 results 长度等于实际执行的任务数。""" @given( pipeline=pipeline_name_st, task_results=task_results_st, ) @settings(max_examples=100) def test_summary_has_required_fields(self, pipeline, task_results): """返回字典必须包含 status、pipeline、layers、results、verification_summary。""" executor = MagicMock() executor.run_tasks.return_value = task_results runner = _make_runner(task_executor=executor) with patch(_TASK_LOGGER_PATH): result = runner.run( pipeline=pipeline, processing_mode="increment_only", data_source="offline", ) required_keys = {"status", "pipeline", "layers", "results", "verification_summary"} assert required_keys.issubset(result.keys()), ( f"缺少必要字段: {required_keys - result.keys()}" ) @given( pipeline=pipeline_name_st, task_results=task_results_st, ) @settings(max_examples=100) def test_results_length_equals_executed_tasks(self, pipeline, task_results): """results 列表长度等于 task_executor.run_tasks 返回的任务数。""" executor = MagicMock() executor.run_tasks.return_value = task_results runner = _make_runner(task_executor=executor) with patch(_TASK_LOGGER_PATH): result = runner.run( pipeline=pipeline, processing_mode="increment_only", data_source="offline", ) assert len(result["results"]) == len(task_results), ( f"results 长度 {len(result['results'])} != 实际任务数 {len(task_results)}" ) @given( pipeline=pipeline_name_st, task_results=task_results_st, ) @settings(max_examples=100) def test_pipeline_and_layers_match_input(self, pipeline, task_results): """返回的 pipeline 和 layers 字段与输入一致。""" executor = MagicMock() executor.run_tasks.return_value = task_results runner = _make_runner(task_executor=executor) with patch(_TASK_LOGGER_PATH): result = runner.run( pipeline=pipeline, processing_mode="increment_only", data_source="offline", ) assert result["pipeline"] == pipeline assert result["layers"] == PipelineRunner.PIPELINE_LAYERS[pipeline] @given( pipeline=pipeline_name_st, task_results=task_results_st, ) @settings(max_examples=100) def test_increment_only_has_no_verification(self, pipeline, task_results): """increment_only 模式下 verification_summary 应为 None。""" executor = MagicMock() executor.run_tasks.return_value = task_results runner = _make_runner(task_executor=executor) with patch(_TASK_LOGGER_PATH): result = runner.run( pipeline=pipeline, processing_mode="increment_only", data_source="offline", ) assert result["verification_summary"] is None