Files
ZQYY.FQ-ETL/tests/unit/test_pipeline_runner_properties.py

305 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""PipelineRunner 属性测试 - hypothesis 验证管道编排器的通用正确性属性。"""
import string
from datetime import datetime, timedelta
from unittest.mock import MagicMock, patch
import pytest
from hypothesis import given, settings
from hypothesis import strategies as st
from orchestration.pipeline_runner import PipelineRunner
# run() 内部延迟导入 TaskLogger需要 mock 源模块路径
_TASK_LOGGER_PATH = "utils.task_logger.TaskLogger"
FILE_VERSION = "v1_shell"
# ── 策略定义 ──────────────────────────────────────────────────────
pipeline_name_st = st.sampled_from(list(PipelineRunner.PIPELINE_LAYERS.keys()))
processing_mode_st = st.sampled_from(["increment_only", "verify_only", "increment_verify"])
data_source_st = st.sampled_from(["online", "offline", "hybrid"])
_TASK_PREFIXES = ["ODS_", "DWD_", "DWS_", "INDEX_"]
task_code_st = st.builds(
lambda prefix, suffix: prefix + suffix,
prefix=st.sampled_from(_TASK_PREFIXES),
suffix=st.text(
alphabet=string.ascii_uppercase + string.digits + "_",
min_size=1, max_size=12,
),
)
# 单任务结果生成器
task_result_st = st.fixed_dictionaries({
"task_code": task_code_st,
"status": st.sampled_from(["SUCCESS", "FAIL", "SKIP"]),
"counts": st.fixed_dictionaries({
"fetched": st.integers(min_value=0, max_value=10000),
"inserted": st.integers(min_value=0, max_value=10000),
"updated": st.integers(min_value=0, max_value=10000),
"skipped": st.integers(min_value=0, max_value=10000),
"errors": st.integers(min_value=0, max_value=100),
}),
"dump_dir": st.none(),
})
task_results_st = st.lists(task_result_st, min_size=0, max_size=10)
# ── 辅助函数 ──────────────────────────────────────────────────────
def _make_config():
"""创建 mock 配置对象。"""
config = MagicMock()
config.get = MagicMock(side_effect=lambda key, default=None: {
"app.timezone": "Asia/Shanghai",
"verification.ods_use_local_json": False,
"verification.skip_ods_when_fetch_before_verify": True,
"run.ods_tasks": [],
"run.dws_tasks": [],
"run.index_tasks": [],
}.get(key, default))
return config
def _make_runner(task_executor=None, task_registry=None):
"""创建 PipelineRunner 实例,注入 mock 依赖。"""
if task_executor is None:
task_executor = MagicMock()
task_executor.run_tasks.return_value = []
if task_registry is None:
task_registry = MagicMock()
task_registry.get_tasks_by_layer.return_value = ["FAKE_TASK"]
return PipelineRunner(
config=_make_config(),
task_executor=task_executor,
task_registry=task_registry,
db_conn=MagicMock(),
api_client=MagicMock(),
logger=MagicMock(),
)
# ── Property 5: 管道名称→层列表映射 ──────────────────────────────
# Feature: scheduler-refactor, Property 5: 管道名称→层列表映射
# **Validates: Requirements 2.1**
class TestProperty5PipelineNameToLayers:
"""对于任意有效的管道名称PipelineRunner 解析出的层列表应与
PIPELINE_LAYERS 字典中的定义完全一致。"""
@given(pipeline=pipeline_name_st)
@settings(max_examples=100)
def test_layers_match_pipeline_definition(self, pipeline):
"""run() 返回的 layers 字段与 PIPELINE_LAYERS[pipeline] 完全一致。"""
executor = MagicMock()
executor.run_tasks.return_value = []
runner = _make_runner(task_executor=executor)
with patch(_TASK_LOGGER_PATH):
result = runner.run(
pipeline=pipeline,
processing_mode="increment_only",
data_source="offline",
)
expected_layers = PipelineRunner.PIPELINE_LAYERS[pipeline]
assert result["layers"] == expected_layers
@given(pipeline=pipeline_name_st)
@settings(max_examples=100)
def test_resolve_tasks_called_with_correct_layers(self, pipeline):
"""_resolve_tasks 接收的层列表与 PIPELINE_LAYERS 定义一致。"""
executor = MagicMock()
executor.run_tasks.return_value = []
runner = _make_runner(task_executor=executor)
with (
patch(_TASK_LOGGER_PATH),
patch.object(runner, "_resolve_tasks", wraps=runner._resolve_tasks) as spy,
):
runner.run(
pipeline=pipeline,
processing_mode="increment_only",
data_source="offline",
)
expected_layers = PipelineRunner.PIPELINE_LAYERS[pipeline]
spy.assert_called_once_with(expected_layers)
# ── Property 6: processing_mode 控制执行流程 ─────────────────────
# Feature: scheduler-refactor, Property 6: processing_mode 控制执行流程
# **Validates: Requirements 2.3, 2.4**
class TestProperty6ProcessingModeControlsFlow:
"""对于任意 processing_mode增量 ETL 执行当且仅当模式包含 increment
校验流程执行当且仅当模式包含 verify。"""
@given(
pipeline=pipeline_name_st,
mode=processing_mode_st,
data_source=data_source_st,
)
@settings(max_examples=100)
def test_increment_executes_iff_mode_contains_increment(self, pipeline, mode, data_source):
"""增量 ETLtask_executor.run_tasks执行当且仅当 mode 包含 'increment'"""
executor = MagicMock()
executor.run_tasks.return_value = []
runner = _make_runner(task_executor=executor)
with (
patch(_TASK_LOGGER_PATH),
patch.object(runner, "_run_verification", return_value={"status": "COMPLETED"}),
):
runner.run(
pipeline=pipeline,
processing_mode=mode,
data_source=data_source,
)
should_increment = "increment" in mode
if should_increment:
assert executor.run_tasks.called, (
f"mode={mode} 包含 'increment',但 run_tasks 未被调用"
)
else:
# verify_only 且 fetch_before_verify=False默认run_tasks 不应被调用
assert not executor.run_tasks.called, (
f"mode={mode} 不包含 'increment',但 run_tasks 被调用了"
)
@given(
pipeline=pipeline_name_st,
mode=processing_mode_st,
data_source=data_source_st,
)
@settings(max_examples=100)
def test_verification_executes_iff_mode_contains_verify(self, pipeline, mode, data_source):
"""校验流程_run_verification执行当且仅当 mode 包含 'verify'"""
executor = MagicMock()
executor.run_tasks.return_value = []
runner = _make_runner(task_executor=executor)
with (
patch(_TASK_LOGGER_PATH),
patch.object(runner, "_run_verification", return_value={"status": "COMPLETED"}) as mock_verify,
):
runner.run(
pipeline=pipeline,
processing_mode=mode,
data_source=data_source,
)
should_verify = "verify" in mode
if should_verify:
assert mock_verify.called, (
f"mode={mode} 包含 'verify',但 _run_verification 未被调用"
)
else:
assert not mock_verify.called, (
f"mode={mode} 不包含 'verify',但 _run_verification 被调用了"
)
# ── Property 7: 管道结果汇总完整性 ──────────────────────────────
# Feature: scheduler-refactor, Property 7: 管道结果汇总完整性
# **Validates: Requirements 2.6**
class TestProperty7PipelineSummaryCompleteness:
"""对于任意一组任务执行结果PipelineRunner 返回的汇总字典应包含
status/pipeline/layers/results 字段,且 results 长度等于实际执行的任务数。"""
@given(
pipeline=pipeline_name_st,
task_results=task_results_st,
)
@settings(max_examples=100)
def test_summary_has_required_fields(self, pipeline, task_results):
"""返回字典必须包含 status、pipeline、layers、results、verification_summary。"""
executor = MagicMock()
executor.run_tasks.return_value = task_results
runner = _make_runner(task_executor=executor)
with patch(_TASK_LOGGER_PATH):
result = runner.run(
pipeline=pipeline,
processing_mode="increment_only",
data_source="offline",
)
required_keys = {"status", "pipeline", "layers", "results", "verification_summary"}
assert required_keys.issubset(result.keys()), (
f"缺少必要字段: {required_keys - result.keys()}"
)
@given(
pipeline=pipeline_name_st,
task_results=task_results_st,
)
@settings(max_examples=100)
def test_results_length_equals_executed_tasks(self, pipeline, task_results):
"""results 列表长度等于 task_executor.run_tasks 返回的任务数。"""
executor = MagicMock()
executor.run_tasks.return_value = task_results
runner = _make_runner(task_executor=executor)
with patch(_TASK_LOGGER_PATH):
result = runner.run(
pipeline=pipeline,
processing_mode="increment_only",
data_source="offline",
)
assert len(result["results"]) == len(task_results), (
f"results 长度 {len(result['results'])} != 实际任务数 {len(task_results)}"
)
@given(
pipeline=pipeline_name_st,
task_results=task_results_st,
)
@settings(max_examples=100)
def test_pipeline_and_layers_match_input(self, pipeline, task_results):
"""返回的 pipeline 和 layers 字段与输入一致。"""
executor = MagicMock()
executor.run_tasks.return_value = task_results
runner = _make_runner(task_executor=executor)
with patch(_TASK_LOGGER_PATH):
result = runner.run(
pipeline=pipeline,
processing_mode="increment_only",
data_source="offline",
)
assert result["pipeline"] == pipeline
assert result["layers"] == PipelineRunner.PIPELINE_LAYERS[pipeline]
@given(
pipeline=pipeline_name_st,
task_results=task_results_st,
)
@settings(max_examples=100)
def test_increment_only_has_no_verification(self, pipeline, task_results):
"""increment_only 模式下 verification_summary 应为 None。"""
executor = MagicMock()
executor.run_tasks.return_value = task_results
runner = _make_runner(task_executor=executor)
with patch(_TASK_LOGGER_PATH):
result = runner.run(
pipeline=pipeline,
processing_mode="increment_only",
data_source="offline",
)
assert result["verification_summary"] is None