初始提交:飞球 ETL 系统全量代码
This commit is contained in:
304
tests/unit/test_pipeline_runner_properties.py
Normal file
304
tests/unit/test_pipeline_runner_properties.py
Normal file
@@ -0,0 +1,304 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""PipelineRunner 属性测试 - hypothesis 验证管道编排器的通用正确性属性。"""
|
||||
import string
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from hypothesis import given, settings
|
||||
from hypothesis import strategies as st
|
||||
|
||||
from orchestration.pipeline_runner import PipelineRunner
|
||||
|
||||
# run() 内部延迟导入 TaskLogger,需要 mock 源模块路径
|
||||
_TASK_LOGGER_PATH = "utils.task_logger.TaskLogger"
|
||||
|
||||
FILE_VERSION = "v1_shell"
|
||||
|
||||
# ── 策略定义 ──────────────────────────────────────────────────────
|
||||
|
||||
pipeline_name_st = st.sampled_from(list(PipelineRunner.PIPELINE_LAYERS.keys()))
|
||||
|
||||
processing_mode_st = st.sampled_from(["increment_only", "verify_only", "increment_verify"])
|
||||
|
||||
data_source_st = st.sampled_from(["online", "offline", "hybrid"])
|
||||
|
||||
_TASK_PREFIXES = ["ODS_", "DWD_", "DWS_", "INDEX_"]
|
||||
task_code_st = st.builds(
|
||||
lambda prefix, suffix: prefix + suffix,
|
||||
prefix=st.sampled_from(_TASK_PREFIXES),
|
||||
suffix=st.text(
|
||||
alphabet=string.ascii_uppercase + string.digits + "_",
|
||||
min_size=1, max_size=12,
|
||||
),
|
||||
)
|
||||
|
||||
# 单任务结果生成器
|
||||
task_result_st = st.fixed_dictionaries({
|
||||
"task_code": task_code_st,
|
||||
"status": st.sampled_from(["SUCCESS", "FAIL", "SKIP"]),
|
||||
"counts": st.fixed_dictionaries({
|
||||
"fetched": st.integers(min_value=0, max_value=10000),
|
||||
"inserted": st.integers(min_value=0, max_value=10000),
|
||||
"updated": st.integers(min_value=0, max_value=10000),
|
||||
"skipped": st.integers(min_value=0, max_value=10000),
|
||||
"errors": st.integers(min_value=0, max_value=100),
|
||||
}),
|
||||
"dump_dir": st.none(),
|
||||
})
|
||||
|
||||
task_results_st = st.lists(task_result_st, min_size=0, max_size=10)
|
||||
|
||||
|
||||
# ── 辅助函数 ──────────────────────────────────────────────────────
|
||||
|
||||
def _make_config():
|
||||
"""创建 mock 配置对象。"""
|
||||
config = MagicMock()
|
||||
config.get = MagicMock(side_effect=lambda key, default=None: {
|
||||
"app.timezone": "Asia/Shanghai",
|
||||
"verification.ods_use_local_json": False,
|
||||
"verification.skip_ods_when_fetch_before_verify": True,
|
||||
"run.ods_tasks": [],
|
||||
"run.dws_tasks": [],
|
||||
"run.index_tasks": [],
|
||||
}.get(key, default))
|
||||
return config
|
||||
|
||||
|
||||
def _make_runner(task_executor=None, task_registry=None):
|
||||
"""创建 PipelineRunner 实例,注入 mock 依赖。"""
|
||||
if task_executor is None:
|
||||
task_executor = MagicMock()
|
||||
task_executor.run_tasks.return_value = []
|
||||
if task_registry is None:
|
||||
task_registry = MagicMock()
|
||||
task_registry.get_tasks_by_layer.return_value = ["FAKE_TASK"]
|
||||
return PipelineRunner(
|
||||
config=_make_config(),
|
||||
task_executor=task_executor,
|
||||
task_registry=task_registry,
|
||||
db_conn=MagicMock(),
|
||||
api_client=MagicMock(),
|
||||
logger=MagicMock(),
|
||||
)
|
||||
|
||||
|
||||
# ── Property 5: 管道名称→层列表映射 ──────────────────────────────
|
||||
# Feature: scheduler-refactor, Property 5: 管道名称→层列表映射
|
||||
# **Validates: Requirements 2.1**
|
||||
|
||||
|
||||
class TestProperty5PipelineNameToLayers:
|
||||
"""对于任意有效的管道名称,PipelineRunner 解析出的层列表应与
|
||||
PIPELINE_LAYERS 字典中的定义完全一致。"""
|
||||
|
||||
@given(pipeline=pipeline_name_st)
|
||||
@settings(max_examples=100)
|
||||
def test_layers_match_pipeline_definition(self, pipeline):
|
||||
"""run() 返回的 layers 字段与 PIPELINE_LAYERS[pipeline] 完全一致。"""
|
||||
executor = MagicMock()
|
||||
executor.run_tasks.return_value = []
|
||||
runner = _make_runner(task_executor=executor)
|
||||
|
||||
with patch(_TASK_LOGGER_PATH):
|
||||
result = runner.run(
|
||||
pipeline=pipeline,
|
||||
processing_mode="increment_only",
|
||||
data_source="offline",
|
||||
)
|
||||
|
||||
expected_layers = PipelineRunner.PIPELINE_LAYERS[pipeline]
|
||||
assert result["layers"] == expected_layers
|
||||
|
||||
@given(pipeline=pipeline_name_st)
|
||||
@settings(max_examples=100)
|
||||
def test_resolve_tasks_called_with_correct_layers(self, pipeline):
|
||||
"""_resolve_tasks 接收的层列表与 PIPELINE_LAYERS 定义一致。"""
|
||||
executor = MagicMock()
|
||||
executor.run_tasks.return_value = []
|
||||
runner = _make_runner(task_executor=executor)
|
||||
|
||||
with (
|
||||
patch(_TASK_LOGGER_PATH),
|
||||
patch.object(runner, "_resolve_tasks", wraps=runner._resolve_tasks) as spy,
|
||||
):
|
||||
runner.run(
|
||||
pipeline=pipeline,
|
||||
processing_mode="increment_only",
|
||||
data_source="offline",
|
||||
)
|
||||
|
||||
expected_layers = PipelineRunner.PIPELINE_LAYERS[pipeline]
|
||||
spy.assert_called_once_with(expected_layers)
|
||||
|
||||
|
||||
# ── Property 6: processing_mode 控制执行流程 ─────────────────────
|
||||
# Feature: scheduler-refactor, Property 6: processing_mode 控制执行流程
|
||||
# **Validates: Requirements 2.3, 2.4**
|
||||
|
||||
|
||||
class TestProperty6ProcessingModeControlsFlow:
|
||||
"""对于任意 processing_mode,增量 ETL 执行当且仅当模式包含 increment,
|
||||
校验流程执行当且仅当模式包含 verify。"""
|
||||
|
||||
@given(
|
||||
pipeline=pipeline_name_st,
|
||||
mode=processing_mode_st,
|
||||
data_source=data_source_st,
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_increment_executes_iff_mode_contains_increment(self, pipeline, mode, data_source):
|
||||
"""增量 ETL(task_executor.run_tasks)执行当且仅当 mode 包含 'increment'。"""
|
||||
executor = MagicMock()
|
||||
executor.run_tasks.return_value = []
|
||||
runner = _make_runner(task_executor=executor)
|
||||
|
||||
with (
|
||||
patch(_TASK_LOGGER_PATH),
|
||||
patch.object(runner, "_run_verification", return_value={"status": "COMPLETED"}),
|
||||
):
|
||||
runner.run(
|
||||
pipeline=pipeline,
|
||||
processing_mode=mode,
|
||||
data_source=data_source,
|
||||
)
|
||||
|
||||
should_increment = "increment" in mode
|
||||
if should_increment:
|
||||
assert executor.run_tasks.called, (
|
||||
f"mode={mode} 包含 'increment',但 run_tasks 未被调用"
|
||||
)
|
||||
else:
|
||||
# verify_only 且 fetch_before_verify=False(默认),run_tasks 不应被调用
|
||||
assert not executor.run_tasks.called, (
|
||||
f"mode={mode} 不包含 'increment',但 run_tasks 被调用了"
|
||||
)
|
||||
|
||||
@given(
|
||||
pipeline=pipeline_name_st,
|
||||
mode=processing_mode_st,
|
||||
data_source=data_source_st,
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_verification_executes_iff_mode_contains_verify(self, pipeline, mode, data_source):
|
||||
"""校验流程(_run_verification)执行当且仅当 mode 包含 'verify'。"""
|
||||
executor = MagicMock()
|
||||
executor.run_tasks.return_value = []
|
||||
runner = _make_runner(task_executor=executor)
|
||||
|
||||
with (
|
||||
patch(_TASK_LOGGER_PATH),
|
||||
patch.object(runner, "_run_verification", return_value={"status": "COMPLETED"}) as mock_verify,
|
||||
):
|
||||
runner.run(
|
||||
pipeline=pipeline,
|
||||
processing_mode=mode,
|
||||
data_source=data_source,
|
||||
)
|
||||
|
||||
should_verify = "verify" in mode
|
||||
if should_verify:
|
||||
assert mock_verify.called, (
|
||||
f"mode={mode} 包含 'verify',但 _run_verification 未被调用"
|
||||
)
|
||||
else:
|
||||
assert not mock_verify.called, (
|
||||
f"mode={mode} 不包含 'verify',但 _run_verification 被调用了"
|
||||
)
|
||||
|
||||
|
||||
# ── Property 7: 管道结果汇总完整性 ──────────────────────────────
|
||||
# Feature: scheduler-refactor, Property 7: 管道结果汇总完整性
|
||||
# **Validates: Requirements 2.6**
|
||||
|
||||
|
||||
class TestProperty7PipelineSummaryCompleteness:
|
||||
"""对于任意一组任务执行结果,PipelineRunner 返回的汇总字典应包含
|
||||
status/pipeline/layers/results 字段,且 results 长度等于实际执行的任务数。"""
|
||||
|
||||
@given(
|
||||
pipeline=pipeline_name_st,
|
||||
task_results=task_results_st,
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_summary_has_required_fields(self, pipeline, task_results):
|
||||
"""返回字典必须包含 status、pipeline、layers、results、verification_summary。"""
|
||||
executor = MagicMock()
|
||||
executor.run_tasks.return_value = task_results
|
||||
runner = _make_runner(task_executor=executor)
|
||||
|
||||
with patch(_TASK_LOGGER_PATH):
|
||||
result = runner.run(
|
||||
pipeline=pipeline,
|
||||
processing_mode="increment_only",
|
||||
data_source="offline",
|
||||
)
|
||||
|
||||
required_keys = {"status", "pipeline", "layers", "results", "verification_summary"}
|
||||
assert required_keys.issubset(result.keys()), (
|
||||
f"缺少必要字段: {required_keys - result.keys()}"
|
||||
)
|
||||
|
||||
@given(
|
||||
pipeline=pipeline_name_st,
|
||||
task_results=task_results_st,
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_results_length_equals_executed_tasks(self, pipeline, task_results):
|
||||
"""results 列表长度等于 task_executor.run_tasks 返回的任务数。"""
|
||||
executor = MagicMock()
|
||||
executor.run_tasks.return_value = task_results
|
||||
runner = _make_runner(task_executor=executor)
|
||||
|
||||
with patch(_TASK_LOGGER_PATH):
|
||||
result = runner.run(
|
||||
pipeline=pipeline,
|
||||
processing_mode="increment_only",
|
||||
data_source="offline",
|
||||
)
|
||||
|
||||
assert len(result["results"]) == len(task_results), (
|
||||
f"results 长度 {len(result['results'])} != 实际任务数 {len(task_results)}"
|
||||
)
|
||||
|
||||
@given(
|
||||
pipeline=pipeline_name_st,
|
||||
task_results=task_results_st,
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_pipeline_and_layers_match_input(self, pipeline, task_results):
|
||||
"""返回的 pipeline 和 layers 字段与输入一致。"""
|
||||
executor = MagicMock()
|
||||
executor.run_tasks.return_value = task_results
|
||||
runner = _make_runner(task_executor=executor)
|
||||
|
||||
with patch(_TASK_LOGGER_PATH):
|
||||
result = runner.run(
|
||||
pipeline=pipeline,
|
||||
processing_mode="increment_only",
|
||||
data_source="offline",
|
||||
)
|
||||
|
||||
assert result["pipeline"] == pipeline
|
||||
assert result["layers"] == PipelineRunner.PIPELINE_LAYERS[pipeline]
|
||||
|
||||
@given(
|
||||
pipeline=pipeline_name_st,
|
||||
task_results=task_results_st,
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_increment_only_has_no_verification(self, pipeline, task_results):
|
||||
"""increment_only 模式下 verification_summary 应为 None。"""
|
||||
executor = MagicMock()
|
||||
executor.run_tasks.return_value = task_results
|
||||
runner = _make_runner(task_executor=executor)
|
||||
|
||||
with patch(_TASK_LOGGER_PATH):
|
||||
result = runner.run(
|
||||
pipeline=pipeline,
|
||||
processing_mode="increment_only",
|
||||
data_source="offline",
|
||||
)
|
||||
|
||||
assert result["verification_summary"] is None
|
||||
Reference in New Issue
Block a user