初始提交:飞球 ETL 系统全量代码

This commit is contained in:
Neo
2026-02-13 08:05:34 +08:00
commit 3c51f5485d
441 changed files with 117631 additions and 0 deletions

View File

@@ -0,0 +1,207 @@
# -*- coding: utf-8 -*-
"""TaskExecutor 属性测试 - hypothesis 验证执行器的通用正确性属性。"""
import re
import string
from datetime import datetime, timedelta
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from hypothesis import given, settings
from hypothesis import strategies as st
from orchestration.task_executor import TaskExecutor, DataSource
from orchestration.task_registry import TaskRegistry
FILE_VERSION = "v4_shell"
data_source_st = st.sampled_from(["online", "offline", "hybrid"])
_NON_ODS_PREFIXES = ["DWD_", "DWS_", "TASK_", "ETL_", "TEST_"]
task_code_st = st.builds(
lambda prefix, suffix: prefix + suffix,
prefix=st.sampled_from(_NON_ODS_PREFIXES),
suffix=st.text(
alphabet=string.ascii_uppercase + string.digits + "_",
min_size=1, max_size=15,
),
)
window_start_st = st.datetimes(min_value=datetime(2020, 1, 1), max_value=datetime(2030, 12, 31))
def _make_fake_class(name="FakeTask"):
return type(name, (), {"__init__": lambda self, *a, **kw: None})
def _make_config():
config = MagicMock()
config.get = MagicMock(side_effect=lambda key, default=None: {
"app.timezone": "Asia/Shanghai",
"io.fetch_root": "/tmp/fetch",
"io.ingest_source_dir": "/tmp/ingest",
"io.write_pretty_json": False,
"pipeline.fetch_root": None,
"pipeline.ingest_source_dir": None,
"integrity.auto_check": False,
"run.overlap_seconds": 600,
}.get(key, default))
config.__getitem__ = MagicMock(side_effect=lambda k: {
"io": {"export_root": "/tmp/export", "log_root": "/tmp/log"},
}[k])
return config
def _make_executor(registry):
return TaskExecutor(
config=_make_config(), db_ops=MagicMock(), api_client=MagicMock(),
cursor_mgr=MagicMock(), run_tracker=MagicMock(),
task_registry=registry, logger=MagicMock(),
)
# Feature: scheduler-refactor, Property 1: data_source 参数决定执行路径
# **Validates: Requirements 1.2**
class TestProperty1DataSourceDeterminesPath:
@given(ds=data_source_st)
@settings(max_examples=100)
def test_flow_includes_fetch(self, ds):
result = TaskExecutor._flow_includes_fetch(ds)
assert result == (ds in {"online", "hybrid"})
@given(ds=data_source_st)
@settings(max_examples=100)
def test_flow_includes_ingest(self, ds):
result = TaskExecutor._flow_includes_ingest(ds)
assert result == (ds in {"offline", "hybrid"})
@given(ds=data_source_st)
@settings(max_examples=100)
def test_fetch_and_ingest_consistency(self, ds):
fetch = TaskExecutor._flow_includes_fetch(ds)
ingest = TaskExecutor._flow_includes_ingest(ds)
if ds == "hybrid":
assert fetch and ingest
elif ds == "online":
assert fetch and not ingest
elif ds == "offline":
assert not fetch and ingest
# Feature: scheduler-refactor, Property 2: 成功任务推进游标
# **Validates: Requirements 1.3**
class TestProperty2SuccessAdvancesCursor:
@given(
task_code=task_code_st,
window_start=window_start_st,
window_minutes=st.integers(min_value=1, max_value=1440),
)
@settings(max_examples=100)
def test_success_with_window_advances_cursor(self, task_code, window_start, window_minutes):
window_end = window_start + timedelta(minutes=window_minutes)
registry = TaskRegistry()
registry.register(task_code, _make_fake_class(), requires_db_config=True, layer="DWD")
task_result = {
"status": "SUCCESS",
"counts": {"fetched": 10, "inserted": 5},
"window": {"start": window_start, "end": window_end, "minutes": window_minutes},
}
executor = _make_executor(registry)
executor.cursor_mgr.get_or_create.return_value = {"cursor_id": 1, "last_end": None}
executor.run_tracker.create_run.return_value = 100
with (
patch.object(TaskExecutor, "_load_task_config", return_value={
"task_id": 42, "task_code": task_code, "store_id": 1, "enabled": True}),
patch.object(TaskExecutor, "_resolve_ingest_source", return_value=Path("/tmp/src")),
patch.object(TaskExecutor, "_execute_ingest", return_value=task_result),
patch.object(TaskExecutor, "_maybe_run_integrity_check"),
):
executor.run_single_task(task_code, "test-uuid", 1, "offline")
executor.cursor_mgr.advance.assert_called_once()
kw = executor.cursor_mgr.advance.call_args.kwargs
assert kw["window_start"] == window_start
assert kw["window_end"] == window_end
# Feature: scheduler-refactor, Property 3: 失败任务标记 FAIL 并重新抛出
# **Validates: Requirements 1.4**
class TestProperty3FailureMarksFailAndReraises:
@given(
task_code=task_code_st,
error_msg=st.text(
alphabet=string.ascii_letters + string.digits + " _-",
min_size=1, max_size=80,
),
)
@settings(max_examples=100)
def test_exception_marks_fail_and_reraises(self, task_code, error_msg):
registry = TaskRegistry()
registry.register(task_code, _make_fake_class(), requires_db_config=True, layer="DWD")
executor = _make_executor(registry)
executor.cursor_mgr.get_or_create.return_value = {"cursor_id": 1, "last_end": None}
executor.run_tracker.create_run.return_value = 200
with (
patch.object(TaskExecutor, "_load_task_config", return_value={
"task_id": 99, "task_code": task_code, "store_id": 1, "enabled": True}),
patch.object(TaskExecutor, "_resolve_ingest_source", return_value=Path("/tmp/src")),
patch.object(TaskExecutor, "_execute_ingest", side_effect=RuntimeError(error_msg)),
):
with pytest.raises(RuntimeError, match=re.escape(error_msg)):
executor.run_single_task(task_code, "fail-uuid", 1, "offline")
executor.run_tracker.update_run.assert_called()
kw = executor.run_tracker.update_run.call_args.kwargs
assert kw["status"] == "FAIL"
# Feature: scheduler-refactor, Property 4: 工具类任务由元数据决定
# **Validates: Requirements 1.6, 4.2**
class TestProperty4UtilityTaskDeterminedByMetadata:
@given(task_code=task_code_st)
@settings(max_examples=100)
def test_utility_task_skips_cursor_and_run_tracker(self, task_code):
registry = TaskRegistry()
registry.register(task_code, _make_fake_class(), requires_db_config=False, task_type="utility")
executor = _make_executor(registry)
mock_task = MagicMock()
mock_task.execute.return_value = {"status": "SUCCESS", "counts": {}}
registry.create_task = MagicMock(return_value=mock_task)
result = executor.run_single_task(task_code, "util-uuid", 1, "hybrid")
executor.cursor_mgr.get_or_create.assert_not_called()
executor.cursor_mgr.advance.assert_not_called()
executor.run_tracker.create_run.assert_not_called()
assert result.get("status") == "SUCCESS"
@given(task_code=task_code_st)
@settings(max_examples=100)
def test_non_utility_task_uses_cursor_and_run_tracker(self, task_code):
registry = TaskRegistry()
registry.register(task_code, _make_fake_class(), requires_db_config=True, layer="DWS")
task_result = {"status": "SUCCESS", "counts": {"fetched": 1}}
executor = _make_executor(registry)
executor.cursor_mgr.get_or_create.return_value = {"cursor_id": 1, "last_end": None}
executor.run_tracker.create_run.return_value = 300
with (
patch.object(TaskExecutor, "_load_task_config", return_value={
"task_id": 77, "task_code": task_code, "store_id": 1, "enabled": True}),
patch.object(TaskExecutor, "_resolve_ingest_source", return_value=Path("/tmp/src")),
patch.object(TaskExecutor, "_execute_ingest", return_value=task_result),
):
executor.run_single_task(task_code, "non-util-uuid", 1, "offline")
executor.cursor_mgr.get_or_create.assert_called_once()
executor.run_tracker.create_run.assert_called_once()