初始提交:飞球 ETL 系统全量代码

This commit is contained in:
Neo
2026-02-13 08:05:34 +08:00
commit 3c51f5485d
441 changed files with 117631 additions and 0 deletions

View File

@@ -0,0 +1,165 @@
# -*- coding: utf-8 -*-
"""TaskRegistry 属性测试 — 使用 hypothesis 验证注册表的通用正确性属性。"""
import string
import pytest
from hypothesis import given, settings
from hypothesis import strategies as st
from orchestration.task_registry import TaskRegistry, TaskMeta
# ── 辅助:动态生成假任务类 ────────────────────────────────────
def _make_fake_class(name: str = "FakeTask") -> type:
"""创建一个最小化的假任务类,用于注册测试。"""
return type(name, (), {"__init__": lambda self, *a, **kw: None})
# ── 生成策略 ──────────────────────────────────────────────────
# 合法任务代码:大写字母 + 数字 + 下划线,长度 1~30
task_code_st = st.text(
alphabet=string.ascii_uppercase + string.digits + "_",
min_size=1,
max_size=30,
)
requires_db_config_st = st.booleans()
layer_st = st.sampled_from([None, "ODS", "DWD", "DWS", "INDEX"])
task_type_st = st.sampled_from(["etl", "utility", "verification"])
# ── Property 8: TaskRegistry 元数据 round-trip ────────────────
# Feature: scheduler-refactor, Property 8: TaskRegistry 元数据 round-trip
# **Validates: Requirements 4.1**
#
# 对于任意任务代码、任务类和元数据组合requires_db_config、layer、task_type
# 注册后通过 get_metadata 查询应返回相同的元数据值。
class TestProperty8MetadataRoundTrip:
"""Property 8: 注册元数据后查询应返回完全相同的值。"""
@given(
task_code=task_code_st,
requires_db=requires_db_config_st,
layer=layer_st,
task_type=task_type_st,
)
@settings(max_examples=100)
def test_metadata_round_trip(
self, task_code: str, requires_db: bool, layer: str | None, task_type: str
):
"""注册任意元数据组合后get_metadata 应返回相同的值。"""
# Arrange — 每次迭代使用全新的注册表,避免状态泄漏
registry = TaskRegistry()
fake_cls = _make_fake_class()
# Act — 注册并查询
registry.register(
task_code,
fake_cls,
requires_db_config=requires_db,
layer=layer,
task_type=task_type,
)
meta = registry.get_metadata(task_code)
# Assert — 元数据 round-trip 一致
assert meta is not None, f"注册后 get_metadata('{task_code}') 不应返回 None"
assert meta.task_class is fake_cls, "task_class 应与注册时一致"
assert meta.requires_db_config is requires_db, (
f"requires_db_config 应为 {requires_db},实际为 {meta.requires_db_config}"
)
assert meta.layer == layer, f"layer 应为 {layer!r},实际为 {meta.layer!r}"
assert meta.task_type == task_type, (
f"task_type 应为 {task_type!r},实际为 {meta.task_type!r}"
)
# ── Property 9: TaskRegistry 向后兼容默认值 ───────────────────
# Feature: scheduler-refactor, Property 9: TaskRegistry 向后兼容默认值
# **Validates: Requirements 4.4**
#
# 对于任意使用旧接口(仅 task_code 和 task_class注册的任务
# 查询元数据应返回 requires_db_config=True、layer=None、task_type="etl"。
class TestProperty9BackwardCompatibleDefaults:
"""Property 9: 仅传 task_code + task_class 时,元数据应使用默认值。"""
@given(task_code=task_code_st)
@settings(max_examples=100)
def test_legacy_register_uses_defaults(self, task_code: str):
"""使用旧接口(仅 task_code 和 task_class注册后元数据应为默认值。"""
# Arrange
registry = TaskRegistry()
fake_cls = _make_fake_class()
# Act — 仅传 task_code 和 task_class不传任何元数据参数
registry.register(task_code, fake_cls)
meta = registry.get_metadata(task_code)
# Assert — 默认值契约
assert meta is not None, f"注册后 get_metadata('{task_code}') 不应返回 None"
assert meta.task_class is fake_cls, "task_class 应与注册时一致"
assert meta.requires_db_config is True, (
f"默认 requires_db_config 应为 True实际为 {meta.requires_db_config}"
)
assert meta.layer is None, (
f"默认 layer 应为 None实际为 {meta.layer!r}"
)
assert meta.task_type == "etl", (
f"默认 task_type 应为 'etl',实际为 {meta.task_type!r}"
)
# ── Property 10: 按层查询任务 ────────────────────────────────
# Feature: scheduler-refactor, Property 10: 按层查询任务
# **Validates: Requirements 4.3**
#
# 对于任意注册了 layer 元数据的任务集合get_tasks_by_layer(layer)
# 返回的任务代码集合应等于所有 layer 匹配的已注册任务代码集合。
# 非 None 的层值策略,用于查询验证
non_none_layer_st = st.sampled_from(["ODS", "DWD", "DWS", "INDEX"])
class TestProperty10GetTasksByLayer:
"""Property 10: get_tasks_by_layer 返回的集合应与手动过滤一致。"""
@given(
entries=st.lists(
st.tuples(task_code_st, layer_st),
min_size=1,
max_size=20,
),
)
@settings(max_examples=100)
def test_get_tasks_by_layer_matches_manual_filter(
self, entries: list[tuple[str, str | None]],
):
"""注册一组任务后,按层查询结果应与手动过滤完全一致。"""
# Arrange
registry = TaskRegistry()
# 去重:同一 task_code 只保留最后一次注册(与 register 覆盖语义一致)
unique_entries: dict[str, str | None] = {}
for code, layer in entries:
fake_cls = _make_fake_class(f"Fake_{code}")
registry.register(code, fake_cls, layer=layer)
unique_entries[code.upper()] = layer # register 内部会 upper()
# Act & Assert — 对每个非 None 的层值进行验证
for query_layer in ["ODS", "DWD", "DWS", "INDEX"]:
actual = set(registry.get_tasks_by_layer(query_layer))
expected = {
code for code, layer in unique_entries.items()
if layer is not None and layer.upper() == query_layer.upper()
}
assert actual == expected, (
f"查询 layer={query_layer!r} 时,"
f"期望 {expected},实际 {actual}"
)