455 lines
18 KiB
Python
455 lines
18 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
Feature: dataflow-field-completion, Property 5: ETL 参数解析与 CLI 命令构建正确性
|
||
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
对于任意合法的 ETL 执行参数组合(门店列表、数据源模式、校验模式、时间范围、
|
||
窗口切分、force-full 标志、任务选择),Backend 构建的 CLI 命令字符串应包含
|
||
所有指定参数,且参数值与输入一致。
|
||
|
||
测试策略:
|
||
- 使用 hypothesis 生成随机 TaskConfigSchema 实例
|
||
- 随机 flow(从 VALID_FLOWS 中选择)
|
||
- 随机 processing_mode(从 VALID_PROCESSING_MODES 中选择)
|
||
- 随机任务代码列表(从 task_registry 中选择)
|
||
- 随机时间窗口模式(lookback / custom)
|
||
- 随机 window_split 和 window_split_days
|
||
- 随机 force_full / dry_run / fetch_before_verify 布尔值
|
||
- 随机 store_id
|
||
- 随机 ods_use_local_json
|
||
|
||
验证:
|
||
1. 构建的 CLI 命令包含 --flow 且值与 flow 一致
|
||
2. 任务代码通过 --tasks 正确传递
|
||
3. 时间范围参数格式正确且值一致
|
||
4. 布尔标志(--force-full / --dry-run / --fetch-before-verify)正确出现或缺失
|
||
5. --store-id 值与输入一致
|
||
6. --window-split / --window-split-days 正确传递
|
||
7. --data-source offline 在 ods_use_local_json=True 时出现
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
from hypothesis import given, settings, HealthCheck, assume
|
||
import hypothesis.strategies as st
|
||
|
||
# ── 将后端模块加入 sys.path ──
|
||
_BACKEND_ROOT = Path(__file__).resolve().parent.parent / "apps" / "backend"
|
||
if str(_BACKEND_ROOT) not in sys.path:
|
||
sys.path.insert(0, str(_BACKEND_ROOT))
|
||
|
||
from app.services.cli_builder import CLIBuilder, VALID_FLOWS, VALID_PROCESSING_MODES
|
||
from app.schemas.tasks import TaskConfigSchema
|
||
from app.services.task_registry import ALL_TASKS
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# 常量与策略
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
# 所有合法任务代码
|
||
_ALL_TASK_CODES: list[str] = [t.code for t in ALL_TASKS]
|
||
|
||
# 合法的 flow 值
|
||
_VALID_FLOWS_LIST = sorted(VALID_FLOWS)
|
||
|
||
# 合法的 processing_mode 值
|
||
_VALID_MODES_LIST = sorted(VALID_PROCESSING_MODES)
|
||
|
||
# 合法的 window_split 值(CLI 支持的切分模式)
|
||
_VALID_WINDOW_SPLITS = ["none", "day", "week", "month"]
|
||
|
||
# 日期格式策略:YYYY-MM-DD
|
||
_date_str = st.dates(
|
||
min_value=st.just(2024, 1, 1).__wrapped__ if False else __import__("datetime").date(2024, 1, 1),
|
||
max_value=__import__("datetime").date(2026, 12, 31),
|
||
).map(lambda d: d.isoformat())
|
||
|
||
|
||
@st.composite
|
||
def _valid_config(draw) -> TaskConfigSchema:
|
||
"""生成一个合法的 TaskConfigSchema 实例"""
|
||
# 随机选择 1-5 个任务代码
|
||
tasks = draw(st.lists(
|
||
st.sampled_from(_ALL_TASK_CODES),
|
||
min_size=1,
|
||
max_size=5,
|
||
unique=True,
|
||
))
|
||
|
||
flow_id = draw(st.sampled_from(_VALID_FLOWS_LIST))
|
||
processing_mode = draw(st.sampled_from(_VALID_MODES_LIST))
|
||
|
||
# 时间窗口模式
|
||
window_mode = draw(st.sampled_from(["lookback", "custom"]))
|
||
|
||
window_start = None
|
||
window_end = None
|
||
lookback_hours = 24
|
||
overlap_seconds = 600
|
||
|
||
if window_mode == "custom":
|
||
# 生成合法的 start <= end 日期对
|
||
start = draw(_date_str)
|
||
end = draw(_date_str)
|
||
if start > end:
|
||
start, end = end, start
|
||
window_start = start
|
||
window_end = end
|
||
else:
|
||
lookback_hours = draw(st.integers(min_value=1, max_value=720))
|
||
overlap_seconds = draw(st.integers(min_value=0, max_value=7200))
|
||
|
||
# 窗口切分
|
||
window_split = draw(st.sampled_from(_VALID_WINDOW_SPLITS))
|
||
window_split_days = None
|
||
if window_split != "none":
|
||
window_split_days = draw(st.integers(min_value=1, max_value=30))
|
||
|
||
# 布尔标志
|
||
force_full = draw(st.booleans())
|
||
dry_run = draw(st.booleans())
|
||
fetch_before_verify = draw(st.booleans())
|
||
ods_use_local_json = draw(st.booleans())
|
||
|
||
# store_id:可能为 None 或正整数
|
||
store_id = draw(st.one_of(st.none(), st.integers(min_value=1, max_value=999999)))
|
||
|
||
return TaskConfigSchema(
|
||
tasks=tasks,
|
||
flow=flow_id,
|
||
processing_mode=processing_mode,
|
||
window_mode=window_mode,
|
||
window_start=window_start,
|
||
window_end=window_end,
|
||
lookback_hours=lookback_hours,
|
||
overlap_seconds=overlap_seconds,
|
||
window_split=window_split,
|
||
window_split_days=window_split_days,
|
||
force_full=force_full,
|
||
dry_run=dry_run,
|
||
fetch_before_verify=fetch_before_verify,
|
||
ods_use_local_json=ods_use_local_json,
|
||
store_id=store_id,
|
||
)
|
||
|
||
|
||
# 全局 CLIBuilder 实例
|
||
_builder = CLIBuilder()
|
||
_ETL_PATH = "apps/etl/connectors/feiqiu"
|
||
|
||
|
||
def _build(config: TaskConfigSchema) -> list[str]:
|
||
"""便捷包装:构建命令列表"""
|
||
return _builder.build_command(config, _ETL_PATH)
|
||
|
||
|
||
def _get_arg_value(cmd: list[str], flag: str) -> str | None:
|
||
"""从命令列表中提取指定 flag 后面的值"""
|
||
try:
|
||
idx = cmd.index(flag)
|
||
if idx + 1 < len(cmd):
|
||
return cmd[idx + 1]
|
||
except ValueError:
|
||
pass
|
||
return None
|
||
|
||
|
||
def _has_flag(cmd: list[str], flag: str) -> bool:
|
||
"""检查命令列表中是否包含指定 flag"""
|
||
return flag in cmd
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 5a: --flow 参数与 flow 一致
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(config=_valid_config())
|
||
@settings(
|
||
max_examples=100,
|
||
deadline=None,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_flow_param_matches_flow(config: TaskConfigSchema):
|
||
"""
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
构建的 CLI 命令必须包含 --flow 参数,且值与 config.flow 一致。
|
||
"""
|
||
cmd = _build(config)
|
||
flow_value = _get_arg_value(cmd, "--flow")
|
||
assert flow_value is not None, "CLI 命令缺少 --flow 参数"
|
||
assert flow_value == config.flow, (
|
||
f"--flow 值 {flow_value!r} != config.flow {config.flow!r}"
|
||
)
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 5b: --tasks 参数包含所有任务代码
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(config=_valid_config())
|
||
@settings(
|
||
max_examples=100,
|
||
deadline=None,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_tasks_param_contains_all_codes(config: TaskConfigSchema):
|
||
"""
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
构建的 CLI 命令中 --tasks 参数应包含所有指定的任务代码(逗号分隔)。
|
||
"""
|
||
cmd = _build(config)
|
||
tasks_value = _get_arg_value(cmd, "--tasks")
|
||
|
||
if config.tasks:
|
||
assert tasks_value is not None, "CLI 命令缺少 --tasks 参数"
|
||
parsed_tasks = set(tasks_value.split(","))
|
||
expected_tasks = set(config.tasks)
|
||
assert parsed_tasks == expected_tasks, (
|
||
f"--tasks 解析结果 {parsed_tasks} != 期望 {expected_tasks}"
|
||
)
|
||
# tasks 为空列表时,CLIBuilder 不添加 --tasks(符合预期)
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 5c: 时间窗口参数正确传递
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(config=_valid_config())
|
||
@settings(
|
||
max_examples=100,
|
||
deadline=None,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_time_window_params_correct(config: TaskConfigSchema):
|
||
"""
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
- lookback 模式:命令包含 --lookback-hours 和 --overlap-seconds
|
||
- custom 模式:命令包含 --window-start 和 --window-end
|
||
- 两种模式互斥
|
||
"""
|
||
cmd = _build(config)
|
||
|
||
if config.window_mode == "lookback":
|
||
# lookback 模式:应有 --lookback-hours
|
||
lh = _get_arg_value(cmd, "--lookback-hours")
|
||
assert lh is not None, "lookback 模式缺少 --lookback-hours"
|
||
assert lh == str(config.lookback_hours), (
|
||
f"--lookback-hours {lh!r} != {config.lookback_hours}"
|
||
)
|
||
os_val = _get_arg_value(cmd, "--overlap-seconds")
|
||
assert os_val is not None, "lookback 模式缺少 --overlap-seconds"
|
||
assert os_val == str(config.overlap_seconds), (
|
||
f"--overlap-seconds {os_val!r} != {config.overlap_seconds}"
|
||
)
|
||
# 不应有 custom 参数
|
||
assert not _has_flag(cmd, "--window-start"), (
|
||
"lookback 模式不应包含 --window-start"
|
||
)
|
||
assert not _has_flag(cmd, "--window-end"), (
|
||
"lookback 模式不应包含 --window-end"
|
||
)
|
||
else:
|
||
# custom 模式
|
||
if config.window_start:
|
||
ws = _get_arg_value(cmd, "--window-start")
|
||
assert ws == config.window_start, (
|
||
f"--window-start {ws!r} != {config.window_start!r}"
|
||
)
|
||
if config.window_end:
|
||
we = _get_arg_value(cmd, "--window-end")
|
||
assert we == config.window_end, (
|
||
f"--window-end {we!r} != {config.window_end!r}"
|
||
)
|
||
# 不应有 lookback 参数
|
||
assert not _has_flag(cmd, "--lookback-hours"), (
|
||
"custom 模式不应包含 --lookback-hours"
|
||
)
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 5d: 布尔标志正确出现或缺失
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(config=_valid_config())
|
||
@settings(
|
||
max_examples=100,
|
||
deadline=None,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_boolean_flags_correct(config: TaskConfigSchema):
|
||
"""
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
- force_full=True → 命令包含 --force-full
|
||
- dry_run=True → 命令包含 --dry-run
|
||
- fetch_before_verify=True 且 processing_mode="verify_only" → 命令包含 --fetch-before-verify
|
||
- ods_use_local_json=True → 命令包含 --data-source offline
|
||
"""
|
||
cmd = _build(config)
|
||
|
||
# force_full
|
||
if config.force_full:
|
||
assert _has_flag(cmd, "--force-full"), "force_full=True 但命令缺少 --force-full"
|
||
else:
|
||
assert not _has_flag(cmd, "--force-full"), "force_full=False 但命令包含 --force-full"
|
||
|
||
# dry_run
|
||
if config.dry_run:
|
||
assert _has_flag(cmd, "--dry-run"), "dry_run=True 但命令缺少 --dry-run"
|
||
else:
|
||
assert not _has_flag(cmd, "--dry-run"), "dry_run=False 但命令包含 --dry-run"
|
||
|
||
# fetch_before_verify(仅 verify_only 模式生效)
|
||
if config.fetch_before_verify and config.processing_mode == "verify_only":
|
||
assert _has_flag(cmd, "--fetch-before-verify"), (
|
||
"fetch_before_verify=True + verify_only 但命令缺少 --fetch-before-verify"
|
||
)
|
||
else:
|
||
assert not _has_flag(cmd, "--fetch-before-verify"), (
|
||
"非 verify_only 模式或 fetch_before_verify=False 但命令包含 --fetch-before-verify"
|
||
)
|
||
|
||
# ods_use_local_json
|
||
if config.ods_use_local_json:
|
||
ds = _get_arg_value(cmd, "--data-source")
|
||
assert ds == "offline", (
|
||
f"ods_use_local_json=True 但 --data-source={ds!r}(期望 'offline')"
|
||
)
|
||
else:
|
||
# 不应有 --data-source offline(除非 extra_args 中有 data_source)
|
||
if "data_source" not in config.extra_args:
|
||
ds = _get_arg_value(cmd, "--data-source")
|
||
assert ds is None, (
|
||
f"ods_use_local_json=False 但命令包含 --data-source {ds!r}"
|
||
)
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 5e: --store-id 正确传递
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(config=_valid_config())
|
||
@settings(
|
||
max_examples=100,
|
||
deadline=None,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_store_id_param_correct(config: TaskConfigSchema):
|
||
"""
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
store_id 不为 None 时,命令应包含 --store-id 且值一致;
|
||
store_id 为 None 时,命令不应包含 --store-id。
|
||
"""
|
||
cmd = _build(config)
|
||
sid = _get_arg_value(cmd, "--store-id")
|
||
|
||
if config.store_id is not None:
|
||
assert sid is not None, "store_id 不为 None 但命令缺少 --store-id"
|
||
assert sid == str(config.store_id), (
|
||
f"--store-id {sid!r} != {config.store_id}"
|
||
)
|
||
else:
|
||
assert sid is None, f"store_id=None 但命令包含 --store-id {sid!r}"
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 5f: --window-split / --window-split-days 正确传递
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(config=_valid_config())
|
||
@settings(
|
||
max_examples=100,
|
||
deadline=None,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_window_split_params_correct(config: TaskConfigSchema):
|
||
"""
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
window_split 不为 "none" 时,命令应包含 --window-split 和 --window-split-days;
|
||
window_split 为 "none" 时,命令不应包含这些参数。
|
||
"""
|
||
cmd = _build(config)
|
||
|
||
if config.window_split and config.window_split != "none":
|
||
ws = _get_arg_value(cmd, "--window-split")
|
||
assert ws == config.window_split, (
|
||
f"--window-split {ws!r} != {config.window_split!r}"
|
||
)
|
||
if config.window_split_days is not None:
|
||
wsd = _get_arg_value(cmd, "--window-split-days")
|
||
assert wsd == str(config.window_split_days), (
|
||
f"--window-split-days {wsd!r} != {config.window_split_days}"
|
||
)
|
||
else:
|
||
assert not _has_flag(cmd, "--window-split"), (
|
||
"window_split='none' 但命令包含 --window-split"
|
||
)
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 5g: --processing-mode 正确传递
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(config=_valid_config())
|
||
@settings(
|
||
max_examples=100,
|
||
deadline=None,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_processing_mode_param_correct(config: TaskConfigSchema):
|
||
"""
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
processing_mode 不为空时,命令应包含 --processing-mode 且值一致。
|
||
"""
|
||
cmd = _build(config)
|
||
|
||
if config.processing_mode:
|
||
pm = _get_arg_value(cmd, "--processing-mode")
|
||
assert pm is not None, "processing_mode 不为空但命令缺少 --processing-mode"
|
||
assert pm == config.processing_mode, (
|
||
f"--processing-mode {pm!r} != {config.processing_mode!r}"
|
||
)
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 5h: 命令字符串与命令列表一致
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(config=_valid_config())
|
||
@settings(
|
||
max_examples=100,
|
||
deadline=None,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_command_string_consistent_with_list(config: TaskConfigSchema):
|
||
"""
|
||
**Validates: Requirements 14.1, 14.2**
|
||
|
||
build_command_string() 的输出应与 build_command() 的列表拼接结果一致
|
||
(对含空格的参数自动加引号)。
|
||
"""
|
||
cmd_list = _builder.build_command(config, _ETL_PATH)
|
||
cmd_str = _builder.build_command_string(config, _ETL_PATH)
|
||
|
||
# 逐个参数验证:每个参数都应出现在字符串中
|
||
for arg in cmd_list:
|
||
if " " in arg or '"' in arg:
|
||
# 含空格的参数应被引号包裹
|
||
assert f'"{arg}"' in cmd_str, (
|
||
f"含空格参数 {arg!r} 未在命令字符串中被正确引用"
|
||
)
|
||
else:
|
||
assert arg in cmd_str, (
|
||
f"参数 {arg!r} 未出现在命令字符串中"
|
||
)
|