初始提交:飞球 ETL 系统全量代码

This commit is contained in:
Neo
2026-02-13 08:05:34 +08:00
commit 3c51f5485d
441 changed files with 117631 additions and 0 deletions

137
tests/unit/test_cli_args.py Normal file
View File

@@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
"""CLI 参数解析单元测试
验证 --data-source 新参数、--pipeline-flow 弃用映射、
--pipeline + --tasks 同时使用、以及 build_cli_overrides 集成行为。
需求: 3.1, 3.3, 3.5
"""
import warnings
from argparse import Namespace
from unittest.mock import patch
import pytest
from cli.main import parse_args, resolve_data_source, build_cli_overrides
# ---------------------------------------------------------------------------
# 1. --data-source 新参数解析
# ---------------------------------------------------------------------------
class TestDataSourceArg:
"""--data-source 新参数测试"""
@pytest.mark.parametrize("value", ["online", "offline", "hybrid"])
def test_data_source_valid_values(self, value):
with patch("sys.argv", ["cli", "--data-source", value]):
args = parse_args()
assert args.data_source == value
def test_data_source_default_is_none(self):
with patch("sys.argv", ["cli"]):
args = parse_args()
assert args.data_source is None
# ---------------------------------------------------------------------------
# 2. resolve_data_source() 弃用映射
# ---------------------------------------------------------------------------
class TestResolveDataSource:
"""resolve_data_source() 弃用映射测试"""
def test_explicit_data_source_returns_directly(self):
args = Namespace(data_source="online", pipeline_flow=None)
assert resolve_data_source(args) == "online"
def test_data_source_takes_priority_over_pipeline_flow(self):
"""--data-source 优先于 --pipeline-flow"""
args = Namespace(data_source="online", pipeline_flow="FULL")
assert resolve_data_source(args) == "online"
@pytest.mark.parametrize(
"flow, expected",
[
("FULL", "hybrid"),
("FETCH_ONLY", "online"),
("INGEST_ONLY", "offline"),
],
)
def test_pipeline_flow_maps_with_deprecation_warning(self, flow, expected):
"""旧参数 --pipeline-flow 映射到正确的 data_source 并发出弃用警告"""
args = Namespace(data_source=None, pipeline_flow=flow)
with pytest.warns(DeprecationWarning, match="--pipeline-flow 已弃用"):
result = resolve_data_source(args)
assert result == expected
def test_neither_arg_defaults_to_hybrid(self):
"""两个参数都未指定时,默认返回 hybrid"""
args = Namespace(data_source=None, pipeline_flow=None)
assert resolve_data_source(args) == "hybrid"
# ---------------------------------------------------------------------------
# 3. build_cli_overrides() 集成
# ---------------------------------------------------------------------------
class TestBuildCliOverrides:
"""build_cli_overrides() 集成测试"""
def _make_args(self, **kwargs):
"""构造最小 Namespace未指定的参数设为 None/False"""
defaults = dict(
store_id=None, tasks=None, dry_run=False,
pipeline=None, processing_mode="increment_only",
fetch_before_verify=False, verify_tables=None,
window_split="none", lookback_hours=24, overlap_seconds=3600,
pg_dsn=None, pg_host=None, pg_port=None, pg_name=None,
pg_user=None, pg_password=None,
api_base=None, api_token=None, api_timeout=None,
api_page_size=None, api_retry_max=None,
window_start=None, window_end=None,
force_window_override=False,
window_split_unit=None, window_split_days=None,
window_compensation_hours=None,
export_root=None, log_root=None,
data_source=None, pipeline_flow=None,
fetch_root=None, ingest_source=None, write_pretty_json=False,
idle_start=None, idle_end=None, allow_empty_advance=False,
)
defaults.update(kwargs)
return Namespace(**defaults)
def test_data_source_online_sets_run_key(self):
args = self._make_args(data_source="online")
overrides = build_cli_overrides(args)
assert overrides["run"]["data_source"] == "online"
def test_pipeline_flow_sets_both_keys(self):
"""旧参数同时写入 pipeline.flow 和 run.data_source"""
args = self._make_args(pipeline_flow="FULL")
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
overrides = build_cli_overrides(args)
assert overrides["pipeline"]["flow"] == "FULL"
assert overrides["run"]["data_source"] == "hybrid"
def test_default_data_source_is_hybrid(self):
"""无 --data-source 也无 --pipeline-flow 时run.data_source 默认 hybrid"""
args = self._make_args()
overrides = build_cli_overrides(args)
assert overrides["run"]["data_source"] == "hybrid"
# ---------------------------------------------------------------------------
# 4. --pipeline + --tasks 同时使用
# ---------------------------------------------------------------------------
class TestPipelineAndTasks:
"""--pipeline + --tasks 同时使用时的行为"""
def test_pipeline_and_tasks_both_parsed(self):
with patch("sys.argv", [
"cli",
"--pipeline", "api_full",
"--tasks", "ODS_MEMBER,ODS_ORDER",
]):
args = parse_args()
assert args.pipeline == "api_full"
assert args.tasks == "ODS_MEMBER,ODS_ORDER"