在前后端开发联调前 的提交20260223
This commit is contained in:
@@ -10,6 +10,8 @@ import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# scripts/ops 不是 Python 包,通过 sys.path 导入
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts" / "ops"))
|
||||
|
||||
@@ -718,11 +720,11 @@ class TestResolveOutputDir:
|
||||
assert result == target
|
||||
assert target.is_dir()
|
||||
|
||||
def test_fallback_to_docs_reports(self, monkeypatch):
|
||||
"""SYSTEM_ANALYZE_ROOT 未设置时回退到 docs/reports/。"""
|
||||
def test_fallback_raises_when_env_missing(self, monkeypatch):
|
||||
"""SYSTEM_ANALYZE_ROOT 未设置时抛出 KeyError。"""
|
||||
monkeypatch.delenv("SYSTEM_ANALYZE_ROOT", raising=False)
|
||||
result = resolve_output_dir()
|
||||
assert result == Path("docs/reports")
|
||||
with pytest.raises(KeyError):
|
||||
resolve_output_dir()
|
||||
|
||||
def test_creates_directory(self, tmp_path, monkeypatch):
|
||||
"""目录不存在时自动创建。"""
|
||||
@@ -1783,91 +1785,26 @@ class TestFieldDiffSubTables:
|
||||
assert "SCD2/派生列 2 个" in report
|
||||
|
||||
|
||||
class TestGuessFieldPurpose:
|
||||
"""测试 _guess_field_purpose 字段用途推测。"""
|
||||
|
||||
def test_scd2_field(self):
|
||||
from gen_dataflow_report import _guess_field_purpose
|
||||
purpose, conf = _guess_field_purpose("scd2_start_time", "test", "DWD")
|
||||
assert "SCD2" in purpose
|
||||
assert conf == "高"
|
||||
|
||||
def test_id_field(self):
|
||||
from gen_dataflow_report import _guess_field_purpose
|
||||
purpose, conf = _guess_field_purpose("id", "test", "ODS")
|
||||
assert "主键" in purpose
|
||||
assert conf == "高"
|
||||
|
||||
def test_foreign_key(self):
|
||||
from gen_dataflow_report import _guess_field_purpose
|
||||
purpose, conf = _guess_field_purpose("tenant_id", "test", "ODS")
|
||||
assert "租户" in purpose
|
||||
assert conf == "高"
|
||||
|
||||
def test_nested_site_profile(self):
|
||||
from gen_dataflow_report import _guess_field_purpose
|
||||
purpose, conf = _guess_field_purpose("siteProfile.shop_name", "test", "API")
|
||||
assert "门店" in purpose
|
||||
assert conf == "高"
|
||||
|
||||
def test_unknown_field(self):
|
||||
from gen_dataflow_report import _guess_field_purpose
|
||||
purpose, conf = _guess_field_purpose("xyzzy_foo_bar", "test", "ODS")
|
||||
assert "待分析" in purpose
|
||||
assert conf == "低"
|
||||
|
||||
def test_price_field(self):
|
||||
from gen_dataflow_report import _guess_field_purpose
|
||||
purpose, conf = _guess_field_purpose("cx_unit_price", "test", "ODS")
|
||||
assert "金额" in purpose or "价格" in purpose
|
||||
|
||||
def test_derived_field(self):
|
||||
from gen_dataflow_report import _guess_field_purpose
|
||||
purpose, conf = _guess_field_purpose("derived_flag", "test", "DWD")
|
||||
assert "派生" in purpose
|
||||
assert conf == "高"
|
||||
|
||||
def test_is_delete_field(self):
|
||||
from gen_dataflow_report import _guess_field_purpose
|
||||
purpose, conf = _guess_field_purpose("is_delete", "test", "ODS")
|
||||
assert "删除" in purpose
|
||||
assert conf == "高"
|
||||
|
||||
|
||||
class TestDiffSubTablePurposeColumn:
|
||||
"""测试差异分表中推测用途列的输出。"""
|
||||
class TestDiffSubTableColumns:
|
||||
"""测试差异分表中列的输出格式(推测用途/置信度已移除,改为人工处理)。"""
|
||||
|
||||
def test_purpose_column_in_flat_unmapped(self, tmp_path):
|
||||
"""平层未映射分表应包含推测用途、置信度、示例值、说明列。"""
|
||||
# 复用 TestFieldDiffSubTables 的数据构造
|
||||
def test_flat_unmapped_header(self, tmp_path):
|
||||
"""平层未映射分表应包含示例值、说明列(无推测用途/置信度)。"""
|
||||
from test_dataflow_analyzer import TestFieldDiffSubTables
|
||||
inst = TestFieldDiffSubTables()
|
||||
data_dir = inst._setup_diff_data_dir(tmp_path)
|
||||
report = generate_report(data_dir)
|
||||
# 表头应有推测用途 + 示例值 + 说明列
|
||||
assert "| # | JSON 字段 | 推测用途 | 置信度 | 示例值 | 说明 | 状态 |" in report
|
||||
assert "| # | JSON 字段 | 示例值 | 说明 | 状态 |" in report
|
||||
|
||||
def test_purpose_column_in_dwd_no_ods(self, tmp_path):
|
||||
"""DWD 无 ODS 源分表应包含推测用途列。"""
|
||||
def test_dwd_no_ods_header(self, tmp_path):
|
||||
"""DWD 无 ODS 源子表应包含说明列(无推测用途/置信度)。"""
|
||||
from test_dataflow_analyzer import TestFieldDiffSubTables
|
||||
inst = TestFieldDiffSubTables()
|
||||
data_dir = inst._setup_diff_data_dir(tmp_path)
|
||||
report = generate_report(data_dir)
|
||||
# scd2_ver 应被推测为 SCD2 元数据
|
||||
assert "SCD2" in report
|
||||
# derived_flag 应被推测为派生列
|
||||
assert "派生" in report
|
||||
|
||||
def test_purpose_column_in_nested(self, tmp_path):
|
||||
"""嵌套对象分表也应包含推测用途列。"""
|
||||
from test_dataflow_analyzer import TestFieldDiffSubTables
|
||||
inst = TestFieldDiffSubTables()
|
||||
data_dir = inst._setup_diff_data_dir(tmp_path)
|
||||
report = generate_report(data_dir)
|
||||
# 嵌套对象表头
|
||||
lines = report.split("\n")
|
||||
nested_headers = [l for l in lines if "推测用途" in l and "置信度" in l]
|
||||
assert len(nested_headers) >= 1
|
||||
assert "| # | DWD 表 | DWD 列 | 说明 | 状态 |" in report
|
||||
|
||||
def test_section_numbering_incremental(self, tmp_path):
|
||||
"""多个差异分表应有递增编号 1.1.1, 1.1.2, ...。"""
|
||||
@@ -1875,7 +1812,6 @@ class TestDiffSubTablePurposeColumn:
|
||||
from test_dataflow_analyzer import TestFieldDiffSubTables
|
||||
inst = TestFieldDiffSubTables()
|
||||
data_dir = inst._setup_diff_data_dir(tmp_path)
|
||||
# 添加第二个有差异的表
|
||||
manifest = _json.loads((data_dir / "collection_manifest.json").read_text(encoding="utf-8"))
|
||||
manifest["tables"].append({
|
||||
"table": "beta_table", "task_code": "ODS_BETA", "description": "第二表",
|
||||
@@ -1936,7 +1872,6 @@ class TestDiffSubTablePurposeColumn:
|
||||
inst = TestFieldDiffSubTables()
|
||||
data_dir = inst._setup_diff_data_dir(tmp_path)
|
||||
report = generate_report(data_dir)
|
||||
# extra_flat 在 json_trees 中有 samples=["x"],应出现在差异子表的 extra_flat 行
|
||||
lines = report.split("\n")
|
||||
flat_rows = [l for l in lines if "extra_flat" in l and "未映射" in l]
|
||||
assert len(flat_rows) >= 1
|
||||
@@ -1948,7 +1883,6 @@ class TestDiffSubTablePurposeColumn:
|
||||
from test_dataflow_analyzer import TestFieldDiffSubTables
|
||||
inst = TestFieldDiffSubTables()
|
||||
data_dir = inst._setup_diff_data_dir(tmp_path)
|
||||
# 注入 bd_descriptions 中 ods_only_col 的说明
|
||||
bd = {"ods_table": "alpha_table",
|
||||
"ods_fields": {"ods_only_col": "仅ODS存在的测试列"},
|
||||
"dwd_fields": {}}
|
||||
@@ -1956,16 +1890,8 @@ class TestDiffSubTablePurposeColumn:
|
||||
_json.dumps(bd, ensure_ascii=False), encoding="utf-8"
|
||||
)
|
||||
report = generate_report(data_dir)
|
||||
# 说明应出现在 ods_only_col 所在行
|
||||
lines = report.split("\n")
|
||||
ods_only_rows = [l for l in lines if "ods_only_col" in l and "无 JSON 源" in l]
|
||||
assert len(ods_only_rows) >= 1
|
||||
assert "仅ODS存在的测试列" in ods_only_rows[0]
|
||||
|
||||
def test_dwd_no_ods_has_desc_column(self, tmp_path):
|
||||
"""DWD 无 ODS 源子表应包含说明列。"""
|
||||
from test_dataflow_analyzer import TestFieldDiffSubTables
|
||||
inst = TestFieldDiffSubTables()
|
||||
data_dir = inst._setup_diff_data_dir(tmp_path)
|
||||
report = generate_report(data_dir)
|
||||
assert "| # | DWD 表 | DWD 列 | 推测用途 | 置信度 | 说明 | 状态 |" in report
|
||||
|
||||
343
tests/test_property_1_fact_mappings.py
Normal file
343
tests/test_property_1_fact_mappings.py
Normal file
@@ -0,0 +1,343 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: dataflow-field-completion, Property 1: FACT_MAPPINGS 字段映射正确性
|
||||
|
||||
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
|
||||
|
||||
对于任意 ODS 表行和任意已配置的 FACT_MAPPINGS 条目 (dwd_col, ods_expr, cast_type),
|
||||
当 DWD 加载任务执行后,DWD 目标行中 dwd_col 列的值应等于从 ODS 行中按 ods_expr
|
||||
提取并按 cast_type 转换后的值。
|
||||
|
||||
本测试聚焦 A 类表(新增 DWD 列 + FACT_MAPPINGS):
|
||||
- dim_assistant_ex
|
||||
- dwd_assistant_service_log_ex
|
||||
- dwd_store_goods_sale
|
||||
- dwd_member_balance_change_ex
|
||||
- dim_table_ex
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from hypothesis import given, settings, assume, HealthCheck
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将 ETL 模块加入 sys.path ──
|
||||
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
|
||||
# ── A 类表列表 ──
|
||||
A_CLASS_TABLES = [
|
||||
"dwd.dim_assistant_ex",
|
||||
"dwd.dwd_assistant_service_log_ex",
|
||||
"dwd.dwd_store_goods_sale",
|
||||
"dwd.dwd_member_balance_change_ex",
|
||||
"dwd.dim_table_ex",
|
||||
]
|
||||
|
||||
# ── 辅助:构造最小可用的 DwdLoadTask 实例 ──
|
||||
|
||||
def _make_task() -> DwdLoadTask:
|
||||
"""构造一个用于测试的 DwdLoadTask,使用 mock config/db/api/logger。"""
|
||||
config = MagicMock()
|
||||
config.get = lambda key, default=None: {
|
||||
"app.store_id": 1,
|
||||
"app.timezone": "Asia/Shanghai",
|
||||
"dwd.fact_upsert": True,
|
||||
}.get(key, default)
|
||||
db = MagicMock()
|
||||
api = MagicMock()
|
||||
logger = MagicMock()
|
||||
return DwdLoadTask(config, db, api, logger)
|
||||
|
||||
|
||||
# ── 收集 A 类表的所有 FACT_MAPPINGS 条目 ──
|
||||
|
||||
def _collect_a_class_mappings() -> list[tuple[str, str, str, str | None]]:
|
||||
"""返回 (dwd_table, dwd_col, ods_expr, cast_type) 四元组列表。"""
|
||||
result = []
|
||||
for table in A_CLASS_TABLES:
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
|
||||
for dwd_col, ods_expr, cast_type in entries:
|
||||
result.append((table, dwd_col, ods_expr, cast_type))
|
||||
return result
|
||||
|
||||
|
||||
_A_CLASS_MAPPING_ENTRIES = _collect_a_class_mappings()
|
||||
|
||||
|
||||
# ── 已知的合法 cast_type 值 ──
|
||||
_VALID_CAST_TYPES = {
|
||||
None, "bigint", "integer", "numeric", "decimal",
|
||||
"timestamptz", "boolean", "date", "text",
|
||||
}
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 1.1: A 类表 FACT_MAPPINGS 条目结构完整性
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_a_class_tables_have_fact_mappings():
|
||||
"""每张 A 类表在 FACT_MAPPINGS 中都有至少一个条目。"""
|
||||
for table in A_CLASS_TABLES:
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
|
||||
assert len(entries) > 0, f"{table} 在 FACT_MAPPINGS 中无条目"
|
||||
|
||||
|
||||
def test_a_class_mappings_are_valid_tuples():
|
||||
"""每个 FACT_MAPPINGS 条目都是 (dwd_col, ods_expr, cast_type) 三元组。"""
|
||||
for table, dwd_col, ods_expr, cast_type in _A_CLASS_MAPPING_ENTRIES:
|
||||
assert isinstance(dwd_col, str) and dwd_col, \
|
||||
f"{table}: dwd_col 不能为空"
|
||||
assert isinstance(ods_expr, str) and ods_expr, \
|
||||
f"{table}: ods_expr 不能为空"
|
||||
assert cast_type is None or isinstance(cast_type, str), \
|
||||
f"{table}.{dwd_col}: cast_type 必须为 None 或字符串"
|
||||
|
||||
|
||||
def test_a_class_cast_types_are_valid():
|
||||
"""所有 cast_type 值都在已知合法集合内。"""
|
||||
for table, dwd_col, _, cast_type in _A_CLASS_MAPPING_ENTRIES:
|
||||
assert cast_type in _VALID_CAST_TYPES, \
|
||||
f"{table}.{dwd_col}: 未知 cast_type={cast_type!r}"
|
||||
|
||||
|
||||
def test_a_class_no_duplicate_dwd_cols():
|
||||
"""同一张 DWD 表内不应有重复的 dwd_col。"""
|
||||
for table in A_CLASS_TABLES:
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
|
||||
dwd_cols = [e[0] for e in entries]
|
||||
seen = set()
|
||||
for col in dwd_cols:
|
||||
assert col not in seen, \
|
||||
f"{table}: dwd_col={col!r} 重复出现"
|
||||
seen.add(col)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 1.2: _cast_expr 对 A 类表映射条目的转换正确性
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# 生成策略:从 A 类表映射条目中随机选取
|
||||
_mapping_entry_strategy = st.sampled_from(_A_CLASS_MAPPING_ENTRIES)
|
||||
|
||||
# 生成策略:模拟 ODS 列值(用于验证 _cast_expr 的 SQL 表达式结构)
|
||||
_ods_value_strategy = st.one_of(
|
||||
st.none(),
|
||||
st.integers(min_value=-999999, max_value=999999),
|
||||
st.text(min_size=0, max_size=50, alphabet=st.characters(
|
||||
whitelist_categories=("L", "N", "P", "Z"),
|
||||
blacklist_characters=("\x00",),
|
||||
)),
|
||||
st.floats(min_value=-1e6, max_value=1e6, allow_nan=False, allow_infinity=False),
|
||||
)
|
||||
|
||||
|
||||
@given(entry=_mapping_entry_strategy)
|
||||
@settings(max_examples=200, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_cast_expr_produces_valid_sql_for_a_class(entry):
|
||||
"""
|
||||
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
|
||||
|
||||
对于任意 A 类表 FACT_MAPPINGS 条目,_cast_expr 应产生非空的 SQL 表达式。
|
||||
"""
|
||||
table, dwd_col, ods_expr, cast_type = entry
|
||||
task = _make_task()
|
||||
result = task._cast_expr(ods_expr, cast_type)
|
||||
|
||||
# 基本断言:结果非空
|
||||
assert result and isinstance(result, str), \
|
||||
f"{table}.{dwd_col}: _cast_expr 返回空结果"
|
||||
|
||||
# 结果应包含 ODS 源表达式(可能被引号包裹或 CAST 包裹)
|
||||
# 对于简单列名,应出现在结果中(带引号或不带)
|
||||
if ods_expr.upper() != "NULL":
|
||||
# 去掉引号后的 ods_expr 应在结果中可找到
|
||||
bare_expr = ods_expr.strip('"')
|
||||
assert bare_expr in result or ods_expr in result, \
|
||||
f"{table}.{dwd_col}: _cast_expr 结果 {result!r} 中未包含 ODS 表达式 {ods_expr!r}"
|
||||
|
||||
# 如果有 cast_type,结果应包含类型转换语法
|
||||
if cast_type:
|
||||
cast_lower = cast_type.lower()
|
||||
if cast_lower in {"bigint", "integer", "numeric", "decimal"}:
|
||||
assert "CAST" in result.upper() or "::" in result, \
|
||||
f"{table}.{dwd_col}: 数值类型转换缺少 CAST/:: 语法"
|
||||
elif cast_lower == "timestamptz":
|
||||
assert "timestamptz" in result.lower(), \
|
||||
f"{table}.{dwd_col}: 时间类型转换缺少 timestamptz"
|
||||
elif cast_lower == "boolean":
|
||||
assert "boolean" in result.lower(), \
|
||||
f"{table}.{dwd_col}: 布尔类型转换缺少 boolean"
|
||||
|
||||
|
||||
@given(entry=_mapping_entry_strategy)
|
||||
@settings(max_examples=200, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_cast_expr_is_deterministic(entry):
|
||||
"""
|
||||
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
|
||||
|
||||
对于同一 FACT_MAPPINGS 条目,_cast_expr 的输出应是确定性的(多次调用结果一致)。
|
||||
"""
|
||||
_, _, ods_expr, cast_type = entry
|
||||
task = _make_task()
|
||||
result1 = task._cast_expr(ods_expr, cast_type)
|
||||
result2 = task._cast_expr(ods_expr, cast_type)
|
||||
assert result1 == result2, "同一输入的 _cast_expr 结果不一致"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 1.3: _build_column_mapping 对 A 类表的映射注册正确性
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(table_name=st.sampled_from(A_CLASS_TABLES))
|
||||
@settings(max_examples=50, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_build_column_mapping_registers_all_explicit_entries(table_name):
|
||||
"""
|
||||
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
|
||||
|
||||
对于任意 A 类表,_build_column_mapping 应将 FACT_MAPPINGS 中的所有条目
|
||||
注册到返回的映射字典中。
|
||||
"""
|
||||
task = _make_task()
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(table_name, [])
|
||||
ods_table = DwdLoadTask.TABLE_MAP.get(table_name, "")
|
||||
|
||||
# 构造 mock cursor,返回包含 fetched_at 的列信息
|
||||
mock_cur = MagicMock()
|
||||
# _get_columns 内部查 information_schema,这里直接 mock _build_column_mapping 的输入
|
||||
# 收集所有 ODS 列名(从 FACT_MAPPINGS 的 ods_expr 中提取简单列名)
|
||||
ods_cols = ["fetched_at", "id", "site_id", "tenant_id"]
|
||||
for _, ods_expr, _ in entries:
|
||||
# 简单列名直接加入;复杂表达式(含 -> 或 CASE)跳过
|
||||
bare = ods_expr.strip('"')
|
||||
if bare.isidentifier():
|
||||
ods_cols.append(bare)
|
||||
|
||||
pk_cols = ["id"] # 简化:假设主键为 id
|
||||
|
||||
mapping = task._build_column_mapping(mock_cur, table_name, ods_table, pk_cols, ods_cols)
|
||||
|
||||
# 如果返回的是错误字典(缺少 fetched_at),跳过
|
||||
if "processed" in mapping:
|
||||
return
|
||||
|
||||
# 验证所有显式映射条目都被注册
|
||||
for dwd_col, ods_expr, cast_type in entries:
|
||||
dwd_col_lower = dwd_col.lower()
|
||||
assert dwd_col_lower in mapping, \
|
||||
f"{table_name}: FACT_MAPPINGS 条目 {dwd_col!r} 未被注册到映射中"
|
||||
src, ct = mapping[dwd_col_lower]
|
||||
assert src == ods_expr, \
|
||||
f"{table_name}.{dwd_col}: 映射源应为 {ods_expr!r},实际为 {src!r}"
|
||||
assert ct == cast_type, \
|
||||
f"{table_name}.{dwd_col}: cast_type 应为 {cast_type!r},实际为 {ct!r}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 1.4: A 类表特定字段映射验证(需求级别)
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# 需求 1: assistant_accounts_master → dim_assistant_ex
|
||||
_REQ1_EXPECTED = {
|
||||
"system_role_id": ("system_role_id", None),
|
||||
"job_num": ("job_num", None),
|
||||
"cx_unit_price": ("cx_unit_price", None),
|
||||
"pd_unit_price": ("pd_unit_price", None),
|
||||
}
|
||||
|
||||
# 需求 2: assistant_service_records → dwd_assistant_service_log_ex
|
||||
_REQ2_EXPECTED = {
|
||||
"operator_id": ("operator_id", None),
|
||||
"operator_name": ("operator_name", None),
|
||||
}
|
||||
|
||||
# 需求 4: store_goods_sales_records → dwd_store_goods_sale
|
||||
_REQ4_EXPECTED = {
|
||||
"discount_money": ("discount_money", None),
|
||||
"discount_price": ("discount_price", None),
|
||||
}
|
||||
|
||||
# 需求 5: member_balance_changes → dwd_member_balance_change_ex
|
||||
_REQ5_EXPECTED = {
|
||||
"relate_id": ("relate_id", None),
|
||||
}
|
||||
|
||||
# 需求 9: site_tables_master → dim_table_ex
|
||||
_REQ9_EXPECTED = {
|
||||
"create_time": ("create_time", None),
|
||||
"light_status": ("light_status", None),
|
||||
"tablestatusname": ("tablestatusname", None),
|
||||
"sitename": ("sitename", None),
|
||||
"applet_qr_code_url": ('"appletQrCodeUrl"', None),
|
||||
"audit_status": ("audit_status", None),
|
||||
"charge_free": ("charge_free", None),
|
||||
"delay_lights_time": ("delay_lights_time", None),
|
||||
"is_rest_area": ("is_rest_area", None),
|
||||
"only_allow_groupon": ("only_allow_groupon", None),
|
||||
"order_delay_time": ("order_delay_time", None),
|
||||
"self_table": ("self_table", None),
|
||||
"temporary_light_second": ("temporary_light_second", None),
|
||||
"virtual_table": ("virtual_table", None),
|
||||
}
|
||||
|
||||
# 汇总:(DWD 表, 期望映射字典, 需求编号)
|
||||
_REQUIREMENT_CHECKS = [
|
||||
("dwd.dim_assistant_ex", _REQ1_EXPECTED, "1.1, 1.2"),
|
||||
("dwd.dwd_assistant_service_log_ex", _REQ2_EXPECTED, "2.1"),
|
||||
("dwd.dwd_store_goods_sale", _REQ4_EXPECTED, "4.1"),
|
||||
("dwd.dwd_member_balance_change_ex", _REQ5_EXPECTED, "5.1"),
|
||||
("dwd.dim_table_ex", _REQ9_EXPECTED, "9.1"),
|
||||
]
|
||||
|
||||
|
||||
@given(check=st.sampled_from(_REQUIREMENT_CHECKS))
|
||||
@settings(max_examples=50, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_requirement_specific_mappings_exist(check):
|
||||
"""
|
||||
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
|
||||
|
||||
对于每个需求指定的字段映射,验证 FACT_MAPPINGS 中确实包含正确的条目。
|
||||
"""
|
||||
dwd_table, expected_mappings, req_ids = check
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, [])
|
||||
# 构建实际映射字典:dwd_col -> (ods_expr, cast_type)
|
||||
actual = {e[0].lower(): (e[1], e[2]) for e in entries}
|
||||
|
||||
for dwd_col, (expected_src, expected_cast) in expected_mappings.items():
|
||||
assert dwd_col in actual, \
|
||||
f"[Req {req_ids}] {dwd_table}: 缺少 dwd_col={dwd_col!r} 的映射条目"
|
||||
actual_src, actual_cast = actual[dwd_col]
|
||||
assert actual_src == expected_src, \
|
||||
f"[Req {req_ids}] {dwd_table}.{dwd_col}: ODS 源应为 {expected_src!r},实际为 {actual_src!r}"
|
||||
assert actual_cast == expected_cast, \
|
||||
f"[Req {req_ids}] {dwd_table}.{dwd_col}: cast_type 应为 {expected_cast!r},实际为 {actual_cast!r}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 1.5: A 类表 FACT_MAPPINGS 与 TABLE_MAP 一致性
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(table_name=st.sampled_from(A_CLASS_TABLES))
|
||||
@settings(max_examples=50, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_a_class_tables_registered_in_table_map(table_name):
|
||||
"""
|
||||
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
|
||||
|
||||
每张 A 类表必须同时在 TABLE_MAP 和 FACT_MAPPINGS 中注册。
|
||||
"""
|
||||
assert table_name in DwdLoadTask.TABLE_MAP, \
|
||||
f"{table_name} 未在 TABLE_MAP 中注册"
|
||||
assert table_name in DwdLoadTask.FACT_MAPPINGS, \
|
||||
f"{table_name} 未在 FACT_MAPPINGS 中注册"
|
||||
# TABLE_MAP 的 ODS 源表应为非空字符串
|
||||
ods_table = DwdLoadTask.TABLE_MAP[table_name]
|
||||
assert ods_table and isinstance(ods_table, str), \
|
||||
f"{table_name}: TABLE_MAP 中的 ODS 表名无效"
|
||||
222
tests/test_property_2_fact_mappings_integrity.py
Normal file
222
tests/test_property_2_fact_mappings_integrity.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: dataflow-field-completion, Property 2: FACT_MAPPINGS 引用完整性
|
||||
|
||||
**Validates: Requirements 6.3**
|
||||
|
||||
对于任意 FACT_MAPPINGS 中的映射条目,其 DWD 目标列名必须存在于对应 DWD 表的列定义中,
|
||||
其 ODS 源表达式引用的列名必须存在于对应 ODS 表的列定义中(或为合法的 SQL 表达式)。
|
||||
|
||||
本测试覆盖所有 FACT_MAPPINGS 条目(不仅限于 A 类表),聚焦以下可静态验证的属性:
|
||||
1. 所有 FACT_MAPPINGS 的 key 都在 TABLE_MAP 中注册
|
||||
2. 所有条目格式为 (str, str, str|None) 三元组
|
||||
3. 同一 DWD 表内无重复 dwd_col
|
||||
4. 所有 ods_expr 非空
|
||||
5. 所有 cast_type 值在已知合法集合内
|
||||
6. B 类表(recharge_settlements → dwd_recharge_order)的 5 个新映射条目存在且正确
|
||||
7. FACT_MAPPINGS 中引用的 DWD 表必须在 TABLE_MAP 中有对应的 ODS 源表
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hypothesis import given, settings, HealthCheck
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将 ETL 模块加入 sys.path ──
|
||||
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
|
||||
# ── 已知的合法 cast_type 值 ──
|
||||
_VALID_CAST_TYPES = {
|
||||
None, "bigint", "integer", "numeric", "decimal",
|
||||
"timestamptz", "boolean", "date", "text", "TEXT[]",
|
||||
}
|
||||
|
||||
# ── 收集所有 FACT_MAPPINGS 条目为 (dwd_table, dwd_col, ods_expr, cast_type) 四元组 ──
|
||||
def _collect_all_mappings() -> list[tuple[str, str, str, str | None]]:
|
||||
"""遍历 FACT_MAPPINGS 所有表,返回四元组列表。"""
|
||||
result = []
|
||||
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
|
||||
for entry in entries:
|
||||
dwd_col, ods_expr, cast_type = entry
|
||||
result.append((table, dwd_col, ods_expr, cast_type))
|
||||
return result
|
||||
|
||||
|
||||
_ALL_MAPPING_ENTRIES = _collect_all_mappings()
|
||||
_ALL_FACT_TABLES = list(DwdLoadTask.FACT_MAPPINGS.keys())
|
||||
|
||||
# ── B 类表期望映射(recharge_settlements → dwd_recharge_order)──
|
||||
_REQ6_EXPECTED = {
|
||||
"pl_coupon_sale_amount": ("plcouponsaleamount", None),
|
||||
"mervou_sales_amount": ("mervousalesamount", None),
|
||||
"electricity_money": ("electricitymoney", None),
|
||||
"real_electricity_money": ("realelectricitymoney", None),
|
||||
"electricity_adjust_money": ("electricityadjustmoney", None),
|
||||
}
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 2.1: FACT_MAPPINGS 结构完整性 — 所有 key 都在 TABLE_MAP 中
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_all_fact_mapping_tables_in_table_map():
|
||||
"""所有 FACT_MAPPINGS 的 key 都必须在 TABLE_MAP 中注册。"""
|
||||
for table in DwdLoadTask.FACT_MAPPINGS:
|
||||
assert table in DwdLoadTask.TABLE_MAP, \
|
||||
f"FACT_MAPPINGS 中的 {table} 未在 TABLE_MAP 中注册"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 2.2: 所有条目格式为 (str, str, str|None) 三元组
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_all_fact_mapping_entries_are_valid_tuples():
|
||||
"""每个 FACT_MAPPINGS 条目都是 (dwd_col, ods_expr, cast_type) 三元组,且类型正确。"""
|
||||
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
|
||||
for i, entry in enumerate(entries):
|
||||
assert isinstance(entry, (tuple, list)) and len(entry) == 3, \
|
||||
f"{table}[{i}]: 条目应为三元组,实际为 {type(entry).__name__}(len={len(entry) if hasattr(entry, '__len__') else '?'})"
|
||||
dwd_col, ods_expr, cast_type = entry
|
||||
assert isinstance(dwd_col, str) and dwd_col.strip(), \
|
||||
f"{table}[{i}]: dwd_col 必须为非空字符串,实际为 {dwd_col!r}"
|
||||
assert isinstance(ods_expr, str) and ods_expr.strip(), \
|
||||
f"{table}[{i}]: ods_expr 必须为非空字符串,实际为 {ods_expr!r}"
|
||||
assert cast_type is None or isinstance(cast_type, str), \
|
||||
f"{table}[{i}].{dwd_col}: cast_type 必须为 None 或字符串,实际为 {type(cast_type).__name__}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 2.3: 同一 DWD 表内无重复 dwd_col
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_no_duplicate_dwd_cols_across_all_tables():
|
||||
"""同一张 DWD 表内不应有重复的 dwd_col(大小写不敏感)。"""
|
||||
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
|
||||
seen: set[str] = set()
|
||||
for dwd_col, _, _ in entries:
|
||||
key = dwd_col.lower()
|
||||
assert key not in seen, \
|
||||
f"{table}: dwd_col={dwd_col!r} 重复出现"
|
||||
seen.add(key)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 2.4: 所有 cast_type 值在已知合法集合内
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_all_cast_types_are_valid():
|
||||
"""所有 FACT_MAPPINGS 条目的 cast_type 值都在已知合法集合内。"""
|
||||
for table, dwd_col, _, cast_type in _ALL_MAPPING_ENTRIES:
|
||||
assert cast_type in _VALID_CAST_TYPES, \
|
||||
f"{table}.{dwd_col}: 未知 cast_type={cast_type!r},合法值为 {_VALID_CAST_TYPES}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 2.5: 所有 ods_expr 非空
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_ods_expr_not_empty():
|
||||
"""所有 FACT_MAPPINGS 条目的 ods_expr 不能为空字符串。"""
|
||||
for table, dwd_col, ods_expr, _ in _ALL_MAPPING_ENTRIES:
|
||||
assert ods_expr.strip(), \
|
||||
f"{table}.{dwd_col}: ods_expr 为空字符串"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 2.6: B 类表特定映射验证(需求 6.3)
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_recharge_settlements_mappings_exist():
|
||||
"""
|
||||
**Validates: Requirements 6.3**
|
||||
|
||||
B 类表 recharge_settlements → dwd_recharge_order 的 5 个新映射条目
|
||||
必须存在且 ODS 源表达式和 cast_type 正确。
|
||||
"""
|
||||
dwd_table = "dwd.dwd_recharge_order"
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, [])
|
||||
assert entries, f"{dwd_table} 在 FACT_MAPPINGS 中无条目"
|
||||
|
||||
# 构建实际映射字典:dwd_col -> (ods_expr, cast_type)
|
||||
actual = {e[0].lower(): (e[1], e[2]) for e in entries}
|
||||
|
||||
for dwd_col, (expected_src, expected_cast) in _REQ6_EXPECTED.items():
|
||||
assert dwd_col in actual, \
|
||||
f"[Req 6.3] {dwd_table}: 缺少 dwd_col={dwd_col!r} 的映射条目"
|
||||
actual_src, actual_cast = actual[dwd_col]
|
||||
assert actual_src == expected_src, \
|
||||
f"[Req 6.3] {dwd_table}.{dwd_col}: ODS 源应为 {expected_src!r},实际为 {actual_src!r}"
|
||||
assert actual_cast == expected_cast, \
|
||||
f"[Req 6.3] {dwd_table}.{dwd_col}: cast_type 应为 {expected_cast!r},实际为 {actual_cast!r}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 2.7: 交叉引用 — FACT_MAPPINGS 的 DWD 表在 TABLE_MAP 中有 ODS 源表
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_fact_mapping_tables_have_ods_source():
|
||||
"""FACT_MAPPINGS 中引用的每张 DWD 表在 TABLE_MAP 中都有非空的 ODS 源表。"""
|
||||
for table in DwdLoadTask.FACT_MAPPINGS:
|
||||
ods_table = DwdLoadTask.TABLE_MAP.get(table)
|
||||
assert ods_table and isinstance(ods_table, str) and ods_table.strip(), \
|
||||
f"{table}: TABLE_MAP 中的 ODS 源表为空或不存在"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Hypothesis 属性测试:随机选取 FACT_MAPPINGS 条目验证结构
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
_mapping_entry_strategy = st.sampled_from(_ALL_MAPPING_ENTRIES)
|
||||
|
||||
|
||||
@given(entry=_mapping_entry_strategy)
|
||||
@settings(max_examples=300, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_random_fact_mapping_entry_structure(entry):
|
||||
"""
|
||||
**Validates: Requirements 6.3**
|
||||
|
||||
对于任意随机选取的 FACT_MAPPINGS 条目,验证:
|
||||
- dwd_col 为非空字符串
|
||||
- ods_expr 为非空字符串
|
||||
- cast_type 在合法集合内
|
||||
- 所属 DWD 表在 TABLE_MAP 中注册
|
||||
"""
|
||||
table, dwd_col, ods_expr, cast_type = entry
|
||||
|
||||
# 结构验证
|
||||
assert isinstance(dwd_col, str) and dwd_col.strip(), \
|
||||
f"{table}: dwd_col 为空"
|
||||
assert isinstance(ods_expr, str) and ods_expr.strip(), \
|
||||
f"{table}.{dwd_col}: ods_expr 为空"
|
||||
assert cast_type in _VALID_CAST_TYPES, \
|
||||
f"{table}.{dwd_col}: 未知 cast_type={cast_type!r}"
|
||||
|
||||
# 交叉引用验证
|
||||
assert table in DwdLoadTask.TABLE_MAP, \
|
||||
f"{table} 未在 TABLE_MAP 中注册"
|
||||
ods_table = DwdLoadTask.TABLE_MAP[table]
|
||||
assert ods_table and isinstance(ods_table, str), \
|
||||
f"{table}: TABLE_MAP 中的 ODS 源表无效"
|
||||
|
||||
|
||||
@given(table_name=st.sampled_from(_ALL_FACT_TABLES))
|
||||
@settings(max_examples=100, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_random_table_no_duplicate_dwd_cols(table_name):
|
||||
"""
|
||||
**Validates: Requirements 6.3**
|
||||
|
||||
对于任意随机选取的 FACT_MAPPINGS 表,验证其内部无重复 dwd_col。
|
||||
"""
|
||||
entries = DwdLoadTask.FACT_MAPPINGS[table_name]
|
||||
seen: set[str] = set()
|
||||
for dwd_col, _, _ in entries:
|
||||
key = dwd_col.lower()
|
||||
assert key not in seen, \
|
||||
f"{table_name}: dwd_col={dwd_col!r} 重复出现"
|
||||
seen.add(key)
|
||||
214
tests/test_property_3_table_map_coverage.py
Normal file
214
tests/test_property_3_table_map_coverage.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: dataflow-field-completion, Property 3: TABLE_MAP 覆盖完整性
|
||||
|
||||
**Validates: Requirements 7.2, 8.2**
|
||||
|
||||
对于任意在 TABLE_MAP 中注册的 DWD 表,该表的所有非 SCD2 列要么在 FACT_MAPPINGS
|
||||
中有显式映射,要么在对应 ODS 表中存在同名列(自动映射)。
|
||||
|
||||
本测试聚焦以下可静态验证的属性:
|
||||
1. TABLE_MAP 所有条目的 ODS 源表非空
|
||||
2. C 类表在 TABLE_MAP 中注册
|
||||
3. C 类表在 FACT_MAPPINGS 中有条目
|
||||
4. C 类表映射字段数量与期望一致
|
||||
5. TABLE_MAP 与 FACT_MAPPINGS 交叉一致性
|
||||
6. hypothesis 属性测试:随机 TABLE_MAP 条目结构验证
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hypothesis import given, settings, HealthCheck
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将 ETL 模块加入 sys.path ──
|
||||
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
|
||||
# ── C 类表定义 ──
|
||||
C_CLASS_TABLES = [
|
||||
"dwd.dwd_goods_stock_summary",
|
||||
"dwd.dwd_goods_stock_movement",
|
||||
]
|
||||
|
||||
# ── C 类表期望映射字段 ──
|
||||
|
||||
# goods_stock_summary → dwd_goods_stock_summary(14 个字段)
|
||||
_GOODS_STOCK_SUMMARY_EXPECTED_COLS = {
|
||||
"site_goods_id", "goods_name", "goods_unit", "goods_category_id",
|
||||
"goods_category_second_id", "category_name", "range_start_stock",
|
||||
"range_end_stock", "range_in", "range_out", "range_sale",
|
||||
"range_sale_money", "range_inventory", "current_stock",
|
||||
}
|
||||
|
||||
# goods_stock_movements → dwd_goods_stock_movement(19 个字段)
|
||||
_GOODS_STOCK_MOVEMENT_EXPECTED_COLS = {
|
||||
"site_goods_stock_id", "tenant_id", "site_id", "site_goods_id",
|
||||
"goods_name", "goods_category_id", "goods_second_category_id",
|
||||
"unit", "price", "stock_type", "change_num", "start_num", "end_num",
|
||||
"change_num_a", "start_num_a", "end_num_a", "remark", "operator_name",
|
||||
"create_time",
|
||||
}
|
||||
|
||||
# ── 收集所有 TABLE_MAP 条目 ──
|
||||
_ALL_TABLE_MAP_ENTRIES = list(DwdLoadTask.TABLE_MAP.items())
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 3.1: TABLE_MAP 所有条目的 ODS 源表非空
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_all_table_map_entries_have_ods_source():
|
||||
"""TABLE_MAP 中每个 DWD 表都有非空的 ODS 源表名。"""
|
||||
for dwd_table, ods_table in DwdLoadTask.TABLE_MAP.items():
|
||||
assert ods_table and isinstance(ods_table, str) and ods_table.strip(), \
|
||||
f"{dwd_table}: TABLE_MAP 中的 ODS 源表为空或无效"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 3.2: C 类表在 TABLE_MAP 中注册
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_c_class_tables_registered_in_table_map():
|
||||
"""
|
||||
**Validates: Requirements 7.2, 8.2**
|
||||
|
||||
C 类表(dwd_goods_stock_summary、dwd_goods_stock_movement)
|
||||
必须在 TABLE_MAP 中注册。
|
||||
"""
|
||||
for table in C_CLASS_TABLES:
|
||||
assert table in DwdLoadTask.TABLE_MAP, \
|
||||
f"C 类表 {table} 未在 TABLE_MAP 中注册"
|
||||
ods_table = DwdLoadTask.TABLE_MAP[table]
|
||||
assert ods_table and isinstance(ods_table, str), \
|
||||
f"C 类表 {table}: TABLE_MAP 中的 ODS 源表无效"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 3.3: C 类表在 FACT_MAPPINGS 中有条目
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_c_class_tables_have_fact_mappings():
|
||||
"""
|
||||
**Validates: Requirements 7.2, 8.2**
|
||||
|
||||
C 类表必须在 FACT_MAPPINGS 中有至少一个映射条目。
|
||||
"""
|
||||
for table in C_CLASS_TABLES:
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
|
||||
assert len(entries) > 0, \
|
||||
f"C 类表 {table} 在 FACT_MAPPINGS 中无条目"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 3.4: goods_stock_summary 14 个字段全覆盖
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_goods_stock_summary_mapping_coverage():
|
||||
"""
|
||||
**Validates: Requirements 7.2**
|
||||
|
||||
dwd.dwd_goods_stock_summary 的 FACT_MAPPINGS 应覆盖全部 14 个期望字段。
|
||||
"""
|
||||
table = "dwd.dwd_goods_stock_summary"
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
|
||||
actual_cols = {e[0].lower() for e in entries}
|
||||
|
||||
# 验证数量
|
||||
assert len(entries) == 14, \
|
||||
f"{table}: 期望 14 个映射条目,实际 {len(entries)} 个"
|
||||
|
||||
# 验证字段覆盖
|
||||
missing = _GOODS_STOCK_SUMMARY_EXPECTED_COLS - actual_cols
|
||||
assert not missing, \
|
||||
f"{table}: 缺少映射字段 {missing}"
|
||||
|
||||
extra = actual_cols - _GOODS_STOCK_SUMMARY_EXPECTED_COLS
|
||||
assert not extra, \
|
||||
f"{table}: 存在多余映射字段 {extra}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 3.5: goods_stock_movement 19 个字段全覆盖
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_goods_stock_movement_mapping_coverage():
|
||||
"""
|
||||
**Validates: Requirements 8.2**
|
||||
|
||||
dwd.dwd_goods_stock_movement 的 FACT_MAPPINGS 应覆盖全部 19 个期望字段。
|
||||
"""
|
||||
table = "dwd.dwd_goods_stock_movement"
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
|
||||
actual_cols = {e[0].lower() for e in entries}
|
||||
|
||||
# 验证数量
|
||||
assert len(entries) == 19, \
|
||||
f"{table}: 期望 19 个映射条目,实际 {len(entries)} 个"
|
||||
|
||||
# 验证字段覆盖
|
||||
missing = _GOODS_STOCK_MOVEMENT_EXPECTED_COLS - actual_cols
|
||||
assert not missing, \
|
||||
f"{table}: 缺少映射字段 {missing}"
|
||||
|
||||
extra = actual_cols - _GOODS_STOCK_MOVEMENT_EXPECTED_COLS
|
||||
assert not extra, \
|
||||
f"{table}: 存在多余映射字段 {extra}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 3.6: FACT_MAPPINGS 是 TABLE_MAP 的子集
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_fact_mappings_subset_of_table_map():
|
||||
"""FACT_MAPPINGS 中的所有 DWD 表都必须在 TABLE_MAP 中注册。"""
|
||||
for table in DwdLoadTask.FACT_MAPPINGS:
|
||||
assert table in DwdLoadTask.TABLE_MAP, \
|
||||
f"FACT_MAPPINGS 中的 {table} 未在 TABLE_MAP 中注册"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Hypothesis 属性测试:随机 TABLE_MAP 条目结构验证
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
_table_map_entry_strategy = st.sampled_from(_ALL_TABLE_MAP_ENTRIES)
|
||||
|
||||
|
||||
@given(entry=_table_map_entry_strategy)
|
||||
@settings(max_examples=200, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_random_table_map_entry_valid(entry):
|
||||
"""
|
||||
**Validates: Requirements 7.2, 8.2**
|
||||
|
||||
对于任意随机选取的 TABLE_MAP 条目,验证:
|
||||
- DWD 表名为 "dwd." 前缀的非空字符串
|
||||
- ODS 源表名为 "ods." 前缀的非空字符串
|
||||
- 如果该表在 FACT_MAPPINGS 中有条目,每个条目都是合法的三元组
|
||||
"""
|
||||
dwd_table, ods_table = entry
|
||||
|
||||
# DWD 表名格式验证
|
||||
assert isinstance(dwd_table, str) and dwd_table.startswith("dwd."), \
|
||||
f"TABLE_MAP key {dwd_table!r} 不以 'dwd.' 开头"
|
||||
|
||||
# ODS 源表名格式验证
|
||||
assert isinstance(ods_table, str) and ods_table.startswith("ods."), \
|
||||
f"TABLE_MAP[{dwd_table}] = {ods_table!r} 不以 'ods.' 开头"
|
||||
|
||||
# 如果有 FACT_MAPPINGS 条目,验证结构
|
||||
entries = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, [])
|
||||
for i, e in enumerate(entries):
|
||||
assert isinstance(e, (tuple, list)) and len(e) == 3, \
|
||||
f"{dwd_table}[{i}]: FACT_MAPPINGS 条目应为三元组"
|
||||
dwd_col, ods_expr, cast_type = e
|
||||
assert isinstance(dwd_col, str) and dwd_col.strip(), \
|
||||
f"{dwd_table}[{i}]: dwd_col 为空"
|
||||
assert isinstance(ods_expr, str) and ods_expr.strip(), \
|
||||
f"{dwd_table}[{i}]: ods_expr 为空"
|
||||
assert cast_type is None or isinstance(cast_type, str), \
|
||||
f"{dwd_table}[{i}].{dwd_col}: cast_type 类型无效"
|
||||
454
tests/test_property_5_etl_param_parsing.py
Normal file
454
tests/test_property_5_etl_param_parsing.py
Normal file
@@ -0,0 +1,454 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: dataflow-field-completion, Property 5: ETL 参数解析与 CLI 命令构建正确性
|
||||
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
对于任意合法的 ETL 执行参数组合(门店列表、数据源模式、校验模式、时间范围、
|
||||
窗口切分、force-full 标志、任务选择),Backend 构建的 CLI 命令字符串应包含
|
||||
所有指定参数,且参数值与输入一致。
|
||||
|
||||
测试策略:
|
||||
- 使用 hypothesis 生成随机 TaskConfigSchema 实例
|
||||
- 随机 flow(从 VALID_FLOWS 中选择)
|
||||
- 随机 processing_mode(从 VALID_PROCESSING_MODES 中选择)
|
||||
- 随机任务代码列表(从 task_registry 中选择)
|
||||
- 随机时间窗口模式(lookback / custom)
|
||||
- 随机 window_split 和 window_split_days
|
||||
- 随机 force_full / dry_run / fetch_before_verify 布尔值
|
||||
- 随机 store_id
|
||||
- 随机 ods_use_local_json
|
||||
|
||||
验证:
|
||||
1. 构建的 CLI 命令包含 --flow 且值与 flow 一致
|
||||
2. 任务代码通过 --tasks 正确传递
|
||||
3. 时间范围参数格式正确且值一致
|
||||
4. 布尔标志(--force-full / --dry-run / --fetch-before-verify)正确出现或缺失
|
||||
5. --store-id 值与输入一致
|
||||
6. --window-split / --window-split-days 正确传递
|
||||
7. --data-source offline 在 ods_use_local_json=True 时出现
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hypothesis import given, settings, HealthCheck, assume
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将后端模块加入 sys.path ──
|
||||
_BACKEND_ROOT = Path(__file__).resolve().parent.parent / "apps" / "backend"
|
||||
if str(_BACKEND_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_BACKEND_ROOT))
|
||||
|
||||
from app.services.cli_builder import CLIBuilder, VALID_FLOWS, VALID_PROCESSING_MODES
|
||||
from app.schemas.tasks import TaskConfigSchema
|
||||
from app.services.task_registry import ALL_TASKS
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# 常量与策略
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# 所有合法任务代码
|
||||
_ALL_TASK_CODES: list[str] = [t.code for t in ALL_TASKS]
|
||||
|
||||
# 合法的 flow 值
|
||||
_VALID_FLOWS_LIST = sorted(VALID_FLOWS)
|
||||
|
||||
# 合法的 processing_mode 值
|
||||
_VALID_MODES_LIST = sorted(VALID_PROCESSING_MODES)
|
||||
|
||||
# 合法的 window_split 值(CLI 支持的切分模式)
|
||||
_VALID_WINDOW_SPLITS = ["none", "day", "week", "month"]
|
||||
|
||||
# 日期格式策略:YYYY-MM-DD
|
||||
_date_str = st.dates(
|
||||
min_value=st.just(2024, 1, 1).__wrapped__ if False else __import__("datetime").date(2024, 1, 1),
|
||||
max_value=__import__("datetime").date(2026, 12, 31),
|
||||
).map(lambda d: d.isoformat())
|
||||
|
||||
|
||||
@st.composite
|
||||
def _valid_config(draw) -> TaskConfigSchema:
|
||||
"""生成一个合法的 TaskConfigSchema 实例"""
|
||||
# 随机选择 1-5 个任务代码
|
||||
tasks = draw(st.lists(
|
||||
st.sampled_from(_ALL_TASK_CODES),
|
||||
min_size=1,
|
||||
max_size=5,
|
||||
unique=True,
|
||||
))
|
||||
|
||||
flow_id = draw(st.sampled_from(_VALID_FLOWS_LIST))
|
||||
processing_mode = draw(st.sampled_from(_VALID_MODES_LIST))
|
||||
|
||||
# 时间窗口模式
|
||||
window_mode = draw(st.sampled_from(["lookback", "custom"]))
|
||||
|
||||
window_start = None
|
||||
window_end = None
|
||||
lookback_hours = 24
|
||||
overlap_seconds = 600
|
||||
|
||||
if window_mode == "custom":
|
||||
# 生成合法的 start <= end 日期对
|
||||
start = draw(_date_str)
|
||||
end = draw(_date_str)
|
||||
if start > end:
|
||||
start, end = end, start
|
||||
window_start = start
|
||||
window_end = end
|
||||
else:
|
||||
lookback_hours = draw(st.integers(min_value=1, max_value=720))
|
||||
overlap_seconds = draw(st.integers(min_value=0, max_value=7200))
|
||||
|
||||
# 窗口切分
|
||||
window_split = draw(st.sampled_from(_VALID_WINDOW_SPLITS))
|
||||
window_split_days = None
|
||||
if window_split != "none":
|
||||
window_split_days = draw(st.integers(min_value=1, max_value=30))
|
||||
|
||||
# 布尔标志
|
||||
force_full = draw(st.booleans())
|
||||
dry_run = draw(st.booleans())
|
||||
fetch_before_verify = draw(st.booleans())
|
||||
ods_use_local_json = draw(st.booleans())
|
||||
|
||||
# store_id:可能为 None 或正整数
|
||||
store_id = draw(st.one_of(st.none(), st.integers(min_value=1, max_value=999999)))
|
||||
|
||||
return TaskConfigSchema(
|
||||
tasks=tasks,
|
||||
flow=flow_id,
|
||||
processing_mode=processing_mode,
|
||||
window_mode=window_mode,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
lookback_hours=lookback_hours,
|
||||
overlap_seconds=overlap_seconds,
|
||||
window_split=window_split,
|
||||
window_split_days=window_split_days,
|
||||
force_full=force_full,
|
||||
dry_run=dry_run,
|
||||
fetch_before_verify=fetch_before_verify,
|
||||
ods_use_local_json=ods_use_local_json,
|
||||
store_id=store_id,
|
||||
)
|
||||
|
||||
|
||||
# 全局 CLIBuilder 实例
|
||||
_builder = CLIBuilder()
|
||||
_ETL_PATH = "apps/etl/connectors/feiqiu"
|
||||
|
||||
|
||||
def _build(config: TaskConfigSchema) -> list[str]:
|
||||
"""便捷包装:构建命令列表"""
|
||||
return _builder.build_command(config, _ETL_PATH)
|
||||
|
||||
|
||||
def _get_arg_value(cmd: list[str], flag: str) -> str | None:
|
||||
"""从命令列表中提取指定 flag 后面的值"""
|
||||
try:
|
||||
idx = cmd.index(flag)
|
||||
if idx + 1 < len(cmd):
|
||||
return cmd[idx + 1]
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _has_flag(cmd: list[str], flag: str) -> bool:
|
||||
"""检查命令列表中是否包含指定 flag"""
|
||||
return flag in cmd
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5a: --flow 参数与 flow 一致
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(config=_valid_config())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_flow_param_matches_flow(config: TaskConfigSchema):
|
||||
"""
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
构建的 CLI 命令必须包含 --flow 参数,且值与 config.flow 一致。
|
||||
"""
|
||||
cmd = _build(config)
|
||||
flow_value = _get_arg_value(cmd, "--flow")
|
||||
assert flow_value is not None, "CLI 命令缺少 --flow 参数"
|
||||
assert flow_value == config.flow, (
|
||||
f"--flow 值 {flow_value!r} != config.flow {config.flow!r}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5b: --tasks 参数包含所有任务代码
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(config=_valid_config())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_tasks_param_contains_all_codes(config: TaskConfigSchema):
|
||||
"""
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
构建的 CLI 命令中 --tasks 参数应包含所有指定的任务代码(逗号分隔)。
|
||||
"""
|
||||
cmd = _build(config)
|
||||
tasks_value = _get_arg_value(cmd, "--tasks")
|
||||
|
||||
if config.tasks:
|
||||
assert tasks_value is not None, "CLI 命令缺少 --tasks 参数"
|
||||
parsed_tasks = set(tasks_value.split(","))
|
||||
expected_tasks = set(config.tasks)
|
||||
assert parsed_tasks == expected_tasks, (
|
||||
f"--tasks 解析结果 {parsed_tasks} != 期望 {expected_tasks}"
|
||||
)
|
||||
# tasks 为空列表时,CLIBuilder 不添加 --tasks(符合预期)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5c: 时间窗口参数正确传递
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(config=_valid_config())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_time_window_params_correct(config: TaskConfigSchema):
|
||||
"""
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
- lookback 模式:命令包含 --lookback-hours 和 --overlap-seconds
|
||||
- custom 模式:命令包含 --window-start 和 --window-end
|
||||
- 两种模式互斥
|
||||
"""
|
||||
cmd = _build(config)
|
||||
|
||||
if config.window_mode == "lookback":
|
||||
# lookback 模式:应有 --lookback-hours
|
||||
lh = _get_arg_value(cmd, "--lookback-hours")
|
||||
assert lh is not None, "lookback 模式缺少 --lookback-hours"
|
||||
assert lh == str(config.lookback_hours), (
|
||||
f"--lookback-hours {lh!r} != {config.lookback_hours}"
|
||||
)
|
||||
os_val = _get_arg_value(cmd, "--overlap-seconds")
|
||||
assert os_val is not None, "lookback 模式缺少 --overlap-seconds"
|
||||
assert os_val == str(config.overlap_seconds), (
|
||||
f"--overlap-seconds {os_val!r} != {config.overlap_seconds}"
|
||||
)
|
||||
# 不应有 custom 参数
|
||||
assert not _has_flag(cmd, "--window-start"), (
|
||||
"lookback 模式不应包含 --window-start"
|
||||
)
|
||||
assert not _has_flag(cmd, "--window-end"), (
|
||||
"lookback 模式不应包含 --window-end"
|
||||
)
|
||||
else:
|
||||
# custom 模式
|
||||
if config.window_start:
|
||||
ws = _get_arg_value(cmd, "--window-start")
|
||||
assert ws == config.window_start, (
|
||||
f"--window-start {ws!r} != {config.window_start!r}"
|
||||
)
|
||||
if config.window_end:
|
||||
we = _get_arg_value(cmd, "--window-end")
|
||||
assert we == config.window_end, (
|
||||
f"--window-end {we!r} != {config.window_end!r}"
|
||||
)
|
||||
# 不应有 lookback 参数
|
||||
assert not _has_flag(cmd, "--lookback-hours"), (
|
||||
"custom 模式不应包含 --lookback-hours"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5d: 布尔标志正确出现或缺失
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(config=_valid_config())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_boolean_flags_correct(config: TaskConfigSchema):
|
||||
"""
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
- force_full=True → 命令包含 --force-full
|
||||
- dry_run=True → 命令包含 --dry-run
|
||||
- fetch_before_verify=True 且 processing_mode="verify_only" → 命令包含 --fetch-before-verify
|
||||
- ods_use_local_json=True → 命令包含 --data-source offline
|
||||
"""
|
||||
cmd = _build(config)
|
||||
|
||||
# force_full
|
||||
if config.force_full:
|
||||
assert _has_flag(cmd, "--force-full"), "force_full=True 但命令缺少 --force-full"
|
||||
else:
|
||||
assert not _has_flag(cmd, "--force-full"), "force_full=False 但命令包含 --force-full"
|
||||
|
||||
# dry_run
|
||||
if config.dry_run:
|
||||
assert _has_flag(cmd, "--dry-run"), "dry_run=True 但命令缺少 --dry-run"
|
||||
else:
|
||||
assert not _has_flag(cmd, "--dry-run"), "dry_run=False 但命令包含 --dry-run"
|
||||
|
||||
# fetch_before_verify(仅 verify_only 模式生效)
|
||||
if config.fetch_before_verify and config.processing_mode == "verify_only":
|
||||
assert _has_flag(cmd, "--fetch-before-verify"), (
|
||||
"fetch_before_verify=True + verify_only 但命令缺少 --fetch-before-verify"
|
||||
)
|
||||
else:
|
||||
assert not _has_flag(cmd, "--fetch-before-verify"), (
|
||||
"非 verify_only 模式或 fetch_before_verify=False 但命令包含 --fetch-before-verify"
|
||||
)
|
||||
|
||||
# ods_use_local_json
|
||||
if config.ods_use_local_json:
|
||||
ds = _get_arg_value(cmd, "--data-source")
|
||||
assert ds == "offline", (
|
||||
f"ods_use_local_json=True 但 --data-source={ds!r}(期望 'offline')"
|
||||
)
|
||||
else:
|
||||
# 不应有 --data-source offline(除非 extra_args 中有 data_source)
|
||||
if "data_source" not in config.extra_args:
|
||||
ds = _get_arg_value(cmd, "--data-source")
|
||||
assert ds is None, (
|
||||
f"ods_use_local_json=False 但命令包含 --data-source {ds!r}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5e: --store-id 正确传递
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(config=_valid_config())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_store_id_param_correct(config: TaskConfigSchema):
|
||||
"""
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
store_id 不为 None 时,命令应包含 --store-id 且值一致;
|
||||
store_id 为 None 时,命令不应包含 --store-id。
|
||||
"""
|
||||
cmd = _build(config)
|
||||
sid = _get_arg_value(cmd, "--store-id")
|
||||
|
||||
if config.store_id is not None:
|
||||
assert sid is not None, "store_id 不为 None 但命令缺少 --store-id"
|
||||
assert sid == str(config.store_id), (
|
||||
f"--store-id {sid!r} != {config.store_id}"
|
||||
)
|
||||
else:
|
||||
assert sid is None, f"store_id=None 但命令包含 --store-id {sid!r}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5f: --window-split / --window-split-days 正确传递
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(config=_valid_config())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_window_split_params_correct(config: TaskConfigSchema):
|
||||
"""
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
window_split 不为 "none" 时,命令应包含 --window-split 和 --window-split-days;
|
||||
window_split 为 "none" 时,命令不应包含这些参数。
|
||||
"""
|
||||
cmd = _build(config)
|
||||
|
||||
if config.window_split and config.window_split != "none":
|
||||
ws = _get_arg_value(cmd, "--window-split")
|
||||
assert ws == config.window_split, (
|
||||
f"--window-split {ws!r} != {config.window_split!r}"
|
||||
)
|
||||
if config.window_split_days is not None:
|
||||
wsd = _get_arg_value(cmd, "--window-split-days")
|
||||
assert wsd == str(config.window_split_days), (
|
||||
f"--window-split-days {wsd!r} != {config.window_split_days}"
|
||||
)
|
||||
else:
|
||||
assert not _has_flag(cmd, "--window-split"), (
|
||||
"window_split='none' 但命令包含 --window-split"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5g: --processing-mode 正确传递
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(config=_valid_config())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_processing_mode_param_correct(config: TaskConfigSchema):
|
||||
"""
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
processing_mode 不为空时,命令应包含 --processing-mode 且值一致。
|
||||
"""
|
||||
cmd = _build(config)
|
||||
|
||||
if config.processing_mode:
|
||||
pm = _get_arg_value(cmd, "--processing-mode")
|
||||
assert pm is not None, "processing_mode 不为空但命令缺少 --processing-mode"
|
||||
assert pm == config.processing_mode, (
|
||||
f"--processing-mode {pm!r} != {config.processing_mode!r}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5h: 命令字符串与命令列表一致
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(config=_valid_config())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_command_string_consistent_with_list(config: TaskConfigSchema):
|
||||
"""
|
||||
**Validates: Requirements 14.1, 14.2**
|
||||
|
||||
build_command_string() 的输出应与 build_command() 的列表拼接结果一致
|
||||
(对含空格的参数自动加引号)。
|
||||
"""
|
||||
cmd_list = _builder.build_command(config, _ETL_PATH)
|
||||
cmd_str = _builder.build_command_string(config, _ETL_PATH)
|
||||
|
||||
# 逐个参数验证:每个参数都应出现在字符串中
|
||||
for arg in cmd_list:
|
||||
if " " in arg or '"' in arg:
|
||||
# 含空格的参数应被引号包裹
|
||||
assert f'"{arg}"' in cmd_str, (
|
||||
f"含空格参数 {arg!r} 未在命令字符串中被正确引用"
|
||||
)
|
||||
else:
|
||||
assert arg in cmd_str, (
|
||||
f"参数 {arg!r} 未出现在命令字符串中"
|
||||
)
|
||||
328
tests/test_property_6_consistency_check.py
Normal file
328
tests/test_property_6_consistency_check.py
Normal file
@@ -0,0 +1,328 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: dataflow-field-completion, Property 6: 数据一致性检查正确性
|
||||
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
对于任意 ODS 行和对应的 DWD 行,黑盒测试检查器应能正确识别:
|
||||
(a) ODS 中存在但 DWD 中缺失的字段
|
||||
(b) ODS 与 DWD 之间值不一致的字段
|
||||
|
||||
测试策略:
|
||||
- 使用 hypothesis 生成随机 API 字段集合和 ODS 列集合
|
||||
- 使用 hypothesis 生成随机 DWD 列集合、ODS 列集合和 FACT_MAPPINGS 列表
|
||||
- 验证属性:
|
||||
1. check_api_vs_ods_fields:当 API 字段是 ODS 列的子集时,结果应为 passed
|
||||
2. check_api_vs_ods_fields:当 API 字段不在 ODS 列中时,missing_fields > 0
|
||||
3. check_ods_vs_dwd_mappings:当所有 DWD 列都有映射时,结果应为 passed
|
||||
4. check_ods_vs_dwd_mappings:当 DWD 列无映射源时,missing_fields > 0
|
||||
5. total_fields = passed_fields + missing_fields + mismatch_fields
|
||||
6. field_results 列表长度 = total_fields
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hypothesis import given, settings, HealthCheck, assume
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将 ETL 模块加入 sys.path ──
|
||||
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
from quality.consistency_checker import (
|
||||
check_api_vs_ods_fields,
|
||||
check_ods_vs_dwd_mappings,
|
||||
ODS_META_COLUMNS,
|
||||
)
|
||||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
|
||||
# SCD2 列集合(小写),check_ods_vs_dwd_mappings 内部会排除这些列
|
||||
_SCD_COLS_LOWER = {c.lower() for c in DwdLoadTask.SCD_COLS}
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# 策略:生成合法的列名集合
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# 列名策略:小写字母 + 下划线,长度 2-20,避免与 SCD2/ODS 元数据列冲突
|
||||
_col_name = st.from_regex(r"[a-z][a-z0-9_]{1,19}", fullmatch=True).filter(
|
||||
lambda c: c not in _SCD_COLS_LOWER and c not in ODS_META_COLUMNS
|
||||
)
|
||||
|
||||
# 非空列名集合
|
||||
_col_set = st.frozensets(_col_name, min_size=1, max_size=15).map(set)
|
||||
|
||||
# 可能为空的列名集合
|
||||
_col_set_maybe_empty = st.frozensets(_col_name, min_size=0, max_size=15).map(set)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 6a: API 字段是 ODS 列子集时,结果应为 passed
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(common=_col_set, extra_ods=_col_set_maybe_empty)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_api_subset_of_ods_passes(common: set[str], extra_ods: set[str]):
|
||||
"""
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
当 API 字段集合是 ODS 列集合的子集时,check_api_vs_ods_fields 应返回 passed=True。
|
||||
"""
|
||||
api_fields = common
|
||||
# ODS 列 = 共同列 + 额外 ODS 列(确保 API 字段全部被覆盖)
|
||||
ods_columns = common | extra_ods
|
||||
|
||||
result = check_api_vs_ods_fields(api_fields, ods_columns)
|
||||
|
||||
assert result.passed is True, (
|
||||
f"API 字段 {api_fields} 是 ODS 列 {ods_columns} 的子集,但 passed={result.passed}"
|
||||
)
|
||||
assert result.missing_fields == 0
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 6b: API 字段不在 ODS 列中时,missing_fields > 0
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(common=_col_set_maybe_empty, api_only=_col_set, ods_only=_col_set_maybe_empty)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_api_fields_not_in_ods_detected_as_missing(
|
||||
common: set[str], api_only: set[str], ods_only: set[str]
|
||||
):
|
||||
"""
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
当 API 字段中存在不在 ODS 列中的字段时,missing_fields > 0 且 passed=False。
|
||||
"""
|
||||
# 确保 api_only 与 (common | ods_only) 不重叠
|
||||
api_only_clean = api_only - common - ods_only
|
||||
assume(len(api_only_clean) > 0)
|
||||
|
||||
api_fields = common | api_only_clean
|
||||
ods_columns = common | ods_only
|
||||
|
||||
result = check_api_vs_ods_fields(api_fields, ods_columns)
|
||||
|
||||
assert result.passed is False, (
|
||||
f"API 有 {len(api_only_clean)} 个字段不在 ODS 中,但 passed=True"
|
||||
)
|
||||
assert result.missing_fields >= len(api_only_clean), (
|
||||
f"期望 missing_fields >= {len(api_only_clean)},实际 {result.missing_fields}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 6c: 所有 DWD 列都有映射时,结果应为 passed
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@st.composite
|
||||
def _fully_mapped_scenario(draw):
|
||||
"""生成一个所有 DWD 列都有映射源的场景(显式映射 + 自动映射混合)"""
|
||||
# 自动映射列:ODS 和 DWD 同名
|
||||
auto_cols = draw(st.frozensets(_col_name, min_size=0, max_size=8).map(set))
|
||||
|
||||
# 显式映射列:DWD 列名与 ODS 列名不同
|
||||
explicit_dwd = draw(st.frozensets(_col_name, min_size=0, max_size=8).map(set))
|
||||
explicit_dwd = explicit_dwd - auto_cols # 避免与自动映射列重叠
|
||||
|
||||
# 为每个显式映射列生成一个 ODS 源列名
|
||||
explicit_ods_names = draw(
|
||||
st.frozensets(_col_name, min_size=len(explicit_dwd), max_size=len(explicit_dwd) + 5).map(set)
|
||||
)
|
||||
# 确保 ODS 源列名不与 DWD 列名重叠(避免被当作自动映射)
|
||||
explicit_ods_names = explicit_ods_names - auto_cols - explicit_dwd
|
||||
|
||||
# 如果 ODS 源列名不够,跳过
|
||||
assume(len(explicit_ods_names) >= len(explicit_dwd))
|
||||
|
||||
ods_list = sorted(explicit_ods_names)[:len(explicit_dwd)]
|
||||
dwd_list = sorted(explicit_dwd)
|
||||
|
||||
fact_mappings = [(d, o, None) for d, o in zip(dwd_list, ods_list)]
|
||||
|
||||
dwd_columns = auto_cols | explicit_dwd
|
||||
ods_columns = auto_cols | set(ods_list)
|
||||
|
||||
assume(len(dwd_columns) > 0)
|
||||
|
||||
return dwd_columns, ods_columns, fact_mappings
|
||||
|
||||
|
||||
@given(scenario=_fully_mapped_scenario())
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_all_dwd_cols_mapped_passes(scenario):
|
||||
"""
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
当所有 DWD 非 SCD2 列都有映射源(显式或自动)时,结果应为 passed=True。
|
||||
"""
|
||||
dwd_columns, ods_columns, fact_mappings = scenario
|
||||
|
||||
result = check_ods_vs_dwd_mappings(
|
||||
"dwd.test_table", "ods.test_table",
|
||||
dwd_columns, ods_columns,
|
||||
fact_mappings if fact_mappings else None,
|
||||
)
|
||||
|
||||
assert result.passed is True, (
|
||||
f"所有 DWD 列都有映射但 passed=False。"
|
||||
f" missing={result.missing_fields}, mismatch={result.mismatch_fields}"
|
||||
)
|
||||
assert result.missing_fields == 0
|
||||
assert result.mismatch_fields == 0
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 6d: DWD 列无映射源时,missing_fields > 0
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(
|
||||
mapped_cols=_col_set_maybe_empty,
|
||||
orphan_cols=_col_set,
|
||||
ods_cols=_col_set_maybe_empty,
|
||||
)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_unmapped_dwd_cols_detected_as_missing(
|
||||
mapped_cols: set[str], orphan_cols: set[str], ods_cols: set[str]
|
||||
):
|
||||
"""
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
当 DWD 列中存在无映射源的列时,missing_fields > 0 且 passed=False。
|
||||
"""
|
||||
# orphan_cols 不能与 mapped_cols 或 ods_cols 重叠(否则会被自动映射覆盖)
|
||||
orphan_clean = orphan_cols - mapped_cols - ods_cols
|
||||
assume(len(orphan_clean) > 0)
|
||||
|
||||
# mapped_cols 通过自动映射(ODS 同名列)
|
||||
dwd_columns = mapped_cols | orphan_clean
|
||||
ods_columns = mapped_cols | ods_cols # mapped_cols 在 ODS 中存在(自动映射)
|
||||
|
||||
result = check_ods_vs_dwd_mappings(
|
||||
"dwd.test_table", "ods.test_table",
|
||||
dwd_columns, ods_columns, None,
|
||||
)
|
||||
|
||||
assert result.passed is False, (
|
||||
f"DWD 有 {len(orphan_clean)} 个无映射列,但 passed=True"
|
||||
)
|
||||
assert result.missing_fields >= len(orphan_clean), (
|
||||
f"期望 missing_fields >= {len(orphan_clean)},实际 {result.missing_fields}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 6e: total_fields = passed_fields + missing_fields + mismatch_fields
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(api_fields=_col_set, ods_columns=_col_set)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_api_vs_ods_field_counts_consistent(api_fields: set[str], ods_columns: set[str]):
|
||||
"""
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
check_api_vs_ods_fields 的结果中:
|
||||
total_fields = passed_fields + missing_fields + mismatch_fields
|
||||
"""
|
||||
result = check_api_vs_ods_fields(api_fields, ods_columns)
|
||||
|
||||
actual_sum = result.passed_fields + result.missing_fields + result.mismatch_fields
|
||||
assert result.total_fields == actual_sum, (
|
||||
f"total_fields={result.total_fields} != "
|
||||
f"passed({result.passed_fields}) + missing({result.missing_fields}) + "
|
||||
f"mismatch({result.mismatch_fields}) = {actual_sum}"
|
||||
)
|
||||
|
||||
|
||||
@given(dwd_columns=_col_set, ods_columns=_col_set)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_ods_vs_dwd_field_counts_consistent(dwd_columns: set[str], ods_columns: set[str]):
|
||||
"""
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
check_ods_vs_dwd_mappings 的结果中:
|
||||
total_fields = passed_fields + missing_fields + mismatch_fields
|
||||
"""
|
||||
result = check_ods_vs_dwd_mappings(
|
||||
"dwd.test_table", "ods.test_table",
|
||||
dwd_columns, ods_columns, None,
|
||||
)
|
||||
|
||||
actual_sum = result.passed_fields + result.missing_fields + result.mismatch_fields
|
||||
assert result.total_fields == actual_sum, (
|
||||
f"total_fields={result.total_fields} != "
|
||||
f"passed({result.passed_fields}) + missing({result.missing_fields}) + "
|
||||
f"mismatch({result.mismatch_fields}) = {actual_sum}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 6f: field_results 列表长度 = total_fields
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(api_fields=_col_set, ods_columns=_col_set)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_api_vs_ods_field_results_length(api_fields: set[str], ods_columns: set[str]):
|
||||
"""
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
check_api_vs_ods_fields 的 field_results 列表长度应等于 total_fields。
|
||||
"""
|
||||
result = check_api_vs_ods_fields(api_fields, ods_columns)
|
||||
|
||||
assert len(result.field_results) == result.total_fields, (
|
||||
f"field_results 长度 {len(result.field_results)} != total_fields {result.total_fields}"
|
||||
)
|
||||
|
||||
|
||||
@given(dwd_columns=_col_set, ods_columns=_col_set)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_ods_vs_dwd_field_results_length(dwd_columns: set[str], ods_columns: set[str]):
|
||||
"""
|
||||
**Validates: Requirements 16.2, 16.3**
|
||||
|
||||
check_ods_vs_dwd_mappings 的 field_results 列表长度应等于 total_fields。
|
||||
"""
|
||||
result = check_ods_vs_dwd_mappings(
|
||||
"dwd.test_table", "ods.test_table",
|
||||
dwd_columns, ods_columns, None,
|
||||
)
|
||||
|
||||
assert len(result.field_results) == result.total_fields, (
|
||||
f"field_results 长度 {len(result.field_results)} != total_fields {result.total_fields}"
|
||||
)
|
||||
263
tests/test_property_7_timer_completeness.py
Normal file
263
tests/test_property_7_timer_completeness.py
Normal file
@@ -0,0 +1,263 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: dataflow-field-completion, Property 7: 计时器记录完整性
|
||||
|
||||
**Validates: Requirements 15.2**
|
||||
|
||||
对于任意 ETL 步骤序列,计时器输出应包含每个步骤的名称、开始时间、结束时间和耗时,
|
||||
且耗时等于结束时间减去开始时间。
|
||||
|
||||
测试策略:
|
||||
- 使用 hypothesis 生成随机步骤名称列表(1-10 个步骤)
|
||||
- 每个步骤可选包含 0-5 个子步骤
|
||||
- 验证属性:
|
||||
1. 每个步骤的 to_dict() 输出包含 name、start_time、end_time、elapsed_ms
|
||||
2. elapsed_ms ≈ (end_time - start_time) 的毫秒数(允许 ±50ms 误差)
|
||||
3. 所有步骤名称都出现在 timer.steps 中
|
||||
4. timer.to_dict() 的 steps 数量等于实际添加的步骤数
|
||||
5. 子步骤的 elapsed_ms 也满足上述一致性
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from hypothesis import given, settings, HealthCheck
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将 ETL 模块加入 sys.path ──
|
||||
_ETL_ROOT = (
|
||||
Path(__file__).resolve().parent.parent
|
||||
/ "apps" / "etl" / "connectors" / "feiqiu"
|
||||
)
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
from utils.timer import EtlTimer
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Hypothesis 策略
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# 步骤名称:ASCII 字母 + 数字 + 下划线,模拟真实 ETL 任务名
|
||||
_step_name = st.text(
|
||||
alphabet=st.sampled_from("ABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789"),
|
||||
min_size=3,
|
||||
max_size=20,
|
||||
)
|
||||
|
||||
# 子步骤名称
|
||||
_sub_step_name = st.text(
|
||||
alphabet=st.sampled_from("abcdefghijklmnopqrstuvwxyz_0123456789"),
|
||||
min_size=2,
|
||||
max_size=15,
|
||||
)
|
||||
|
||||
|
||||
@st.composite
|
||||
def _step_spec(draw):
|
||||
"""生成一个步骤规格:(步骤名, [子步骤名列表])"""
|
||||
name = draw(_step_name)
|
||||
# 0-5 个子步骤,名称唯一
|
||||
sub_names = draw(
|
||||
st.lists(_sub_step_name, min_size=0, max_size=5, unique=True)
|
||||
)
|
||||
return (name, sub_names)
|
||||
|
||||
|
||||
# 生成 1-10 个步骤,步骤名唯一
|
||||
_steps_strategy = st.lists(
|
||||
_step_spec(),
|
||||
min_size=1,
|
||||
max_size=10,
|
||||
).filter(
|
||||
# 确保步骤名唯一
|
||||
lambda specs: len(set(s[0] for s in specs)) == len(specs)
|
||||
)
|
||||
|
||||
# 允许的时间误差(毫秒)——perf_counter 与 datetime.now 之间存在微小差异
|
||||
_TOLERANCE_MS = 50.0
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# 辅助函数
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def _run_timer(step_specs: list[tuple[str, list[str]]]) -> EtlTimer:
|
||||
"""按给定步骤规格运行计时器,返回完成后的 timer 实例"""
|
||||
timer = EtlTimer()
|
||||
timer.start()
|
||||
|
||||
for step_name, sub_names in step_specs:
|
||||
timer.start_step(step_name)
|
||||
for sub_name in sub_names:
|
||||
timer.start_sub_step(step_name, sub_name)
|
||||
# 极短暂停,确保 start/end 时间有差异
|
||||
time.sleep(0.001)
|
||||
timer.stop_sub_step(step_name, sub_name)
|
||||
time.sleep(0.001)
|
||||
timer.stop_step(step_name)
|
||||
|
||||
timer.finish(write_report=False)
|
||||
return timer
|
||||
|
||||
|
||||
def _parse_iso(iso_str: str) -> datetime:
|
||||
"""解析 ISO 格式时间字符串"""
|
||||
return datetime.fromisoformat(iso_str)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 7a: to_dict() 输出包含必要字段
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(step_specs=_steps_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_step_dict_contains_required_fields(step_specs):
|
||||
"""
|
||||
**Validates: Requirements 15.2**
|
||||
|
||||
每个步骤的 to_dict() 输出必须包含 name、start_time、end_time、elapsed_ms 字段。
|
||||
"""
|
||||
timer = _run_timer(step_specs)
|
||||
|
||||
for step in timer.steps:
|
||||
d = step.to_dict()
|
||||
for key in ("name", "start_time", "end_time", "elapsed_ms"):
|
||||
assert key in d, f"步骤 {step.name} 的 to_dict() 缺少字段: {key}"
|
||||
# start_time 和 end_time 不为 None
|
||||
assert d["start_time"] is not None, f"步骤 {step.name} 的 start_time 为 None"
|
||||
assert d["end_time"] is not None, f"步骤 {step.name} 的 end_time 为 None"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 7b: elapsed_ms ≈ (end_time - start_time)
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(step_specs=_steps_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_step_elapsed_ms_consistent_with_timestamps(step_specs):
|
||||
"""
|
||||
**Validates: Requirements 15.2**
|
||||
|
||||
每个步骤的 elapsed_ms 应近似等于 (end_time - start_time) 的毫秒数。
|
||||
允许 ±50ms 误差(perf_counter 与 datetime.now 的微小差异)。
|
||||
"""
|
||||
timer = _run_timer(step_specs)
|
||||
|
||||
for step in timer.steps:
|
||||
d = step.to_dict()
|
||||
start_dt = _parse_iso(d["start_time"])
|
||||
end_dt = _parse_iso(d["end_time"])
|
||||
wall_ms = (end_dt - start_dt).total_seconds() * 1000
|
||||
elapsed_ms = d["elapsed_ms"]
|
||||
|
||||
diff = abs(elapsed_ms - wall_ms)
|
||||
assert diff <= _TOLERANCE_MS, (
|
||||
f"步骤 {step.name}: elapsed_ms={elapsed_ms:.3f} 与 "
|
||||
f"wall_clock_ms={wall_ms:.3f} 差异 {diff:.3f}ms 超过容差 {_TOLERANCE_MS}ms"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 7c: 所有步骤名称都出现在 timer.steps 中
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(step_specs=_steps_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_all_step_names_present(step_specs):
|
||||
"""
|
||||
**Validates: Requirements 15.2**
|
||||
|
||||
所有添加的步骤名称都必须出现在 timer.steps 列表中。
|
||||
"""
|
||||
timer = _run_timer(step_specs)
|
||||
recorded_names = {s.name for s in timer.steps}
|
||||
|
||||
for step_name, _ in step_specs:
|
||||
assert step_name in recorded_names, (
|
||||
f"步骤 {step_name!r} 未出现在 timer.steps 中"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 7d: timer.to_dict() 的 steps 数量等于实际步骤数
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(step_specs=_steps_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_timer_dict_step_count_matches(step_specs):
|
||||
"""
|
||||
**Validates: Requirements 15.2**
|
||||
|
||||
timer.to_dict() 输出的 steps 数量应等于实际添加的步骤数。
|
||||
"""
|
||||
timer = _run_timer(step_specs)
|
||||
timer_dict = timer.to_dict()
|
||||
|
||||
assert len(timer_dict["steps"]) == len(step_specs), (
|
||||
f"to_dict() steps 数量 {len(timer_dict['steps'])} "
|
||||
f"!= 实际步骤数 {len(step_specs)}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 7e: 子步骤的 elapsed_ms 也满足一致性
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(step_specs=_steps_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
deadline=None,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_sub_step_elapsed_ms_consistent(step_specs):
|
||||
"""
|
||||
**Validates: Requirements 15.2**
|
||||
|
||||
子步骤的 elapsed_ms 也应近似等于 (end_time - start_time) 的毫秒数,
|
||||
且 to_dict() 输出包含必要字段。
|
||||
"""
|
||||
timer = _run_timer(step_specs)
|
||||
|
||||
for step in timer.steps:
|
||||
step_dict = step.to_dict()
|
||||
for child_dict in step_dict["children"]:
|
||||
# 必要字段检查
|
||||
for key in ("name", "start_time", "end_time", "elapsed_ms"):
|
||||
assert key in child_dict, (
|
||||
f"子步骤 {child_dict.get('name', '?')} 的 to_dict() 缺少字段: {key}"
|
||||
)
|
||||
|
||||
if child_dict["end_time"] is None:
|
||||
continue
|
||||
|
||||
start_dt = _parse_iso(child_dict["start_time"])
|
||||
end_dt = _parse_iso(child_dict["end_time"])
|
||||
wall_ms = (end_dt - start_dt).total_seconds() * 1000
|
||||
elapsed_ms = child_dict["elapsed_ms"]
|
||||
|
||||
diff = abs(elapsed_ms - wall_ms)
|
||||
assert diff <= _TOLERANCE_MS, (
|
||||
f"子步骤 {child_dict['name']}: elapsed_ms={elapsed_ms:.3f} 与 "
|
||||
f"wall_clock_ms={wall_ms:.3f} 差异 {diff:.3f}ms 超过容差 {_TOLERANCE_MS}ms"
|
||||
)
|
||||
324
tests/test_property_8_dws_stock_aggregation.py
Normal file
324
tests/test_property_8_dws_stock_aggregation.py
Normal file
@@ -0,0 +1,324 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: dataflow-field-completion, Property 8: DWS 库存汇总粒度聚合正确性
|
||||
|
||||
**Validates: Requirements 12.2, 12.3, 12.4, 12.5, 12.6**
|
||||
|
||||
对于任意 DWD 库存汇总数据集和任意汇总粒度(日/周/月),DWS 汇总任务的 transform
|
||||
输出应满足:
|
||||
(a) 每条记录的 stat_period 与任务粒度一致
|
||||
(b) 同一 (site_id, stat_date, site_goods_id) 组合不重复
|
||||
(c) 日度汇总的记录数不少于周度和月度汇总的记录数
|
||||
|
||||
测试策略:
|
||||
- 使用 hypothesis 生成随机 DWD 库存行(随机 fetched_at 日期、site_goods_id、数值)
|
||||
- 构造最小可用的任务实例,调用 transform 方法
|
||||
- 验证三条属性
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from hypothesis import given, settings, assume, HealthCheck
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将 ETL 模块加入 sys.path ──
|
||||
_ETL_ROOT = (
|
||||
Path(__file__).resolve().parent.parent
|
||||
/ "apps" / "etl" / "connectors" / "feiqiu"
|
||||
)
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
from tasks.dws.goods_stock_daily_task import GoodsStockDailyTask
|
||||
from tasks.dws.goods_stock_weekly_task import GoodsStockWeeklyTask
|
||||
from tasks.dws.goods_stock_monthly_task import GoodsStockMonthlyTask
|
||||
from tasks.base_task import TaskContext
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# 辅助:构造最小可用的任务实例
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def _make_config() -> MagicMock:
|
||||
"""构造 mock config"""
|
||||
config = MagicMock()
|
||||
config.get = lambda key, default=None: {
|
||||
"app.store_id": 1,
|
||||
"app.timezone": "Asia/Shanghai",
|
||||
}.get(key, default)
|
||||
return config
|
||||
|
||||
|
||||
def _make_task(task_cls):
|
||||
"""构造一个用于测试的 DWS 任务实例"""
|
||||
config = _make_config()
|
||||
db = MagicMock()
|
||||
api = MagicMock()
|
||||
logger = MagicMock()
|
||||
return task_cls(config, db, api, logger)
|
||||
|
||||
|
||||
def _make_context(site_id: int = 1) -> TaskContext:
|
||||
"""构造最小 TaskContext"""
|
||||
now = datetime(2026, 1, 15, 12, 0, 0)
|
||||
return TaskContext(
|
||||
store_id=site_id,
|
||||
window_start=now - timedelta(days=90),
|
||||
window_end=now,
|
||||
window_minutes=90 * 24 * 60,
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Hypothesis 策略:生成随机 DWD 库存行
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# 日期范围:2025-11-01 ~ 2026-02-20(覆盖跨周/跨月边界)
|
||||
_MIN_DATE = date(2025, 11, 1)
|
||||
_MAX_DATE = date(2026, 2, 20)
|
||||
_DATE_RANGE_DAYS = (_MAX_DATE - _MIN_DATE).days
|
||||
|
||||
_date_strategy = st.integers(
|
||||
min_value=0, max_value=_DATE_RANGE_DAYS
|
||||
).map(lambda d: _MIN_DATE + timedelta(days=d))
|
||||
|
||||
# site_goods_id:1~5 个不同商品(保持较小范围以产生有意义的聚合)
|
||||
_goods_id_strategy = st.integers(min_value=1, max_value=5)
|
||||
|
||||
# 数值策略:合理的库存数量
|
||||
_numeric_strategy = st.decimals(
|
||||
min_value=Decimal("0"),
|
||||
max_value=Decimal("9999.99"),
|
||||
places=2,
|
||||
allow_nan=False,
|
||||
allow_infinity=False,
|
||||
)
|
||||
|
||||
|
||||
@st.composite
|
||||
def _dwd_row(draw):
|
||||
"""生成一条随机 DWD 库存汇总行"""
|
||||
d = draw(_date_strategy)
|
||||
# fetched_at 为 datetime,带时分秒
|
||||
hour = draw(st.integers(min_value=0, max_value=23))
|
||||
minute = draw(st.integers(min_value=0, max_value=59))
|
||||
fetched_at = datetime(d.year, d.month, d.day, hour, minute, 0)
|
||||
|
||||
return {
|
||||
"site_goods_id": draw(_goods_id_strategy),
|
||||
"goods_name": f"商品_{draw(st.integers(min_value=1, max_value=5))}",
|
||||
"goods_unit": draw(st.sampled_from(["个", "箱", "瓶", "包"])),
|
||||
"goods_category_id": draw(st.integers(min_value=1, max_value=3)),
|
||||
"goods_category_second_id": draw(st.integers(min_value=1, max_value=5)),
|
||||
"category_name": draw(st.sampled_from(["饮料", "零食", "台球用品"])),
|
||||
"range_start_stock": draw(_numeric_strategy),
|
||||
"range_end_stock": draw(_numeric_strategy),
|
||||
"range_in": draw(_numeric_strategy),
|
||||
"range_out": draw(_numeric_strategy),
|
||||
"range_sale": draw(_numeric_strategy),
|
||||
"range_sale_money": draw(_numeric_strategy),
|
||||
"range_inventory": draw(_numeric_strategy),
|
||||
"current_stock": draw(_numeric_strategy),
|
||||
"site_id": 1,
|
||||
"tenant_id": 100,
|
||||
"fetched_at": fetched_at,
|
||||
}
|
||||
|
||||
|
||||
# 生成 1~30 条 DWD 行,按 fetched_at 排序(模拟 SQL ORDER BY fetched_at)
|
||||
_dwd_rows_strategy = st.lists(
|
||||
_dwd_row(), min_size=1, max_size=30
|
||||
).map(lambda rows: sorted(rows, key=lambda r: r["fetched_at"]))
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 8a: stat_period 与任务粒度一致
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_daily_stat_period_is_daily(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.2**
|
||||
|
||||
日度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'daily'。
|
||||
"""
|
||||
task = _make_task(GoodsStockDailyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
for rec in result:
|
||||
assert rec["stat_period"] == "daily", \
|
||||
f"日度汇总记录的 stat_period 应为 'daily',实际为 {rec['stat_period']!r}"
|
||||
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_weekly_stat_period_is_weekly(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.3**
|
||||
|
||||
周度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'weekly'。
|
||||
"""
|
||||
task = _make_task(GoodsStockWeeklyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
for rec in result:
|
||||
assert rec["stat_period"] == "weekly", \
|
||||
f"周度汇总记录的 stat_period 应为 'weekly',实际为 {rec['stat_period']!r}"
|
||||
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_monthly_stat_period_is_monthly(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.4**
|
||||
|
||||
月度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'monthly'。
|
||||
"""
|
||||
task = _make_task(GoodsStockMonthlyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
for rec in result:
|
||||
assert rec["stat_period"] == "monthly", \
|
||||
f"月度汇总记录的 stat_period 应为 'monthly',实际为 {rec['stat_period']!r}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 8b: 同一 (site_id, stat_date, site_goods_id) 组合不重复
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_daily_no_duplicate_keys(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.5, 12.6**
|
||||
|
||||
日度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||||
"""
|
||||
task = _make_task(GoodsStockDailyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
keys = set()
|
||||
for rec in result:
|
||||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||||
assert key not in keys, \
|
||||
f"日度汇总存在重复主键: {key}"
|
||||
keys.add(key)
|
||||
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_weekly_no_duplicate_keys(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.5, 12.6**
|
||||
|
||||
周度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||||
"""
|
||||
task = _make_task(GoodsStockWeeklyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
keys = set()
|
||||
for rec in result:
|
||||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||||
assert key not in keys, \
|
||||
f"周度汇总存在重复主键: {key}"
|
||||
keys.add(key)
|
||||
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_monthly_no_duplicate_keys(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.5, 12.6**
|
||||
|
||||
月度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||||
"""
|
||||
task = _make_task(GoodsStockMonthlyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
keys = set()
|
||||
for rec in result:
|
||||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||||
assert key not in keys, \
|
||||
f"月度汇总存在重复主键: {key}"
|
||||
keys.add(key)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 8c: 日度记录数 >= 周度 >= 月度
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_daily_count_gte_weekly_and_monthly(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.2, 12.3, 12.4**
|
||||
|
||||
对于同一组 DWD 输入数据,日度汇总的记录数不少于周度和月度汇总的记录数。
|
||||
这是因为日粒度更细,分组键更多,产生的聚合记录数更多或相等。
|
||||
"""
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
|
||||
daily_task = _make_task(GoodsStockDailyTask)
|
||||
weekly_task = _make_task(GoodsStockWeeklyTask)
|
||||
monthly_task = _make_task(GoodsStockMonthlyTask)
|
||||
|
||||
daily_result = daily_task.transform(extracted, context)
|
||||
weekly_result = weekly_task.transform(extracted, context)
|
||||
monthly_result = monthly_task.transform(extracted, context)
|
||||
|
||||
daily_count = len(daily_result)
|
||||
weekly_count = len(weekly_result)
|
||||
monthly_count = len(monthly_result)
|
||||
|
||||
assert daily_count >= weekly_count, (
|
||||
f"日度记录数({daily_count}) 应 >= 周度记录数({weekly_count})"
|
||||
)
|
||||
assert daily_count >= monthly_count, (
|
||||
f"日度记录数({daily_count}) 应 >= 月度记录数({monthly_count})"
|
||||
)
|
||||
# 额外验证:周度记录数 >= 月度记录数
|
||||
assert weekly_count >= monthly_count, (
|
||||
f"周度记录数({weekly_count}) 应 >= 月度记录数({monthly_count})"
|
||||
)
|
||||
135
tests/test_property_staff_ods_column_consistency.py
Normal file
135
tests/test_property_staff_ods_column_consistency.py
Normal file
@@ -0,0 +1,135 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: etl-staff-dimension, Property 3: ODS 列名提取一致性
|
||||
|
||||
**Validates: Requirements 1.3**
|
||||
|
||||
对于任意 API 返回的员工记录(含驼峰和蛇形混合字段名),经 BaseOdsTask 处理后:
|
||||
- 所有字段名转为小写蛇形(_get_value_case_insensitive 大小写不敏感匹配)
|
||||
- id 字段不为空且为正整数
|
||||
- payload 字段包含完整原始 JSON
|
||||
|
||||
验证方式:hypothesis 属性测试,生成随机员工记录验证转换一致性。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hypothesis import given, settings, HealthCheck
|
||||
import hypothesis.strategies as st
|
||||
|
||||
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
os.environ.setdefault("ETL_SKIP_DOTENV", "1")
|
||||
|
||||
from tasks.ods.ods_tasks import ODS_TASK_CLASSES, BaseOdsTask
|
||||
|
||||
# 将 tests/unit 加入 path 以使用 FakeDB/FakeAPI
|
||||
_UNIT_TEST_ROOT = _ETL_ROOT / "tests" / "unit"
|
||||
if str(_UNIT_TEST_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_UNIT_TEST_ROOT))
|
||||
|
||||
from task_test_utils import create_test_config, get_db_operations, FakeAPIClient
|
||||
|
||||
|
||||
# -- 策略:生成随机员工记录 --
|
||||
# API 返回的字段名混合驼峰和蛇形
|
||||
_STAFF_RECORD = st.fixed_dictionaries({
|
||||
"id": st.integers(min_value=1, max_value=2**53),
|
||||
"staff_name": st.text(min_size=1, max_size=10),
|
||||
"mobile": st.from_regex(r"1[3-9]\d{9}", fullmatch=True),
|
||||
"job": st.sampled_from(["店长", "主管", "教练", "收银员", "助教管理员"]),
|
||||
"staff_identity": st.integers(min_value=0, max_value=5),
|
||||
"status": st.integers(min_value=0, max_value=2),
|
||||
"leave_status": st.integers(min_value=0, max_value=2),
|
||||
"site_id": st.integers(min_value=1, max_value=2**53),
|
||||
"tenant_id": st.integers(min_value=1, max_value=2**53),
|
||||
# 驼峰字段(API 实际返回的格式)
|
||||
"cashierPointId": st.integers(min_value=0, max_value=2**53),
|
||||
"cashierPointName": st.text(min_size=0, max_size=20),
|
||||
"groupName": st.text(min_size=0, max_size=20),
|
||||
"groupId": st.integers(min_value=0, max_value=2**53),
|
||||
"rankName": st.text(min_size=0, max_size=10),
|
||||
"userRoles": st.just([]),
|
||||
"gender": st.integers(min_value=0, max_value=3),
|
||||
"is_delete": st.just(0),
|
||||
})
|
||||
|
||||
|
||||
@given(record=_STAFF_RECORD)
|
||||
@settings(max_examples=50, suppress_health_check=[HealthCheck.too_slow])
|
||||
def test_staff_record_field_case_insensitive_lookup(record):
|
||||
"""P3(a): _get_value_case_insensitive 能正确匹配驼峰和蛇形字段。"""
|
||||
# 驼峰字段应能通过小写列名查找到
|
||||
assert BaseOdsTask._get_value_case_insensitive(record, "cashierpointid") == record["cashierPointId"]
|
||||
assert BaseOdsTask._get_value_case_insensitive(record, "groupname") == record["groupName"]
|
||||
assert BaseOdsTask._get_value_case_insensitive(record, "groupid") == record["groupId"]
|
||||
assert BaseOdsTask._get_value_case_insensitive(record, "rankname") == record["rankName"]
|
||||
assert BaseOdsTask._get_value_case_insensitive(record, "userroles") == record["userRoles"]
|
||||
# 蛇形字段直接匹配
|
||||
assert BaseOdsTask._get_value_case_insensitive(record, "id") == record["id"]
|
||||
assert BaseOdsTask._get_value_case_insensitive(record, "staff_name") == record["staff_name"]
|
||||
assert BaseOdsTask._get_value_case_insensitive(record, "mobile") == record["mobile"]
|
||||
|
||||
|
||||
@given(record=_STAFF_RECORD)
|
||||
@settings(max_examples=50, suppress_health_check=[HealthCheck.too_slow])
|
||||
def test_staff_record_id_positive_integer(record):
|
||||
"""P3(b): id 字段始终为正整数。"""
|
||||
val = BaseOdsTask._get_value_case_insensitive(record, "id")
|
||||
assert val is not None
|
||||
assert isinstance(val, int)
|
||||
assert val > 0
|
||||
|
||||
|
||||
@given(record=_STAFF_RECORD)
|
||||
@settings(max_examples=50, suppress_health_check=[HealthCheck.too_slow])
|
||||
def test_staff_record_payload_preserves_original(record):
|
||||
"""P3(c): payload 序列化后包含原始记录的所有键。"""
|
||||
payload_str = json.dumps(record, ensure_ascii=False)
|
||||
for key in record:
|
||||
assert key in payload_str
|
||||
|
||||
|
||||
def test_staff_ingest_payload_roundtrip(tmp_path):
|
||||
"""P3(d): 端到端验证 ODS 落地后 payload 包含完整原始 JSON。"""
|
||||
config = create_test_config("ONLINE", tmp_path / "archive", tmp_path / "temp")
|
||||
sample = [
|
||||
{
|
||||
"id": 9999999999999,
|
||||
"staff_name": "测试员工",
|
||||
"mobile": "13900000001",
|
||||
"cashierPointId": 12345,
|
||||
"cashierPointName": "默认收银台",
|
||||
"groupName": "A组",
|
||||
"groupId": 100,
|
||||
"rankName": "初级",
|
||||
"userRoles": [{"roleId": 1}],
|
||||
"gender": 1,
|
||||
"is_delete": 0,
|
||||
"status": 1,
|
||||
"staff_identity": 2,
|
||||
"site_id": 1001,
|
||||
"tenant_id": 2001,
|
||||
}
|
||||
]
|
||||
api = FakeAPIClient({"/PersonnelManagement/SearchSystemStaffInfo": sample})
|
||||
task_cls = ODS_TASK_CLASSES["ODS_STAFF_INFO"]
|
||||
|
||||
with get_db_operations() as db_ops:
|
||||
task = task_cls(config, db_ops, api, logging.getLogger("test_p3"))
|
||||
result = task.execute()
|
||||
|
||||
assert result["status"] == "SUCCESS"
|
||||
row = db_ops.upserts[0]["rows"][0]
|
||||
payload = json.loads(row["payload"])
|
||||
# payload 保留原始键名(含驼峰)
|
||||
assert payload["cashierPointId"] == 12345
|
||||
assert payload["groupName"] == "A组"
|
||||
assert payload["id"] == 9999999999999
|
||||
232
tests/test_spi_properties.py
Normal file
232
tests/test_spi_properties.py
Normal file
@@ -0,0 +1,232 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: spi-spending-power-index — SPI 消费力指数属性测试
|
||||
|
||||
使用 hypothesis 验证 SPI 算法的正确性属性:
|
||||
- Property 1: SPI 总分非负性
|
||||
- Property 2: Level 子分关于消费金额单调非递减
|
||||
- Property 3: Speed 子分关于 spend_30 单调非递减
|
||||
- Property 4: Stability 子分取值范围 [0, 1]
|
||||
- Property 5: Display Score 取值范围 [0, 10]
|
||||
|
||||
测试策略:
|
||||
- 子分计算为 @staticmethod 纯函数,不依赖数据库,直接调用
|
||||
- batch_normalize_to_display 为实例方法,通过 MagicMock 构造最小实例
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from hypothesis import given, settings, assume
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将 ETL 模块加入 sys.path ──
|
||||
_ETL_ROOT = (
|
||||
Path(__file__).resolve().parent.parent
|
||||
/ "apps" / "etl" / "connectors" / "feiqiu"
|
||||
)
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
from tasks.dws.index.spending_power_index_task import (
|
||||
SpendingPowerIndexTask,
|
||||
SPIMemberFeatures,
|
||||
)
|
||||
from tasks.dws.index.base_index_task import BaseIndexTask
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# 辅助:构造最小可用的 SpendingPowerIndexTask 实例(仅用于 Property 5)
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def _make_spi_task() -> SpendingPowerIndexTask:
|
||||
"""构造不依赖真实 DB/API 的 SPI 任务实例,仅用于调用 batch_normalize_to_display。"""
|
||||
config = MagicMock()
|
||||
# BaseTask.__init__ 会调用 config.get("app.timezone", "Asia/Shanghai")
|
||||
# MagicMock.get() 默认返回 Mock 对象,导致 ZoneInfo 报错,需要正确返回字符串
|
||||
config.get = lambda key, default=None: {
|
||||
"app.timezone": "Asia/Shanghai",
|
||||
}.get(key, default)
|
||||
db = MagicMock()
|
||||
api = MagicMock()
|
||||
logger = MagicMock()
|
||||
return SpendingPowerIndexTask(config, db, api, logger)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 1: SPI 总分非负性
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(
|
||||
level=st.floats(min_value=0, max_value=100),
|
||||
speed=st.floats(min_value=0, max_value=100),
|
||||
stability=st.floats(min_value=0, max_value=1),
|
||||
)
|
||||
@settings(max_examples=200)
|
||||
def test_spi_raw_non_negative(level, speed, stability):
|
||||
"""Property 1: SPI 总分非负性
|
||||
|
||||
对于任意非负的 Level、Speed、Stability 子分,
|
||||
compute_spi_raw 的返回值应为非负。
|
||||
|
||||
**Validates: Requirements 6.1, 10.1**
|
||||
"""
|
||||
params = SpendingPowerIndexTask.DEFAULT_PARAMS
|
||||
result = SpendingPowerIndexTask.compute_spi_raw(level, speed, stability, params)
|
||||
assert result >= 0, f"SPI_raw={result} < 0 (L={level}, S={speed}, P={stability})"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 2: Level 子分关于消费金额单调非递减
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(
|
||||
spend_30=st.floats(min_value=0, max_value=50000),
|
||||
spend_90=st.floats(min_value=0, max_value=150000),
|
||||
recharge_90=st.floats(min_value=0, max_value=100000),
|
||||
avg_ticket_90=st.floats(min_value=0, max_value=5000),
|
||||
delta=st.floats(min_value=0.01, max_value=10000),
|
||||
)
|
||||
@settings(max_examples=200)
|
||||
def test_level_monotonic_on_spend(spend_30, spend_90, recharge_90, avg_ticket_90, delta):
|
||||
"""Property 2: Level 子分关于消费金额单调非递减
|
||||
|
||||
在其他条件不变时,增加 spend_30 或 spend_90 不会导致 Level 子分下降。
|
||||
|
||||
**Validates: Requirements 3.1, 10.2**
|
||||
"""
|
||||
params = SpendingPowerIndexTask.DEFAULT_PARAMS
|
||||
|
||||
base = SPIMemberFeatures(
|
||||
member_id=1, site_id=1,
|
||||
spend_30=spend_30, spend_90=spend_90,
|
||||
recharge_90=recharge_90, avg_ticket_90=avg_ticket_90,
|
||||
)
|
||||
level_before = SpendingPowerIndexTask.compute_level(base, params)
|
||||
|
||||
# 增加 spend_30
|
||||
inc_30 = SPIMemberFeatures(
|
||||
member_id=1, site_id=1,
|
||||
spend_30=spend_30 + delta, spend_90=spend_90,
|
||||
recharge_90=recharge_90, avg_ticket_90=avg_ticket_90,
|
||||
)
|
||||
level_after_30 = SpendingPowerIndexTask.compute_level(inc_30, params)
|
||||
assert level_after_30 >= level_before, (
|
||||
f"Level 下降: spend_30 增加 {delta} 后 {level_after_30} < {level_before}"
|
||||
)
|
||||
|
||||
# 增加 spend_90
|
||||
inc_90 = SPIMemberFeatures(
|
||||
member_id=1, site_id=1,
|
||||
spend_30=spend_30, spend_90=spend_90 + delta,
|
||||
recharge_90=recharge_90, avg_ticket_90=avg_ticket_90,
|
||||
)
|
||||
level_after_90 = SpendingPowerIndexTask.compute_level(inc_90, params)
|
||||
assert level_after_90 >= level_before, (
|
||||
f"Level 下降: spend_90 增加 {delta} 后 {level_after_90} < {level_before}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 3: Speed 子分关于 spend_30 单调非递减
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(
|
||||
spend_30=st.floats(min_value=0, max_value=50000),
|
||||
spend_90=st.floats(min_value=0, max_value=150000),
|
||||
visit_days_30=st.integers(min_value=0, max_value=30),
|
||||
daily_spend_ewma_90=st.floats(min_value=0, max_value=10000),
|
||||
delta=st.floats(min_value=0.01, max_value=10000),
|
||||
)
|
||||
@settings(max_examples=200)
|
||||
def test_speed_monotonic_on_spend_30(spend_30, spend_90, visit_days_30, daily_spend_ewma_90, delta):
|
||||
"""Property 3: Speed 子分关于 spend_30 单调非递减
|
||||
|
||||
在其他条件不变时,增加 spend_30 不会导致 Speed 子分下降。
|
||||
|
||||
**Validates: Requirements 4.1, 4.4, 10.3**
|
||||
"""
|
||||
params = SpendingPowerIndexTask.DEFAULT_PARAMS
|
||||
|
||||
base = SPIMemberFeatures(
|
||||
member_id=1, site_id=1,
|
||||
spend_30=spend_30, spend_90=spend_90,
|
||||
visit_days_30=visit_days_30,
|
||||
daily_spend_ewma_90=daily_spend_ewma_90,
|
||||
)
|
||||
speed_before = SpendingPowerIndexTask.compute_speed(base, params)
|
||||
|
||||
inc = SPIMemberFeatures(
|
||||
member_id=1, site_id=1,
|
||||
spend_30=spend_30 + delta, spend_90=spend_90,
|
||||
visit_days_30=visit_days_30,
|
||||
daily_spend_ewma_90=daily_spend_ewma_90,
|
||||
)
|
||||
speed_after = SpendingPowerIndexTask.compute_speed(inc, params)
|
||||
assert speed_after >= speed_before, (
|
||||
f"Speed 下降: spend_30 增加 {delta} 后 {speed_after} < {speed_before}"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 4: Stability 子分取值范围 [0, 1]
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(active_weeks=st.integers(min_value=0, max_value=13))
|
||||
@settings(max_examples=200)
|
||||
def test_stability_in_range(active_weeks):
|
||||
"""Property 4: Stability 子分取值范围 [0, 1]
|
||||
|
||||
对于任意 active_weeks_90 ∈ [0, 13],compute_stability 返回值应在 [0, 1]。
|
||||
|
||||
**Validates: Requirements 5.2, 5.4, 10.4**
|
||||
"""
|
||||
params = SpendingPowerIndexTask.DEFAULT_PARAMS
|
||||
|
||||
features = SPIMemberFeatures(
|
||||
member_id=1, site_id=1,
|
||||
active_weeks_90=active_weeks,
|
||||
)
|
||||
stability = SpendingPowerIndexTask.compute_stability(features, params)
|
||||
assert 0 <= stability <= 1, (
|
||||
f"Stability={stability} 超出 [0, 1] (active_weeks_90={active_weeks})"
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 5: Display Score 取值范围 [0, 10]
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(
|
||||
raw_scores=st.lists(
|
||||
st.floats(min_value=0, max_value=1000),
|
||||
min_size=1,
|
||||
max_size=50,
|
||||
),
|
||||
)
|
||||
@settings(max_examples=200)
|
||||
def test_display_score_in_range(raw_scores):
|
||||
"""Property 5: Display Score 取值范围 [0, 10]
|
||||
|
||||
对于任意非空的非负 raw_score 列表,batch_normalize_to_display
|
||||
映射后的 display_score 应在 [0.00, 10.00]。
|
||||
|
||||
**Validates: Requirements 6.6, 10.5**
|
||||
"""
|
||||
task = _make_spi_task()
|
||||
|
||||
# 构造 (entity_id, raw_score) 输入
|
||||
input_scores = [(i, s) for i, s in enumerate(raw_scores)]
|
||||
|
||||
results = task.batch_normalize_to_display(
|
||||
raw_scores=input_scores,
|
||||
compression=None, # 无压缩,直接 MinMax
|
||||
use_smoothing=False, # 不使用 EWMA 平滑(避免 DB 调用)
|
||||
)
|
||||
|
||||
for entity_id, raw_score, display_score in results:
|
||||
assert 0.0 <= display_score <= 10.0, (
|
||||
f"display_score={display_score} 超出 [0, 10] (raw={raw_score})"
|
||||
)
|
||||
Reference in New Issue
Block a user