在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -10,6 +10,8 @@ import json
import sys
from pathlib import Path
import pytest
# scripts/ops 不是 Python 包,通过 sys.path 导入
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts" / "ops"))
@@ -718,11 +720,11 @@ class TestResolveOutputDir:
assert result == target
assert target.is_dir()
def test_fallback_to_docs_reports(self, monkeypatch):
"""SYSTEM_ANALYZE_ROOT 未设置时回退到 docs/reports/"""
def test_fallback_raises_when_env_missing(self, monkeypatch):
"""SYSTEM_ANALYZE_ROOT 未设置时抛出 KeyError"""
monkeypatch.delenv("SYSTEM_ANALYZE_ROOT", raising=False)
result = resolve_output_dir()
assert result == Path("docs/reports")
with pytest.raises(KeyError):
resolve_output_dir()
def test_creates_directory(self, tmp_path, monkeypatch):
"""目录不存在时自动创建。"""
@@ -1783,91 +1785,26 @@ class TestFieldDiffSubTables:
assert "SCD2/派生列 2 个" in report
class TestGuessFieldPurpose:
"""测试 _guess_field_purpose 字段用途推测。"""
def test_scd2_field(self):
from gen_dataflow_report import _guess_field_purpose
purpose, conf = _guess_field_purpose("scd2_start_time", "test", "DWD")
assert "SCD2" in purpose
assert conf == ""
def test_id_field(self):
from gen_dataflow_report import _guess_field_purpose
purpose, conf = _guess_field_purpose("id", "test", "ODS")
assert "主键" in purpose
assert conf == ""
def test_foreign_key(self):
from gen_dataflow_report import _guess_field_purpose
purpose, conf = _guess_field_purpose("tenant_id", "test", "ODS")
assert "租户" in purpose
assert conf == ""
def test_nested_site_profile(self):
from gen_dataflow_report import _guess_field_purpose
purpose, conf = _guess_field_purpose("siteProfile.shop_name", "test", "API")
assert "门店" in purpose
assert conf == ""
def test_unknown_field(self):
from gen_dataflow_report import _guess_field_purpose
purpose, conf = _guess_field_purpose("xyzzy_foo_bar", "test", "ODS")
assert "待分析" in purpose
assert conf == ""
def test_price_field(self):
from gen_dataflow_report import _guess_field_purpose
purpose, conf = _guess_field_purpose("cx_unit_price", "test", "ODS")
assert "金额" in purpose or "价格" in purpose
def test_derived_field(self):
from gen_dataflow_report import _guess_field_purpose
purpose, conf = _guess_field_purpose("derived_flag", "test", "DWD")
assert "派生" in purpose
assert conf == ""
def test_is_delete_field(self):
from gen_dataflow_report import _guess_field_purpose
purpose, conf = _guess_field_purpose("is_delete", "test", "ODS")
assert "删除" in purpose
assert conf == ""
class TestDiffSubTablePurposeColumn:
"""测试差异分表中推测用途列的输出。"""
class TestDiffSubTableColumns:
"""测试差异分表中列的输出格式(推测用途/置信度已移除,改为人工处理)"""
def test_purpose_column_in_flat_unmapped(self, tmp_path):
"""平层未映射分表应包含推测用途、置信度、示例值、说明列。"""
# 复用 TestFieldDiffSubTables 的数据构造
def test_flat_unmapped_header(self, tmp_path):
"""平层未映射分表应包含示例值、说明列(无推测用途/置信度)"""
from test_dataflow_analyzer import TestFieldDiffSubTables
inst = TestFieldDiffSubTables()
data_dir = inst._setup_diff_data_dir(tmp_path)
report = generate_report(data_dir)
# 表头应有推测用途 + 示例值 + 说明
assert "| # | JSON 字段 | 推测用途 | 置信度 | 示例值 | 说明 | 状态 |" in report
assert "| # | JSON 字段 | 示例值 | 说明 | 状态 |" in report
def test_purpose_column_in_dwd_no_ods(self, tmp_path):
"""DWD 无 ODS 源表应包含推测用途列"""
def test_dwd_no_ods_header(self, tmp_path):
"""DWD 无 ODS 源表应包含说明列(无推测用途/置信度)"""
from test_dataflow_analyzer import TestFieldDiffSubTables
inst = TestFieldDiffSubTables()
data_dir = inst._setup_diff_data_dir(tmp_path)
report = generate_report(data_dir)
# scd2_ver 应被推测为 SCD2 元数据
assert "SCD2" in report
# derived_flag 应被推测为派生列
assert "派生" in report
def test_purpose_column_in_nested(self, tmp_path):
"""嵌套对象分表也应包含推测用途列。"""
from test_dataflow_analyzer import TestFieldDiffSubTables
inst = TestFieldDiffSubTables()
data_dir = inst._setup_diff_data_dir(tmp_path)
report = generate_report(data_dir)
# 嵌套对象表头
lines = report.split("\n")
nested_headers = [l for l in lines if "推测用途" in l and "置信度" in l]
assert len(nested_headers) >= 1
assert "| # | DWD 表 | DWD 列 | 说明 | 状态 |" in report
def test_section_numbering_incremental(self, tmp_path):
"""多个差异分表应有递增编号 1.1.1, 1.1.2, ...。"""
@@ -1875,7 +1812,6 @@ class TestDiffSubTablePurposeColumn:
from test_dataflow_analyzer import TestFieldDiffSubTables
inst = TestFieldDiffSubTables()
data_dir = inst._setup_diff_data_dir(tmp_path)
# 添加第二个有差异的表
manifest = _json.loads((data_dir / "collection_manifest.json").read_text(encoding="utf-8"))
manifest["tables"].append({
"table": "beta_table", "task_code": "ODS_BETA", "description": "第二表",
@@ -1936,7 +1872,6 @@ class TestDiffSubTablePurposeColumn:
inst = TestFieldDiffSubTables()
data_dir = inst._setup_diff_data_dir(tmp_path)
report = generate_report(data_dir)
# extra_flat 在 json_trees 中有 samples=["x"],应出现在差异子表的 extra_flat 行
lines = report.split("\n")
flat_rows = [l for l in lines if "extra_flat" in l and "未映射" in l]
assert len(flat_rows) >= 1
@@ -1948,7 +1883,6 @@ class TestDiffSubTablePurposeColumn:
from test_dataflow_analyzer import TestFieldDiffSubTables
inst = TestFieldDiffSubTables()
data_dir = inst._setup_diff_data_dir(tmp_path)
# 注入 bd_descriptions 中 ods_only_col 的说明
bd = {"ods_table": "alpha_table",
"ods_fields": {"ods_only_col": "仅ODS存在的测试列"},
"dwd_fields": {}}
@@ -1956,16 +1890,8 @@ class TestDiffSubTablePurposeColumn:
_json.dumps(bd, ensure_ascii=False), encoding="utf-8"
)
report = generate_report(data_dir)
# 说明应出现在 ods_only_col 所在行
lines = report.split("\n")
ods_only_rows = [l for l in lines if "ods_only_col" in l and "无 JSON 源" in l]
assert len(ods_only_rows) >= 1
assert "仅ODS存在的测试列" in ods_only_rows[0]
def test_dwd_no_ods_has_desc_column(self, tmp_path):
"""DWD 无 ODS 源子表应包含说明列。"""
from test_dataflow_analyzer import TestFieldDiffSubTables
inst = TestFieldDiffSubTables()
data_dir = inst._setup_diff_data_dir(tmp_path)
report = generate_report(data_dir)
assert "| # | DWD 表 | DWD 列 | 推测用途 | 置信度 | 说明 | 状态 |" in report

View File

@@ -0,0 +1,343 @@
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 1: FACT_MAPPINGS 字段映射正确性
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
对于任意 ODS 表行和任意已配置的 FACT_MAPPINGS 条目 (dwd_col, ods_expr, cast_type)
当 DWD 加载任务执行后DWD 目标行中 dwd_col 列的值应等于从 ODS 行中按 ods_expr
提取并按 cast_type 转换后的值。
本测试聚焦 A 类表(新增 DWD 列 + FACT_MAPPINGS
- dim_assistant_ex
- dwd_assistant_service_log_ex
- dwd_store_goods_sale
- dwd_member_balance_change_ex
- dim_table_ex
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock
from hypothesis import given, settings, assume, HealthCheck
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from tasks.dwd.dwd_load_task import DwdLoadTask
# ── A 类表列表 ──
A_CLASS_TABLES = [
"dwd.dim_assistant_ex",
"dwd.dwd_assistant_service_log_ex",
"dwd.dwd_store_goods_sale",
"dwd.dwd_member_balance_change_ex",
"dwd.dim_table_ex",
]
# ── 辅助:构造最小可用的 DwdLoadTask 实例 ──
def _make_task() -> DwdLoadTask:
"""构造一个用于测试的 DwdLoadTask使用 mock config/db/api/logger。"""
config = MagicMock()
config.get = lambda key, default=None: {
"app.store_id": 1,
"app.timezone": "Asia/Shanghai",
"dwd.fact_upsert": True,
}.get(key, default)
db = MagicMock()
api = MagicMock()
logger = MagicMock()
return DwdLoadTask(config, db, api, logger)
# ── 收集 A 类表的所有 FACT_MAPPINGS 条目 ──
def _collect_a_class_mappings() -> list[tuple[str, str, str, str | None]]:
"""返回 (dwd_table, dwd_col, ods_expr, cast_type) 四元组列表。"""
result = []
for table in A_CLASS_TABLES:
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
for dwd_col, ods_expr, cast_type in entries:
result.append((table, dwd_col, ods_expr, cast_type))
return result
_A_CLASS_MAPPING_ENTRIES = _collect_a_class_mappings()
# ── 已知的合法 cast_type 值 ──
_VALID_CAST_TYPES = {
None, "bigint", "integer", "numeric", "decimal",
"timestamptz", "boolean", "date", "text",
}
# ══════════════════════════════════════════════════════════════════
# Property 1.1: A 类表 FACT_MAPPINGS 条目结构完整性
# ══════════════════════════════════════════════════════════════════
def test_a_class_tables_have_fact_mappings():
"""每张 A 类表在 FACT_MAPPINGS 中都有至少一个条目。"""
for table in A_CLASS_TABLES:
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
assert len(entries) > 0, f"{table} 在 FACT_MAPPINGS 中无条目"
def test_a_class_mappings_are_valid_tuples():
"""每个 FACT_MAPPINGS 条目都是 (dwd_col, ods_expr, cast_type) 三元组。"""
for table, dwd_col, ods_expr, cast_type in _A_CLASS_MAPPING_ENTRIES:
assert isinstance(dwd_col, str) and dwd_col, \
f"{table}: dwd_col 不能为空"
assert isinstance(ods_expr, str) and ods_expr, \
f"{table}: ods_expr 不能为空"
assert cast_type is None or isinstance(cast_type, str), \
f"{table}.{dwd_col}: cast_type 必须为 None 或字符串"
def test_a_class_cast_types_are_valid():
"""所有 cast_type 值都在已知合法集合内。"""
for table, dwd_col, _, cast_type in _A_CLASS_MAPPING_ENTRIES:
assert cast_type in _VALID_CAST_TYPES, \
f"{table}.{dwd_col}: 未知 cast_type={cast_type!r}"
def test_a_class_no_duplicate_dwd_cols():
"""同一张 DWD 表内不应有重复的 dwd_col。"""
for table in A_CLASS_TABLES:
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
dwd_cols = [e[0] for e in entries]
seen = set()
for col in dwd_cols:
assert col not in seen, \
f"{table}: dwd_col={col!r} 重复出现"
seen.add(col)
# ══════════════════════════════════════════════════════════════════
# Property 1.2: _cast_expr 对 A 类表映射条目的转换正确性
# ══════════════════════════════════════════════════════════════════
# 生成策略:从 A 类表映射条目中随机选取
_mapping_entry_strategy = st.sampled_from(_A_CLASS_MAPPING_ENTRIES)
# 生成策略:模拟 ODS 列值(用于验证 _cast_expr 的 SQL 表达式结构)
_ods_value_strategy = st.one_of(
st.none(),
st.integers(min_value=-999999, max_value=999999),
st.text(min_size=0, max_size=50, alphabet=st.characters(
whitelist_categories=("L", "N", "P", "Z"),
blacklist_characters=("\x00",),
)),
st.floats(min_value=-1e6, max_value=1e6, allow_nan=False, allow_infinity=False),
)
@given(entry=_mapping_entry_strategy)
@settings(max_examples=200, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_cast_expr_produces_valid_sql_for_a_class(entry):
"""
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
对于任意 A 类表 FACT_MAPPINGS 条目_cast_expr 应产生非空的 SQL 表达式。
"""
table, dwd_col, ods_expr, cast_type = entry
task = _make_task()
result = task._cast_expr(ods_expr, cast_type)
# 基本断言:结果非空
assert result and isinstance(result, str), \
f"{table}.{dwd_col}: _cast_expr 返回空结果"
# 结果应包含 ODS 源表达式(可能被引号包裹或 CAST 包裹)
# 对于简单列名,应出现在结果中(带引号或不带)
if ods_expr.upper() != "NULL":
# 去掉引号后的 ods_expr 应在结果中可找到
bare_expr = ods_expr.strip('"')
assert bare_expr in result or ods_expr in result, \
f"{table}.{dwd_col}: _cast_expr 结果 {result!r} 中未包含 ODS 表达式 {ods_expr!r}"
# 如果有 cast_type结果应包含类型转换语法
if cast_type:
cast_lower = cast_type.lower()
if cast_lower in {"bigint", "integer", "numeric", "decimal"}:
assert "CAST" in result.upper() or "::" in result, \
f"{table}.{dwd_col}: 数值类型转换缺少 CAST/:: 语法"
elif cast_lower == "timestamptz":
assert "timestamptz" in result.lower(), \
f"{table}.{dwd_col}: 时间类型转换缺少 timestamptz"
elif cast_lower == "boolean":
assert "boolean" in result.lower(), \
f"{table}.{dwd_col}: 布尔类型转换缺少 boolean"
@given(entry=_mapping_entry_strategy)
@settings(max_examples=200, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_cast_expr_is_deterministic(entry):
"""
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
对于同一 FACT_MAPPINGS 条目_cast_expr 的输出应是确定性的(多次调用结果一致)。
"""
_, _, ods_expr, cast_type = entry
task = _make_task()
result1 = task._cast_expr(ods_expr, cast_type)
result2 = task._cast_expr(ods_expr, cast_type)
assert result1 == result2, "同一输入的 _cast_expr 结果不一致"
# ══════════════════════════════════════════════════════════════════
# Property 1.3: _build_column_mapping 对 A 类表的映射注册正确性
# ══════════════════════════════════════════════════════════════════
@given(table_name=st.sampled_from(A_CLASS_TABLES))
@settings(max_examples=50, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_build_column_mapping_registers_all_explicit_entries(table_name):
"""
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
对于任意 A 类表_build_column_mapping 应将 FACT_MAPPINGS 中的所有条目
注册到返回的映射字典中。
"""
task = _make_task()
entries = DwdLoadTask.FACT_MAPPINGS.get(table_name, [])
ods_table = DwdLoadTask.TABLE_MAP.get(table_name, "")
# 构造 mock cursor返回包含 fetched_at 的列信息
mock_cur = MagicMock()
# _get_columns 内部查 information_schema这里直接 mock _build_column_mapping 的输入
# 收集所有 ODS 列名(从 FACT_MAPPINGS 的 ods_expr 中提取简单列名)
ods_cols = ["fetched_at", "id", "site_id", "tenant_id"]
for _, ods_expr, _ in entries:
# 简单列名直接加入;复杂表达式(含 -> 或 CASE跳过
bare = ods_expr.strip('"')
if bare.isidentifier():
ods_cols.append(bare)
pk_cols = ["id"] # 简化:假设主键为 id
mapping = task._build_column_mapping(mock_cur, table_name, ods_table, pk_cols, ods_cols)
# 如果返回的是错误字典(缺少 fetched_at跳过
if "processed" in mapping:
return
# 验证所有显式映射条目都被注册
for dwd_col, ods_expr, cast_type in entries:
dwd_col_lower = dwd_col.lower()
assert dwd_col_lower in mapping, \
f"{table_name}: FACT_MAPPINGS 条目 {dwd_col!r} 未被注册到映射中"
src, ct = mapping[dwd_col_lower]
assert src == ods_expr, \
f"{table_name}.{dwd_col}: 映射源应为 {ods_expr!r},实际为 {src!r}"
assert ct == cast_type, \
f"{table_name}.{dwd_col}: cast_type 应为 {cast_type!r},实际为 {ct!r}"
# ══════════════════════════════════════════════════════════════════
# Property 1.4: A 类表特定字段映射验证(需求级别)
# ══════════════════════════════════════════════════════════════════
# 需求 1: assistant_accounts_master → dim_assistant_ex
_REQ1_EXPECTED = {
"system_role_id": ("system_role_id", None),
"job_num": ("job_num", None),
"cx_unit_price": ("cx_unit_price", None),
"pd_unit_price": ("pd_unit_price", None),
}
# 需求 2: assistant_service_records → dwd_assistant_service_log_ex
_REQ2_EXPECTED = {
"operator_id": ("operator_id", None),
"operator_name": ("operator_name", None),
}
# 需求 4: store_goods_sales_records → dwd_store_goods_sale
_REQ4_EXPECTED = {
"discount_money": ("discount_money", None),
"discount_price": ("discount_price", None),
}
# 需求 5: member_balance_changes → dwd_member_balance_change_ex
_REQ5_EXPECTED = {
"relate_id": ("relate_id", None),
}
# 需求 9: site_tables_master → dim_table_ex
_REQ9_EXPECTED = {
"create_time": ("create_time", None),
"light_status": ("light_status", None),
"tablestatusname": ("tablestatusname", None),
"sitename": ("sitename", None),
"applet_qr_code_url": ('"appletQrCodeUrl"', None),
"audit_status": ("audit_status", None),
"charge_free": ("charge_free", None),
"delay_lights_time": ("delay_lights_time", None),
"is_rest_area": ("is_rest_area", None),
"only_allow_groupon": ("only_allow_groupon", None),
"order_delay_time": ("order_delay_time", None),
"self_table": ("self_table", None),
"temporary_light_second": ("temporary_light_second", None),
"virtual_table": ("virtual_table", None),
}
# 汇总:(DWD 表, 期望映射字典, 需求编号)
_REQUIREMENT_CHECKS = [
("dwd.dim_assistant_ex", _REQ1_EXPECTED, "1.1, 1.2"),
("dwd.dwd_assistant_service_log_ex", _REQ2_EXPECTED, "2.1"),
("dwd.dwd_store_goods_sale", _REQ4_EXPECTED, "4.1"),
("dwd.dwd_member_balance_change_ex", _REQ5_EXPECTED, "5.1"),
("dwd.dim_table_ex", _REQ9_EXPECTED, "9.1"),
]
@given(check=st.sampled_from(_REQUIREMENT_CHECKS))
@settings(max_examples=50, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_requirement_specific_mappings_exist(check):
"""
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
对于每个需求指定的字段映射,验证 FACT_MAPPINGS 中确实包含正确的条目。
"""
dwd_table, expected_mappings, req_ids = check
entries = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, [])
# 构建实际映射字典dwd_col -> (ods_expr, cast_type)
actual = {e[0].lower(): (e[1], e[2]) for e in entries}
for dwd_col, (expected_src, expected_cast) in expected_mappings.items():
assert dwd_col in actual, \
f"[Req {req_ids}] {dwd_table}: 缺少 dwd_col={dwd_col!r} 的映射条目"
actual_src, actual_cast = actual[dwd_col]
assert actual_src == expected_src, \
f"[Req {req_ids}] {dwd_table}.{dwd_col}: ODS 源应为 {expected_src!r},实际为 {actual_src!r}"
assert actual_cast == expected_cast, \
f"[Req {req_ids}] {dwd_table}.{dwd_col}: cast_type 应为 {expected_cast!r},实际为 {actual_cast!r}"
# ══════════════════════════════════════════════════════════════════
# Property 1.5: A 类表 FACT_MAPPINGS 与 TABLE_MAP 一致性
# ══════════════════════════════════════════════════════════════════
@given(table_name=st.sampled_from(A_CLASS_TABLES))
@settings(max_examples=50, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_a_class_tables_registered_in_table_map(table_name):
"""
**Validates: Requirements 1.1, 1.2, 2.1, 4.1, 5.1, 9.1**
每张 A 类表必须同时在 TABLE_MAP 和 FACT_MAPPINGS 中注册。
"""
assert table_name in DwdLoadTask.TABLE_MAP, \
f"{table_name} 未在 TABLE_MAP 中注册"
assert table_name in DwdLoadTask.FACT_MAPPINGS, \
f"{table_name} 未在 FACT_MAPPINGS 中注册"
# TABLE_MAP 的 ODS 源表应为非空字符串
ods_table = DwdLoadTask.TABLE_MAP[table_name]
assert ods_table and isinstance(ods_table, str), \
f"{table_name}: TABLE_MAP 中的 ODS 表名无效"

View File

@@ -0,0 +1,222 @@
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 2: FACT_MAPPINGS 引用完整性
**Validates: Requirements 6.3**
对于任意 FACT_MAPPINGS 中的映射条目,其 DWD 目标列名必须存在于对应 DWD 表的列定义中,
其 ODS 源表达式引用的列名必须存在于对应 ODS 表的列定义中(或为合法的 SQL 表达式)。
本测试覆盖所有 FACT_MAPPINGS 条目(不仅限于 A 类表),聚焦以下可静态验证的属性:
1. 所有 FACT_MAPPINGS 的 key 都在 TABLE_MAP 中注册
2. 所有条目格式为 (str, str, str|None) 三元组
3. 同一 DWD 表内无重复 dwd_col
4. 所有 ods_expr 非空
5. 所有 cast_type 值在已知合法集合内
6. B 类表recharge_settlements → dwd_recharge_order的 5 个新映射条目存在且正确
7. FACT_MAPPINGS 中引用的 DWD 表必须在 TABLE_MAP 中有对应的 ODS 源表
"""
from __future__ import annotations
import sys
from pathlib import Path
from hypothesis import given, settings, HealthCheck
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from tasks.dwd.dwd_load_task import DwdLoadTask
# ── 已知的合法 cast_type 值 ──
_VALID_CAST_TYPES = {
None, "bigint", "integer", "numeric", "decimal",
"timestamptz", "boolean", "date", "text", "TEXT[]",
}
# ── 收集所有 FACT_MAPPINGS 条目为 (dwd_table, dwd_col, ods_expr, cast_type) 四元组 ──
def _collect_all_mappings() -> list[tuple[str, str, str, str | None]]:
"""遍历 FACT_MAPPINGS 所有表,返回四元组列表。"""
result = []
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
for entry in entries:
dwd_col, ods_expr, cast_type = entry
result.append((table, dwd_col, ods_expr, cast_type))
return result
_ALL_MAPPING_ENTRIES = _collect_all_mappings()
_ALL_FACT_TABLES = list(DwdLoadTask.FACT_MAPPINGS.keys())
# ── B 类表期望映射recharge_settlements → dwd_recharge_order──
_REQ6_EXPECTED = {
"pl_coupon_sale_amount": ("plcouponsaleamount", None),
"mervou_sales_amount": ("mervousalesamount", None),
"electricity_money": ("electricitymoney", None),
"real_electricity_money": ("realelectricitymoney", None),
"electricity_adjust_money": ("electricityadjustmoney", None),
}
# ══════════════════════════════════════════════════════════════════
# Property 2.1: FACT_MAPPINGS 结构完整性 — 所有 key 都在 TABLE_MAP 中
# ══════════════════════════════════════════════════════════════════
def test_all_fact_mapping_tables_in_table_map():
"""所有 FACT_MAPPINGS 的 key 都必须在 TABLE_MAP 中注册。"""
for table in DwdLoadTask.FACT_MAPPINGS:
assert table in DwdLoadTask.TABLE_MAP, \
f"FACT_MAPPINGS 中的 {table} 未在 TABLE_MAP 中注册"
# ══════════════════════════════════════════════════════════════════
# Property 2.2: 所有条目格式为 (str, str, str|None) 三元组
# ══════════════════════════════════════════════════════════════════
def test_all_fact_mapping_entries_are_valid_tuples():
"""每个 FACT_MAPPINGS 条目都是 (dwd_col, ods_expr, cast_type) 三元组,且类型正确。"""
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
for i, entry in enumerate(entries):
assert isinstance(entry, (tuple, list)) and len(entry) == 3, \
f"{table}[{i}]: 条目应为三元组,实际为 {type(entry).__name__}(len={len(entry) if hasattr(entry, '__len__') else '?'})"
dwd_col, ods_expr, cast_type = entry
assert isinstance(dwd_col, str) and dwd_col.strip(), \
f"{table}[{i}]: dwd_col 必须为非空字符串,实际为 {dwd_col!r}"
assert isinstance(ods_expr, str) and ods_expr.strip(), \
f"{table}[{i}]: ods_expr 必须为非空字符串,实际为 {ods_expr!r}"
assert cast_type is None or isinstance(cast_type, str), \
f"{table}[{i}].{dwd_col}: cast_type 必须为 None 或字符串,实际为 {type(cast_type).__name__}"
# ══════════════════════════════════════════════════════════════════
# Property 2.3: 同一 DWD 表内无重复 dwd_col
# ══════════════════════════════════════════════════════════════════
def test_no_duplicate_dwd_cols_across_all_tables():
"""同一张 DWD 表内不应有重复的 dwd_col大小写不敏感"""
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
seen: set[str] = set()
for dwd_col, _, _ in entries:
key = dwd_col.lower()
assert key not in seen, \
f"{table}: dwd_col={dwd_col!r} 重复出现"
seen.add(key)
# ══════════════════════════════════════════════════════════════════
# Property 2.4: 所有 cast_type 值在已知合法集合内
# ══════════════════════════════════════════════════════════════════
def test_all_cast_types_are_valid():
"""所有 FACT_MAPPINGS 条目的 cast_type 值都在已知合法集合内。"""
for table, dwd_col, _, cast_type in _ALL_MAPPING_ENTRIES:
assert cast_type in _VALID_CAST_TYPES, \
f"{table}.{dwd_col}: 未知 cast_type={cast_type!r},合法值为 {_VALID_CAST_TYPES}"
# ══════════════════════════════════════════════════════════════════
# Property 2.5: 所有 ods_expr 非空
# ══════════════════════════════════════════════════════════════════
def test_ods_expr_not_empty():
"""所有 FACT_MAPPINGS 条目的 ods_expr 不能为空字符串。"""
for table, dwd_col, ods_expr, _ in _ALL_MAPPING_ENTRIES:
assert ods_expr.strip(), \
f"{table}.{dwd_col}: ods_expr 为空字符串"
# ══════════════════════════════════════════════════════════════════
# Property 2.6: B 类表特定映射验证(需求 6.3
# ══════════════════════════════════════════════════════════════════
def test_recharge_settlements_mappings_exist():
"""
**Validates: Requirements 6.3**
B 类表 recharge_settlements → dwd_recharge_order 的 5 个新映射条目
必须存在且 ODS 源表达式和 cast_type 正确。
"""
dwd_table = "dwd.dwd_recharge_order"
entries = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, [])
assert entries, f"{dwd_table} 在 FACT_MAPPINGS 中无条目"
# 构建实际映射字典dwd_col -> (ods_expr, cast_type)
actual = {e[0].lower(): (e[1], e[2]) for e in entries}
for dwd_col, (expected_src, expected_cast) in _REQ6_EXPECTED.items():
assert dwd_col in actual, \
f"[Req 6.3] {dwd_table}: 缺少 dwd_col={dwd_col!r} 的映射条目"
actual_src, actual_cast = actual[dwd_col]
assert actual_src == expected_src, \
f"[Req 6.3] {dwd_table}.{dwd_col}: ODS 源应为 {expected_src!r},实际为 {actual_src!r}"
assert actual_cast == expected_cast, \
f"[Req 6.3] {dwd_table}.{dwd_col}: cast_type 应为 {expected_cast!r},实际为 {actual_cast!r}"
# ══════════════════════════════════════════════════════════════════
# Property 2.7: 交叉引用 — FACT_MAPPINGS 的 DWD 表在 TABLE_MAP 中有 ODS 源表
# ══════════════════════════════════════════════════════════════════
def test_fact_mapping_tables_have_ods_source():
"""FACT_MAPPINGS 中引用的每张 DWD 表在 TABLE_MAP 中都有非空的 ODS 源表。"""
for table in DwdLoadTask.FACT_MAPPINGS:
ods_table = DwdLoadTask.TABLE_MAP.get(table)
assert ods_table and isinstance(ods_table, str) and ods_table.strip(), \
f"{table}: TABLE_MAP 中的 ODS 源表为空或不存在"
# ══════════════════════════════════════════════════════════════════
# Hypothesis 属性测试:随机选取 FACT_MAPPINGS 条目验证结构
# ══════════════════════════════════════════════════════════════════
_mapping_entry_strategy = st.sampled_from(_ALL_MAPPING_ENTRIES)
@given(entry=_mapping_entry_strategy)
@settings(max_examples=300, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_random_fact_mapping_entry_structure(entry):
"""
**Validates: Requirements 6.3**
对于任意随机选取的 FACT_MAPPINGS 条目,验证:
- dwd_col 为非空字符串
- ods_expr 为非空字符串
- cast_type 在合法集合内
- 所属 DWD 表在 TABLE_MAP 中注册
"""
table, dwd_col, ods_expr, cast_type = entry
# 结构验证
assert isinstance(dwd_col, str) and dwd_col.strip(), \
f"{table}: dwd_col 为空"
assert isinstance(ods_expr, str) and ods_expr.strip(), \
f"{table}.{dwd_col}: ods_expr 为空"
assert cast_type in _VALID_CAST_TYPES, \
f"{table}.{dwd_col}: 未知 cast_type={cast_type!r}"
# 交叉引用验证
assert table in DwdLoadTask.TABLE_MAP, \
f"{table} 未在 TABLE_MAP 中注册"
ods_table = DwdLoadTask.TABLE_MAP[table]
assert ods_table and isinstance(ods_table, str), \
f"{table}: TABLE_MAP 中的 ODS 源表无效"
@given(table_name=st.sampled_from(_ALL_FACT_TABLES))
@settings(max_examples=100, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_random_table_no_duplicate_dwd_cols(table_name):
"""
**Validates: Requirements 6.3**
对于任意随机选取的 FACT_MAPPINGS 表,验证其内部无重复 dwd_col。
"""
entries = DwdLoadTask.FACT_MAPPINGS[table_name]
seen: set[str] = set()
for dwd_col, _, _ in entries:
key = dwd_col.lower()
assert key not in seen, \
f"{table_name}: dwd_col={dwd_col!r} 重复出现"
seen.add(key)

View File

@@ -0,0 +1,214 @@
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 3: TABLE_MAP 覆盖完整性
**Validates: Requirements 7.2, 8.2**
对于任意在 TABLE_MAP 中注册的 DWD 表,该表的所有非 SCD2 列要么在 FACT_MAPPINGS
中有显式映射,要么在对应 ODS 表中存在同名列(自动映射)。
本测试聚焦以下可静态验证的属性:
1. TABLE_MAP 所有条目的 ODS 源表非空
2. C 类表在 TABLE_MAP 中注册
3. C 类表在 FACT_MAPPINGS 中有条目
4. C 类表映射字段数量与期望一致
5. TABLE_MAP 与 FACT_MAPPINGS 交叉一致性
6. hypothesis 属性测试:随机 TABLE_MAP 条目结构验证
"""
from __future__ import annotations
import sys
from pathlib import Path
from hypothesis import given, settings, HealthCheck
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from tasks.dwd.dwd_load_task import DwdLoadTask
# ── C 类表定义 ──
C_CLASS_TABLES = [
"dwd.dwd_goods_stock_summary",
"dwd.dwd_goods_stock_movement",
]
# ── C 类表期望映射字段 ──
# goods_stock_summary → dwd_goods_stock_summary14 个字段)
_GOODS_STOCK_SUMMARY_EXPECTED_COLS = {
"site_goods_id", "goods_name", "goods_unit", "goods_category_id",
"goods_category_second_id", "category_name", "range_start_stock",
"range_end_stock", "range_in", "range_out", "range_sale",
"range_sale_money", "range_inventory", "current_stock",
}
# goods_stock_movements → dwd_goods_stock_movement19 个字段)
_GOODS_STOCK_MOVEMENT_EXPECTED_COLS = {
"site_goods_stock_id", "tenant_id", "site_id", "site_goods_id",
"goods_name", "goods_category_id", "goods_second_category_id",
"unit", "price", "stock_type", "change_num", "start_num", "end_num",
"change_num_a", "start_num_a", "end_num_a", "remark", "operator_name",
"create_time",
}
# ── 收集所有 TABLE_MAP 条目 ──
_ALL_TABLE_MAP_ENTRIES = list(DwdLoadTask.TABLE_MAP.items())
# ══════════════════════════════════════════════════════════════════
# Property 3.1: TABLE_MAP 所有条目的 ODS 源表非空
# ══════════════════════════════════════════════════════════════════
def test_all_table_map_entries_have_ods_source():
"""TABLE_MAP 中每个 DWD 表都有非空的 ODS 源表名。"""
for dwd_table, ods_table in DwdLoadTask.TABLE_MAP.items():
assert ods_table and isinstance(ods_table, str) and ods_table.strip(), \
f"{dwd_table}: TABLE_MAP 中的 ODS 源表为空或无效"
# ══════════════════════════════════════════════════════════════════
# Property 3.2: C 类表在 TABLE_MAP 中注册
# ══════════════════════════════════════════════════════════════════
def test_c_class_tables_registered_in_table_map():
"""
**Validates: Requirements 7.2, 8.2**
C 类表dwd_goods_stock_summary、dwd_goods_stock_movement
必须在 TABLE_MAP 中注册。
"""
for table in C_CLASS_TABLES:
assert table in DwdLoadTask.TABLE_MAP, \
f"C 类表 {table} 未在 TABLE_MAP 中注册"
ods_table = DwdLoadTask.TABLE_MAP[table]
assert ods_table and isinstance(ods_table, str), \
f"C 类表 {table}: TABLE_MAP 中的 ODS 源表无效"
# ══════════════════════════════════════════════════════════════════
# Property 3.3: C 类表在 FACT_MAPPINGS 中有条目
# ══════════════════════════════════════════════════════════════════
def test_c_class_tables_have_fact_mappings():
"""
**Validates: Requirements 7.2, 8.2**
C 类表必须在 FACT_MAPPINGS 中有至少一个映射条目。
"""
for table in C_CLASS_TABLES:
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
assert len(entries) > 0, \
f"C 类表 {table} 在 FACT_MAPPINGS 中无条目"
# ══════════════════════════════════════════════════════════════════
# Property 3.4: goods_stock_summary 14 个字段全覆盖
# ══════════════════════════════════════════════════════════════════
def test_goods_stock_summary_mapping_coverage():
"""
**Validates: Requirements 7.2**
dwd.dwd_goods_stock_summary 的 FACT_MAPPINGS 应覆盖全部 14 个期望字段。
"""
table = "dwd.dwd_goods_stock_summary"
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
actual_cols = {e[0].lower() for e in entries}
# 验证数量
assert len(entries) == 14, \
f"{table}: 期望 14 个映射条目,实际 {len(entries)}"
# 验证字段覆盖
missing = _GOODS_STOCK_SUMMARY_EXPECTED_COLS - actual_cols
assert not missing, \
f"{table}: 缺少映射字段 {missing}"
extra = actual_cols - _GOODS_STOCK_SUMMARY_EXPECTED_COLS
assert not extra, \
f"{table}: 存在多余映射字段 {extra}"
# ══════════════════════════════════════════════════════════════════
# Property 3.5: goods_stock_movement 19 个字段全覆盖
# ══════════════════════════════════════════════════════════════════
def test_goods_stock_movement_mapping_coverage():
"""
**Validates: Requirements 8.2**
dwd.dwd_goods_stock_movement 的 FACT_MAPPINGS 应覆盖全部 19 个期望字段。
"""
table = "dwd.dwd_goods_stock_movement"
entries = DwdLoadTask.FACT_MAPPINGS.get(table, [])
actual_cols = {e[0].lower() for e in entries}
# 验证数量
assert len(entries) == 19, \
f"{table}: 期望 19 个映射条目,实际 {len(entries)}"
# 验证字段覆盖
missing = _GOODS_STOCK_MOVEMENT_EXPECTED_COLS - actual_cols
assert not missing, \
f"{table}: 缺少映射字段 {missing}"
extra = actual_cols - _GOODS_STOCK_MOVEMENT_EXPECTED_COLS
assert not extra, \
f"{table}: 存在多余映射字段 {extra}"
# ══════════════════════════════════════════════════════════════════
# Property 3.6: FACT_MAPPINGS 是 TABLE_MAP 的子集
# ══════════════════════════════════════════════════════════════════
def test_fact_mappings_subset_of_table_map():
"""FACT_MAPPINGS 中的所有 DWD 表都必须在 TABLE_MAP 中注册。"""
for table in DwdLoadTask.FACT_MAPPINGS:
assert table in DwdLoadTask.TABLE_MAP, \
f"FACT_MAPPINGS 中的 {table} 未在 TABLE_MAP 中注册"
# ══════════════════════════════════════════════════════════════════
# Hypothesis 属性测试:随机 TABLE_MAP 条目结构验证
# ══════════════════════════════════════════════════════════════════
_table_map_entry_strategy = st.sampled_from(_ALL_TABLE_MAP_ENTRIES)
@given(entry=_table_map_entry_strategy)
@settings(max_examples=200, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_random_table_map_entry_valid(entry):
"""
**Validates: Requirements 7.2, 8.2**
对于任意随机选取的 TABLE_MAP 条目,验证:
- DWD 表名为 "dwd." 前缀的非空字符串
- ODS 源表名为 "ods." 前缀的非空字符串
- 如果该表在 FACT_MAPPINGS 中有条目,每个条目都是合法的三元组
"""
dwd_table, ods_table = entry
# DWD 表名格式验证
assert isinstance(dwd_table, str) and dwd_table.startswith("dwd."), \
f"TABLE_MAP key {dwd_table!r} 不以 'dwd.' 开头"
# ODS 源表名格式验证
assert isinstance(ods_table, str) and ods_table.startswith("ods."), \
f"TABLE_MAP[{dwd_table}] = {ods_table!r} 不以 'ods.' 开头"
# 如果有 FACT_MAPPINGS 条目,验证结构
entries = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, [])
for i, e in enumerate(entries):
assert isinstance(e, (tuple, list)) and len(e) == 3, \
f"{dwd_table}[{i}]: FACT_MAPPINGS 条目应为三元组"
dwd_col, ods_expr, cast_type = e
assert isinstance(dwd_col, str) and dwd_col.strip(), \
f"{dwd_table}[{i}]: dwd_col 为空"
assert isinstance(ods_expr, str) and ods_expr.strip(), \
f"{dwd_table}[{i}]: ods_expr 为空"
assert cast_type is None or isinstance(cast_type, str), \
f"{dwd_table}[{i}].{dwd_col}: cast_type 类型无效"

View File

@@ -0,0 +1,454 @@
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 5: ETL 参数解析与 CLI 命令构建正确性
**Validates: Requirements 14.1, 14.2**
对于任意合法的 ETL 执行参数组合(门店列表、数据源模式、校验模式、时间范围、
窗口切分、force-full 标志、任务选择Backend 构建的 CLI 命令字符串应包含
所有指定参数,且参数值与输入一致。
测试策略:
- 使用 hypothesis 生成随机 TaskConfigSchema 实例
- 随机 flow从 VALID_FLOWS 中选择)
- 随机 processing_mode从 VALID_PROCESSING_MODES 中选择)
- 随机任务代码列表(从 task_registry 中选择)
- 随机时间窗口模式lookback / custom
- 随机 window_split 和 window_split_days
- 随机 force_full / dry_run / fetch_before_verify 布尔值
- 随机 store_id
- 随机 ods_use_local_json
验证:
1. 构建的 CLI 命令包含 --flow 且值与 flow 一致
2. 任务代码通过 --tasks 正确传递
3. 时间范围参数格式正确且值一致
4. 布尔标志(--force-full / --dry-run / --fetch-before-verify正确出现或缺失
5. --store-id 值与输入一致
6. --window-split / --window-split-days 正确传递
7. --data-source offline 在 ods_use_local_json=True 时出现
"""
from __future__ import annotations
import sys
from pathlib import Path
from hypothesis import given, settings, HealthCheck, assume
import hypothesis.strategies as st
# ── 将后端模块加入 sys.path ──
_BACKEND_ROOT = Path(__file__).resolve().parent.parent / "apps" / "backend"
if str(_BACKEND_ROOT) not in sys.path:
sys.path.insert(0, str(_BACKEND_ROOT))
from app.services.cli_builder import CLIBuilder, VALID_FLOWS, VALID_PROCESSING_MODES
from app.schemas.tasks import TaskConfigSchema
from app.services.task_registry import ALL_TASKS
# ══════════════════════════════════════════════════════════════════
# 常量与策略
# ══════════════════════════════════════════════════════════════════
# 所有合法任务代码
_ALL_TASK_CODES: list[str] = [t.code for t in ALL_TASKS]
# 合法的 flow 值
_VALID_FLOWS_LIST = sorted(VALID_FLOWS)
# 合法的 processing_mode 值
_VALID_MODES_LIST = sorted(VALID_PROCESSING_MODES)
# 合法的 window_split 值CLI 支持的切分模式)
_VALID_WINDOW_SPLITS = ["none", "day", "week", "month"]
# 日期格式策略YYYY-MM-DD
_date_str = st.dates(
min_value=st.just(2024, 1, 1).__wrapped__ if False else __import__("datetime").date(2024, 1, 1),
max_value=__import__("datetime").date(2026, 12, 31),
).map(lambda d: d.isoformat())
@st.composite
def _valid_config(draw) -> TaskConfigSchema:
"""生成一个合法的 TaskConfigSchema 实例"""
# 随机选择 1-5 个任务代码
tasks = draw(st.lists(
st.sampled_from(_ALL_TASK_CODES),
min_size=1,
max_size=5,
unique=True,
))
flow_id = draw(st.sampled_from(_VALID_FLOWS_LIST))
processing_mode = draw(st.sampled_from(_VALID_MODES_LIST))
# 时间窗口模式
window_mode = draw(st.sampled_from(["lookback", "custom"]))
window_start = None
window_end = None
lookback_hours = 24
overlap_seconds = 600
if window_mode == "custom":
# 生成合法的 start <= end 日期对
start = draw(_date_str)
end = draw(_date_str)
if start > end:
start, end = end, start
window_start = start
window_end = end
else:
lookback_hours = draw(st.integers(min_value=1, max_value=720))
overlap_seconds = draw(st.integers(min_value=0, max_value=7200))
# 窗口切分
window_split = draw(st.sampled_from(_VALID_WINDOW_SPLITS))
window_split_days = None
if window_split != "none":
window_split_days = draw(st.integers(min_value=1, max_value=30))
# 布尔标志
force_full = draw(st.booleans())
dry_run = draw(st.booleans())
fetch_before_verify = draw(st.booleans())
ods_use_local_json = draw(st.booleans())
# store_id可能为 None 或正整数
store_id = draw(st.one_of(st.none(), st.integers(min_value=1, max_value=999999)))
return TaskConfigSchema(
tasks=tasks,
flow=flow_id,
processing_mode=processing_mode,
window_mode=window_mode,
window_start=window_start,
window_end=window_end,
lookback_hours=lookback_hours,
overlap_seconds=overlap_seconds,
window_split=window_split,
window_split_days=window_split_days,
force_full=force_full,
dry_run=dry_run,
fetch_before_verify=fetch_before_verify,
ods_use_local_json=ods_use_local_json,
store_id=store_id,
)
# 全局 CLIBuilder 实例
_builder = CLIBuilder()
_ETL_PATH = "apps/etl/connectors/feiqiu"
def _build(config: TaskConfigSchema) -> list[str]:
"""便捷包装:构建命令列表"""
return _builder.build_command(config, _ETL_PATH)
def _get_arg_value(cmd: list[str], flag: str) -> str | None:
"""从命令列表中提取指定 flag 后面的值"""
try:
idx = cmd.index(flag)
if idx + 1 < len(cmd):
return cmd[idx + 1]
except ValueError:
pass
return None
def _has_flag(cmd: list[str], flag: str) -> bool:
"""检查命令列表中是否包含指定 flag"""
return flag in cmd
# ══════════════════════════════════════════════════════════════════
# Property 5a: --flow 参数与 flow 一致
# ══════════════════════════════════════════════════════════════════
@given(config=_valid_config())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_flow_param_matches_flow(config: TaskConfigSchema):
"""
**Validates: Requirements 14.1, 14.2**
构建的 CLI 命令必须包含 --flow 参数,且值与 config.flow 一致。
"""
cmd = _build(config)
flow_value = _get_arg_value(cmd, "--flow")
assert flow_value is not None, "CLI 命令缺少 --flow 参数"
assert flow_value == config.flow, (
f"--flow 值 {flow_value!r} != config.flow {config.flow!r}"
)
# ══════════════════════════════════════════════════════════════════
# Property 5b: --tasks 参数包含所有任务代码
# ══════════════════════════════════════════════════════════════════
@given(config=_valid_config())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_tasks_param_contains_all_codes(config: TaskConfigSchema):
"""
**Validates: Requirements 14.1, 14.2**
构建的 CLI 命令中 --tasks 参数应包含所有指定的任务代码(逗号分隔)。
"""
cmd = _build(config)
tasks_value = _get_arg_value(cmd, "--tasks")
if config.tasks:
assert tasks_value is not None, "CLI 命令缺少 --tasks 参数"
parsed_tasks = set(tasks_value.split(","))
expected_tasks = set(config.tasks)
assert parsed_tasks == expected_tasks, (
f"--tasks 解析结果 {parsed_tasks} != 期望 {expected_tasks}"
)
# tasks 为空列表时CLIBuilder 不添加 --tasks符合预期
# ══════════════════════════════════════════════════════════════════
# Property 5c: 时间窗口参数正确传递
# ══════════════════════════════════════════════════════════════════
@given(config=_valid_config())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_time_window_params_correct(config: TaskConfigSchema):
"""
**Validates: Requirements 14.1, 14.2**
- lookback 模式:命令包含 --lookback-hours 和 --overlap-seconds
- custom 模式:命令包含 --window-start 和 --window-end
- 两种模式互斥
"""
cmd = _build(config)
if config.window_mode == "lookback":
# lookback 模式:应有 --lookback-hours
lh = _get_arg_value(cmd, "--lookback-hours")
assert lh is not None, "lookback 模式缺少 --lookback-hours"
assert lh == str(config.lookback_hours), (
f"--lookback-hours {lh!r} != {config.lookback_hours}"
)
os_val = _get_arg_value(cmd, "--overlap-seconds")
assert os_val is not None, "lookback 模式缺少 --overlap-seconds"
assert os_val == str(config.overlap_seconds), (
f"--overlap-seconds {os_val!r} != {config.overlap_seconds}"
)
# 不应有 custom 参数
assert not _has_flag(cmd, "--window-start"), (
"lookback 模式不应包含 --window-start"
)
assert not _has_flag(cmd, "--window-end"), (
"lookback 模式不应包含 --window-end"
)
else:
# custom 模式
if config.window_start:
ws = _get_arg_value(cmd, "--window-start")
assert ws == config.window_start, (
f"--window-start {ws!r} != {config.window_start!r}"
)
if config.window_end:
we = _get_arg_value(cmd, "--window-end")
assert we == config.window_end, (
f"--window-end {we!r} != {config.window_end!r}"
)
# 不应有 lookback 参数
assert not _has_flag(cmd, "--lookback-hours"), (
"custom 模式不应包含 --lookback-hours"
)
# ══════════════════════════════════════════════════════════════════
# Property 5d: 布尔标志正确出现或缺失
# ══════════════════════════════════════════════════════════════════
@given(config=_valid_config())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_boolean_flags_correct(config: TaskConfigSchema):
"""
**Validates: Requirements 14.1, 14.2**
- force_full=True → 命令包含 --force-full
- dry_run=True → 命令包含 --dry-run
- fetch_before_verify=True 且 processing_mode="verify_only" → 命令包含 --fetch-before-verify
- ods_use_local_json=True → 命令包含 --data-source offline
"""
cmd = _build(config)
# force_full
if config.force_full:
assert _has_flag(cmd, "--force-full"), "force_full=True 但命令缺少 --force-full"
else:
assert not _has_flag(cmd, "--force-full"), "force_full=False 但命令包含 --force-full"
# dry_run
if config.dry_run:
assert _has_flag(cmd, "--dry-run"), "dry_run=True 但命令缺少 --dry-run"
else:
assert not _has_flag(cmd, "--dry-run"), "dry_run=False 但命令包含 --dry-run"
# fetch_before_verify仅 verify_only 模式生效)
if config.fetch_before_verify and config.processing_mode == "verify_only":
assert _has_flag(cmd, "--fetch-before-verify"), (
"fetch_before_verify=True + verify_only 但命令缺少 --fetch-before-verify"
)
else:
assert not _has_flag(cmd, "--fetch-before-verify"), (
"非 verify_only 模式或 fetch_before_verify=False 但命令包含 --fetch-before-verify"
)
# ods_use_local_json
if config.ods_use_local_json:
ds = _get_arg_value(cmd, "--data-source")
assert ds == "offline", (
f"ods_use_local_json=True 但 --data-source={ds!r}(期望 'offline'"
)
else:
# 不应有 --data-source offline除非 extra_args 中有 data_source
if "data_source" not in config.extra_args:
ds = _get_arg_value(cmd, "--data-source")
assert ds is None, (
f"ods_use_local_json=False 但命令包含 --data-source {ds!r}"
)
# ══════════════════════════════════════════════════════════════════
# Property 5e: --store-id 正确传递
# ══════════════════════════════════════════════════════════════════
@given(config=_valid_config())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_store_id_param_correct(config: TaskConfigSchema):
"""
**Validates: Requirements 14.1, 14.2**
store_id 不为 None 时,命令应包含 --store-id 且值一致;
store_id 为 None 时,命令不应包含 --store-id。
"""
cmd = _build(config)
sid = _get_arg_value(cmd, "--store-id")
if config.store_id is not None:
assert sid is not None, "store_id 不为 None 但命令缺少 --store-id"
assert sid == str(config.store_id), (
f"--store-id {sid!r} != {config.store_id}"
)
else:
assert sid is None, f"store_id=None 但命令包含 --store-id {sid!r}"
# ══════════════════════════════════════════════════════════════════
# Property 5f: --window-split / --window-split-days 正确传递
# ══════════════════════════════════════════════════════════════════
@given(config=_valid_config())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_window_split_params_correct(config: TaskConfigSchema):
"""
**Validates: Requirements 14.1, 14.2**
window_split 不为 "none" 时,命令应包含 --window-split 和 --window-split-days
window_split 为 "none" 时,命令不应包含这些参数。
"""
cmd = _build(config)
if config.window_split and config.window_split != "none":
ws = _get_arg_value(cmd, "--window-split")
assert ws == config.window_split, (
f"--window-split {ws!r} != {config.window_split!r}"
)
if config.window_split_days is not None:
wsd = _get_arg_value(cmd, "--window-split-days")
assert wsd == str(config.window_split_days), (
f"--window-split-days {wsd!r} != {config.window_split_days}"
)
else:
assert not _has_flag(cmd, "--window-split"), (
"window_split='none' 但命令包含 --window-split"
)
# ══════════════════════════════════════════════════════════════════
# Property 5g: --processing-mode 正确传递
# ══════════════════════════════════════════════════════════════════
@given(config=_valid_config())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_processing_mode_param_correct(config: TaskConfigSchema):
"""
**Validates: Requirements 14.1, 14.2**
processing_mode 不为空时,命令应包含 --processing-mode 且值一致。
"""
cmd = _build(config)
if config.processing_mode:
pm = _get_arg_value(cmd, "--processing-mode")
assert pm is not None, "processing_mode 不为空但命令缺少 --processing-mode"
assert pm == config.processing_mode, (
f"--processing-mode {pm!r} != {config.processing_mode!r}"
)
# ══════════════════════════════════════════════════════════════════
# Property 5h: 命令字符串与命令列表一致
# ══════════════════════════════════════════════════════════════════
@given(config=_valid_config())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_command_string_consistent_with_list(config: TaskConfigSchema):
"""
**Validates: Requirements 14.1, 14.2**
build_command_string() 的输出应与 build_command() 的列表拼接结果一致
(对含空格的参数自动加引号)。
"""
cmd_list = _builder.build_command(config, _ETL_PATH)
cmd_str = _builder.build_command_string(config, _ETL_PATH)
# 逐个参数验证:每个参数都应出现在字符串中
for arg in cmd_list:
if " " in arg or '"' in arg:
# 含空格的参数应被引号包裹
assert f'"{arg}"' in cmd_str, (
f"含空格参数 {arg!r} 未在命令字符串中被正确引用"
)
else:
assert arg in cmd_str, (
f"参数 {arg!r} 未出现在命令字符串中"
)

View File

@@ -0,0 +1,328 @@
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 6: 数据一致性检查正确性
**Validates: Requirements 16.2, 16.3**
对于任意 ODS 行和对应的 DWD 行,黑盒测试检查器应能正确识别:
(a) ODS 中存在但 DWD 中缺失的字段
(b) ODS 与 DWD 之间值不一致的字段
测试策略:
- 使用 hypothesis 生成随机 API 字段集合和 ODS 列集合
- 使用 hypothesis 生成随机 DWD 列集合、ODS 列集合和 FACT_MAPPINGS 列表
- 验证属性:
1. check_api_vs_ods_fields当 API 字段是 ODS 列的子集时,结果应为 passed
2. check_api_vs_ods_fields当 API 字段不在 ODS 列中时missing_fields > 0
3. check_ods_vs_dwd_mappings当所有 DWD 列都有映射时,结果应为 passed
4. check_ods_vs_dwd_mappings当 DWD 列无映射源时missing_fields > 0
5. total_fields = passed_fields + missing_fields + mismatch_fields
6. field_results 列表长度 = total_fields
"""
from __future__ import annotations
import sys
from pathlib import Path
from hypothesis import given, settings, HealthCheck, assume
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from quality.consistency_checker import (
check_api_vs_ods_fields,
check_ods_vs_dwd_mappings,
ODS_META_COLUMNS,
)
from tasks.dwd.dwd_load_task import DwdLoadTask
# SCD2 列集合小写check_ods_vs_dwd_mappings 内部会排除这些列
_SCD_COLS_LOWER = {c.lower() for c in DwdLoadTask.SCD_COLS}
# ══════════════════════════════════════════════════════════════════
# 策略:生成合法的列名集合
# ══════════════════════════════════════════════════════════════════
# 列名策略:小写字母 + 下划线,长度 2-20避免与 SCD2/ODS 元数据列冲突
_col_name = st.from_regex(r"[a-z][a-z0-9_]{1,19}", fullmatch=True).filter(
lambda c: c not in _SCD_COLS_LOWER and c not in ODS_META_COLUMNS
)
# 非空列名集合
_col_set = st.frozensets(_col_name, min_size=1, max_size=15).map(set)
# 可能为空的列名集合
_col_set_maybe_empty = st.frozensets(_col_name, min_size=0, max_size=15).map(set)
# ══════════════════════════════════════════════════════════════════
# Property 6a: API 字段是 ODS 列子集时,结果应为 passed
# ══════════════════════════════════════════════════════════════════
@given(common=_col_set, extra_ods=_col_set_maybe_empty)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_api_subset_of_ods_passes(common: set[str], extra_ods: set[str]):
"""
**Validates: Requirements 16.2, 16.3**
当 API 字段集合是 ODS 列集合的子集时check_api_vs_ods_fields 应返回 passed=True。
"""
api_fields = common
# ODS 列 = 共同列 + 额外 ODS 列(确保 API 字段全部被覆盖)
ods_columns = common | extra_ods
result = check_api_vs_ods_fields(api_fields, ods_columns)
assert result.passed is True, (
f"API 字段 {api_fields} 是 ODS 列 {ods_columns} 的子集,但 passed={result.passed}"
)
assert result.missing_fields == 0
# ══════════════════════════════════════════════════════════════════
# Property 6b: API 字段不在 ODS 列中时missing_fields > 0
# ══════════════════════════════════════════════════════════════════
@given(common=_col_set_maybe_empty, api_only=_col_set, ods_only=_col_set_maybe_empty)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_api_fields_not_in_ods_detected_as_missing(
common: set[str], api_only: set[str], ods_only: set[str]
):
"""
**Validates: Requirements 16.2, 16.3**
当 API 字段中存在不在 ODS 列中的字段时missing_fields > 0 且 passed=False。
"""
# 确保 api_only 与 (common | ods_only) 不重叠
api_only_clean = api_only - common - ods_only
assume(len(api_only_clean) > 0)
api_fields = common | api_only_clean
ods_columns = common | ods_only
result = check_api_vs_ods_fields(api_fields, ods_columns)
assert result.passed is False, (
f"API 有 {len(api_only_clean)} 个字段不在 ODS 中,但 passed=True"
)
assert result.missing_fields >= len(api_only_clean), (
f"期望 missing_fields >= {len(api_only_clean)},实际 {result.missing_fields}"
)
# ══════════════════════════════════════════════════════════════════
# Property 6c: 所有 DWD 列都有映射时,结果应为 passed
# ══════════════════════════════════════════════════════════════════
@st.composite
def _fully_mapped_scenario(draw):
"""生成一个所有 DWD 列都有映射源的场景(显式映射 + 自动映射混合)"""
# 自动映射列ODS 和 DWD 同名
auto_cols = draw(st.frozensets(_col_name, min_size=0, max_size=8).map(set))
# 显式映射列DWD 列名与 ODS 列名不同
explicit_dwd = draw(st.frozensets(_col_name, min_size=0, max_size=8).map(set))
explicit_dwd = explicit_dwd - auto_cols # 避免与自动映射列重叠
# 为每个显式映射列生成一个 ODS 源列名
explicit_ods_names = draw(
st.frozensets(_col_name, min_size=len(explicit_dwd), max_size=len(explicit_dwd) + 5).map(set)
)
# 确保 ODS 源列名不与 DWD 列名重叠(避免被当作自动映射)
explicit_ods_names = explicit_ods_names - auto_cols - explicit_dwd
# 如果 ODS 源列名不够,跳过
assume(len(explicit_ods_names) >= len(explicit_dwd))
ods_list = sorted(explicit_ods_names)[:len(explicit_dwd)]
dwd_list = sorted(explicit_dwd)
fact_mappings = [(d, o, None) for d, o in zip(dwd_list, ods_list)]
dwd_columns = auto_cols | explicit_dwd
ods_columns = auto_cols | set(ods_list)
assume(len(dwd_columns) > 0)
return dwd_columns, ods_columns, fact_mappings
@given(scenario=_fully_mapped_scenario())
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_all_dwd_cols_mapped_passes(scenario):
"""
**Validates: Requirements 16.2, 16.3**
当所有 DWD 非 SCD2 列都有映射源(显式或自动)时,结果应为 passed=True。
"""
dwd_columns, ods_columns, fact_mappings = scenario
result = check_ods_vs_dwd_mappings(
"dwd.test_table", "ods.test_table",
dwd_columns, ods_columns,
fact_mappings if fact_mappings else None,
)
assert result.passed is True, (
f"所有 DWD 列都有映射但 passed=False。"
f" missing={result.missing_fields}, mismatch={result.mismatch_fields}"
)
assert result.missing_fields == 0
assert result.mismatch_fields == 0
# ══════════════════════════════════════════════════════════════════
# Property 6d: DWD 列无映射源时missing_fields > 0
# ══════════════════════════════════════════════════════════════════
@given(
mapped_cols=_col_set_maybe_empty,
orphan_cols=_col_set,
ods_cols=_col_set_maybe_empty,
)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_unmapped_dwd_cols_detected_as_missing(
mapped_cols: set[str], orphan_cols: set[str], ods_cols: set[str]
):
"""
**Validates: Requirements 16.2, 16.3**
当 DWD 列中存在无映射源的列时missing_fields > 0 且 passed=False。
"""
# orphan_cols 不能与 mapped_cols 或 ods_cols 重叠(否则会被自动映射覆盖)
orphan_clean = orphan_cols - mapped_cols - ods_cols
assume(len(orphan_clean) > 0)
# mapped_cols 通过自动映射ODS 同名列)
dwd_columns = mapped_cols | orphan_clean
ods_columns = mapped_cols | ods_cols # mapped_cols 在 ODS 中存在(自动映射)
result = check_ods_vs_dwd_mappings(
"dwd.test_table", "ods.test_table",
dwd_columns, ods_columns, None,
)
assert result.passed is False, (
f"DWD 有 {len(orphan_clean)} 个无映射列,但 passed=True"
)
assert result.missing_fields >= len(orphan_clean), (
f"期望 missing_fields >= {len(orphan_clean)},实际 {result.missing_fields}"
)
# ══════════════════════════════════════════════════════════════════
# Property 6e: total_fields = passed_fields + missing_fields + mismatch_fields
# ══════════════════════════════════════════════════════════════════
@given(api_fields=_col_set, ods_columns=_col_set)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_api_vs_ods_field_counts_consistent(api_fields: set[str], ods_columns: set[str]):
"""
**Validates: Requirements 16.2, 16.3**
check_api_vs_ods_fields 的结果中:
total_fields = passed_fields + missing_fields + mismatch_fields
"""
result = check_api_vs_ods_fields(api_fields, ods_columns)
actual_sum = result.passed_fields + result.missing_fields + result.mismatch_fields
assert result.total_fields == actual_sum, (
f"total_fields={result.total_fields} != "
f"passed({result.passed_fields}) + missing({result.missing_fields}) + "
f"mismatch({result.mismatch_fields}) = {actual_sum}"
)
@given(dwd_columns=_col_set, ods_columns=_col_set)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_ods_vs_dwd_field_counts_consistent(dwd_columns: set[str], ods_columns: set[str]):
"""
**Validates: Requirements 16.2, 16.3**
check_ods_vs_dwd_mappings 的结果中:
total_fields = passed_fields + missing_fields + mismatch_fields
"""
result = check_ods_vs_dwd_mappings(
"dwd.test_table", "ods.test_table",
dwd_columns, ods_columns, None,
)
actual_sum = result.passed_fields + result.missing_fields + result.mismatch_fields
assert result.total_fields == actual_sum, (
f"total_fields={result.total_fields} != "
f"passed({result.passed_fields}) + missing({result.missing_fields}) + "
f"mismatch({result.mismatch_fields}) = {actual_sum}"
)
# ══════════════════════════════════════════════════════════════════
# Property 6f: field_results 列表长度 = total_fields
# ══════════════════════════════════════════════════════════════════
@given(api_fields=_col_set, ods_columns=_col_set)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_api_vs_ods_field_results_length(api_fields: set[str], ods_columns: set[str]):
"""
**Validates: Requirements 16.2, 16.3**
check_api_vs_ods_fields 的 field_results 列表长度应等于 total_fields。
"""
result = check_api_vs_ods_fields(api_fields, ods_columns)
assert len(result.field_results) == result.total_fields, (
f"field_results 长度 {len(result.field_results)} != total_fields {result.total_fields}"
)
@given(dwd_columns=_col_set, ods_columns=_col_set)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_ods_vs_dwd_field_results_length(dwd_columns: set[str], ods_columns: set[str]):
"""
**Validates: Requirements 16.2, 16.3**
check_ods_vs_dwd_mappings 的 field_results 列表长度应等于 total_fields。
"""
result = check_ods_vs_dwd_mappings(
"dwd.test_table", "ods.test_table",
dwd_columns, ods_columns, None,
)
assert len(result.field_results) == result.total_fields, (
f"field_results 长度 {len(result.field_results)} != total_fields {result.total_fields}"
)

View File

@@ -0,0 +1,263 @@
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 7: 计时器记录完整性
**Validates: Requirements 15.2**
对于任意 ETL 步骤序列,计时器输出应包含每个步骤的名称、开始时间、结束时间和耗时,
且耗时等于结束时间减去开始时间。
测试策略:
- 使用 hypothesis 生成随机步骤名称列表1-10 个步骤)
- 每个步骤可选包含 0-5 个子步骤
- 验证属性:
1. 每个步骤的 to_dict() 输出包含 name、start_time、end_time、elapsed_ms
2. elapsed_ms ≈ (end_time - start_time) 的毫秒数(允许 ±50ms 误差)
3. 所有步骤名称都出现在 timer.steps 中
4. timer.to_dict() 的 steps 数量等于实际添加的步骤数
5. 子步骤的 elapsed_ms 也满足上述一致性
"""
from __future__ import annotations
import sys
import time
from datetime import datetime
from pathlib import Path
from hypothesis import given, settings, HealthCheck
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = (
Path(__file__).resolve().parent.parent
/ "apps" / "etl" / "connectors" / "feiqiu"
)
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from utils.timer import EtlTimer
# ══════════════════════════════════════════════════════════════════
# Hypothesis 策略
# ══════════════════════════════════════════════════════════════════
# 步骤名称ASCII 字母 + 数字 + 下划线,模拟真实 ETL 任务名
_step_name = st.text(
alphabet=st.sampled_from("ABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789"),
min_size=3,
max_size=20,
)
# 子步骤名称
_sub_step_name = st.text(
alphabet=st.sampled_from("abcdefghijklmnopqrstuvwxyz_0123456789"),
min_size=2,
max_size=15,
)
@st.composite
def _step_spec(draw):
"""生成一个步骤规格:(步骤名, [子步骤名列表])"""
name = draw(_step_name)
# 0-5 个子步骤,名称唯一
sub_names = draw(
st.lists(_sub_step_name, min_size=0, max_size=5, unique=True)
)
return (name, sub_names)
# 生成 1-10 个步骤,步骤名唯一
_steps_strategy = st.lists(
_step_spec(),
min_size=1,
max_size=10,
).filter(
# 确保步骤名唯一
lambda specs: len(set(s[0] for s in specs)) == len(specs)
)
# 允许的时间误差毫秒——perf_counter 与 datetime.now 之间存在微小差异
_TOLERANCE_MS = 50.0
# ══════════════════════════════════════════════════════════════════
# 辅助函数
# ══════════════════════════════════════════════════════════════════
def _run_timer(step_specs: list[tuple[str, list[str]]]) -> EtlTimer:
"""按给定步骤规格运行计时器,返回完成后的 timer 实例"""
timer = EtlTimer()
timer.start()
for step_name, sub_names in step_specs:
timer.start_step(step_name)
for sub_name in sub_names:
timer.start_sub_step(step_name, sub_name)
# 极短暂停,确保 start/end 时间有差异
time.sleep(0.001)
timer.stop_sub_step(step_name, sub_name)
time.sleep(0.001)
timer.stop_step(step_name)
timer.finish(write_report=False)
return timer
def _parse_iso(iso_str: str) -> datetime:
"""解析 ISO 格式时间字符串"""
return datetime.fromisoformat(iso_str)
# ══════════════════════════════════════════════════════════════════
# Property 7a: to_dict() 输出包含必要字段
# ══════════════════════════════════════════════════════════════════
@given(step_specs=_steps_strategy)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_step_dict_contains_required_fields(step_specs):
"""
**Validates: Requirements 15.2**
每个步骤的 to_dict() 输出必须包含 name、start_time、end_time、elapsed_ms 字段。
"""
timer = _run_timer(step_specs)
for step in timer.steps:
d = step.to_dict()
for key in ("name", "start_time", "end_time", "elapsed_ms"):
assert key in d, f"步骤 {step.name} 的 to_dict() 缺少字段: {key}"
# start_time 和 end_time 不为 None
assert d["start_time"] is not None, f"步骤 {step.name} 的 start_time 为 None"
assert d["end_time"] is not None, f"步骤 {step.name} 的 end_time 为 None"
# ══════════════════════════════════════════════════════════════════
# Property 7b: elapsed_ms ≈ (end_time - start_time)
# ══════════════════════════════════════════════════════════════════
@given(step_specs=_steps_strategy)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_step_elapsed_ms_consistent_with_timestamps(step_specs):
"""
**Validates: Requirements 15.2**
每个步骤的 elapsed_ms 应近似等于 (end_time - start_time) 的毫秒数。
允许 ±50ms 误差perf_counter 与 datetime.now 的微小差异)。
"""
timer = _run_timer(step_specs)
for step in timer.steps:
d = step.to_dict()
start_dt = _parse_iso(d["start_time"])
end_dt = _parse_iso(d["end_time"])
wall_ms = (end_dt - start_dt).total_seconds() * 1000
elapsed_ms = d["elapsed_ms"]
diff = abs(elapsed_ms - wall_ms)
assert diff <= _TOLERANCE_MS, (
f"步骤 {step.name}: elapsed_ms={elapsed_ms:.3f}"
f"wall_clock_ms={wall_ms:.3f} 差异 {diff:.3f}ms 超过容差 {_TOLERANCE_MS}ms"
)
# ══════════════════════════════════════════════════════════════════
# Property 7c: 所有步骤名称都出现在 timer.steps 中
# ══════════════════════════════════════════════════════════════════
@given(step_specs=_steps_strategy)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_all_step_names_present(step_specs):
"""
**Validates: Requirements 15.2**
所有添加的步骤名称都必须出现在 timer.steps 列表中。
"""
timer = _run_timer(step_specs)
recorded_names = {s.name for s in timer.steps}
for step_name, _ in step_specs:
assert step_name in recorded_names, (
f"步骤 {step_name!r} 未出现在 timer.steps 中"
)
# ══════════════════════════════════════════════════════════════════
# Property 7d: timer.to_dict() 的 steps 数量等于实际步骤数
# ══════════════════════════════════════════════════════════════════
@given(step_specs=_steps_strategy)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_timer_dict_step_count_matches(step_specs):
"""
**Validates: Requirements 15.2**
timer.to_dict() 输出的 steps 数量应等于实际添加的步骤数。
"""
timer = _run_timer(step_specs)
timer_dict = timer.to_dict()
assert len(timer_dict["steps"]) == len(step_specs), (
f"to_dict() steps 数量 {len(timer_dict['steps'])} "
f"!= 实际步骤数 {len(step_specs)}"
)
# ══════════════════════════════════════════════════════════════════
# Property 7e: 子步骤的 elapsed_ms 也满足一致性
# ══════════════════════════════════════════════════════════════════
@given(step_specs=_steps_strategy)
@settings(
max_examples=100,
deadline=None,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_sub_step_elapsed_ms_consistent(step_specs):
"""
**Validates: Requirements 15.2**
子步骤的 elapsed_ms 也应近似等于 (end_time - start_time) 的毫秒数,
且 to_dict() 输出包含必要字段。
"""
timer = _run_timer(step_specs)
for step in timer.steps:
step_dict = step.to_dict()
for child_dict in step_dict["children"]:
# 必要字段检查
for key in ("name", "start_time", "end_time", "elapsed_ms"):
assert key in child_dict, (
f"子步骤 {child_dict.get('name', '?')} 的 to_dict() 缺少字段: {key}"
)
if child_dict["end_time"] is None:
continue
start_dt = _parse_iso(child_dict["start_time"])
end_dt = _parse_iso(child_dict["end_time"])
wall_ms = (end_dt - start_dt).total_seconds() * 1000
elapsed_ms = child_dict["elapsed_ms"]
diff = abs(elapsed_ms - wall_ms)
assert diff <= _TOLERANCE_MS, (
f"子步骤 {child_dict['name']}: elapsed_ms={elapsed_ms:.3f}"
f"wall_clock_ms={wall_ms:.3f} 差异 {diff:.3f}ms 超过容差 {_TOLERANCE_MS}ms"
)

View File

@@ -0,0 +1,324 @@
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 8: DWS 库存汇总粒度聚合正确性
**Validates: Requirements 12.2, 12.3, 12.4, 12.5, 12.6**
对于任意 DWD 库存汇总数据集和任意汇总粒度(日/周/月DWS 汇总任务的 transform
输出应满足:
(a) 每条记录的 stat_period 与任务粒度一致
(b) 同一 (site_id, stat_date, site_goods_id) 组合不重复
(c) 日度汇总的记录数不少于周度和月度汇总的记录数
测试策略:
- 使用 hypothesis 生成随机 DWD 库存行(随机 fetched_at 日期、site_goods_id、数值
- 构造最小可用的任务实例,调用 transform 方法
- 验证三条属性
"""
from __future__ import annotations
import sys
from dataclasses import dataclass
from datetime import date, datetime, timedelta
from decimal import Decimal
from pathlib import Path
from typing import Any, Dict, List
from unittest.mock import MagicMock
from hypothesis import given, settings, assume, HealthCheck
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = (
Path(__file__).resolve().parent.parent
/ "apps" / "etl" / "connectors" / "feiqiu"
)
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from tasks.dws.goods_stock_daily_task import GoodsStockDailyTask
from tasks.dws.goods_stock_weekly_task import GoodsStockWeeklyTask
from tasks.dws.goods_stock_monthly_task import GoodsStockMonthlyTask
from tasks.base_task import TaskContext
# ══════════════════════════════════════════════════════════════════
# 辅助:构造最小可用的任务实例
# ══════════════════════════════════════════════════════════════════
def _make_config() -> MagicMock:
"""构造 mock config"""
config = MagicMock()
config.get = lambda key, default=None: {
"app.store_id": 1,
"app.timezone": "Asia/Shanghai",
}.get(key, default)
return config
def _make_task(task_cls):
"""构造一个用于测试的 DWS 任务实例"""
config = _make_config()
db = MagicMock()
api = MagicMock()
logger = MagicMock()
return task_cls(config, db, api, logger)
def _make_context(site_id: int = 1) -> TaskContext:
"""构造最小 TaskContext"""
now = datetime(2026, 1, 15, 12, 0, 0)
return TaskContext(
store_id=site_id,
window_start=now - timedelta(days=90),
window_end=now,
window_minutes=90 * 24 * 60,
)
# ══════════════════════════════════════════════════════════════════
# Hypothesis 策略:生成随机 DWD 库存行
# ══════════════════════════════════════════════════════════════════
# 日期范围2025-11-01 ~ 2026-02-20覆盖跨周/跨月边界)
_MIN_DATE = date(2025, 11, 1)
_MAX_DATE = date(2026, 2, 20)
_DATE_RANGE_DAYS = (_MAX_DATE - _MIN_DATE).days
_date_strategy = st.integers(
min_value=0, max_value=_DATE_RANGE_DAYS
).map(lambda d: _MIN_DATE + timedelta(days=d))
# site_goods_id1~5 个不同商品(保持较小范围以产生有意义的聚合)
_goods_id_strategy = st.integers(min_value=1, max_value=5)
# 数值策略:合理的库存数量
_numeric_strategy = st.decimals(
min_value=Decimal("0"),
max_value=Decimal("9999.99"),
places=2,
allow_nan=False,
allow_infinity=False,
)
@st.composite
def _dwd_row(draw):
"""生成一条随机 DWD 库存汇总行"""
d = draw(_date_strategy)
# fetched_at 为 datetime带时分秒
hour = draw(st.integers(min_value=0, max_value=23))
minute = draw(st.integers(min_value=0, max_value=59))
fetched_at = datetime(d.year, d.month, d.day, hour, minute, 0)
return {
"site_goods_id": draw(_goods_id_strategy),
"goods_name": f"商品_{draw(st.integers(min_value=1, max_value=5))}",
"goods_unit": draw(st.sampled_from(["", "", "", ""])),
"goods_category_id": draw(st.integers(min_value=1, max_value=3)),
"goods_category_second_id": draw(st.integers(min_value=1, max_value=5)),
"category_name": draw(st.sampled_from(["饮料", "零食", "台球用品"])),
"range_start_stock": draw(_numeric_strategy),
"range_end_stock": draw(_numeric_strategy),
"range_in": draw(_numeric_strategy),
"range_out": draw(_numeric_strategy),
"range_sale": draw(_numeric_strategy),
"range_sale_money": draw(_numeric_strategy),
"range_inventory": draw(_numeric_strategy),
"current_stock": draw(_numeric_strategy),
"site_id": 1,
"tenant_id": 100,
"fetched_at": fetched_at,
}
# 生成 1~30 条 DWD 行,按 fetched_at 排序(模拟 SQL ORDER BY fetched_at
_dwd_rows_strategy = st.lists(
_dwd_row(), min_size=1, max_size=30
).map(lambda rows: sorted(rows, key=lambda r: r["fetched_at"]))
# ══════════════════════════════════════════════════════════════════
# Property 8a: stat_period 与任务粒度一致
# ══════════════════════════════════════════════════════════════════
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_daily_stat_period_is_daily(rows):
"""
**Validates: Requirements 12.2**
日度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'daily'
"""
task = _make_task(GoodsStockDailyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
for rec in result:
assert rec["stat_period"] == "daily", \
f"日度汇总记录的 stat_period 应为 'daily',实际为 {rec['stat_period']!r}"
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_weekly_stat_period_is_weekly(rows):
"""
**Validates: Requirements 12.3**
周度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'weekly'
"""
task = _make_task(GoodsStockWeeklyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
for rec in result:
assert rec["stat_period"] == "weekly", \
f"周度汇总记录的 stat_period 应为 'weekly',实际为 {rec['stat_period']!r}"
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_monthly_stat_period_is_monthly(rows):
"""
**Validates: Requirements 12.4**
月度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'monthly'
"""
task = _make_task(GoodsStockMonthlyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
for rec in result:
assert rec["stat_period"] == "monthly", \
f"月度汇总记录的 stat_period 应为 'monthly',实际为 {rec['stat_period']!r}"
# ══════════════════════════════════════════════════════════════════
# Property 8b: 同一 (site_id, stat_date, site_goods_id) 组合不重复
# ══════════════════════════════════════════════════════════════════
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_daily_no_duplicate_keys(rows):
"""
**Validates: Requirements 12.5, 12.6**
日度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
"""
task = _make_task(GoodsStockDailyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
keys = set()
for rec in result:
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
assert key not in keys, \
f"日度汇总存在重复主键: {key}"
keys.add(key)
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_weekly_no_duplicate_keys(rows):
"""
**Validates: Requirements 12.5, 12.6**
周度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
"""
task = _make_task(GoodsStockWeeklyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
keys = set()
for rec in result:
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
assert key not in keys, \
f"周度汇总存在重复主键: {key}"
keys.add(key)
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_monthly_no_duplicate_keys(rows):
"""
**Validates: Requirements 12.5, 12.6**
月度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
"""
task = _make_task(GoodsStockMonthlyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
keys = set()
for rec in result:
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
assert key not in keys, \
f"月度汇总存在重复主键: {key}"
keys.add(key)
# ══════════════════════════════════════════════════════════════════
# Property 8c: 日度记录数 >= 周度 >= 月度
# ══════════════════════════════════════════════════════════════════
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_daily_count_gte_weekly_and_monthly(rows):
"""
**Validates: Requirements 12.2, 12.3, 12.4**
对于同一组 DWD 输入数据,日度汇总的记录数不少于周度和月度汇总的记录数。
这是因为日粒度更细,分组键更多,产生的聚合记录数更多或相等。
"""
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
daily_task = _make_task(GoodsStockDailyTask)
weekly_task = _make_task(GoodsStockWeeklyTask)
monthly_task = _make_task(GoodsStockMonthlyTask)
daily_result = daily_task.transform(extracted, context)
weekly_result = weekly_task.transform(extracted, context)
monthly_result = monthly_task.transform(extracted, context)
daily_count = len(daily_result)
weekly_count = len(weekly_result)
monthly_count = len(monthly_result)
assert daily_count >= weekly_count, (
f"日度记录数({daily_count}) 应 >= 周度记录数({weekly_count})"
)
assert daily_count >= monthly_count, (
f"日度记录数({daily_count}) 应 >= 月度记录数({monthly_count})"
)
# 额外验证:周度记录数 >= 月度记录数
assert weekly_count >= monthly_count, (
f"周度记录数({weekly_count}) 应 >= 月度记录数({monthly_count})"
)

View File

@@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
"""
Feature: etl-staff-dimension, Property 3: ODS 列名提取一致性
**Validates: Requirements 1.3**
对于任意 API 返回的员工记录(含驼峰和蛇形混合字段名),经 BaseOdsTask 处理后:
- 所有字段名转为小写蛇形_get_value_case_insensitive 大小写不敏感匹配)
- id 字段不为空且为正整数
- payload 字段包含完整原始 JSON
验证方式hypothesis 属性测试,生成随机员工记录验证转换一致性。
"""
from __future__ import annotations
import json
import logging
import os
import sys
from pathlib import Path
from hypothesis import given, settings, HealthCheck
import hypothesis.strategies as st
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
os.environ.setdefault("ETL_SKIP_DOTENV", "1")
from tasks.ods.ods_tasks import ODS_TASK_CLASSES, BaseOdsTask
# 将 tests/unit 加入 path 以使用 FakeDB/FakeAPI
_UNIT_TEST_ROOT = _ETL_ROOT / "tests" / "unit"
if str(_UNIT_TEST_ROOT) not in sys.path:
sys.path.insert(0, str(_UNIT_TEST_ROOT))
from task_test_utils import create_test_config, get_db_operations, FakeAPIClient
# -- 策略:生成随机员工记录 --
# API 返回的字段名混合驼峰和蛇形
_STAFF_RECORD = st.fixed_dictionaries({
"id": st.integers(min_value=1, max_value=2**53),
"staff_name": st.text(min_size=1, max_size=10),
"mobile": st.from_regex(r"1[3-9]\d{9}", fullmatch=True),
"job": st.sampled_from(["店长", "主管", "教练", "收银员", "助教管理员"]),
"staff_identity": st.integers(min_value=0, max_value=5),
"status": st.integers(min_value=0, max_value=2),
"leave_status": st.integers(min_value=0, max_value=2),
"site_id": st.integers(min_value=1, max_value=2**53),
"tenant_id": st.integers(min_value=1, max_value=2**53),
# 驼峰字段API 实际返回的格式)
"cashierPointId": st.integers(min_value=0, max_value=2**53),
"cashierPointName": st.text(min_size=0, max_size=20),
"groupName": st.text(min_size=0, max_size=20),
"groupId": st.integers(min_value=0, max_value=2**53),
"rankName": st.text(min_size=0, max_size=10),
"userRoles": st.just([]),
"gender": st.integers(min_value=0, max_value=3),
"is_delete": st.just(0),
})
@given(record=_STAFF_RECORD)
@settings(max_examples=50, suppress_health_check=[HealthCheck.too_slow])
def test_staff_record_field_case_insensitive_lookup(record):
"""P3(a): _get_value_case_insensitive 能正确匹配驼峰和蛇形字段。"""
# 驼峰字段应能通过小写列名查找到
assert BaseOdsTask._get_value_case_insensitive(record, "cashierpointid") == record["cashierPointId"]
assert BaseOdsTask._get_value_case_insensitive(record, "groupname") == record["groupName"]
assert BaseOdsTask._get_value_case_insensitive(record, "groupid") == record["groupId"]
assert BaseOdsTask._get_value_case_insensitive(record, "rankname") == record["rankName"]
assert BaseOdsTask._get_value_case_insensitive(record, "userroles") == record["userRoles"]
# 蛇形字段直接匹配
assert BaseOdsTask._get_value_case_insensitive(record, "id") == record["id"]
assert BaseOdsTask._get_value_case_insensitive(record, "staff_name") == record["staff_name"]
assert BaseOdsTask._get_value_case_insensitive(record, "mobile") == record["mobile"]
@given(record=_STAFF_RECORD)
@settings(max_examples=50, suppress_health_check=[HealthCheck.too_slow])
def test_staff_record_id_positive_integer(record):
"""P3(b): id 字段始终为正整数。"""
val = BaseOdsTask._get_value_case_insensitive(record, "id")
assert val is not None
assert isinstance(val, int)
assert val > 0
@given(record=_STAFF_RECORD)
@settings(max_examples=50, suppress_health_check=[HealthCheck.too_slow])
def test_staff_record_payload_preserves_original(record):
"""P3(c): payload 序列化后包含原始记录的所有键。"""
payload_str = json.dumps(record, ensure_ascii=False)
for key in record:
assert key in payload_str
def test_staff_ingest_payload_roundtrip(tmp_path):
"""P3(d): 端到端验证 ODS 落地后 payload 包含完整原始 JSON。"""
config = create_test_config("ONLINE", tmp_path / "archive", tmp_path / "temp")
sample = [
{
"id": 9999999999999,
"staff_name": "测试员工",
"mobile": "13900000001",
"cashierPointId": 12345,
"cashierPointName": "默认收银台",
"groupName": "A组",
"groupId": 100,
"rankName": "初级",
"userRoles": [{"roleId": 1}],
"gender": 1,
"is_delete": 0,
"status": 1,
"staff_identity": 2,
"site_id": 1001,
"tenant_id": 2001,
}
]
api = FakeAPIClient({"/PersonnelManagement/SearchSystemStaffInfo": sample})
task_cls = ODS_TASK_CLASSES["ODS_STAFF_INFO"]
with get_db_operations() as db_ops:
task = task_cls(config, db_ops, api, logging.getLogger("test_p3"))
result = task.execute()
assert result["status"] == "SUCCESS"
row = db_ops.upserts[0]["rows"][0]
payload = json.loads(row["payload"])
# payload 保留原始键名(含驼峰)
assert payload["cashierPointId"] == 12345
assert payload["groupName"] == "A组"
assert payload["id"] == 9999999999999

View File

@@ -0,0 +1,232 @@
# -*- coding: utf-8 -*-
"""
Feature: spi-spending-power-index — SPI 消费力指数属性测试
使用 hypothesis 验证 SPI 算法的正确性属性:
- Property 1: SPI 总分非负性
- Property 2: Level 子分关于消费金额单调非递减
- Property 3: Speed 子分关于 spend_30 单调非递减
- Property 4: Stability 子分取值范围 [0, 1]
- Property 5: Display Score 取值范围 [0, 10]
测试策略:
- 子分计算为 @staticmethod 纯函数,不依赖数据库,直接调用
- batch_normalize_to_display 为实例方法,通过 MagicMock 构造最小实例
"""
from __future__ import annotations
import sys
from pathlib import Path
from unittest.mock import MagicMock
from hypothesis import given, settings, assume
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = (
Path(__file__).resolve().parent.parent
/ "apps" / "etl" / "connectors" / "feiqiu"
)
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from tasks.dws.index.spending_power_index_task import (
SpendingPowerIndexTask,
SPIMemberFeatures,
)
from tasks.dws.index.base_index_task import BaseIndexTask
# ══════════════════════════════════════════════════════════════════
# 辅助:构造最小可用的 SpendingPowerIndexTask 实例(仅用于 Property 5
# ══════════════════════════════════════════════════════════════════
def _make_spi_task() -> SpendingPowerIndexTask:
"""构造不依赖真实 DB/API 的 SPI 任务实例,仅用于调用 batch_normalize_to_display。"""
config = MagicMock()
# BaseTask.__init__ 会调用 config.get("app.timezone", "Asia/Shanghai")
# MagicMock.get() 默认返回 Mock 对象,导致 ZoneInfo 报错,需要正确返回字符串
config.get = lambda key, default=None: {
"app.timezone": "Asia/Shanghai",
}.get(key, default)
db = MagicMock()
api = MagicMock()
logger = MagicMock()
return SpendingPowerIndexTask(config, db, api, logger)
# ══════════════════════════════════════════════════════════════════
# Property 1: SPI 总分非负性
# ══════════════════════════════════════════════════════════════════
@given(
level=st.floats(min_value=0, max_value=100),
speed=st.floats(min_value=0, max_value=100),
stability=st.floats(min_value=0, max_value=1),
)
@settings(max_examples=200)
def test_spi_raw_non_negative(level, speed, stability):
"""Property 1: SPI 总分非负性
对于任意非负的 Level、Speed、Stability 子分,
compute_spi_raw 的返回值应为非负。
**Validates: Requirements 6.1, 10.1**
"""
params = SpendingPowerIndexTask.DEFAULT_PARAMS
result = SpendingPowerIndexTask.compute_spi_raw(level, speed, stability, params)
assert result >= 0, f"SPI_raw={result} < 0 (L={level}, S={speed}, P={stability})"
# ══════════════════════════════════════════════════════════════════
# Property 2: Level 子分关于消费金额单调非递减
# ══════════════════════════════════════════════════════════════════
@given(
spend_30=st.floats(min_value=0, max_value=50000),
spend_90=st.floats(min_value=0, max_value=150000),
recharge_90=st.floats(min_value=0, max_value=100000),
avg_ticket_90=st.floats(min_value=0, max_value=5000),
delta=st.floats(min_value=0.01, max_value=10000),
)
@settings(max_examples=200)
def test_level_monotonic_on_spend(spend_30, spend_90, recharge_90, avg_ticket_90, delta):
"""Property 2: Level 子分关于消费金额单调非递减
在其他条件不变时,增加 spend_30 或 spend_90 不会导致 Level 子分下降。
**Validates: Requirements 3.1, 10.2**
"""
params = SpendingPowerIndexTask.DEFAULT_PARAMS
base = SPIMemberFeatures(
member_id=1, site_id=1,
spend_30=spend_30, spend_90=spend_90,
recharge_90=recharge_90, avg_ticket_90=avg_ticket_90,
)
level_before = SpendingPowerIndexTask.compute_level(base, params)
# 增加 spend_30
inc_30 = SPIMemberFeatures(
member_id=1, site_id=1,
spend_30=spend_30 + delta, spend_90=spend_90,
recharge_90=recharge_90, avg_ticket_90=avg_ticket_90,
)
level_after_30 = SpendingPowerIndexTask.compute_level(inc_30, params)
assert level_after_30 >= level_before, (
f"Level 下降: spend_30 增加 {delta}{level_after_30} < {level_before}"
)
# 增加 spend_90
inc_90 = SPIMemberFeatures(
member_id=1, site_id=1,
spend_30=spend_30, spend_90=spend_90 + delta,
recharge_90=recharge_90, avg_ticket_90=avg_ticket_90,
)
level_after_90 = SpendingPowerIndexTask.compute_level(inc_90, params)
assert level_after_90 >= level_before, (
f"Level 下降: spend_90 增加 {delta}{level_after_90} < {level_before}"
)
# ══════════════════════════════════════════════════════════════════
# Property 3: Speed 子分关于 spend_30 单调非递减
# ══════════════════════════════════════════════════════════════════
@given(
spend_30=st.floats(min_value=0, max_value=50000),
spend_90=st.floats(min_value=0, max_value=150000),
visit_days_30=st.integers(min_value=0, max_value=30),
daily_spend_ewma_90=st.floats(min_value=0, max_value=10000),
delta=st.floats(min_value=0.01, max_value=10000),
)
@settings(max_examples=200)
def test_speed_monotonic_on_spend_30(spend_30, spend_90, visit_days_30, daily_spend_ewma_90, delta):
"""Property 3: Speed 子分关于 spend_30 单调非递减
在其他条件不变时,增加 spend_30 不会导致 Speed 子分下降。
**Validates: Requirements 4.1, 4.4, 10.3**
"""
params = SpendingPowerIndexTask.DEFAULT_PARAMS
base = SPIMemberFeatures(
member_id=1, site_id=1,
spend_30=spend_30, spend_90=spend_90,
visit_days_30=visit_days_30,
daily_spend_ewma_90=daily_spend_ewma_90,
)
speed_before = SpendingPowerIndexTask.compute_speed(base, params)
inc = SPIMemberFeatures(
member_id=1, site_id=1,
spend_30=spend_30 + delta, spend_90=spend_90,
visit_days_30=visit_days_30,
daily_spend_ewma_90=daily_spend_ewma_90,
)
speed_after = SpendingPowerIndexTask.compute_speed(inc, params)
assert speed_after >= speed_before, (
f"Speed 下降: spend_30 增加 {delta}{speed_after} < {speed_before}"
)
# ══════════════════════════════════════════════════════════════════
# Property 4: Stability 子分取值范围 [0, 1]
# ══════════════════════════════════════════════════════════════════
@given(active_weeks=st.integers(min_value=0, max_value=13))
@settings(max_examples=200)
def test_stability_in_range(active_weeks):
"""Property 4: Stability 子分取值范围 [0, 1]
对于任意 active_weeks_90 ∈ [0, 13]compute_stability 返回值应在 [0, 1]。
**Validates: Requirements 5.2, 5.4, 10.4**
"""
params = SpendingPowerIndexTask.DEFAULT_PARAMS
features = SPIMemberFeatures(
member_id=1, site_id=1,
active_weeks_90=active_weeks,
)
stability = SpendingPowerIndexTask.compute_stability(features, params)
assert 0 <= stability <= 1, (
f"Stability={stability} 超出 [0, 1] (active_weeks_90={active_weeks})"
)
# ══════════════════════════════════════════════════════════════════
# Property 5: Display Score 取值范围 [0, 10]
# ══════════════════════════════════════════════════════════════════
@given(
raw_scores=st.lists(
st.floats(min_value=0, max_value=1000),
min_size=1,
max_size=50,
),
)
@settings(max_examples=200)
def test_display_score_in_range(raw_scores):
"""Property 5: Display Score 取值范围 [0, 10]
对于任意非空的非负 raw_score 列表batch_normalize_to_display
映射后的 display_score 应在 [0.00, 10.00]。
**Validates: Requirements 6.6, 10.5**
"""
task = _make_spi_task()
# 构造 (entity_id, raw_score) 输入
input_scores = [(i, s) for i, s in enumerate(raw_scores)]
results = task.batch_normalize_to_display(
raw_scores=input_scores,
compression=None, # 无压缩,直接 MinMax
use_smoothing=False, # 不使用 EWMA 平滑(避免 DB 调用)
)
for entity_id, raw_score, display_score in results:
assert 0.0 <= display_score <= 10.0, (
f"display_score={display_score} 超出 [0, 10] (raw={raw_score})"
)