在前后端开发联调前 的提交20260223
This commit is contained in:
324
tests/test_property_8_dws_stock_aggregation.py
Normal file
324
tests/test_property_8_dws_stock_aggregation.py
Normal file
@@ -0,0 +1,324 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature: dataflow-field-completion, Property 8: DWS 库存汇总粒度聚合正确性
|
||||
|
||||
**Validates: Requirements 12.2, 12.3, 12.4, 12.5, 12.6**
|
||||
|
||||
对于任意 DWD 库存汇总数据集和任意汇总粒度(日/周/月),DWS 汇总任务的 transform
|
||||
输出应满足:
|
||||
(a) 每条记录的 stat_period 与任务粒度一致
|
||||
(b) 同一 (site_id, stat_date, site_goods_id) 组合不重复
|
||||
(c) 日度汇总的记录数不少于周度和月度汇总的记录数
|
||||
|
||||
测试策略:
|
||||
- 使用 hypothesis 生成随机 DWD 库存行(随机 fetched_at 日期、site_goods_id、数值)
|
||||
- 构造最小可用的任务实例,调用 transform 方法
|
||||
- 验证三条属性
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from hypothesis import given, settings, assume, HealthCheck
|
||||
import hypothesis.strategies as st
|
||||
|
||||
# ── 将 ETL 模块加入 sys.path ──
|
||||
_ETL_ROOT = (
|
||||
Path(__file__).resolve().parent.parent
|
||||
/ "apps" / "etl" / "connectors" / "feiqiu"
|
||||
)
|
||||
if str(_ETL_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_ETL_ROOT))
|
||||
|
||||
from tasks.dws.goods_stock_daily_task import GoodsStockDailyTask
|
||||
from tasks.dws.goods_stock_weekly_task import GoodsStockWeeklyTask
|
||||
from tasks.dws.goods_stock_monthly_task import GoodsStockMonthlyTask
|
||||
from tasks.base_task import TaskContext
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# 辅助:构造最小可用的任务实例
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
def _make_config() -> MagicMock:
|
||||
"""构造 mock config"""
|
||||
config = MagicMock()
|
||||
config.get = lambda key, default=None: {
|
||||
"app.store_id": 1,
|
||||
"app.timezone": "Asia/Shanghai",
|
||||
}.get(key, default)
|
||||
return config
|
||||
|
||||
|
||||
def _make_task(task_cls):
|
||||
"""构造一个用于测试的 DWS 任务实例"""
|
||||
config = _make_config()
|
||||
db = MagicMock()
|
||||
api = MagicMock()
|
||||
logger = MagicMock()
|
||||
return task_cls(config, db, api, logger)
|
||||
|
||||
|
||||
def _make_context(site_id: int = 1) -> TaskContext:
|
||||
"""构造最小 TaskContext"""
|
||||
now = datetime(2026, 1, 15, 12, 0, 0)
|
||||
return TaskContext(
|
||||
store_id=site_id,
|
||||
window_start=now - timedelta(days=90),
|
||||
window_end=now,
|
||||
window_minutes=90 * 24 * 60,
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Hypothesis 策略:生成随机 DWD 库存行
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# 日期范围:2025-11-01 ~ 2026-02-20(覆盖跨周/跨月边界)
|
||||
_MIN_DATE = date(2025, 11, 1)
|
||||
_MAX_DATE = date(2026, 2, 20)
|
||||
_DATE_RANGE_DAYS = (_MAX_DATE - _MIN_DATE).days
|
||||
|
||||
_date_strategy = st.integers(
|
||||
min_value=0, max_value=_DATE_RANGE_DAYS
|
||||
).map(lambda d: _MIN_DATE + timedelta(days=d))
|
||||
|
||||
# site_goods_id:1~5 个不同商品(保持较小范围以产生有意义的聚合)
|
||||
_goods_id_strategy = st.integers(min_value=1, max_value=5)
|
||||
|
||||
# 数值策略:合理的库存数量
|
||||
_numeric_strategy = st.decimals(
|
||||
min_value=Decimal("0"),
|
||||
max_value=Decimal("9999.99"),
|
||||
places=2,
|
||||
allow_nan=False,
|
||||
allow_infinity=False,
|
||||
)
|
||||
|
||||
|
||||
@st.composite
|
||||
def _dwd_row(draw):
|
||||
"""生成一条随机 DWD 库存汇总行"""
|
||||
d = draw(_date_strategy)
|
||||
# fetched_at 为 datetime,带时分秒
|
||||
hour = draw(st.integers(min_value=0, max_value=23))
|
||||
minute = draw(st.integers(min_value=0, max_value=59))
|
||||
fetched_at = datetime(d.year, d.month, d.day, hour, minute, 0)
|
||||
|
||||
return {
|
||||
"site_goods_id": draw(_goods_id_strategy),
|
||||
"goods_name": f"商品_{draw(st.integers(min_value=1, max_value=5))}",
|
||||
"goods_unit": draw(st.sampled_from(["个", "箱", "瓶", "包"])),
|
||||
"goods_category_id": draw(st.integers(min_value=1, max_value=3)),
|
||||
"goods_category_second_id": draw(st.integers(min_value=1, max_value=5)),
|
||||
"category_name": draw(st.sampled_from(["饮料", "零食", "台球用品"])),
|
||||
"range_start_stock": draw(_numeric_strategy),
|
||||
"range_end_stock": draw(_numeric_strategy),
|
||||
"range_in": draw(_numeric_strategy),
|
||||
"range_out": draw(_numeric_strategy),
|
||||
"range_sale": draw(_numeric_strategy),
|
||||
"range_sale_money": draw(_numeric_strategy),
|
||||
"range_inventory": draw(_numeric_strategy),
|
||||
"current_stock": draw(_numeric_strategy),
|
||||
"site_id": 1,
|
||||
"tenant_id": 100,
|
||||
"fetched_at": fetched_at,
|
||||
}
|
||||
|
||||
|
||||
# 生成 1~30 条 DWD 行,按 fetched_at 排序(模拟 SQL ORDER BY fetched_at)
|
||||
_dwd_rows_strategy = st.lists(
|
||||
_dwd_row(), min_size=1, max_size=30
|
||||
).map(lambda rows: sorted(rows, key=lambda r: r["fetched_at"]))
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 8a: stat_period 与任务粒度一致
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_daily_stat_period_is_daily(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.2**
|
||||
|
||||
日度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'daily'。
|
||||
"""
|
||||
task = _make_task(GoodsStockDailyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
for rec in result:
|
||||
assert rec["stat_period"] == "daily", \
|
||||
f"日度汇总记录的 stat_period 应为 'daily',实际为 {rec['stat_period']!r}"
|
||||
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_weekly_stat_period_is_weekly(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.3**
|
||||
|
||||
周度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'weekly'。
|
||||
"""
|
||||
task = _make_task(GoodsStockWeeklyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
for rec in result:
|
||||
assert rec["stat_period"] == "weekly", \
|
||||
f"周度汇总记录的 stat_period 应为 'weekly',实际为 {rec['stat_period']!r}"
|
||||
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_monthly_stat_period_is_monthly(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.4**
|
||||
|
||||
月度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'monthly'。
|
||||
"""
|
||||
task = _make_task(GoodsStockMonthlyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
for rec in result:
|
||||
assert rec["stat_period"] == "monthly", \
|
||||
f"月度汇总记录的 stat_period 应为 'monthly',实际为 {rec['stat_period']!r}"
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 8b: 同一 (site_id, stat_date, site_goods_id) 组合不重复
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_daily_no_duplicate_keys(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.5, 12.6**
|
||||
|
||||
日度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||||
"""
|
||||
task = _make_task(GoodsStockDailyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
keys = set()
|
||||
for rec in result:
|
||||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||||
assert key not in keys, \
|
||||
f"日度汇总存在重复主键: {key}"
|
||||
keys.add(key)
|
||||
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_weekly_no_duplicate_keys(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.5, 12.6**
|
||||
|
||||
周度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||||
"""
|
||||
task = _make_task(GoodsStockWeeklyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
keys = set()
|
||||
for rec in result:
|
||||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||||
assert key not in keys, \
|
||||
f"周度汇总存在重复主键: {key}"
|
||||
keys.add(key)
|
||||
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_monthly_no_duplicate_keys(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.5, 12.6**
|
||||
|
||||
月度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||||
"""
|
||||
task = _make_task(GoodsStockMonthlyTask)
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
result = task.transform(extracted, context)
|
||||
|
||||
keys = set()
|
||||
for rec in result:
|
||||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||||
assert key not in keys, \
|
||||
f"月度汇总存在重复主键: {key}"
|
||||
keys.add(key)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
# Property 8c: 日度记录数 >= 周度 >= 月度
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
@given(rows=_dwd_rows_strategy)
|
||||
@settings(
|
||||
max_examples=100,
|
||||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||||
)
|
||||
def test_daily_count_gte_weekly_and_monthly(rows):
|
||||
"""
|
||||
**Validates: Requirements 12.2, 12.3, 12.4**
|
||||
|
||||
对于同一组 DWD 输入数据,日度汇总的记录数不少于周度和月度汇总的记录数。
|
||||
这是因为日粒度更细,分组键更多,产生的聚合记录数更多或相等。
|
||||
"""
|
||||
context = _make_context()
|
||||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||||
|
||||
daily_task = _make_task(GoodsStockDailyTask)
|
||||
weekly_task = _make_task(GoodsStockWeeklyTask)
|
||||
monthly_task = _make_task(GoodsStockMonthlyTask)
|
||||
|
||||
daily_result = daily_task.transform(extracted, context)
|
||||
weekly_result = weekly_task.transform(extracted, context)
|
||||
monthly_result = monthly_task.transform(extracted, context)
|
||||
|
||||
daily_count = len(daily_result)
|
||||
weekly_count = len(weekly_result)
|
||||
monthly_count = len(monthly_result)
|
||||
|
||||
assert daily_count >= weekly_count, (
|
||||
f"日度记录数({daily_count}) 应 >= 周度记录数({weekly_count})"
|
||||
)
|
||||
assert daily_count >= monthly_count, (
|
||||
f"日度记录数({daily_count}) 应 >= 月度记录数({monthly_count})"
|
||||
)
|
||||
# 额外验证:周度记录数 >= 月度记录数
|
||||
assert weekly_count >= monthly_count, (
|
||||
f"周度记录数({weekly_count}) 应 >= 月度记录数({monthly_count})"
|
||||
)
|
||||
Reference in New Issue
Block a user