325 lines
12 KiB
Python
325 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
Feature: dataflow-field-completion, Property 8: DWS 库存汇总粒度聚合正确性
|
||
|
||
**Validates: Requirements 12.2, 12.3, 12.4, 12.5, 12.6**
|
||
|
||
对于任意 DWD 库存汇总数据集和任意汇总粒度(日/周/月),DWS 汇总任务的 transform
|
||
输出应满足:
|
||
(a) 每条记录的 stat_period 与任务粒度一致
|
||
(b) 同一 (site_id, stat_date, site_goods_id) 组合不重复
|
||
(c) 日度汇总的记录数不少于周度和月度汇总的记录数
|
||
|
||
测试策略:
|
||
- 使用 hypothesis 生成随机 DWD 库存行(随机 fetched_at 日期、site_goods_id、数值)
|
||
- 构造最小可用的任务实例,调用 transform 方法
|
||
- 验证三条属性
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import sys
|
||
from dataclasses import dataclass
|
||
from datetime import date, datetime, timedelta
|
||
from decimal import Decimal
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List
|
||
from unittest.mock import MagicMock
|
||
|
||
from hypothesis import given, settings, assume, HealthCheck
|
||
import hypothesis.strategies as st
|
||
|
||
# ── 将 ETL 模块加入 sys.path ──
|
||
_ETL_ROOT = (
|
||
Path(__file__).resolve().parent.parent
|
||
/ "apps" / "etl" / "connectors" / "feiqiu"
|
||
)
|
||
if str(_ETL_ROOT) not in sys.path:
|
||
sys.path.insert(0, str(_ETL_ROOT))
|
||
|
||
from tasks.dws.goods_stock_daily_task import GoodsStockDailyTask
|
||
from tasks.dws.goods_stock_weekly_task import GoodsStockWeeklyTask
|
||
from tasks.dws.goods_stock_monthly_task import GoodsStockMonthlyTask
|
||
from tasks.base_task import TaskContext
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# 辅助:构造最小可用的任务实例
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
def _make_config() -> MagicMock:
|
||
"""构造 mock config"""
|
||
config = MagicMock()
|
||
config.get = lambda key, default=None: {
|
||
"app.store_id": 1,
|
||
"app.timezone": "Asia/Shanghai",
|
||
}.get(key, default)
|
||
return config
|
||
|
||
|
||
def _make_task(task_cls):
|
||
"""构造一个用于测试的 DWS 任务实例"""
|
||
config = _make_config()
|
||
db = MagicMock()
|
||
api = MagicMock()
|
||
logger = MagicMock()
|
||
return task_cls(config, db, api, logger)
|
||
|
||
|
||
def _make_context(site_id: int = 1) -> TaskContext:
|
||
"""构造最小 TaskContext"""
|
||
now = datetime(2026, 1, 15, 12, 0, 0)
|
||
return TaskContext(
|
||
store_id=site_id,
|
||
window_start=now - timedelta(days=90),
|
||
window_end=now,
|
||
window_minutes=90 * 24 * 60,
|
||
)
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Hypothesis 策略:生成随机 DWD 库存行
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
# 日期范围:2025-11-01 ~ 2026-02-20(覆盖跨周/跨月边界)
|
||
_MIN_DATE = date(2025, 11, 1)
|
||
_MAX_DATE = date(2026, 2, 20)
|
||
_DATE_RANGE_DAYS = (_MAX_DATE - _MIN_DATE).days
|
||
|
||
_date_strategy = st.integers(
|
||
min_value=0, max_value=_DATE_RANGE_DAYS
|
||
).map(lambda d: _MIN_DATE + timedelta(days=d))
|
||
|
||
# site_goods_id:1~5 个不同商品(保持较小范围以产生有意义的聚合)
|
||
_goods_id_strategy = st.integers(min_value=1, max_value=5)
|
||
|
||
# 数值策略:合理的库存数量
|
||
_numeric_strategy = st.decimals(
|
||
min_value=Decimal("0"),
|
||
max_value=Decimal("9999.99"),
|
||
places=2,
|
||
allow_nan=False,
|
||
allow_infinity=False,
|
||
)
|
||
|
||
|
||
@st.composite
|
||
def _dwd_row(draw):
|
||
"""生成一条随机 DWD 库存汇总行"""
|
||
d = draw(_date_strategy)
|
||
# fetched_at 为 datetime,带时分秒
|
||
hour = draw(st.integers(min_value=0, max_value=23))
|
||
minute = draw(st.integers(min_value=0, max_value=59))
|
||
fetched_at = datetime(d.year, d.month, d.day, hour, minute, 0)
|
||
|
||
return {
|
||
"site_goods_id": draw(_goods_id_strategy),
|
||
"goods_name": f"商品_{draw(st.integers(min_value=1, max_value=5))}",
|
||
"goods_unit": draw(st.sampled_from(["个", "箱", "瓶", "包"])),
|
||
"goods_category_id": draw(st.integers(min_value=1, max_value=3)),
|
||
"goods_category_second_id": draw(st.integers(min_value=1, max_value=5)),
|
||
"category_name": draw(st.sampled_from(["饮料", "零食", "台球用品"])),
|
||
"range_start_stock": draw(_numeric_strategy),
|
||
"range_end_stock": draw(_numeric_strategy),
|
||
"range_in": draw(_numeric_strategy),
|
||
"range_out": draw(_numeric_strategy),
|
||
"range_sale": draw(_numeric_strategy),
|
||
"range_sale_money": draw(_numeric_strategy),
|
||
"range_inventory": draw(_numeric_strategy),
|
||
"current_stock": draw(_numeric_strategy),
|
||
"site_id": 1,
|
||
"tenant_id": 100,
|
||
"fetched_at": fetched_at,
|
||
}
|
||
|
||
|
||
# 生成 1~30 条 DWD 行,按 fetched_at 排序(模拟 SQL ORDER BY fetched_at)
|
||
_dwd_rows_strategy = st.lists(
|
||
_dwd_row(), min_size=1, max_size=30
|
||
).map(lambda rows: sorted(rows, key=lambda r: r["fetched_at"]))
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 8a: stat_period 与任务粒度一致
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(rows=_dwd_rows_strategy)
|
||
@settings(
|
||
max_examples=100,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_daily_stat_period_is_daily(rows):
|
||
"""
|
||
**Validates: Requirements 12.2**
|
||
|
||
日度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'daily'。
|
||
"""
|
||
task = _make_task(GoodsStockDailyTask)
|
||
context = _make_context()
|
||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||
result = task.transform(extracted, context)
|
||
|
||
for rec in result:
|
||
assert rec["stat_period"] == "daily", \
|
||
f"日度汇总记录的 stat_period 应为 'daily',实际为 {rec['stat_period']!r}"
|
||
|
||
|
||
@given(rows=_dwd_rows_strategy)
|
||
@settings(
|
||
max_examples=100,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_weekly_stat_period_is_weekly(rows):
|
||
"""
|
||
**Validates: Requirements 12.3**
|
||
|
||
周度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'weekly'。
|
||
"""
|
||
task = _make_task(GoodsStockWeeklyTask)
|
||
context = _make_context()
|
||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||
result = task.transform(extracted, context)
|
||
|
||
for rec in result:
|
||
assert rec["stat_period"] == "weekly", \
|
||
f"周度汇总记录的 stat_period 应为 'weekly',实际为 {rec['stat_period']!r}"
|
||
|
||
|
||
@given(rows=_dwd_rows_strategy)
|
||
@settings(
|
||
max_examples=100,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_monthly_stat_period_is_monthly(rows):
|
||
"""
|
||
**Validates: Requirements 12.4**
|
||
|
||
月度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'monthly'。
|
||
"""
|
||
task = _make_task(GoodsStockMonthlyTask)
|
||
context = _make_context()
|
||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||
result = task.transform(extracted, context)
|
||
|
||
for rec in result:
|
||
assert rec["stat_period"] == "monthly", \
|
||
f"月度汇总记录的 stat_period 应为 'monthly',实际为 {rec['stat_period']!r}"
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 8b: 同一 (site_id, stat_date, site_goods_id) 组合不重复
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(rows=_dwd_rows_strategy)
|
||
@settings(
|
||
max_examples=100,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_daily_no_duplicate_keys(rows):
|
||
"""
|
||
**Validates: Requirements 12.5, 12.6**
|
||
|
||
日度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||
"""
|
||
task = _make_task(GoodsStockDailyTask)
|
||
context = _make_context()
|
||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||
result = task.transform(extracted, context)
|
||
|
||
keys = set()
|
||
for rec in result:
|
||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||
assert key not in keys, \
|
||
f"日度汇总存在重复主键: {key}"
|
||
keys.add(key)
|
||
|
||
|
||
@given(rows=_dwd_rows_strategy)
|
||
@settings(
|
||
max_examples=100,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_weekly_no_duplicate_keys(rows):
|
||
"""
|
||
**Validates: Requirements 12.5, 12.6**
|
||
|
||
周度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||
"""
|
||
task = _make_task(GoodsStockWeeklyTask)
|
||
context = _make_context()
|
||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||
result = task.transform(extracted, context)
|
||
|
||
keys = set()
|
||
for rec in result:
|
||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||
assert key not in keys, \
|
||
f"周度汇总存在重复主键: {key}"
|
||
keys.add(key)
|
||
|
||
|
||
@given(rows=_dwd_rows_strategy)
|
||
@settings(
|
||
max_examples=100,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_monthly_no_duplicate_keys(rows):
|
||
"""
|
||
**Validates: Requirements 12.5, 12.6**
|
||
|
||
月度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
|
||
"""
|
||
task = _make_task(GoodsStockMonthlyTask)
|
||
context = _make_context()
|
||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||
result = task.transform(extracted, context)
|
||
|
||
keys = set()
|
||
for rec in result:
|
||
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
|
||
assert key not in keys, \
|
||
f"月度汇总存在重复主键: {key}"
|
||
keys.add(key)
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# Property 8c: 日度记录数 >= 周度 >= 月度
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
@given(rows=_dwd_rows_strategy)
|
||
@settings(
|
||
max_examples=100,
|
||
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
|
||
)
|
||
def test_daily_count_gte_weekly_and_monthly(rows):
|
||
"""
|
||
**Validates: Requirements 12.2, 12.3, 12.4**
|
||
|
||
对于同一组 DWD 输入数据,日度汇总的记录数不少于周度和月度汇总的记录数。
|
||
这是因为日粒度更细,分组键更多,产生的聚合记录数更多或相等。
|
||
"""
|
||
context = _make_context()
|
||
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
|
||
|
||
daily_task = _make_task(GoodsStockDailyTask)
|
||
weekly_task = _make_task(GoodsStockWeeklyTask)
|
||
monthly_task = _make_task(GoodsStockMonthlyTask)
|
||
|
||
daily_result = daily_task.transform(extracted, context)
|
||
weekly_result = weekly_task.transform(extracted, context)
|
||
monthly_result = monthly_task.transform(extracted, context)
|
||
|
||
daily_count = len(daily_result)
|
||
weekly_count = len(weekly_result)
|
||
monthly_count = len(monthly_result)
|
||
|
||
assert daily_count >= weekly_count, (
|
||
f"日度记录数({daily_count}) 应 >= 周度记录数({weekly_count})"
|
||
)
|
||
assert daily_count >= monthly_count, (
|
||
f"日度记录数({daily_count}) 应 >= 月度记录数({monthly_count})"
|
||
)
|
||
# 额外验证:周度记录数 >= 月度记录数
|
||
assert weekly_count >= monthly_count, (
|
||
f"周度记录数({weekly_count}) 应 >= 月度记录数({monthly_count})"
|
||
)
|