在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -0,0 +1,324 @@
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 8: DWS 库存汇总粒度聚合正确性
**Validates: Requirements 12.2, 12.3, 12.4, 12.5, 12.6**
对于任意 DWD 库存汇总数据集和任意汇总粒度(日/周/月DWS 汇总任务的 transform
输出应满足:
(a) 每条记录的 stat_period 与任务粒度一致
(b) 同一 (site_id, stat_date, site_goods_id) 组合不重复
(c) 日度汇总的记录数不少于周度和月度汇总的记录数
测试策略:
- 使用 hypothesis 生成随机 DWD 库存行(随机 fetched_at 日期、site_goods_id、数值
- 构造最小可用的任务实例,调用 transform 方法
- 验证三条属性
"""
from __future__ import annotations
import sys
from dataclasses import dataclass
from datetime import date, datetime, timedelta
from decimal import Decimal
from pathlib import Path
from typing import Any, Dict, List
from unittest.mock import MagicMock
from hypothesis import given, settings, assume, HealthCheck
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = (
Path(__file__).resolve().parent.parent
/ "apps" / "etl" / "connectors" / "feiqiu"
)
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from tasks.dws.goods_stock_daily_task import GoodsStockDailyTask
from tasks.dws.goods_stock_weekly_task import GoodsStockWeeklyTask
from tasks.dws.goods_stock_monthly_task import GoodsStockMonthlyTask
from tasks.base_task import TaskContext
# ══════════════════════════════════════════════════════════════════
# 辅助:构造最小可用的任务实例
# ══════════════════════════════════════════════════════════════════
def _make_config() -> MagicMock:
"""构造 mock config"""
config = MagicMock()
config.get = lambda key, default=None: {
"app.store_id": 1,
"app.timezone": "Asia/Shanghai",
}.get(key, default)
return config
def _make_task(task_cls):
"""构造一个用于测试的 DWS 任务实例"""
config = _make_config()
db = MagicMock()
api = MagicMock()
logger = MagicMock()
return task_cls(config, db, api, logger)
def _make_context(site_id: int = 1) -> TaskContext:
"""构造最小 TaskContext"""
now = datetime(2026, 1, 15, 12, 0, 0)
return TaskContext(
store_id=site_id,
window_start=now - timedelta(days=90),
window_end=now,
window_minutes=90 * 24 * 60,
)
# ══════════════════════════════════════════════════════════════════
# Hypothesis 策略:生成随机 DWD 库存行
# ══════════════════════════════════════════════════════════════════
# 日期范围2025-11-01 ~ 2026-02-20覆盖跨周/跨月边界)
_MIN_DATE = date(2025, 11, 1)
_MAX_DATE = date(2026, 2, 20)
_DATE_RANGE_DAYS = (_MAX_DATE - _MIN_DATE).days
_date_strategy = st.integers(
min_value=0, max_value=_DATE_RANGE_DAYS
).map(lambda d: _MIN_DATE + timedelta(days=d))
# site_goods_id1~5 个不同商品(保持较小范围以产生有意义的聚合)
_goods_id_strategy = st.integers(min_value=1, max_value=5)
# 数值策略:合理的库存数量
_numeric_strategy = st.decimals(
min_value=Decimal("0"),
max_value=Decimal("9999.99"),
places=2,
allow_nan=False,
allow_infinity=False,
)
@st.composite
def _dwd_row(draw):
"""生成一条随机 DWD 库存汇总行"""
d = draw(_date_strategy)
# fetched_at 为 datetime带时分秒
hour = draw(st.integers(min_value=0, max_value=23))
minute = draw(st.integers(min_value=0, max_value=59))
fetched_at = datetime(d.year, d.month, d.day, hour, minute, 0)
return {
"site_goods_id": draw(_goods_id_strategy),
"goods_name": f"商品_{draw(st.integers(min_value=1, max_value=5))}",
"goods_unit": draw(st.sampled_from(["", "", "", ""])),
"goods_category_id": draw(st.integers(min_value=1, max_value=3)),
"goods_category_second_id": draw(st.integers(min_value=1, max_value=5)),
"category_name": draw(st.sampled_from(["饮料", "零食", "台球用品"])),
"range_start_stock": draw(_numeric_strategy),
"range_end_stock": draw(_numeric_strategy),
"range_in": draw(_numeric_strategy),
"range_out": draw(_numeric_strategy),
"range_sale": draw(_numeric_strategy),
"range_sale_money": draw(_numeric_strategy),
"range_inventory": draw(_numeric_strategy),
"current_stock": draw(_numeric_strategy),
"site_id": 1,
"tenant_id": 100,
"fetched_at": fetched_at,
}
# 生成 1~30 条 DWD 行,按 fetched_at 排序(模拟 SQL ORDER BY fetched_at
_dwd_rows_strategy = st.lists(
_dwd_row(), min_size=1, max_size=30
).map(lambda rows: sorted(rows, key=lambda r: r["fetched_at"]))
# ══════════════════════════════════════════════════════════════════
# Property 8a: stat_period 与任务粒度一致
# ══════════════════════════════════════════════════════════════════
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_daily_stat_period_is_daily(rows):
"""
**Validates: Requirements 12.2**
日度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'daily'
"""
task = _make_task(GoodsStockDailyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
for rec in result:
assert rec["stat_period"] == "daily", \
f"日度汇总记录的 stat_period 应为 'daily',实际为 {rec['stat_period']!r}"
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_weekly_stat_period_is_weekly(rows):
"""
**Validates: Requirements 12.3**
周度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'weekly'
"""
task = _make_task(GoodsStockWeeklyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
for rec in result:
assert rec["stat_period"] == "weekly", \
f"周度汇总记录的 stat_period 应为 'weekly',实际为 {rec['stat_period']!r}"
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_monthly_stat_period_is_monthly(rows):
"""
**Validates: Requirements 12.4**
月度汇总任务的 transform 输出中,每条记录的 stat_period 必须为 'monthly'
"""
task = _make_task(GoodsStockMonthlyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
for rec in result:
assert rec["stat_period"] == "monthly", \
f"月度汇总记录的 stat_period 应为 'monthly',实际为 {rec['stat_period']!r}"
# ══════════════════════════════════════════════════════════════════
# Property 8b: 同一 (site_id, stat_date, site_goods_id) 组合不重复
# ══════════════════════════════════════════════════════════════════
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_daily_no_duplicate_keys(rows):
"""
**Validates: Requirements 12.5, 12.6**
日度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
"""
task = _make_task(GoodsStockDailyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
keys = set()
for rec in result:
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
assert key not in keys, \
f"日度汇总存在重复主键: {key}"
keys.add(key)
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_weekly_no_duplicate_keys(rows):
"""
**Validates: Requirements 12.5, 12.6**
周度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
"""
task = _make_task(GoodsStockWeeklyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
keys = set()
for rec in result:
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
assert key not in keys, \
f"周度汇总存在重复主键: {key}"
keys.add(key)
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_monthly_no_duplicate_keys(rows):
"""
**Validates: Requirements 12.5, 12.6**
月度汇总输出中,同一 (site_id, stat_date, site_goods_id) 组合不应重复。
"""
task = _make_task(GoodsStockMonthlyTask)
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
result = task.transform(extracted, context)
keys = set()
for rec in result:
key = (rec["site_id"], rec["stat_date"], rec["site_goods_id"])
assert key not in keys, \
f"月度汇总存在重复主键: {key}"
keys.add(key)
# ══════════════════════════════════════════════════════════════════
# Property 8c: 日度记录数 >= 周度 >= 月度
# ══════════════════════════════════════════════════════════════════
@given(rows=_dwd_rows_strategy)
@settings(
max_examples=100,
suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.too_slow],
)
def test_daily_count_gte_weekly_and_monthly(rows):
"""
**Validates: Requirements 12.2, 12.3, 12.4**
对于同一组 DWD 输入数据,日度汇总的记录数不少于周度和月度汇总的记录数。
这是因为日粒度更细,分组键更多,产生的聚合记录数更多或相等。
"""
context = _make_context()
extracted = {"rows": rows, "site_id": 1, "start_date": _MIN_DATE, "end_date": _MAX_DATE}
daily_task = _make_task(GoodsStockDailyTask)
weekly_task = _make_task(GoodsStockWeeklyTask)
monthly_task = _make_task(GoodsStockMonthlyTask)
daily_result = daily_task.transform(extracted, context)
weekly_result = weekly_task.transform(extracted, context)
monthly_result = monthly_task.transform(extracted, context)
daily_count = len(daily_result)
weekly_count = len(weekly_result)
monthly_count = len(monthly_result)
assert daily_count >= weekly_count, (
f"日度记录数({daily_count}) 应 >= 周度记录数({weekly_count})"
)
assert daily_count >= monthly_count, (
f"日度记录数({daily_count}) 应 >= 月度记录数({monthly_count})"
)
# 额外验证:周度记录数 >= 月度记录数
assert weekly_count >= monthly_count, (
f"周度记录数({weekly_count}) 应 >= 月度记录数({monthly_count})"
)