Files
Neo-ZQYY/tests/test_property_2_fact_mappings_integrity.py

223 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
Feature: dataflow-field-completion, Property 2: FACT_MAPPINGS 引用完整性
**Validates: Requirements 6.3**
对于任意 FACT_MAPPINGS 中的映射条目,其 DWD 目标列名必须存在于对应 DWD 表的列定义中,
其 ODS 源表达式引用的列名必须存在于对应 ODS 表的列定义中(或为合法的 SQL 表达式)。
本测试覆盖所有 FACT_MAPPINGS 条目(不仅限于 A 类表),聚焦以下可静态验证的属性:
1. 所有 FACT_MAPPINGS 的 key 都在 TABLE_MAP 中注册
2. 所有条目格式为 (str, str, str|None) 三元组
3. 同一 DWD 表内无重复 dwd_col
4. 所有 ods_expr 非空
5. 所有 cast_type 值在已知合法集合内
6. B 类表recharge_settlements → dwd_recharge_order的 5 个新映射条目存在且正确
7. FACT_MAPPINGS 中引用的 DWD 表必须在 TABLE_MAP 中有对应的 ODS 源表
"""
from __future__ import annotations
import sys
from pathlib import Path
from hypothesis import given, settings, HealthCheck
import hypothesis.strategies as st
# ── 将 ETL 模块加入 sys.path ──
_ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu"
if str(_ETL_ROOT) not in sys.path:
sys.path.insert(0, str(_ETL_ROOT))
from tasks.dwd.dwd_load_task import DwdLoadTask
# ── 已知的合法 cast_type 值 ──
_VALID_CAST_TYPES = {
None, "bigint", "integer", "numeric", "decimal",
"timestamptz", "boolean", "date", "text", "TEXT[]",
}
# ── 收集所有 FACT_MAPPINGS 条目为 (dwd_table, dwd_col, ods_expr, cast_type) 四元组 ──
def _collect_all_mappings() -> list[tuple[str, str, str, str | None]]:
"""遍历 FACT_MAPPINGS 所有表,返回四元组列表。"""
result = []
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
for entry in entries:
dwd_col, ods_expr, cast_type = entry
result.append((table, dwd_col, ods_expr, cast_type))
return result
_ALL_MAPPING_ENTRIES = _collect_all_mappings()
_ALL_FACT_TABLES = list(DwdLoadTask.FACT_MAPPINGS.keys())
# ── B 类表期望映射recharge_settlements → dwd_recharge_order──
_REQ6_EXPECTED = {
"pl_coupon_sale_amount": ("plcouponsaleamount", None),
"mervou_sales_amount": ("mervousalesamount", None),
"electricity_money": ("electricitymoney", None),
"real_electricity_money": ("realelectricitymoney", None),
"electricity_adjust_money": ("electricityadjustmoney", None),
}
# ══════════════════════════════════════════════════════════════════
# Property 2.1: FACT_MAPPINGS 结构完整性 — 所有 key 都在 TABLE_MAP 中
# ══════════════════════════════════════════════════════════════════
def test_all_fact_mapping_tables_in_table_map():
"""所有 FACT_MAPPINGS 的 key 都必须在 TABLE_MAP 中注册。"""
for table in DwdLoadTask.FACT_MAPPINGS:
assert table in DwdLoadTask.TABLE_MAP, \
f"FACT_MAPPINGS 中的 {table} 未在 TABLE_MAP 中注册"
# ══════════════════════════════════════════════════════════════════
# Property 2.2: 所有条目格式为 (str, str, str|None) 三元组
# ══════════════════════════════════════════════════════════════════
def test_all_fact_mapping_entries_are_valid_tuples():
"""每个 FACT_MAPPINGS 条目都是 (dwd_col, ods_expr, cast_type) 三元组,且类型正确。"""
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
for i, entry in enumerate(entries):
assert isinstance(entry, (tuple, list)) and len(entry) == 3, \
f"{table}[{i}]: 条目应为三元组,实际为 {type(entry).__name__}(len={len(entry) if hasattr(entry, '__len__') else '?'})"
dwd_col, ods_expr, cast_type = entry
assert isinstance(dwd_col, str) and dwd_col.strip(), \
f"{table}[{i}]: dwd_col 必须为非空字符串,实际为 {dwd_col!r}"
assert isinstance(ods_expr, str) and ods_expr.strip(), \
f"{table}[{i}]: ods_expr 必须为非空字符串,实际为 {ods_expr!r}"
assert cast_type is None or isinstance(cast_type, str), \
f"{table}[{i}].{dwd_col}: cast_type 必须为 None 或字符串,实际为 {type(cast_type).__name__}"
# ══════════════════════════════════════════════════════════════════
# Property 2.3: 同一 DWD 表内无重复 dwd_col
# ══════════════════════════════════════════════════════════════════
def test_no_duplicate_dwd_cols_across_all_tables():
"""同一张 DWD 表内不应有重复的 dwd_col大小写不敏感"""
for table, entries in DwdLoadTask.FACT_MAPPINGS.items():
seen: set[str] = set()
for dwd_col, _, _ in entries:
key = dwd_col.lower()
assert key not in seen, \
f"{table}: dwd_col={dwd_col!r} 重复出现"
seen.add(key)
# ══════════════════════════════════════════════════════════════════
# Property 2.4: 所有 cast_type 值在已知合法集合内
# ══════════════════════════════════════════════════════════════════
def test_all_cast_types_are_valid():
"""所有 FACT_MAPPINGS 条目的 cast_type 值都在已知合法集合内。"""
for table, dwd_col, _, cast_type in _ALL_MAPPING_ENTRIES:
assert cast_type in _VALID_CAST_TYPES, \
f"{table}.{dwd_col}: 未知 cast_type={cast_type!r},合法值为 {_VALID_CAST_TYPES}"
# ══════════════════════════════════════════════════════════════════
# Property 2.5: 所有 ods_expr 非空
# ══════════════════════════════════════════════════════════════════
def test_ods_expr_not_empty():
"""所有 FACT_MAPPINGS 条目的 ods_expr 不能为空字符串。"""
for table, dwd_col, ods_expr, _ in _ALL_MAPPING_ENTRIES:
assert ods_expr.strip(), \
f"{table}.{dwd_col}: ods_expr 为空字符串"
# ══════════════════════════════════════════════════════════════════
# Property 2.6: B 类表特定映射验证(需求 6.3
# ══════════════════════════════════════════════════════════════════
def test_recharge_settlements_mappings_exist():
"""
**Validates: Requirements 6.3**
B 类表 recharge_settlements → dwd_recharge_order 的 5 个新映射条目
必须存在且 ODS 源表达式和 cast_type 正确。
"""
dwd_table = "dwd.dwd_recharge_order"
entries = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, [])
assert entries, f"{dwd_table} 在 FACT_MAPPINGS 中无条目"
# 构建实际映射字典dwd_col -> (ods_expr, cast_type)
actual = {e[0].lower(): (e[1], e[2]) for e in entries}
for dwd_col, (expected_src, expected_cast) in _REQ6_EXPECTED.items():
assert dwd_col in actual, \
f"[Req 6.3] {dwd_table}: 缺少 dwd_col={dwd_col!r} 的映射条目"
actual_src, actual_cast = actual[dwd_col]
assert actual_src == expected_src, \
f"[Req 6.3] {dwd_table}.{dwd_col}: ODS 源应为 {expected_src!r},实际为 {actual_src!r}"
assert actual_cast == expected_cast, \
f"[Req 6.3] {dwd_table}.{dwd_col}: cast_type 应为 {expected_cast!r},实际为 {actual_cast!r}"
# ══════════════════════════════════════════════════════════════════
# Property 2.7: 交叉引用 — FACT_MAPPINGS 的 DWD 表在 TABLE_MAP 中有 ODS 源表
# ══════════════════════════════════════════════════════════════════
def test_fact_mapping_tables_have_ods_source():
"""FACT_MAPPINGS 中引用的每张 DWD 表在 TABLE_MAP 中都有非空的 ODS 源表。"""
for table in DwdLoadTask.FACT_MAPPINGS:
ods_table = DwdLoadTask.TABLE_MAP.get(table)
assert ods_table and isinstance(ods_table, str) and ods_table.strip(), \
f"{table}: TABLE_MAP 中的 ODS 源表为空或不存在"
# ══════════════════════════════════════════════════════════════════
# Hypothesis 属性测试:随机选取 FACT_MAPPINGS 条目验证结构
# ══════════════════════════════════════════════════════════════════
_mapping_entry_strategy = st.sampled_from(_ALL_MAPPING_ENTRIES)
@given(entry=_mapping_entry_strategy)
@settings(max_examples=300, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_random_fact_mapping_entry_structure(entry):
"""
**Validates: Requirements 6.3**
对于任意随机选取的 FACT_MAPPINGS 条目,验证:
- dwd_col 为非空字符串
- ods_expr 为非空字符串
- cast_type 在合法集合内
- 所属 DWD 表在 TABLE_MAP 中注册
"""
table, dwd_col, ods_expr, cast_type = entry
# 结构验证
assert isinstance(dwd_col, str) and dwd_col.strip(), \
f"{table}: dwd_col 为空"
assert isinstance(ods_expr, str) and ods_expr.strip(), \
f"{table}.{dwd_col}: ods_expr 为空"
assert cast_type in _VALID_CAST_TYPES, \
f"{table}.{dwd_col}: 未知 cast_type={cast_type!r}"
# 交叉引用验证
assert table in DwdLoadTask.TABLE_MAP, \
f"{table} 未在 TABLE_MAP 中注册"
ods_table = DwdLoadTask.TABLE_MAP[table]
assert ods_table and isinstance(ods_table, str), \
f"{table}: TABLE_MAP 中的 ODS 源表无效"
@given(table_name=st.sampled_from(_ALL_FACT_TABLES))
@settings(max_examples=100, suppress_health_check=[HealthCheck.function_scoped_fixture])
def test_random_table_no_duplicate_dwd_cols(table_name):
"""
**Validates: Requirements 6.3**
对于任意随机选取的 FACT_MAPPINGS 表,验证其内部无重复 dwd_col。
"""
entries = DwdLoadTask.FACT_MAPPINGS[table_name]
seen: set[str] = set()
for dwd_col, _, _ in entries:
key = dwd_col.lower()
assert key not in seen, \
f"{table_name}: dwd_col={dwd_col!r} 重复出现"
seen.add(key)