# -*- coding: utf-8 -*- """ Feature: dataflow-field-completion, Property 2: FACT_MAPPINGS 引用完整性 **Validates: Requirements 6.3** 对于任意 FACT_MAPPINGS 中的映射条目,其 DWD 目标列名必须存在于对应 DWD 表的列定义中, 其 ODS 源表达式引用的列名必须存在于对应 ODS 表的列定义中(或为合法的 SQL 表达式)。 本测试覆盖所有 FACT_MAPPINGS 条目(不仅限于 A 类表),聚焦以下可静态验证的属性: 1. 所有 FACT_MAPPINGS 的 key 都在 TABLE_MAP 中注册 2. 所有条目格式为 (str, str, str|None) 三元组 3. 同一 DWD 表内无重复 dwd_col 4. 所有 ods_expr 非空 5. 所有 cast_type 值在已知合法集合内 6. B 类表(recharge_settlements → dwd_recharge_order)的 5 个新映射条目存在且正确 7. FACT_MAPPINGS 中引用的 DWD 表必须在 TABLE_MAP 中有对应的 ODS 源表 """ from __future__ import annotations import sys from pathlib import Path from hypothesis import given, settings, HealthCheck import hypothesis.strategies as st # ── 将 ETL 模块加入 sys.path ── _ETL_ROOT = Path(__file__).resolve().parent.parent / "apps" / "etl" / "connectors" / "feiqiu" if str(_ETL_ROOT) not in sys.path: sys.path.insert(0, str(_ETL_ROOT)) from tasks.dwd.dwd_load_task import DwdLoadTask # ── 已知的合法 cast_type 值 ── _VALID_CAST_TYPES = { None, "bigint", "integer", "numeric", "decimal", "timestamptz", "boolean", "date", "text", "TEXT[]", } # ── 收集所有 FACT_MAPPINGS 条目为 (dwd_table, dwd_col, ods_expr, cast_type) 四元组 ── def _collect_all_mappings() -> list[tuple[str, str, str, str | None]]: """遍历 FACT_MAPPINGS 所有表,返回四元组列表。""" result = [] for table, entries in DwdLoadTask.FACT_MAPPINGS.items(): for entry in entries: dwd_col, ods_expr, cast_type = entry result.append((table, dwd_col, ods_expr, cast_type)) return result _ALL_MAPPING_ENTRIES = _collect_all_mappings() _ALL_FACT_TABLES = list(DwdLoadTask.FACT_MAPPINGS.keys()) # ── B 类表期望映射(recharge_settlements → dwd_recharge_order)── _REQ6_EXPECTED = { "pl_coupon_sale_amount": ("plcouponsaleamount", None), "mervou_sales_amount": ("mervousalesamount", None), "electricity_money": ("electricitymoney", None), "real_electricity_money": ("realelectricitymoney", None), "electricity_adjust_money": ("electricityadjustmoney", None), } # ══════════════════════════════════════════════════════════════════ # Property 2.1: FACT_MAPPINGS 结构完整性 — 所有 key 都在 TABLE_MAP 中 # ══════════════════════════════════════════════════════════════════ def test_all_fact_mapping_tables_in_table_map(): """所有 FACT_MAPPINGS 的 key 都必须在 TABLE_MAP 中注册。""" for table in DwdLoadTask.FACT_MAPPINGS: assert table in DwdLoadTask.TABLE_MAP, \ f"FACT_MAPPINGS 中的 {table} 未在 TABLE_MAP 中注册" # ══════════════════════════════════════════════════════════════════ # Property 2.2: 所有条目格式为 (str, str, str|None) 三元组 # ══════════════════════════════════════════════════════════════════ def test_all_fact_mapping_entries_are_valid_tuples(): """每个 FACT_MAPPINGS 条目都是 (dwd_col, ods_expr, cast_type) 三元组,且类型正确。""" for table, entries in DwdLoadTask.FACT_MAPPINGS.items(): for i, entry in enumerate(entries): assert isinstance(entry, (tuple, list)) and len(entry) == 3, \ f"{table}[{i}]: 条目应为三元组,实际为 {type(entry).__name__}(len={len(entry) if hasattr(entry, '__len__') else '?'})" dwd_col, ods_expr, cast_type = entry assert isinstance(dwd_col, str) and dwd_col.strip(), \ f"{table}[{i}]: dwd_col 必须为非空字符串,实际为 {dwd_col!r}" assert isinstance(ods_expr, str) and ods_expr.strip(), \ f"{table}[{i}]: ods_expr 必须为非空字符串,实际为 {ods_expr!r}" assert cast_type is None or isinstance(cast_type, str), \ f"{table}[{i}].{dwd_col}: cast_type 必须为 None 或字符串,实际为 {type(cast_type).__name__}" # ══════════════════════════════════════════════════════════════════ # Property 2.3: 同一 DWD 表内无重复 dwd_col # ══════════════════════════════════════════════════════════════════ def test_no_duplicate_dwd_cols_across_all_tables(): """同一张 DWD 表内不应有重复的 dwd_col(大小写不敏感)。""" for table, entries in DwdLoadTask.FACT_MAPPINGS.items(): seen: set[str] = set() for dwd_col, _, _ in entries: key = dwd_col.lower() assert key not in seen, \ f"{table}: dwd_col={dwd_col!r} 重复出现" seen.add(key) # ══════════════════════════════════════════════════════════════════ # Property 2.4: 所有 cast_type 值在已知合法集合内 # ══════════════════════════════════════════════════════════════════ def test_all_cast_types_are_valid(): """所有 FACT_MAPPINGS 条目的 cast_type 值都在已知合法集合内。""" for table, dwd_col, _, cast_type in _ALL_MAPPING_ENTRIES: assert cast_type in _VALID_CAST_TYPES, \ f"{table}.{dwd_col}: 未知 cast_type={cast_type!r},合法值为 {_VALID_CAST_TYPES}" # ══════════════════════════════════════════════════════════════════ # Property 2.5: 所有 ods_expr 非空 # ══════════════════════════════════════════════════════════════════ def test_ods_expr_not_empty(): """所有 FACT_MAPPINGS 条目的 ods_expr 不能为空字符串。""" for table, dwd_col, ods_expr, _ in _ALL_MAPPING_ENTRIES: assert ods_expr.strip(), \ f"{table}.{dwd_col}: ods_expr 为空字符串" # ══════════════════════════════════════════════════════════════════ # Property 2.6: B 类表特定映射验证(需求 6.3) # ══════════════════════════════════════════════════════════════════ def test_recharge_settlements_mappings_exist(): """ **Validates: Requirements 6.3** B 类表 recharge_settlements → dwd_recharge_order 的 5 个新映射条目 必须存在且 ODS 源表达式和 cast_type 正确。 """ dwd_table = "dwd.dwd_recharge_order" entries = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, []) assert entries, f"{dwd_table} 在 FACT_MAPPINGS 中无条目" # 构建实际映射字典:dwd_col -> (ods_expr, cast_type) actual = {e[0].lower(): (e[1], e[2]) for e in entries} for dwd_col, (expected_src, expected_cast) in _REQ6_EXPECTED.items(): assert dwd_col in actual, \ f"[Req 6.3] {dwd_table}: 缺少 dwd_col={dwd_col!r} 的映射条目" actual_src, actual_cast = actual[dwd_col] assert actual_src == expected_src, \ f"[Req 6.3] {dwd_table}.{dwd_col}: ODS 源应为 {expected_src!r},实际为 {actual_src!r}" assert actual_cast == expected_cast, \ f"[Req 6.3] {dwd_table}.{dwd_col}: cast_type 应为 {expected_cast!r},实际为 {actual_cast!r}" # ══════════════════════════════════════════════════════════════════ # Property 2.7: 交叉引用 — FACT_MAPPINGS 的 DWD 表在 TABLE_MAP 中有 ODS 源表 # ══════════════════════════════════════════════════════════════════ def test_fact_mapping_tables_have_ods_source(): """FACT_MAPPINGS 中引用的每张 DWD 表在 TABLE_MAP 中都有非空的 ODS 源表。""" for table in DwdLoadTask.FACT_MAPPINGS: ods_table = DwdLoadTask.TABLE_MAP.get(table) assert ods_table and isinstance(ods_table, str) and ods_table.strip(), \ f"{table}: TABLE_MAP 中的 ODS 源表为空或不存在" # ══════════════════════════════════════════════════════════════════ # Hypothesis 属性测试:随机选取 FACT_MAPPINGS 条目验证结构 # ══════════════════════════════════════════════════════════════════ _mapping_entry_strategy = st.sampled_from(_ALL_MAPPING_ENTRIES) @given(entry=_mapping_entry_strategy) @settings(max_examples=300, suppress_health_check=[HealthCheck.function_scoped_fixture]) def test_random_fact_mapping_entry_structure(entry): """ **Validates: Requirements 6.3** 对于任意随机选取的 FACT_MAPPINGS 条目,验证: - dwd_col 为非空字符串 - ods_expr 为非空字符串 - cast_type 在合法集合内 - 所属 DWD 表在 TABLE_MAP 中注册 """ table, dwd_col, ods_expr, cast_type = entry # 结构验证 assert isinstance(dwd_col, str) and dwd_col.strip(), \ f"{table}: dwd_col 为空" assert isinstance(ods_expr, str) and ods_expr.strip(), \ f"{table}.{dwd_col}: ods_expr 为空" assert cast_type in _VALID_CAST_TYPES, \ f"{table}.{dwd_col}: 未知 cast_type={cast_type!r}" # 交叉引用验证 assert table in DwdLoadTask.TABLE_MAP, \ f"{table} 未在 TABLE_MAP 中注册" ods_table = DwdLoadTask.TABLE_MAP[table] assert ods_table and isinstance(ods_table, str), \ f"{table}: TABLE_MAP 中的 ODS 源表无效" @given(table_name=st.sampled_from(_ALL_FACT_TABLES)) @settings(max_examples=100, suppress_health_check=[HealthCheck.function_scoped_fixture]) def test_random_table_no_duplicate_dwd_cols(table_name): """ **Validates: Requirements 6.3** 对于任意随机选取的 FACT_MAPPINGS 表,验证其内部无重复 dwd_col。 """ entries = DwdLoadTask.FACT_MAPPINGS[table_name] seen: set[str] = set() for dwd_col, _, _ in entries: key = dwd_col.lower() assert key not in seen, \ f"{table_name}: dwd_col={dwd_col!r} 重复出现" seen.add(key)