Files
Neo-ZQYY/tests/test_property_core_minimal_fields.py

111 lines
4.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
Property 7: Core schema 最小字段集
Validates: Requirements 7.5
对于任意 core schema 中的表,其字段数量应严格少于对应 dwd schema 中同名(或对应)表的字段数量。
使用 hypothesis 从 core 表列表中随机选取,验证 core 表字段数 < 对应 dwd 表字段数。
"""
import re
import os
from pathlib import Path
import pytest
from hypothesis import given, settings, assume
from hypothesis.strategies import sampled_from
# ---------------------------------------------------------------------------
# SQL 解析工具
# ---------------------------------------------------------------------------
def _parse_tables(sql_text: str) -> dict[str, int]:
"""从 SQL 文本中提取每个 CREATE TABLE 的表名和字段数量。
只统计显式声明的列(不含 CONSTRAINT / PRIMARY KEY / CHECK 等行)。
"""
tables: dict[str, int] = {}
# 匹配 CREATE TABLE ... ( ... ); 允许 IF NOT EXISTS
pattern = re.compile(
r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)\s*\((.*?)\);",
re.DOTALL | re.IGNORECASE,
)
for match in pattern.finditer(sql_text):
table_name = match.group(1).lower()
body = match.group(2)
# 按逗号拆分,过滤掉约束行
col_count = 0
for line in body.split(","):
line = line.strip()
if not line:
continue
# 跳过约束 / 索引 / 空行
upper = line.upper().lstrip()
if upper.startswith(("PRIMARY KEY", "UNIQUE", "CHECK", "CONSTRAINT",
"EXCLUDE", "FOREIGN KEY", "INDEX")):
continue
# 剩余视为列定义
col_count += 1
tables[table_name] = col_count
return tables
# ---------------------------------------------------------------------------
# 加载 SQL 文件并建立映射
# ---------------------------------------------------------------------------
_CORE_SQL = Path(r"C:\NeoZQYY\db\etl_feiqiu\schemas\core.sql")
_DWD_SQL = Path(r"C:\NeoZQYY\db\etl_feiqiu\schemas\dwd.sql")
_core_tables = _parse_tables(_CORE_SQL.read_text(encoding="utf-8"))
_dwd_tables = _parse_tables(_DWD_SQL.read_text(encoding="utf-8"))
# core → dwd 映射(手动定义,因为命名规则不完全一致)
# 维度表core 与 dwd 同名
# 事实表core.fact_settlement → dwd.dwd_settlement_head
# core.fact_payment → dwd.dwd_payment
_CORE_TO_DWD_MAP: dict[str, str] = {
"dim_site": "dim_site",
"dim_member": "dim_member",
"dim_assistant": "dim_assistant",
"dim_table": "dim_table",
"dim_goods_category": "dim_goods_category",
"fact_settlement": "dwd_settlement_head",
"fact_payment": "dwd_payment",
}
# 预检:确保映射中的表在两侧 SQL 中都存在
_valid_pairs: list[tuple[str, str, int, int]] = []
for core_name, dwd_name in _CORE_TO_DWD_MAP.items():
if core_name in _core_tables and dwd_name in _dwd_tables:
_valid_pairs.append(
(core_name, dwd_name, _core_tables[core_name], _dwd_tables[dwd_name])
)
# 确保至少有可测试的映射对
assert len(_valid_pairs) > 0, (
f"未找到有效的 core→dwd 映射对。"
f" core 表: {list(_core_tables.keys())},"
f" dwd 表: {list(_dwd_tables.keys())}"
)
# ---------------------------------------------------------------------------
# 属性测试
# ---------------------------------------------------------------------------
@settings(max_examples=100)
@given(pair=sampled_from(_valid_pairs))
def test_core_table_has_fewer_fields_than_dwd(pair):
"""**Validates: Requirements 7.5**
对于任意 core schema 中的表,其字段数量应严格少于
对应 dwd schema 中同名(或对应)表的字段数量。
"""
core_name, dwd_name, core_count, dwd_count = pair
assert core_count < dwd_count, (
f"core.{core_name}{core_count} 个字段,"
f"但 dwd.{dwd_name} 只有 {dwd_count} 个字段。"
f" 期望 core 字段数严格少于 dwd。"
)