Files
Neo-ZQYY/tests/test_property_core_minimal_fields.py
Neo 2a7a5d68aa feat: 2026-04-15~04-20 累积变更基线 — 多主线合流
主线 1: rns1-customer-coach-api + 04-miniapp-core-business 后端实施
  - 新增 GET /xcx/coaches/{id}/banner 轻量接口
  - performance/records 加 coach_id 参数 + view_board_coach 权限分流
  - coach/customer/performance/board/task 服务层重构
  - fdw_queries 结算单粒度聚合 + consumption_summary 视图统一
  - task_generator 回访宽限 72h + UPSERT 替代策略 + Step 5 保底清理
  - recall_detector settle_type=3 双重限制 + 门店级 resolved

主线 2: 小程序权限分流 + 新增 coach-service-records 管理者视角业绩明细页
  - perf-progress 共享模块去重 task-list/coach-detail 动画逻辑
  - isScattered 散客标记端到端
  - foodDetail/phoneFull/creator* 字段透传

主线 3: P19 指数回测框架 Phase 1+2
  - 3 个指数表 stat_date 日快照模式
  - 新增 DWS_INDEX_BACKFILL / DWS_TASK_SIMULATION 工具任务
  - task_engine 升级 HTTP 实时 + 推演回测双模式

主线 4: Core 维度层启用
  - 新增 CORE_DIM_SYNC 任务(DWD → core 4 维度表)
  - 修复 app 视图空查询问题

主线 5: member_project_tag 改为 LAST_30_VISITS 消费次数窗口

主线 6: 2 个迁移 SQL 已执行(stat_date + member_project_tag 新窗口)
  - schema 基线与 DDL 快照同步

主线 7: 开发机路径迁移 C:\NeoZQYY → C:\Project\NeoZQYY(约 95% 改动量)

附带: 新建运维脚本(churned_customer_report / simulate_historical_tasks /
      backfill_index_snapshots)+ tools/task-analysis/ 任务分析工具

合计 157 文件。未包含中间产物(tmp/ .playwright-mcp/ inspect-* excel/sheet 分析 txt)。
审计记录见下一个 commit。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 06:32:07 +08:00

120 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
Property 7: Core schema 最小字段集
Validates: Requirements 7.5
对于任意 core schema 中的表,其字段数量应严格少于对应 dwd schema 中同名(或对应)表的字段数量。
使用 hypothesis 从 core 表列表中随机选取,验证 core 表字段数 < 对应 dwd 表字段数。
"""
import re
import os
from pathlib import Path
import pytest
from hypothesis import given, settings, assume
from hypothesis.strategies import sampled_from
# DDL 基线文件在 ETL schema 重构后已删除,跳过整个模块
_CORE_SQL = Path(r"C:\Project\NeoZQYY\db\etl_feiqiu\schemas\core.sql")
_DWD_SQL = Path(r"C:\Project\NeoZQYY\db\etl_feiqiu\schemas\dwd.sql")
if not _CORE_SQL.exists() or not _DWD_SQL.exists():
pytest.skip(
"DDL 基线文件 core.sql / dwd.sql 不存在ETL schema 重构后已删除)",
allow_module_level=True,
)
# ---------------------------------------------------------------------------
# SQL 解析工具
# ---------------------------------------------------------------------------
def _parse_tables(sql_text: str) -> dict[str, int]:
"""从 SQL 文本中提取每个 CREATE TABLE 的表名和字段数量。
只统计显式声明的列(不含 CONSTRAINT / PRIMARY KEY / CHECK 等行)。
"""
tables: dict[str, int] = {}
# 匹配 CREATE TABLE ... ( ... ); 允许 IF NOT EXISTS
pattern = re.compile(
r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)\s*\((.*?)\);",
re.DOTALL | re.IGNORECASE,
)
for match in pattern.finditer(sql_text):
table_name = match.group(1).lower()
body = match.group(2)
# 按逗号拆分,过滤掉约束行
col_count = 0
for line in body.split(","):
line = line.strip()
if not line:
continue
# 跳过约束 / 索引 / 空行
upper = line.upper().lstrip()
if upper.startswith(("PRIMARY KEY", "UNIQUE", "CHECK", "CONSTRAINT",
"EXCLUDE", "FOREIGN KEY", "INDEX")):
continue
# 剩余视为列定义
col_count += 1
tables[table_name] = col_count
return tables
# ---------------------------------------------------------------------------
# 加载 SQL 文件并建立映射
# ---------------------------------------------------------------------------
_CORE_SQL = Path(r"C:\Project\NeoZQYY\db\etl_feiqiu\schemas\core.sql")
_DWD_SQL = Path(r"C:\Project\NeoZQYY\db\etl_feiqiu\schemas\dwd.sql")
_core_tables = _parse_tables(_CORE_SQL.read_text(encoding="utf-8"))
_dwd_tables = _parse_tables(_DWD_SQL.read_text(encoding="utf-8"))
# core → dwd 映射(手动定义,因为命名规则不完全一致)
# 维度表core 与 dwd 同名
# 事实表core.fact_settlement → dwd.dwd_settlement_head
# core.fact_payment → dwd.dwd_payment
_CORE_TO_DWD_MAP: dict[str, str] = {
"dim_site": "dim_site",
"dim_member": "dim_member",
"dim_assistant": "dim_assistant",
"dim_table": "dim_table",
"dim_goods_category": "dim_goods_category",
"fact_settlement": "dwd_settlement_head",
"fact_payment": "dwd_payment",
}
# 预检:确保映射中的表在两侧 SQL 中都存在
_valid_pairs: list[tuple[str, str, int, int]] = []
for core_name, dwd_name in _CORE_TO_DWD_MAP.items():
if core_name in _core_tables and dwd_name in _dwd_tables:
_valid_pairs.append(
(core_name, dwd_name, _core_tables[core_name], _dwd_tables[dwd_name])
)
# 确保至少有可测试的映射对
assert len(_valid_pairs) > 0, (
f"未找到有效的 core→dwd 映射对。"
f" core 表: {list(_core_tables.keys())},"
f" dwd 表: {list(_dwd_tables.keys())}"
)
# ---------------------------------------------------------------------------
# 属性测试
# ---------------------------------------------------------------------------
@settings(max_examples=100)
@given(pair=sampled_from(_valid_pairs))
def test_core_table_has_fewer_fields_than_dwd(pair):
"""**Validates: Requirements 7.5**
对于任意 core schema 中的表,其字段数量应严格少于
对应 dwd schema 中同名(或对应)表的字段数量。
"""
core_name, dwd_name, core_count, dwd_count = pair
assert core_count < dwd_count, (
f"core.{core_name}{core_count} 个字段,"
f"但 dwd.{dwd_name} 只有 {dwd_count} 个字段。"
f" 期望 core 字段数严格少于 dwd。"
)