初始提交:飞球 ETL 系统全量代码
This commit is contained in:
485
tests/unit/test_audit_report_properties.py
Normal file
485
tests/unit/test_audit_report_properties.py
Normal file
@@ -0,0 +1,485 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
属性测试 — 报告输出属性
|
||||
|
||||
Feature: repo-audit
|
||||
- Property 13: 统计摘要一致性
|
||||
- Property 14: 报告头部元信息
|
||||
- Property 15: 写操作仅限 docs/audit/
|
||||
|
||||
Validates: Requirements 4.2, 4.5, 4.6, 4.7, 5.2
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import string
|
||||
from pathlib import Path
|
||||
|
||||
from hypothesis import given, settings, assume
|
||||
from hypothesis import strategies as st
|
||||
|
||||
from scripts.audit import (
|
||||
AlignmentIssue,
|
||||
Category,
|
||||
Disposition,
|
||||
DocMapping,
|
||||
FlowNode,
|
||||
InventoryItem,
|
||||
)
|
||||
from scripts.audit.inventory_analyzer import render_inventory_report
|
||||
from scripts.audit.flow_analyzer import render_flow_report
|
||||
from scripts.audit.doc_alignment_analyzer import render_alignment_report
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 共享生成器策略
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PATH_CHARS = string.ascii_letters + string.digits + "_-."
|
||||
|
||||
_path_segment = st.text(
|
||||
alphabet=_PATH_CHARS,
|
||||
min_size=1,
|
||||
max_size=12,
|
||||
)
|
||||
|
||||
_rel_path = st.lists(
|
||||
_path_segment,
|
||||
min_size=1,
|
||||
max_size=3,
|
||||
).map(lambda parts: "/".join(parts))
|
||||
|
||||
_safe_text = st.text(
|
||||
alphabet=st.characters(
|
||||
whitelist_categories=("L", "N", "P", "S", "Z"),
|
||||
blacklist_characters="|\n\r",
|
||||
),
|
||||
min_size=1,
|
||||
max_size=30,
|
||||
)
|
||||
|
||||
_repo_root_str = st.text(
|
||||
alphabet=string.ascii_letters + string.digits + "/_-.",
|
||||
min_size=3,
|
||||
max_size=40,
|
||||
).map(lambda s: "/" + s.lstrip("/"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# InventoryItem 生成器
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _inventory_item_st() -> st.SearchStrategy[InventoryItem]:
|
||||
return st.builds(
|
||||
InventoryItem,
|
||||
rel_path=_rel_path,
|
||||
category=st.sampled_from(list(Category)),
|
||||
disposition=st.sampled_from(list(Disposition)),
|
||||
description=_safe_text,
|
||||
)
|
||||
|
||||
|
||||
_inventory_list = st.lists(_inventory_item_st(), min_size=0, max_size=20)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FlowNode 生成器(限制深度和宽度)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _flow_node_st(max_depth: int = 2) -> st.SearchStrategy[FlowNode]:
|
||||
"""生成随机 FlowNode 树,限制深度避免爆炸。"""
|
||||
if max_depth <= 0:
|
||||
return st.builds(
|
||||
FlowNode,
|
||||
name=_path_segment,
|
||||
source_file=_rel_path,
|
||||
node_type=st.sampled_from(["entry", "module", "class", "function"]),
|
||||
children=st.just([]),
|
||||
)
|
||||
return st.builds(
|
||||
FlowNode,
|
||||
name=_path_segment,
|
||||
source_file=_rel_path,
|
||||
node_type=st.sampled_from(["entry", "module", "class", "function"]),
|
||||
children=st.lists(
|
||||
_flow_node_st(max_depth - 1),
|
||||
min_size=0,
|
||||
max_size=3,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
_flow_tree_list = st.lists(_flow_node_st(), min_size=0, max_size=5)
|
||||
_orphan_list = st.lists(_rel_path, min_size=0, max_size=10)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DocMapping / AlignmentIssue 生成器
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_issue_type_st = st.sampled_from(["stale", "conflict", "missing"])
|
||||
|
||||
|
||||
def _alignment_issue_st() -> st.SearchStrategy[AlignmentIssue]:
|
||||
return st.builds(
|
||||
AlignmentIssue,
|
||||
doc_path=_rel_path,
|
||||
issue_type=_issue_type_st,
|
||||
description=_safe_text,
|
||||
related_code=_rel_path,
|
||||
)
|
||||
|
||||
|
||||
def _doc_mapping_st() -> st.SearchStrategy[DocMapping]:
|
||||
return st.builds(
|
||||
DocMapping,
|
||||
doc_path=_rel_path,
|
||||
doc_topic=_safe_text,
|
||||
related_code=st.lists(_rel_path, min_size=0, max_size=5),
|
||||
status=st.sampled_from(["aligned", "stale", "conflict", "orphan"]),
|
||||
)
|
||||
|
||||
|
||||
_mapping_list = st.lists(_doc_mapping_st(), min_size=0, max_size=15)
|
||||
_issue_list = st.lists(_alignment_issue_st(), min_size=0, max_size=15)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Property 13: 统计摘要一致性
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestProperty13SummaryConsistency:
|
||||
"""Property 13: 统计摘要一致性
|
||||
|
||||
Feature: repo-audit, Property 13: 统计摘要一致性
|
||||
Validates: Requirements 4.5, 4.6, 4.7
|
||||
|
||||
对于任意报告的统计摘要,各分类/标签的计数之和应等于对应条目列表的总长度。
|
||||
"""
|
||||
|
||||
# --- 13a: render_inventory_report 的分类计数之和 = 列表长度 ---
|
||||
|
||||
@given(items=_inventory_list)
|
||||
@settings(max_examples=100)
|
||||
def test_inventory_category_counts_sum(
|
||||
self, items: list[InventoryItem]
|
||||
) -> None:
|
||||
"""Feature: repo-audit, Property 13: 统计摘要一致性
|
||||
Validates: Requirements 4.5
|
||||
|
||||
render_inventory_report 统计摘要中各用途分类的计数之和应等于条目总数。
|
||||
"""
|
||||
report = render_inventory_report(items, "/tmp/repo")
|
||||
|
||||
# 定位"按用途分类"表格,提取各行数字并求和
|
||||
cat_sum = _extract_summary_total(report, "按用途分类")
|
||||
assert cat_sum == len(items), (
|
||||
f"分类计数之和 {cat_sum} != 条目总数 {len(items)}"
|
||||
)
|
||||
|
||||
# --- 13b: render_inventory_report 的处置标签计数之和 = 列表长度 ---
|
||||
|
||||
@given(items=_inventory_list)
|
||||
@settings(max_examples=100)
|
||||
def test_inventory_disposition_counts_sum(
|
||||
self, items: list[InventoryItem]
|
||||
) -> None:
|
||||
"""Feature: repo-audit, Property 13: 统计摘要一致性
|
||||
Validates: Requirements 4.5
|
||||
|
||||
render_inventory_report 统计摘要中各处置标签的计数之和应等于条目总数。
|
||||
"""
|
||||
report = render_inventory_report(items, "/tmp/repo")
|
||||
|
||||
disp_sum = _extract_summary_total(report, "按处置标签")
|
||||
assert disp_sum == len(items), (
|
||||
f"处置标签计数之和 {disp_sum} != 条目总数 {len(items)}"
|
||||
)
|
||||
|
||||
# --- 13c: render_flow_report 的孤立模块数量 = orphans 列表长度 ---
|
||||
|
||||
@given(trees=_flow_tree_list, orphans=_orphan_list)
|
||||
@settings(max_examples=100)
|
||||
def test_flow_orphan_count_matches(
|
||||
self, trees: list[FlowNode], orphans: list[str]
|
||||
) -> None:
|
||||
"""Feature: repo-audit, Property 13: 统计摘要一致性
|
||||
Validates: Requirements 4.6
|
||||
|
||||
render_flow_report 统计摘要中的孤立模块数量应等于 orphans 列表长度。
|
||||
"""
|
||||
report = render_flow_report(trees, orphans, "/tmp/repo")
|
||||
|
||||
# 从统计摘要表格中提取"孤立模块"行的数字
|
||||
orphan_count = _extract_flow_stat(report, "孤立模块")
|
||||
assert orphan_count == len(orphans), (
|
||||
f"报告中孤立模块数 {orphan_count} != orphans 列表长度 {len(orphans)}"
|
||||
)
|
||||
|
||||
# --- 13d: render_alignment_report 的 issue 类型计数一致 ---
|
||||
|
||||
@given(mappings=_mapping_list, issues=_issue_list)
|
||||
@settings(max_examples=100)
|
||||
def test_alignment_issue_counts_match(
|
||||
self, mappings: list[DocMapping], issues: list[AlignmentIssue]
|
||||
) -> None:
|
||||
"""Feature: repo-audit, Property 13: 统计摘要一致性
|
||||
Validates: Requirements 4.7
|
||||
|
||||
render_alignment_report 统计摘要中过期/冲突/缺失点计数应与
|
||||
issues 列表中对应类型的实际数量一致。
|
||||
"""
|
||||
report = render_alignment_report(mappings, issues, "/tmp/repo")
|
||||
|
||||
expected_stale = sum(1 for i in issues if i.issue_type == "stale")
|
||||
expected_conflict = sum(1 for i in issues if i.issue_type == "conflict")
|
||||
expected_missing = sum(1 for i in issues if i.issue_type == "missing")
|
||||
|
||||
actual_stale = _extract_alignment_stat(report, "过期点数量")
|
||||
actual_conflict = _extract_alignment_stat(report, "冲突点数量")
|
||||
actual_missing = _extract_alignment_stat(report, "缺失点数量")
|
||||
|
||||
assert actual_stale == expected_stale, (
|
||||
f"过期点: 报告 {actual_stale} != 实际 {expected_stale}"
|
||||
)
|
||||
assert actual_conflict == expected_conflict, (
|
||||
f"冲突点: 报告 {actual_conflict} != 实际 {expected_conflict}"
|
||||
)
|
||||
assert actual_missing == expected_missing, (
|
||||
f"缺失点: 报告 {actual_missing} != 实际 {expected_missing}"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Property 14: 报告头部元信息
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestProperty14ReportHeader:
|
||||
"""Property 14: 报告头部元信息
|
||||
|
||||
Feature: repo-audit, Property 14: 报告头部元信息
|
||||
Validates: Requirements 4.2
|
||||
|
||||
对于任意报告输出,头部应包含一个符合 ISO 格式的时间戳字符串和仓库根目录路径字符串。
|
||||
"""
|
||||
|
||||
_ISO_TS_RE = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z")
|
||||
|
||||
@given(items=_inventory_list, repo_root=_repo_root_str)
|
||||
@settings(max_examples=100)
|
||||
def test_inventory_report_header(
|
||||
self, items: list[InventoryItem], repo_root: str
|
||||
) -> None:
|
||||
"""Feature: repo-audit, Property 14: 报告头部元信息
|
||||
Validates: Requirements 4.2
|
||||
|
||||
render_inventory_report 头部应包含 ISO 时间戳和仓库路径。
|
||||
"""
|
||||
report = render_inventory_report(items, repo_root)
|
||||
header = report[:500]
|
||||
|
||||
assert self._ISO_TS_RE.search(header), (
|
||||
"inventory 报告头部缺少 ISO 格式时间戳"
|
||||
)
|
||||
assert repo_root in header, (
|
||||
f"inventory 报告头部缺少仓库路径 '{repo_root}'"
|
||||
)
|
||||
|
||||
@given(trees=_flow_tree_list, orphans=_orphan_list, repo_root=_repo_root_str)
|
||||
@settings(max_examples=100)
|
||||
def test_flow_report_header(
|
||||
self, trees: list[FlowNode], orphans: list[str], repo_root: str
|
||||
) -> None:
|
||||
"""Feature: repo-audit, Property 14: 报告头部元信息
|
||||
Validates: Requirements 4.2
|
||||
|
||||
render_flow_report 头部应包含 ISO 时间戳和仓库路径。
|
||||
"""
|
||||
report = render_flow_report(trees, orphans, repo_root)
|
||||
header = report[:500]
|
||||
|
||||
assert self._ISO_TS_RE.search(header), (
|
||||
"flow 报告头部缺少 ISO 格式时间戳"
|
||||
)
|
||||
assert repo_root in header, (
|
||||
f"flow 报告头部缺少仓库路径 '{repo_root}'"
|
||||
)
|
||||
|
||||
@given(mappings=_mapping_list, issues=_issue_list, repo_root=_repo_root_str)
|
||||
@settings(max_examples=100)
|
||||
def test_alignment_report_header(
|
||||
self, mappings: list[DocMapping], issues: list[AlignmentIssue], repo_root: str
|
||||
) -> None:
|
||||
"""Feature: repo-audit, Property 14: 报告头部元信息
|
||||
Validates: Requirements 4.2
|
||||
|
||||
render_alignment_report 头部应包含 ISO 时间戳和仓库路径。
|
||||
"""
|
||||
report = render_alignment_report(mappings, issues, repo_root)
|
||||
header = report[:500]
|
||||
|
||||
assert self._ISO_TS_RE.search(header), (
|
||||
"alignment 报告头部缺少 ISO 格式时间戳"
|
||||
)
|
||||
assert repo_root in header, (
|
||||
f"alignment 报告头部缺少仓库路径 '{repo_root}'"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Property 15: 写操作仅限 docs/audit/
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestProperty15WritesOnlyDocsAudit:
|
||||
"""Property 15: 写操作仅限 docs/audit/
|
||||
|
||||
Feature: repo-audit, Property 15: 写操作仅限 docs/audit/
|
||||
Validates: Requirements 5.2
|
||||
|
||||
对于任意审计执行过程,所有文件写操作的目标路径应以 docs/audit/ 为前缀。
|
||||
由于需要实际文件系统,使用较少迭代。
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _make_minimal_repo(base: Path, variant: int) -> Path:
|
||||
"""构造最小仓库结构,variant 控制变体以增加多样性。"""
|
||||
repo = base / f"repo_{variant}"
|
||||
repo.mkdir()
|
||||
|
||||
# 必需的 cli 入口
|
||||
cli_dir = repo / "cli"
|
||||
cli_dir.mkdir()
|
||||
(cli_dir / "__init__.py").write_text("", encoding="utf-8")
|
||||
(cli_dir / "main.py").write_text(
|
||||
"# -*- coding: utf-8 -*-\ndef main(): pass\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# config 目录
|
||||
config_dir = repo / "config"
|
||||
config_dir.mkdir()
|
||||
(config_dir / "__init__.py").write_text("", encoding="utf-8")
|
||||
|
||||
# docs 目录
|
||||
docs_dir = repo / "docs"
|
||||
docs_dir.mkdir()
|
||||
|
||||
# 根据 variant 添加不同的额外文件
|
||||
if variant % 3 == 0:
|
||||
(repo / "README.md").write_text("# 项目\n", encoding="utf-8")
|
||||
if variant % 3 == 1:
|
||||
scripts_dir = repo / "scripts"
|
||||
scripts_dir.mkdir()
|
||||
(scripts_dir / "__init__.py").write_text("", encoding="utf-8")
|
||||
if variant % 3 == 2:
|
||||
(docs_dir / "notes.md").write_text("# 笔记\n", encoding="utf-8")
|
||||
|
||||
return repo
|
||||
|
||||
@staticmethod
|
||||
def _snapshot_files(repo: Path) -> dict[str, float]:
|
||||
"""记录仓库中所有文件的 mtime 快照(排除 docs/audit/)。"""
|
||||
snap: dict[str, float] = {}
|
||||
for p in repo.rglob("*"):
|
||||
if p.is_file():
|
||||
rel = p.relative_to(repo).as_posix()
|
||||
if not rel.startswith("docs/audit"):
|
||||
snap[rel] = p.stat().st_mtime
|
||||
return snap
|
||||
|
||||
@given(variant=st.integers(min_value=0, max_value=9))
|
||||
@settings(max_examples=10)
|
||||
def test_writes_only_under_docs_audit(self, variant: int) -> None:
|
||||
"""Feature: repo-audit, Property 15: 写操作仅限 docs/audit/
|
||||
Validates: Requirements 5.2
|
||||
|
||||
运行 run_audit 后,docs/audit/ 外不应有新文件被创建。
|
||||
docs/audit/ 下应有报告文件。
|
||||
"""
|
||||
import tempfile
|
||||
from scripts.audit.run_audit import run_audit
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
tmp_path = Path(tmp_dir)
|
||||
repo = self._make_minimal_repo(tmp_path, variant)
|
||||
before_snap = self._snapshot_files(repo)
|
||||
|
||||
run_audit(repo)
|
||||
|
||||
# 验证 docs/audit/ 下有新文件
|
||||
audit_dir = repo / "docs" / "audit"
|
||||
assert audit_dir.is_dir(), "docs/audit/ 目录未创建"
|
||||
audit_files = list(audit_dir.iterdir())
|
||||
assert len(audit_files) > 0, "docs/audit/ 下无报告文件"
|
||||
|
||||
# 验证 docs/audit/ 外无新文件
|
||||
for p in repo.rglob("*"):
|
||||
if p.is_file():
|
||||
rel = p.relative_to(repo).as_posix()
|
||||
if rel.startswith("docs/audit"):
|
||||
continue
|
||||
assert rel in before_snap, (
|
||||
f"docs/audit/ 外出现了新文件: {rel}"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 辅助函数 — 从报告文本中提取统计数字
|
||||
# ===========================================================================
|
||||
|
||||
def _extract_summary_total(report: str, section_name: str) -> int:
|
||||
"""从 inventory 报告的统计摘要中提取指定分区的数字之和。
|
||||
|
||||
查找 "### {section_name}" 下的 Markdown 表格,
|
||||
累加每行最后一列的数字(排除合计行)。
|
||||
"""
|
||||
lines = report.split("\n")
|
||||
in_section = False
|
||||
total = 0
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped == f"### {section_name}":
|
||||
in_section = True
|
||||
continue
|
||||
if in_section and stripped.startswith("###"):
|
||||
# 进入下一个子节
|
||||
break
|
||||
if in_section and stripped.startswith("|") and "**合计**" not in stripped:
|
||||
# 跳过表头和分隔行
|
||||
if stripped.startswith("| 用途分类") or stripped.startswith("| 处置标签"):
|
||||
continue
|
||||
if stripped.startswith("|---"):
|
||||
continue
|
||||
# 提取最后一列的数字
|
||||
cells = [c.strip() for c in stripped.split("|") if c.strip()]
|
||||
if cells:
|
||||
try:
|
||||
total += int(cells[-1])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return total
|
||||
|
||||
|
||||
def _extract_flow_stat(report: str, label: str) -> int:
|
||||
"""从 flow 报告统计摘要表格中提取指定指标的数字。"""
|
||||
# 匹配 "| 孤立模块 | 5 |" 格式
|
||||
pattern = re.compile(rf"\|\s*{re.escape(label)}\s*\|\s*(\d+)\s*\|")
|
||||
m = pattern.search(report)
|
||||
return int(m.group(1)) if m else -1
|
||||
|
||||
|
||||
def _extract_alignment_stat(report: str, label: str) -> int:
|
||||
"""从 alignment 报告统计摘要中提取指定指标的数字。
|
||||
|
||||
匹配 "- 过期点数量:3" 格式。
|
||||
"""
|
||||
# 兼容全角/半角冒号
|
||||
pattern = re.compile(rf"{re.escape(label)}[::]\s*(\d+)")
|
||||
m = pattern.search(report)
|
||||
return int(m.group(1)) if m else -1
|
||||
Reference in New Issue
Block a user