Files
ZQYY.FQ-ETL/tests/unit/test_audit_report_properties.py

486 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
属性测试 — 报告输出属性
Feature: repo-audit
- Property 13: 统计摘要一致性
- Property 14: 报告头部元信息
- Property 15: 写操作仅限 docs/audit/
Validates: Requirements 4.2, 4.5, 4.6, 4.7, 5.2
"""
from __future__ import annotations
import os
import re
import string
from pathlib import Path
from hypothesis import given, settings, assume
from hypothesis import strategies as st
from scripts.audit import (
AlignmentIssue,
Category,
Disposition,
DocMapping,
FlowNode,
InventoryItem,
)
from scripts.audit.inventory_analyzer import render_inventory_report
from scripts.audit.flow_analyzer import render_flow_report
from scripts.audit.doc_alignment_analyzer import render_alignment_report
# ---------------------------------------------------------------------------
# 共享生成器策略
# ---------------------------------------------------------------------------
_PATH_CHARS = string.ascii_letters + string.digits + "_-."
_path_segment = st.text(
alphabet=_PATH_CHARS,
min_size=1,
max_size=12,
)
_rel_path = st.lists(
_path_segment,
min_size=1,
max_size=3,
).map(lambda parts: "/".join(parts))
_safe_text = st.text(
alphabet=st.characters(
whitelist_categories=("L", "N", "P", "S", "Z"),
blacklist_characters="|\n\r",
),
min_size=1,
max_size=30,
)
_repo_root_str = st.text(
alphabet=string.ascii_letters + string.digits + "/_-.",
min_size=3,
max_size=40,
).map(lambda s: "/" + s.lstrip("/"))
# ---------------------------------------------------------------------------
# InventoryItem 生成器
# ---------------------------------------------------------------------------
def _inventory_item_st() -> st.SearchStrategy[InventoryItem]:
return st.builds(
InventoryItem,
rel_path=_rel_path,
category=st.sampled_from(list(Category)),
disposition=st.sampled_from(list(Disposition)),
description=_safe_text,
)
_inventory_list = st.lists(_inventory_item_st(), min_size=0, max_size=20)
# ---------------------------------------------------------------------------
# FlowNode 生成器(限制深度和宽度)
# ---------------------------------------------------------------------------
def _flow_node_st(max_depth: int = 2) -> st.SearchStrategy[FlowNode]:
"""生成随机 FlowNode 树,限制深度避免爆炸。"""
if max_depth <= 0:
return st.builds(
FlowNode,
name=_path_segment,
source_file=_rel_path,
node_type=st.sampled_from(["entry", "module", "class", "function"]),
children=st.just([]),
)
return st.builds(
FlowNode,
name=_path_segment,
source_file=_rel_path,
node_type=st.sampled_from(["entry", "module", "class", "function"]),
children=st.lists(
_flow_node_st(max_depth - 1),
min_size=0,
max_size=3,
),
)
_flow_tree_list = st.lists(_flow_node_st(), min_size=0, max_size=5)
_orphan_list = st.lists(_rel_path, min_size=0, max_size=10)
# ---------------------------------------------------------------------------
# DocMapping / AlignmentIssue 生成器
# ---------------------------------------------------------------------------
_issue_type_st = st.sampled_from(["stale", "conflict", "missing"])
def _alignment_issue_st() -> st.SearchStrategy[AlignmentIssue]:
return st.builds(
AlignmentIssue,
doc_path=_rel_path,
issue_type=_issue_type_st,
description=_safe_text,
related_code=_rel_path,
)
def _doc_mapping_st() -> st.SearchStrategy[DocMapping]:
return st.builds(
DocMapping,
doc_path=_rel_path,
doc_topic=_safe_text,
related_code=st.lists(_rel_path, min_size=0, max_size=5),
status=st.sampled_from(["aligned", "stale", "conflict", "orphan"]),
)
_mapping_list = st.lists(_doc_mapping_st(), min_size=0, max_size=15)
_issue_list = st.lists(_alignment_issue_st(), min_size=0, max_size=15)
# ===========================================================================
# Property 13: 统计摘要一致性
# ===========================================================================
class TestProperty13SummaryConsistency:
"""Property 13: 统计摘要一致性
Feature: repo-audit, Property 13: 统计摘要一致性
Validates: Requirements 4.5, 4.6, 4.7
对于任意报告的统计摘要,各分类/标签的计数之和应等于对应条目列表的总长度。
"""
# --- 13a: render_inventory_report 的分类计数之和 = 列表长度 ---
@given(items=_inventory_list)
@settings(max_examples=100)
def test_inventory_category_counts_sum(
self, items: list[InventoryItem]
) -> None:
"""Feature: repo-audit, Property 13: 统计摘要一致性
Validates: Requirements 4.5
render_inventory_report 统计摘要中各用途分类的计数之和应等于条目总数。
"""
report = render_inventory_report(items, "/tmp/repo")
# 定位"按用途分类"表格,提取各行数字并求和
cat_sum = _extract_summary_total(report, "按用途分类")
assert cat_sum == len(items), (
f"分类计数之和 {cat_sum} != 条目总数 {len(items)}"
)
# --- 13b: render_inventory_report 的处置标签计数之和 = 列表长度 ---
@given(items=_inventory_list)
@settings(max_examples=100)
def test_inventory_disposition_counts_sum(
self, items: list[InventoryItem]
) -> None:
"""Feature: repo-audit, Property 13: 统计摘要一致性
Validates: Requirements 4.5
render_inventory_report 统计摘要中各处置标签的计数之和应等于条目总数。
"""
report = render_inventory_report(items, "/tmp/repo")
disp_sum = _extract_summary_total(report, "按处置标签")
assert disp_sum == len(items), (
f"处置标签计数之和 {disp_sum} != 条目总数 {len(items)}"
)
# --- 13c: render_flow_report 的孤立模块数量 = orphans 列表长度 ---
@given(trees=_flow_tree_list, orphans=_orphan_list)
@settings(max_examples=100)
def test_flow_orphan_count_matches(
self, trees: list[FlowNode], orphans: list[str]
) -> None:
"""Feature: repo-audit, Property 13: 统计摘要一致性
Validates: Requirements 4.6
render_flow_report 统计摘要中的孤立模块数量应等于 orphans 列表长度。
"""
report = render_flow_report(trees, orphans, "/tmp/repo")
# 从统计摘要表格中提取"孤立模块"行的数字
orphan_count = _extract_flow_stat(report, "孤立模块")
assert orphan_count == len(orphans), (
f"报告中孤立模块数 {orphan_count} != orphans 列表长度 {len(orphans)}"
)
# --- 13d: render_alignment_report 的 issue 类型计数一致 ---
@given(mappings=_mapping_list, issues=_issue_list)
@settings(max_examples=100)
def test_alignment_issue_counts_match(
self, mappings: list[DocMapping], issues: list[AlignmentIssue]
) -> None:
"""Feature: repo-audit, Property 13: 统计摘要一致性
Validates: Requirements 4.7
render_alignment_report 统计摘要中过期/冲突/缺失点计数应与
issues 列表中对应类型的实际数量一致。
"""
report = render_alignment_report(mappings, issues, "/tmp/repo")
expected_stale = sum(1 for i in issues if i.issue_type == "stale")
expected_conflict = sum(1 for i in issues if i.issue_type == "conflict")
expected_missing = sum(1 for i in issues if i.issue_type == "missing")
actual_stale = _extract_alignment_stat(report, "过期点数量")
actual_conflict = _extract_alignment_stat(report, "冲突点数量")
actual_missing = _extract_alignment_stat(report, "缺失点数量")
assert actual_stale == expected_stale, (
f"过期点: 报告 {actual_stale} != 实际 {expected_stale}"
)
assert actual_conflict == expected_conflict, (
f"冲突点: 报告 {actual_conflict} != 实际 {expected_conflict}"
)
assert actual_missing == expected_missing, (
f"缺失点: 报告 {actual_missing} != 实际 {expected_missing}"
)
# ===========================================================================
# Property 14: 报告头部元信息
# ===========================================================================
class TestProperty14ReportHeader:
"""Property 14: 报告头部元信息
Feature: repo-audit, Property 14: 报告头部元信息
Validates: Requirements 4.2
对于任意报告输出,头部应包含一个符合 ISO 格式的时间戳字符串和仓库根目录路径字符串。
"""
_ISO_TS_RE = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z")
@given(items=_inventory_list, repo_root=_repo_root_str)
@settings(max_examples=100)
def test_inventory_report_header(
self, items: list[InventoryItem], repo_root: str
) -> None:
"""Feature: repo-audit, Property 14: 报告头部元信息
Validates: Requirements 4.2
render_inventory_report 头部应包含 ISO 时间戳和仓库路径。
"""
report = render_inventory_report(items, repo_root)
header = report[:500]
assert self._ISO_TS_RE.search(header), (
"inventory 报告头部缺少 ISO 格式时间戳"
)
assert repo_root in header, (
f"inventory 报告头部缺少仓库路径 '{repo_root}'"
)
@given(trees=_flow_tree_list, orphans=_orphan_list, repo_root=_repo_root_str)
@settings(max_examples=100)
def test_flow_report_header(
self, trees: list[FlowNode], orphans: list[str], repo_root: str
) -> None:
"""Feature: repo-audit, Property 14: 报告头部元信息
Validates: Requirements 4.2
render_flow_report 头部应包含 ISO 时间戳和仓库路径。
"""
report = render_flow_report(trees, orphans, repo_root)
header = report[:500]
assert self._ISO_TS_RE.search(header), (
"flow 报告头部缺少 ISO 格式时间戳"
)
assert repo_root in header, (
f"flow 报告头部缺少仓库路径 '{repo_root}'"
)
@given(mappings=_mapping_list, issues=_issue_list, repo_root=_repo_root_str)
@settings(max_examples=100)
def test_alignment_report_header(
self, mappings: list[DocMapping], issues: list[AlignmentIssue], repo_root: str
) -> None:
"""Feature: repo-audit, Property 14: 报告头部元信息
Validates: Requirements 4.2
render_alignment_report 头部应包含 ISO 时间戳和仓库路径。
"""
report = render_alignment_report(mappings, issues, repo_root)
header = report[:500]
assert self._ISO_TS_RE.search(header), (
"alignment 报告头部缺少 ISO 格式时间戳"
)
assert repo_root in header, (
f"alignment 报告头部缺少仓库路径 '{repo_root}'"
)
# ===========================================================================
# Property 15: 写操作仅限 docs/audit/
# ===========================================================================
class TestProperty15WritesOnlyDocsAudit:
"""Property 15: 写操作仅限 docs/audit/
Feature: repo-audit, Property 15: 写操作仅限 docs/audit/
Validates: Requirements 5.2
对于任意审计执行过程,所有文件写操作的目标路径应以 docs/audit/ 为前缀。
由于需要实际文件系统,使用较少迭代。
"""
@staticmethod
def _make_minimal_repo(base: Path, variant: int) -> Path:
"""构造最小仓库结构variant 控制变体以增加多样性。"""
repo = base / f"repo_{variant}"
repo.mkdir()
# 必需的 cli 入口
cli_dir = repo / "cli"
cli_dir.mkdir()
(cli_dir / "__init__.py").write_text("", encoding="utf-8")
(cli_dir / "main.py").write_text(
"# -*- coding: utf-8 -*-\ndef main(): pass\n",
encoding="utf-8",
)
# config 目录
config_dir = repo / "config"
config_dir.mkdir()
(config_dir / "__init__.py").write_text("", encoding="utf-8")
# docs 目录
docs_dir = repo / "docs"
docs_dir.mkdir()
# 根据 variant 添加不同的额外文件
if variant % 3 == 0:
(repo / "README.md").write_text("# 项目\n", encoding="utf-8")
if variant % 3 == 1:
scripts_dir = repo / "scripts"
scripts_dir.mkdir()
(scripts_dir / "__init__.py").write_text("", encoding="utf-8")
if variant % 3 == 2:
(docs_dir / "notes.md").write_text("# 笔记\n", encoding="utf-8")
return repo
@staticmethod
def _snapshot_files(repo: Path) -> dict[str, float]:
"""记录仓库中所有文件的 mtime 快照(排除 docs/audit/)。"""
snap: dict[str, float] = {}
for p in repo.rglob("*"):
if p.is_file():
rel = p.relative_to(repo).as_posix()
if not rel.startswith("docs/audit"):
snap[rel] = p.stat().st_mtime
return snap
@given(variant=st.integers(min_value=0, max_value=9))
@settings(max_examples=10)
def test_writes_only_under_docs_audit(self, variant: int) -> None:
"""Feature: repo-audit, Property 15: 写操作仅限 docs/audit/
Validates: Requirements 5.2
运行 run_audit 后docs/audit/ 外不应有新文件被创建。
docs/audit/ 下应有报告文件。
"""
import tempfile
from scripts.audit.run_audit import run_audit
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
repo = self._make_minimal_repo(tmp_path, variant)
before_snap = self._snapshot_files(repo)
run_audit(repo)
# 验证 docs/audit/ 下有新文件
audit_dir = repo / "docs" / "audit"
assert audit_dir.is_dir(), "docs/audit/ 目录未创建"
audit_files = list(audit_dir.iterdir())
assert len(audit_files) > 0, "docs/audit/ 下无报告文件"
# 验证 docs/audit/ 外无新文件
for p in repo.rglob("*"):
if p.is_file():
rel = p.relative_to(repo).as_posix()
if rel.startswith("docs/audit"):
continue
assert rel in before_snap, (
f"docs/audit/ 外出现了新文件: {rel}"
)
# ===========================================================================
# 辅助函数 — 从报告文本中提取统计数字
# ===========================================================================
def _extract_summary_total(report: str, section_name: str) -> int:
"""从 inventory 报告的统计摘要中提取指定分区的数字之和。
查找 "### {section_name}" 下的 Markdown 表格,
累加每行最后一列的数字(排除合计行)。
"""
lines = report.split("\n")
in_section = False
total = 0
for line in lines:
stripped = line.strip()
if stripped == f"### {section_name}":
in_section = True
continue
if in_section and stripped.startswith("###"):
# 进入下一个子节
break
if in_section and stripped.startswith("|") and "**合计**" not in stripped:
# 跳过表头和分隔行
if stripped.startswith("| 用途分类") or stripped.startswith("| 处置标签"):
continue
if stripped.startswith("|---"):
continue
# 提取最后一列的数字
cells = [c.strip() for c in stripped.split("|") if c.strip()]
if cells:
try:
total += int(cells[-1])
except ValueError:
pass
return total
def _extract_flow_stat(report: str, label: str) -> int:
"""从 flow 报告统计摘要表格中提取指定指标的数字。"""
# 匹配 "| 孤立模块 | 5 |" 格式
pattern = re.compile(rf"\|\s*{re.escape(label)}\s*\|\s*(\d+)\s*\|")
m = pattern.search(report)
return int(m.group(1)) if m else -1
def _extract_alignment_stat(report: str, label: str) -> int:
"""从 alignment 报告统计摘要中提取指定指标的数字。
匹配 "- 过期点数量3" 格式。
"""
# 兼容全角/半角冒号
pattern = re.compile(rf"{re.escape(label)}[:]\s*(\d+)")
m = pattern.search(report)
return int(m.group(1)) if m else -1