ZQYY.FQ-ETL/tests/unit/test_audit_report_properties.py

# -*- coding: utf-8 -*-
"""
属性测试 — 报告输出属性

Feature: repo-audit
- Property 13: 统计摘要一致性
- Property 14: 报告头部元信息
- Property 15: 写操作仅限 docs/audit/

Validates: Requirements 4.2, 4.5, 4.6, 4.7, 5.2
"""

from __future__ import annotations

import os
import re
import string
from pathlib import Path

from hypothesis import given, settings, assume
from hypothesis import strategies as st

from scripts.audit import (
    AlignmentIssue,
    Category,
    Disposition,
    DocMapping,
    FlowNode,
    InventoryItem,
)
from scripts.audit.inventory_analyzer import render_inventory_report
from scripts.audit.flow_analyzer import render_flow_report
from scripts.audit.doc_alignment_analyzer import render_alignment_report

# ---------------------------------------------------------------------------
# 共享生成器策略
# ---------------------------------------------------------------------------

_PATH_CHARS = string.ascii_letters + string.digits + "_-."

_path_segment = st.text(
    alphabet=_PATH_CHARS,
    min_size=1,
    max_size=12,
)

_rel_path = st.lists(
    _path_segment,
    min_size=1,
    max_size=3,
).map(lambda parts: "/".join(parts))

_safe_text = st.text(
    alphabet=st.characters(
        whitelist_categories=("L", "N", "P", "S", "Z"),
        blacklist_characters="|\n\r",
    ),
    min_size=1,
    max_size=30,
)

_repo_root_str = st.text(
    alphabet=string.ascii_letters + string.digits + "/_-.",
    min_size=3,
    max_size=40,
).map(lambda s: "/" + s.lstrip("/"))


# ---------------------------------------------------------------------------
# InventoryItem 生成器
# ---------------------------------------------------------------------------

def _inventory_item_st() -> st.SearchStrategy[InventoryItem]:
    return st.builds(
        InventoryItem,
        rel_path=_rel_path,
        category=st.sampled_from(list(Category)),
        disposition=st.sampled_from(list(Disposition)),
        description=_safe_text,
    )


_inventory_list = st.lists(_inventory_item_st(), min_size=0, max_size=20)


# ---------------------------------------------------------------------------
# FlowNode 生成器（限制深度和宽度）
# ---------------------------------------------------------------------------

def _flow_node_st(max_depth: int = 2) -> st.SearchStrategy[FlowNode]:
    """生成随机 FlowNode 树，限制深度避免爆炸。"""
    if max_depth <= 0:
        return st.builds(
            FlowNode,
            name=_path_segment,
            source_file=_rel_path,
            node_type=st.sampled_from(["entry", "module", "class", "function"]),
            children=st.just([]),
        )
    return st.builds(
        FlowNode,
        name=_path_segment,
        source_file=_rel_path,
        node_type=st.sampled_from(["entry", "module", "class", "function"]),
        children=st.lists(
            _flow_node_st(max_depth - 1),
            min_size=0,
            max_size=3,
        ),
    )


_flow_tree_list = st.lists(_flow_node_st(), min_size=0, max_size=5)
_orphan_list = st.lists(_rel_path, min_size=0, max_size=10)


# ---------------------------------------------------------------------------
# DocMapping / AlignmentIssue 生成器
# ---------------------------------------------------------------------------

_issue_type_st = st.sampled_from(["stale", "conflict", "missing"])


def _alignment_issue_st() -> st.SearchStrategy[AlignmentIssue]:
    return st.builds(
        AlignmentIssue,
        doc_path=_rel_path,
        issue_type=_issue_type_st,
        description=_safe_text,
        related_code=_rel_path,
    )


def _doc_mapping_st() -> st.SearchStrategy[DocMapping]:
    return st.builds(
        DocMapping,
        doc_path=_rel_path,
        doc_topic=_safe_text,
        related_code=st.lists(_rel_path, min_size=0, max_size=5),
        status=st.sampled_from(["aligned", "stale", "conflict", "orphan"]),
    )


_mapping_list = st.lists(_doc_mapping_st(), min_size=0, max_size=15)
_issue_list = st.lists(_alignment_issue_st(), min_size=0, max_size=15)


# ===========================================================================
# Property 13: 统计摘要一致性
# ===========================================================================


class TestProperty13SummaryConsistency:
    """Property 13: 统计摘要一致性

    Feature: repo-audit, Property 13: 统计摘要一致性
    Validates: Requirements 4.5, 4.6, 4.7

    对于任意报告的统计摘要，各分类/标签的计数之和应等于对应条目列表的总长度。
    """

    # --- 13a: render_inventory_report 的分类计数之和 = 列表长度 ---

    @given(items=_inventory_list)
    @settings(max_examples=100)
    def test_inventory_category_counts_sum(
        self, items: list[InventoryItem]
    ) -> None:
        """Feature: repo-audit, Property 13: 统计摘要一致性
        Validates: Requirements 4.5

        render_inventory_report 统计摘要中各用途分类的计数之和应等于条目总数。
        """
        report = render_inventory_report(items, "/tmp/repo")

        # 定位"按用途分类"表格，提取各行数字并求和
        cat_sum = _extract_summary_total(report, "按用途分类")
        assert cat_sum == len(items), (
            f"分类计数之和 {cat_sum} != 条目总数 {len(items)}"
        )

    # --- 13b: render_inventory_report 的处置标签计数之和 = 列表长度 ---

    @given(items=_inventory_list)
    @settings(max_examples=100)
    def test_inventory_disposition_counts_sum(
        self, items: list[InventoryItem]
    ) -> None:
        """Feature: repo-audit, Property 13: 统计摘要一致性
        Validates: Requirements 4.5

        render_inventory_report 统计摘要中各处置标签的计数之和应等于条目总数。
        """
        report = render_inventory_report(items, "/tmp/repo")

        disp_sum = _extract_summary_total(report, "按处置标签")
        assert disp_sum == len(items), (
            f"处置标签计数之和 {disp_sum} != 条目总数 {len(items)}"
        )

    # --- 13c: render_flow_report 的孤立模块数量 = orphans 列表长度 ---

    @given(trees=_flow_tree_list, orphans=_orphan_list)
    @settings(max_examples=100)
    def test_flow_orphan_count_matches(
        self, trees: list[FlowNode], orphans: list[str]
    ) -> None:
        """Feature: repo-audit, Property 13: 统计摘要一致性
        Validates: Requirements 4.6

        render_flow_report 统计摘要中的孤立模块数量应等于 orphans 列表长度。
        """
        report = render_flow_report(trees, orphans, "/tmp/repo")

        # 从统计摘要表格中提取"孤立模块"行的数字
        orphan_count = _extract_flow_stat(report, "孤立模块")
        assert orphan_count == len(orphans), (
            f"报告中孤立模块数 {orphan_count} != orphans 列表长度 {len(orphans)}"
        )

    # --- 13d: render_alignment_report 的 issue 类型计数一致 ---

    @given(mappings=_mapping_list, issues=_issue_list)
    @settings(max_examples=100)
    def test_alignment_issue_counts_match(
        self, mappings: list[DocMapping], issues: list[AlignmentIssue]
    ) -> None:
        """Feature: repo-audit, Property 13: 统计摘要一致性
        Validates: Requirements 4.7

        render_alignment_report 统计摘要中过期/冲突/缺失点计数应与
        issues 列表中对应类型的实际数量一致。
        """
        report = render_alignment_report(mappings, issues, "/tmp/repo")

        expected_stale = sum(1 for i in issues if i.issue_type == "stale")
        expected_conflict = sum(1 for i in issues if i.issue_type == "conflict")
        expected_missing = sum(1 for i in issues if i.issue_type == "missing")

        actual_stale = _extract_alignment_stat(report, "过期点数量")
        actual_conflict = _extract_alignment_stat(report, "冲突点数量")
        actual_missing = _extract_alignment_stat(report, "缺失点数量")

        assert actual_stale == expected_stale, (
            f"过期点: 报告 {actual_stale} != 实际 {expected_stale}"
        )
        assert actual_conflict == expected_conflict, (
            f"冲突点: 报告 {actual_conflict} != 实际 {expected_conflict}"
        )
        assert actual_missing == expected_missing, (
            f"缺失点: 报告 {actual_missing} != 实际 {expected_missing}"
        )


# ===========================================================================
# Property 14: 报告头部元信息
# ===========================================================================


class TestProperty14ReportHeader:
    """Property 14: 报告头部元信息

    Feature: repo-audit, Property 14: 报告头部元信息
    Validates: Requirements 4.2

    对于任意报告输出，头部应包含一个符合 ISO 格式的时间戳字符串和仓库根目录路径字符串。
    """

    _ISO_TS_RE = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z")

    @given(items=_inventory_list, repo_root=_repo_root_str)
    @settings(max_examples=100)
    def test_inventory_report_header(
        self, items: list[InventoryItem], repo_root: str
    ) -> None:
        """Feature: repo-audit, Property 14: 报告头部元信息
        Validates: Requirements 4.2

        render_inventory_report 头部应包含 ISO 时间戳和仓库路径。
        """
        report = render_inventory_report(items, repo_root)
        header = report[:500]

        assert self._ISO_TS_RE.search(header), (
            "inventory 报告头部缺少 ISO 格式时间戳"
        )
        assert repo_root in header, (
            f"inventory 报告头部缺少仓库路径 '{repo_root}'"
        )

    @given(trees=_flow_tree_list, orphans=_orphan_list, repo_root=_repo_root_str)
    @settings(max_examples=100)
    def test_flow_report_header(
        self, trees: list[FlowNode], orphans: list[str], repo_root: str
    ) -> None:
        """Feature: repo-audit, Property 14: 报告头部元信息
        Validates: Requirements 4.2

        render_flow_report 头部应包含 ISO 时间戳和仓库路径。
        """
        report = render_flow_report(trees, orphans, repo_root)
        header = report[:500]

        assert self._ISO_TS_RE.search(header), (
            "flow 报告头部缺少 ISO 格式时间戳"
        )
        assert repo_root in header, (
            f"flow 报告头部缺少仓库路径 '{repo_root}'"
        )

    @given(mappings=_mapping_list, issues=_issue_list, repo_root=_repo_root_str)
    @settings(max_examples=100)
    def test_alignment_report_header(
        self, mappings: list[DocMapping], issues: list[AlignmentIssue], repo_root: str
    ) -> None:
        """Feature: repo-audit, Property 14: 报告头部元信息
        Validates: Requirements 4.2

        render_alignment_report 头部应包含 ISO 时间戳和仓库路径。
        """
        report = render_alignment_report(mappings, issues, repo_root)
        header = report[:500]

        assert self._ISO_TS_RE.search(header), (
            "alignment 报告头部缺少 ISO 格式时间戳"
        )
        assert repo_root in header, (
            f"alignment 报告头部缺少仓库路径 '{repo_root}'"
        )


# ===========================================================================
# Property 15: 写操作仅限 docs/audit/
# ===========================================================================


class TestProperty15WritesOnlyDocsAudit:
    """Property 15: 写操作仅限 docs/audit/

    Feature: repo-audit, Property 15: 写操作仅限 docs/audit/
    Validates: Requirements 5.2

    对于任意审计执行过程，所有文件写操作的目标路径应以 docs/audit/ 为前缀。
    由于需要实际文件系统，使用较少迭代。
    """

    @staticmethod
    def _make_minimal_repo(base: Path, variant: int) -> Path:
        """构造最小仓库结构，variant 控制变体以增加多样性。"""
        repo = base / f"repo_{variant}"
        repo.mkdir()

        # 必需的 cli 入口
        cli_dir = repo / "cli"
        cli_dir.mkdir()
        (cli_dir / "__init__.py").write_text("", encoding="utf-8")
        (cli_dir / "main.py").write_text(
            "# -*- coding: utf-8 -*-\ndef main(): pass\n",
            encoding="utf-8",
        )

        # config 目录
        config_dir = repo / "config"
        config_dir.mkdir()
        (config_dir / "__init__.py").write_text("", encoding="utf-8")

        # docs 目录
        docs_dir = repo / "docs"
        docs_dir.mkdir()

        # 根据 variant 添加不同的额外文件
        if variant % 3 == 0:
            (repo / "README.md").write_text("# 项目\n", encoding="utf-8")
        if variant % 3 == 1:
            scripts_dir = repo / "scripts"
            scripts_dir.mkdir()
            (scripts_dir / "__init__.py").write_text("", encoding="utf-8")
        if variant % 3 == 2:
            (docs_dir / "notes.md").write_text("# 笔记\n", encoding="utf-8")

        return repo

    @staticmethod
    def _snapshot_files(repo: Path) -> dict[str, float]:
        """记录仓库中所有文件的 mtime 快照（排除 docs/audit/）。"""
        snap: dict[str, float] = {}
        for p in repo.rglob("*"):
            if p.is_file():
                rel = p.relative_to(repo).as_posix()
                if not rel.startswith("docs/audit"):
                    snap[rel] = p.stat().st_mtime
        return snap

    @given(variant=st.integers(min_value=0, max_value=9))
    @settings(max_examples=10)
    def test_writes_only_under_docs_audit(self, variant: int) -> None:
        """Feature: repo-audit, Property 15: 写操作仅限 docs/audit/
        Validates: Requirements 5.2

        运行 run_audit 后，docs/audit/ 外不应有新文件被创建。
        docs/audit/ 下应有报告文件。
        """
        import tempfile
        from scripts.audit.run_audit import run_audit

        with tempfile.TemporaryDirectory() as tmp_dir:
            tmp_path = Path(tmp_dir)
            repo = self._make_minimal_repo(tmp_path, variant)
            before_snap = self._snapshot_files(repo)

            run_audit(repo)

            # 验证 docs/audit/ 下有新文件
            audit_dir = repo / "docs" / "audit"
            assert audit_dir.is_dir(), "docs/audit/ 目录未创建"
            audit_files = list(audit_dir.iterdir())
            assert len(audit_files) > 0, "docs/audit/ 下无报告文件"

            # 验证 docs/audit/ 外无新文件
            for p in repo.rglob("*"):
                if p.is_file():
                    rel = p.relative_to(repo).as_posix()
                    if rel.startswith("docs/audit"):
                        continue
                    assert rel in before_snap, (
                        f"docs/audit/ 外出现了新文件: {rel}"
                    )


# ===========================================================================
# 辅助函数 — 从报告文本中提取统计数字
# ===========================================================================

def _extract_summary_total(report: str, section_name: str) -> int:
    """从 inventory 报告的统计摘要中提取指定分区的数字之和。

    查找 "### {section_name}" 下的 Markdown 表格，
    累加每行最后一列的数字（排除合计行）。
    """
    lines = report.split("\n")
    in_section = False
    total = 0

    for line in lines:
        stripped = line.strip()
        if stripped == f"### {section_name}":
            in_section = True
            continue
        if in_section and stripped.startswith("###"):
            # 进入下一个子节
            break
        if in_section and stripped.startswith("|") and "**合计**" not in stripped:
            # 跳过表头和分隔行
            if stripped.startswith("| 用途分类") or stripped.startswith("| 处置标签"):
                continue
            if stripped.startswith("|---"):
                continue
            # 提取最后一列的数字
            cells = [c.strip() for c in stripped.split("|") if c.strip()]
            if cells:
                try:
                    total += int(cells[-1])
                except ValueError:
                    pass

    return total


def _extract_flow_stat(report: str, label: str) -> int:
    """从 flow 报告统计摘要表格中提取指定指标的数字。"""
    # 匹配 "| 孤立模块 | 5 |" 格式
    pattern = re.compile(rf"\|\s*{re.escape(label)}\s*\|\s*(\d+)\s*\|")
    m = pattern.search(report)
    return int(m.group(1)) if m else -1


def _extract_alignment_stat(report: str, label: str) -> int:
    """从 alignment 报告统计摘要中提取指定指标的数字。

    匹配 "- 过期点数量：3" 格式。
    """
    # 兼容全角/半角冒号
    pattern = re.compile(rf"{re.escape(label)}[：:]\s*(\d+)")
    m = pattern.search(report)
    return int(m.group(1)) if m else -1