# -*- coding: utf-8 -*- """ 单元测试 — 仓库扫描器 (scanner.py) 覆盖: - 排除模式匹配逻辑 - 递归遍历与 FileEntry 构建 - 空目录检测 - 权限错误容错 """ from __future__ import annotations import os from pathlib import Path import pytest from scripts.audit import FileEntry from scripts.audit.scanner import EXCLUDED_PATTERNS, _is_excluded, scan_repo # --------------------------------------------------------------------------- # _is_excluded 单元测试 # --------------------------------------------------------------------------- class TestIsExcluded: """排除模式匹配逻辑测试。""" def test_exact_match_git(self) -> None: assert _is_excluded(".git", EXCLUDED_PATTERNS) is True def test_exact_match_pycache(self) -> None: assert _is_excluded("__pycache__", EXCLUDED_PATTERNS) is True def test_exact_match_pytest_cache(self) -> None: assert _is_excluded(".pytest_cache", EXCLUDED_PATTERNS) is True def test_exact_match_kiro(self) -> None: assert _is_excluded(".kiro", EXCLUDED_PATTERNS) is True def test_wildcard_pyc(self) -> None: assert _is_excluded("module.pyc", EXCLUDED_PATTERNS) is True def test_normal_py_not_excluded(self) -> None: assert _is_excluded("main.py", EXCLUDED_PATTERNS) is False def test_normal_dir_not_excluded(self) -> None: assert _is_excluded("src", EXCLUDED_PATTERNS) is False def test_empty_patterns(self) -> None: assert _is_excluded(".git", []) is False def test_custom_pattern(self) -> None: assert _is_excluded("data.csv", ["*.csv"]) is True # --------------------------------------------------------------------------- # scan_repo 单元测试 # --------------------------------------------------------------------------- class TestScanRepo: """scan_repo 递归遍历测试。""" def test_basic_structure(self, tmp_path: Path) -> None: """基本文件和目录应被正确扫描。""" (tmp_path / "a.py").write_text("# code", encoding="utf-8") sub = tmp_path / "sub" sub.mkdir() (sub / "b.txt").write_text("hello", encoding="utf-8") entries = scan_repo(tmp_path) paths = {e.rel_path for e in entries} assert "a.py" in paths assert "sub" in paths assert "sub/b.txt" in paths def test_file_entry_fields(self, tmp_path: Path) -> None: """FileEntry 各字段应正确填充。""" (tmp_path / "hello.md").write_text("# hi", encoding="utf-8") entries = scan_repo(tmp_path) md = next(e for e in entries if e.rel_path == "hello.md") assert md.is_dir is False assert md.size_bytes > 0 assert md.extension == ".md" assert md.is_empty_dir is False def test_directory_entry_fields(self, tmp_path: Path) -> None: """目录条目的字段应正确设置。""" sub = tmp_path / "mydir" sub.mkdir() (sub / "file.py").write_text("pass", encoding="utf-8") entries = scan_repo(tmp_path) d = next(e for e in entries if e.rel_path == "mydir") assert d.is_dir is True assert d.size_bytes == 0 assert d.extension == "" assert d.is_empty_dir is False def test_excluded_git_dir(self, tmp_path: Path) -> None: """.git 目录及其内容应被排除。""" git_dir = tmp_path / ".git" git_dir.mkdir() (git_dir / "config").write_text("", encoding="utf-8") entries = scan_repo(tmp_path) paths = {e.rel_path for e in entries} assert ".git" not in paths assert ".git/config" not in paths def test_excluded_pycache(self, tmp_path: Path) -> None: """__pycache__ 目录应被排除。""" cache = tmp_path / "pkg" / "__pycache__" cache.mkdir(parents=True) (cache / "mod.cpython-310.pyc").write_bytes(b"\x00") entries = scan_repo(tmp_path) paths = {e.rel_path for e in entries} assert not any("__pycache__" in p for p in paths) def test_excluded_pyc_files(self, tmp_path: Path) -> None: """*.pyc 文件应被排除。""" (tmp_path / "mod.pyc").write_bytes(b"\x00") (tmp_path / "mod.py").write_text("pass", encoding="utf-8") entries = scan_repo(tmp_path) paths = {e.rel_path for e in entries} assert "mod.pyc" not in paths assert "mod.py" in paths def test_empty_directory_detection(self, tmp_path: Path) -> None: """空目录应被标记为 is_empty_dir=True。""" (tmp_path / "empty").mkdir() entries = scan_repo(tmp_path) d = next(e for e in entries if e.rel_path == "empty") assert d.is_dir is True assert d.is_empty_dir is True def test_dir_with_only_excluded_children(self, tmp_path: Path) -> None: """仅含被排除子项的目录应视为空目录。""" sub = tmp_path / "pkg" sub.mkdir() cache = sub / "__pycache__" cache.mkdir() (cache / "x.pyc").write_bytes(b"\x00") entries = scan_repo(tmp_path) d = next(e for e in entries if e.rel_path == "pkg") assert d.is_empty_dir is True def test_custom_exclude_patterns(self, tmp_path: Path) -> None: """自定义排除模式应生效。""" (tmp_path / "keep.py").write_text("pass", encoding="utf-8") (tmp_path / "skip.log").write_text("log", encoding="utf-8") entries = scan_repo(tmp_path, exclude=["*.log"]) paths = {e.rel_path for e in entries} assert "keep.py" in paths assert "skip.log" not in paths def test_empty_repo(self, tmp_path: Path) -> None: """空仓库应返回空列表。""" entries = scan_repo(tmp_path) assert entries == [] def test_results_sorted(self, tmp_path: Path) -> None: """返回结果应按 rel_path 排序。""" (tmp_path / "z.py").write_text("", encoding="utf-8") (tmp_path / "a.py").write_text("", encoding="utf-8") sub = tmp_path / "m" sub.mkdir() (sub / "b.py").write_text("", encoding="utf-8") entries = scan_repo(tmp_path) paths = [e.rel_path for e in entries] assert paths == sorted(paths) @pytest.mark.skipif( os.name == "nt", reason="Windows 上 chmod 行为不同,跳过权限测试", ) def test_permission_error_skipped(self, tmp_path: Path) -> None: """权限不足的目录应被跳过,不中断扫描。""" ok_file = tmp_path / "ok.py" ok_file.write_text("pass", encoding="utf-8") no_access = tmp_path / "secret" no_access.mkdir() (no_access / "data.txt").write_text("x", encoding="utf-8") no_access.chmod(0o000) try: entries = scan_repo(tmp_path) paths = {e.rel_path for e in entries} # ok.py 应正常扫描到 assert "ok.py" in paths # secret 目录本身会被记录(在 _walk 中先记录目录再尝试 iterdir) # 但其子文件不应出现 assert "secret/data.txt" not in paths finally: no_access.chmod(0o755) def test_nested_directories(self, tmp_path: Path) -> None: """多层嵌套目录应被正确遍历。""" deep = tmp_path / "a" / "b" / "c" deep.mkdir(parents=True) (deep / "leaf.py").write_text("pass", encoding="utf-8") entries = scan_repo(tmp_path) paths = {e.rel_path for e in entries} assert "a" in paths assert "a/b" in paths assert "a/b/c" in paths assert "a/b/c/leaf.py" in paths def test_extension_lowercase(self, tmp_path: Path) -> None: """扩展名应统一为小写。""" (tmp_path / "README.MD").write_text("", encoding="utf-8") entries = scan_repo(tmp_path) md = next(e for e in entries if "README" in e.rel_path) assert md.extension == ".md" def test_no_extension(self, tmp_path: Path) -> None: """无扩展名的文件 extension 应为空字符串。""" (tmp_path / "Makefile").write_text("", encoding="utf-8") entries = scan_repo(tmp_path) f = next(e for e in entries if e.rel_path == "Makefile") assert f.extension == "" def test_root_not_in_entries(self, tmp_path: Path) -> None: """根目录自身不应出现在结果中。""" (tmp_path / "a.py").write_text("", encoding="utf-8") entries = scan_repo(tmp_path) paths = {e.rel_path for e in entries} assert "." not in paths assert "" not in paths # --------------------------------------------------------------------------- # 属性测试 — Property 7: 扫描器排除规则 # Feature: repo-audit, Property 7: 扫描器排除规则 # Validates: Requirements 1.1 # --------------------------------------------------------------------------- import fnmatch import string import tempfile from hypothesis import given, settings from hypothesis import strategies as st # --- 生成器策略 --- # 合法的文件/目录名字符(排除路径分隔符和特殊字符) _SAFE_CHARS = string.ascii_lowercase + string.digits + "_-" # 安全的文件名策略(不与排除模式冲突的普通名称) _safe_name = st.text(_SAFE_CHARS, min_size=1, max_size=8) # 排除模式中的目录名 _EXCLUDED_DIR_NAMES = [".git", "__pycache__", ".pytest_cache", ".kiro"] # 排除模式中的文件扩展名 _EXCLUDED_FILE_EXT = ".pyc" # 随机选择一个被排除的目录名 _excluded_dir_name = st.sampled_from(_EXCLUDED_DIR_NAMES) def _build_tree(tmp: Path, normal_names: list[str], excluded_dirs: list[str], include_pyc: bool) -> None: """在临时目录中构建包含正常文件和被排除条目的文件树。""" # 创建正常文件 for name in normal_names: safe = name or "f" filepath = tmp / f"{safe}.txt" if not filepath.exists(): filepath.write_text("ok", encoding="utf-8") # 创建被排除的目录(含子文件) for dirname in excluded_dirs: d = tmp / dirname d.mkdir(exist_ok=True) (d / "inner.txt").write_text("hidden", encoding="utf-8") # 可选:创建 .pyc 文件 if include_pyc: (tmp / "module.pyc").write_bytes(b"\x00") class TestProperty7ScannerExclusionRules: """ Property 7: 扫描器排除规则 对于任意文件树,scan_repo 返回的 FileEntry 列表中不应包含 rel_path 匹配排除模式(.git、__pycache__、.pytest_cache 等)的条目。 Feature: repo-audit, Property 7: 扫描器排除规则 Validates: Requirements 1.1 """ @given( normal_names=st.lists(_safe_name, min_size=0, max_size=5), excluded_dirs=st.lists(_excluded_dir_name, min_size=1, max_size=3), include_pyc=st.booleans(), ) @settings(max_examples=100) def test_excluded_entries_never_in_results( self, normal_names: list[str], excluded_dirs: list[str], include_pyc: bool, ) -> None: """扫描结果中不应包含任何匹配排除模式的条目。""" with tempfile.TemporaryDirectory() as tmpdir: tmp = Path(tmpdir) _build_tree(tmp, normal_names, excluded_dirs, include_pyc) entries = scan_repo(tmp) for entry in entries: # 检查 rel_path 的每一段是否匹配排除模式 parts = entry.rel_path.split("/") for part in parts: for pat in EXCLUDED_PATTERNS: assert not fnmatch.fnmatch(part, pat), ( f"排除模式 '{pat}' 不应出现在结果中," f"但发现 rel_path='{entry.rel_path}' 包含 '{part}'" ) @given( excluded_dir=_excluded_dir_name, depth=st.integers(min_value=1, max_value=3), ) @settings(max_examples=100) def test_excluded_dirs_at_any_depth( self, excluded_dir: str, depth: int, ) -> None: """被排除目录无论在哪一层嵌套深度,都不应出现在结果中。""" with tempfile.TemporaryDirectory() as tmpdir: tmp = Path(tmpdir) # 构建嵌套路径:normal/normal/.../excluded_dir/file.txt current = tmp for i in range(depth): current = current / f"level{i}" current.mkdir(exist_ok=True) # 放一个正常文件保证父目录非空 (current / "keep.txt").write_text("ok", encoding="utf-8") # 在最深层放置被排除目录 excluded = current / excluded_dir excluded.mkdir(exist_ok=True) (excluded / "secret.txt").write_text("hidden", encoding="utf-8") entries = scan_repo(tmp) for entry in entries: parts = entry.rel_path.split("/") assert excluded_dir not in parts, ( f"被排除目录 '{excluded_dir}' 不应出现在结果中," f"但发现 rel_path='{entry.rel_path}'" ) @given( custom_patterns=st.lists( st.sampled_from(["*.log", "*.tmp", "*.bak", "node_modules", ".venv"]), min_size=1, max_size=3, ), ) @settings(max_examples=100) def test_custom_exclude_patterns_respected( self, custom_patterns: list[str], ) -> None: """自定义排除模式同样应被 scan_repo 正确排除。""" with tempfile.TemporaryDirectory() as tmpdir: tmp = Path(tmpdir) # 创建一个正常文件 (tmp / "main.py").write_text("pass", encoding="utf-8") # 为每个自定义模式创建匹配的文件或目录 for pat in custom_patterns: if pat.startswith("*."): # 通配符模式 → 创建匹配的文件 ext = pat[1:] # e.g. ".log" (tmp / f"data{ext}").write_text("x", encoding="utf-8") else: # 精确匹配 → 创建目录 d = tmp / pat d.mkdir(exist_ok=True) (d / "inner.txt").write_text("x", encoding="utf-8") entries = scan_repo(tmp, exclude=custom_patterns) for entry in entries: parts = entry.rel_path.split("/") for part in parts: for pat in custom_patterns: assert not fnmatch.fnmatch(part, pat), ( f"自定义排除模式 '{pat}' 不应出现在结果中," f"但发现 rel_path='{entry.rel_path}' 包含 '{part}'" )