init: 项目初始提交 - NeoZQYY Monorepo 完整代码

This commit is contained in:
Neo
2026-02-15 14:58:14 +08:00
commit ded6dfb9d8
769 changed files with 182616 additions and 0 deletions

View File

@@ -0,0 +1,428 @@
# -*- coding: utf-8 -*-
"""
单元测试 — 仓库扫描器 (scanner.py)
覆盖:
- 排除模式匹配逻辑
- 递归遍历与 FileEntry 构建
- 空目录检测
- 权限错误容错
"""
from __future__ import annotations
import os
from pathlib import Path
import pytest
from scripts.audit import FileEntry
from scripts.audit.scanner import EXCLUDED_PATTERNS, _is_excluded, scan_repo
# ---------------------------------------------------------------------------
# _is_excluded 单元测试
# ---------------------------------------------------------------------------
class TestIsExcluded:
"""排除模式匹配逻辑测试。"""
def test_exact_match_git(self) -> None:
assert _is_excluded(".git", EXCLUDED_PATTERNS) is True
def test_exact_match_pycache(self) -> None:
assert _is_excluded("__pycache__", EXCLUDED_PATTERNS) is True
def test_exact_match_pytest_cache(self) -> None:
assert _is_excluded(".pytest_cache", EXCLUDED_PATTERNS) is True
def test_exact_match_kiro(self) -> None:
assert _is_excluded(".kiro", EXCLUDED_PATTERNS) is True
def test_wildcard_pyc(self) -> None:
assert _is_excluded("module.pyc", EXCLUDED_PATTERNS) is True
def test_normal_py_not_excluded(self) -> None:
assert _is_excluded("main.py", EXCLUDED_PATTERNS) is False
def test_normal_dir_not_excluded(self) -> None:
assert _is_excluded("src", EXCLUDED_PATTERNS) is False
def test_empty_patterns(self) -> None:
assert _is_excluded(".git", []) is False
def test_custom_pattern(self) -> None:
assert _is_excluded("data.csv", ["*.csv"]) is True
# ---------------------------------------------------------------------------
# scan_repo 单元测试
# ---------------------------------------------------------------------------
class TestScanRepo:
"""scan_repo 递归遍历测试。"""
def test_basic_structure(self, tmp_path: Path) -> None:
"""基本文件和目录应被正确扫描。"""
(tmp_path / "a.py").write_text("# code", encoding="utf-8")
sub = tmp_path / "sub"
sub.mkdir()
(sub / "b.txt").write_text("hello", encoding="utf-8")
entries = scan_repo(tmp_path)
paths = {e.rel_path for e in entries}
assert "a.py" in paths
assert "sub" in paths
assert "sub/b.txt" in paths
def test_file_entry_fields(self, tmp_path: Path) -> None:
"""FileEntry 各字段应正确填充。"""
(tmp_path / "hello.md").write_text("# hi", encoding="utf-8")
entries = scan_repo(tmp_path)
md = next(e for e in entries if e.rel_path == "hello.md")
assert md.is_dir is False
assert md.size_bytes > 0
assert md.extension == ".md"
assert md.is_empty_dir is False
def test_directory_entry_fields(self, tmp_path: Path) -> None:
"""目录条目的字段应正确设置。"""
sub = tmp_path / "mydir"
sub.mkdir()
(sub / "file.py").write_text("pass", encoding="utf-8")
entries = scan_repo(tmp_path)
d = next(e for e in entries if e.rel_path == "mydir")
assert d.is_dir is True
assert d.size_bytes == 0
assert d.extension == ""
assert d.is_empty_dir is False
def test_excluded_git_dir(self, tmp_path: Path) -> None:
""".git 目录及其内容应被排除。"""
git_dir = tmp_path / ".git"
git_dir.mkdir()
(git_dir / "config").write_text("", encoding="utf-8")
entries = scan_repo(tmp_path)
paths = {e.rel_path for e in entries}
assert ".git" not in paths
assert ".git/config" not in paths
def test_excluded_pycache(self, tmp_path: Path) -> None:
"""__pycache__ 目录应被排除。"""
cache = tmp_path / "pkg" / "__pycache__"
cache.mkdir(parents=True)
(cache / "mod.cpython-310.pyc").write_bytes(b"\x00")
entries = scan_repo(tmp_path)
paths = {e.rel_path for e in entries}
assert not any("__pycache__" in p for p in paths)
def test_excluded_pyc_files(self, tmp_path: Path) -> None:
"""*.pyc 文件应被排除。"""
(tmp_path / "mod.pyc").write_bytes(b"\x00")
(tmp_path / "mod.py").write_text("pass", encoding="utf-8")
entries = scan_repo(tmp_path)
paths = {e.rel_path for e in entries}
assert "mod.pyc" not in paths
assert "mod.py" in paths
def test_empty_directory_detection(self, tmp_path: Path) -> None:
"""空目录应被标记为 is_empty_dir=True。"""
(tmp_path / "empty").mkdir()
entries = scan_repo(tmp_path)
d = next(e for e in entries if e.rel_path == "empty")
assert d.is_dir is True
assert d.is_empty_dir is True
def test_dir_with_only_excluded_children(self, tmp_path: Path) -> None:
"""仅含被排除子项的目录应视为空目录。"""
sub = tmp_path / "pkg"
sub.mkdir()
cache = sub / "__pycache__"
cache.mkdir()
(cache / "x.pyc").write_bytes(b"\x00")
entries = scan_repo(tmp_path)
d = next(e for e in entries if e.rel_path == "pkg")
assert d.is_empty_dir is True
def test_custom_exclude_patterns(self, tmp_path: Path) -> None:
"""自定义排除模式应生效。"""
(tmp_path / "keep.py").write_text("pass", encoding="utf-8")
(tmp_path / "skip.log").write_text("log", encoding="utf-8")
entries = scan_repo(tmp_path, exclude=["*.log"])
paths = {e.rel_path for e in entries}
assert "keep.py" in paths
assert "skip.log" not in paths
def test_empty_repo(self, tmp_path: Path) -> None:
"""空仓库应返回空列表。"""
entries = scan_repo(tmp_path)
assert entries == []
def test_results_sorted(self, tmp_path: Path) -> None:
"""返回结果应按 rel_path 排序。"""
(tmp_path / "z.py").write_text("", encoding="utf-8")
(tmp_path / "a.py").write_text("", encoding="utf-8")
sub = tmp_path / "m"
sub.mkdir()
(sub / "b.py").write_text("", encoding="utf-8")
entries = scan_repo(tmp_path)
paths = [e.rel_path for e in entries]
assert paths == sorted(paths)
@pytest.mark.skipif(
os.name == "nt",
reason="Windows 上 chmod 行为不同,跳过权限测试",
)
def test_permission_error_skipped(self, tmp_path: Path) -> None:
"""权限不足的目录应被跳过,不中断扫描。"""
ok_file = tmp_path / "ok.py"
ok_file.write_text("pass", encoding="utf-8")
no_access = tmp_path / "secret"
no_access.mkdir()
(no_access / "data.txt").write_text("x", encoding="utf-8")
no_access.chmod(0o000)
try:
entries = scan_repo(tmp_path)
paths = {e.rel_path for e in entries}
# ok.py 应正常扫描到
assert "ok.py" in paths
# secret 目录本身会被记录(在 _walk 中先记录目录再尝试 iterdir
# 但其子文件不应出现
assert "secret/data.txt" not in paths
finally:
no_access.chmod(0o755)
def test_nested_directories(self, tmp_path: Path) -> None:
"""多层嵌套目录应被正确遍历。"""
deep = tmp_path / "a" / "b" / "c"
deep.mkdir(parents=True)
(deep / "leaf.py").write_text("pass", encoding="utf-8")
entries = scan_repo(tmp_path)
paths = {e.rel_path for e in entries}
assert "a" in paths
assert "a/b" in paths
assert "a/b/c" in paths
assert "a/b/c/leaf.py" in paths
def test_extension_lowercase(self, tmp_path: Path) -> None:
"""扩展名应统一为小写。"""
(tmp_path / "README.MD").write_text("", encoding="utf-8")
entries = scan_repo(tmp_path)
md = next(e for e in entries if "README" in e.rel_path)
assert md.extension == ".md"
def test_no_extension(self, tmp_path: Path) -> None:
"""无扩展名的文件 extension 应为空字符串。"""
(tmp_path / "Makefile").write_text("", encoding="utf-8")
entries = scan_repo(tmp_path)
f = next(e for e in entries if e.rel_path == "Makefile")
assert f.extension == ""
def test_root_not_in_entries(self, tmp_path: Path) -> None:
"""根目录自身不应出现在结果中。"""
(tmp_path / "a.py").write_text("", encoding="utf-8")
entries = scan_repo(tmp_path)
paths = {e.rel_path for e in entries}
assert "." not in paths
assert "" not in paths
# ---------------------------------------------------------------------------
# 属性测试 — Property 7: 扫描器排除规则
# Feature: repo-audit, Property 7: 扫描器排除规则
# Validates: Requirements 1.1
# ---------------------------------------------------------------------------
import fnmatch
import string
import tempfile
from hypothesis import given, settings
from hypothesis import strategies as st
# --- 生成器策略 ---
# 合法的文件/目录名字符(排除路径分隔符和特殊字符)
_SAFE_CHARS = string.ascii_lowercase + string.digits + "_-"
# 安全的文件名策略(不与排除模式冲突的普通名称)
_safe_name = st.text(_SAFE_CHARS, min_size=1, max_size=8)
# 排除模式中的目录名
_EXCLUDED_DIR_NAMES = [".git", "__pycache__", ".pytest_cache", ".kiro"]
# 排除模式中的文件扩展名
_EXCLUDED_FILE_EXT = ".pyc"
# 随机选择一个被排除的目录名
_excluded_dir_name = st.sampled_from(_EXCLUDED_DIR_NAMES)
def _build_tree(tmp: Path, normal_names: list[str], excluded_dirs: list[str],
include_pyc: bool) -> None:
"""在临时目录中构建包含正常文件和被排除条目的文件树。"""
# 创建正常文件
for name in normal_names:
safe = name or "f"
filepath = tmp / f"{safe}.txt"
if not filepath.exists():
filepath.write_text("ok", encoding="utf-8")
# 创建被排除的目录(含子文件)
for dirname in excluded_dirs:
d = tmp / dirname
d.mkdir(exist_ok=True)
(d / "inner.txt").write_text("hidden", encoding="utf-8")
# 可选:创建 .pyc 文件
if include_pyc:
(tmp / "module.pyc").write_bytes(b"\x00")
class TestProperty7ScannerExclusionRules:
"""
Property 7: 扫描器排除规则
对于任意文件树scan_repo 返回的 FileEntry 列表中不应包含
rel_path 匹配排除模式(.git、__pycache__、.pytest_cache 等)的条目。
Feature: repo-audit, Property 7: 扫描器排除规则
Validates: Requirements 1.1
"""
@given(
normal_names=st.lists(_safe_name, min_size=0, max_size=5),
excluded_dirs=st.lists(_excluded_dir_name, min_size=1, max_size=3),
include_pyc=st.booleans(),
)
@settings(max_examples=100)
def test_excluded_entries_never_in_results(
self,
normal_names: list[str],
excluded_dirs: list[str],
include_pyc: bool,
) -> None:
"""扫描结果中不应包含任何匹配排除模式的条目。"""
with tempfile.TemporaryDirectory() as tmpdir:
tmp = Path(tmpdir)
_build_tree(tmp, normal_names, excluded_dirs, include_pyc)
entries = scan_repo(tmp)
for entry in entries:
# 检查 rel_path 的每一段是否匹配排除模式
parts = entry.rel_path.split("/")
for part in parts:
for pat in EXCLUDED_PATTERNS:
assert not fnmatch.fnmatch(part, pat), (
f"排除模式 '{pat}' 不应出现在结果中,"
f"但发现 rel_path='{entry.rel_path}' 包含 '{part}'"
)
@given(
excluded_dir=_excluded_dir_name,
depth=st.integers(min_value=1, max_value=3),
)
@settings(max_examples=100)
def test_excluded_dirs_at_any_depth(
self,
excluded_dir: str,
depth: int,
) -> None:
"""被排除目录无论在哪一层嵌套深度,都不应出现在结果中。"""
with tempfile.TemporaryDirectory() as tmpdir:
tmp = Path(tmpdir)
# 构建嵌套路径normal/normal/.../excluded_dir/file.txt
current = tmp
for i in range(depth):
current = current / f"level{i}"
current.mkdir(exist_ok=True)
# 放一个正常文件保证父目录非空
(current / "keep.txt").write_text("ok", encoding="utf-8")
# 在最深层放置被排除目录
excluded = current / excluded_dir
excluded.mkdir(exist_ok=True)
(excluded / "secret.txt").write_text("hidden", encoding="utf-8")
entries = scan_repo(tmp)
for entry in entries:
parts = entry.rel_path.split("/")
assert excluded_dir not in parts, (
f"被排除目录 '{excluded_dir}' 不应出现在结果中,"
f"但发现 rel_path='{entry.rel_path}'"
)
@given(
custom_patterns=st.lists(
st.sampled_from(["*.log", "*.tmp", "*.bak", "node_modules", ".venv"]),
min_size=1,
max_size=3,
),
)
@settings(max_examples=100)
def test_custom_exclude_patterns_respected(
self,
custom_patterns: list[str],
) -> None:
"""自定义排除模式同样应被 scan_repo 正确排除。"""
with tempfile.TemporaryDirectory() as tmpdir:
tmp = Path(tmpdir)
# 创建一个正常文件
(tmp / "main.py").write_text("pass", encoding="utf-8")
# 为每个自定义模式创建匹配的文件或目录
for pat in custom_patterns:
if pat.startswith("*."):
# 通配符模式 → 创建匹配的文件
ext = pat[1:] # e.g. ".log"
(tmp / f"data{ext}").write_text("x", encoding="utf-8")
else:
# 精确匹配 → 创建目录
d = tmp / pat
d.mkdir(exist_ok=True)
(d / "inner.txt").write_text("x", encoding="utf-8")
entries = scan_repo(tmp, exclude=custom_patterns)
for entry in entries:
parts = entry.rel_path.split("/")
for part in parts:
for pat in custom_patterns:
assert not fnmatch.fnmatch(part, pat), (
f"自定义排除模式 '{pat}' 不应出现在结果中,"
f"但发现 rel_path='{entry.rel_path}' 包含 '{part}'"
)