init: 项目初始提交 - NeoZQYY Monorepo 完整代码
This commit is contained in:
428
apps/etl/pipelines/feiqiu/tests/unit/test_audit_scanner.py
Normal file
428
apps/etl/pipelines/feiqiu/tests/unit/test_audit_scanner.py
Normal file
@@ -0,0 +1,428 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
单元测试 — 仓库扫描器 (scanner.py)
|
||||
|
||||
覆盖:
|
||||
- 排除模式匹配逻辑
|
||||
- 递归遍历与 FileEntry 构建
|
||||
- 空目录检测
|
||||
- 权限错误容错
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from scripts.audit import FileEntry
|
||||
from scripts.audit.scanner import EXCLUDED_PATTERNS, _is_excluded, scan_repo
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _is_excluded 单元测试
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestIsExcluded:
|
||||
"""排除模式匹配逻辑测试。"""
|
||||
|
||||
def test_exact_match_git(self) -> None:
|
||||
assert _is_excluded(".git", EXCLUDED_PATTERNS) is True
|
||||
|
||||
def test_exact_match_pycache(self) -> None:
|
||||
assert _is_excluded("__pycache__", EXCLUDED_PATTERNS) is True
|
||||
|
||||
def test_exact_match_pytest_cache(self) -> None:
|
||||
assert _is_excluded(".pytest_cache", EXCLUDED_PATTERNS) is True
|
||||
|
||||
def test_exact_match_kiro(self) -> None:
|
||||
assert _is_excluded(".kiro", EXCLUDED_PATTERNS) is True
|
||||
|
||||
def test_wildcard_pyc(self) -> None:
|
||||
assert _is_excluded("module.pyc", EXCLUDED_PATTERNS) is True
|
||||
|
||||
def test_normal_py_not_excluded(self) -> None:
|
||||
assert _is_excluded("main.py", EXCLUDED_PATTERNS) is False
|
||||
|
||||
def test_normal_dir_not_excluded(self) -> None:
|
||||
assert _is_excluded("src", EXCLUDED_PATTERNS) is False
|
||||
|
||||
def test_empty_patterns(self) -> None:
|
||||
assert _is_excluded(".git", []) is False
|
||||
|
||||
def test_custom_pattern(self) -> None:
|
||||
assert _is_excluded("data.csv", ["*.csv"]) is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# scan_repo 单元测试
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestScanRepo:
|
||||
"""scan_repo 递归遍历测试。"""
|
||||
|
||||
def test_basic_structure(self, tmp_path: Path) -> None:
|
||||
"""基本文件和目录应被正确扫描。"""
|
||||
(tmp_path / "a.py").write_text("# code", encoding="utf-8")
|
||||
sub = tmp_path / "sub"
|
||||
sub.mkdir()
|
||||
(sub / "b.txt").write_text("hello", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
paths = {e.rel_path for e in entries}
|
||||
|
||||
assert "a.py" in paths
|
||||
assert "sub" in paths
|
||||
assert "sub/b.txt" in paths
|
||||
|
||||
def test_file_entry_fields(self, tmp_path: Path) -> None:
|
||||
"""FileEntry 各字段应正确填充。"""
|
||||
(tmp_path / "hello.md").write_text("# hi", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
md = next(e for e in entries if e.rel_path == "hello.md")
|
||||
|
||||
assert md.is_dir is False
|
||||
assert md.size_bytes > 0
|
||||
assert md.extension == ".md"
|
||||
assert md.is_empty_dir is False
|
||||
|
||||
def test_directory_entry_fields(self, tmp_path: Path) -> None:
|
||||
"""目录条目的字段应正确设置。"""
|
||||
sub = tmp_path / "mydir"
|
||||
sub.mkdir()
|
||||
(sub / "file.py").write_text("pass", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
d = next(e for e in entries if e.rel_path == "mydir")
|
||||
|
||||
assert d.is_dir is True
|
||||
assert d.size_bytes == 0
|
||||
assert d.extension == ""
|
||||
assert d.is_empty_dir is False
|
||||
|
||||
def test_excluded_git_dir(self, tmp_path: Path) -> None:
|
||||
""".git 目录及其内容应被排除。"""
|
||||
git_dir = tmp_path / ".git"
|
||||
git_dir.mkdir()
|
||||
(git_dir / "config").write_text("", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
paths = {e.rel_path for e in entries}
|
||||
|
||||
assert ".git" not in paths
|
||||
assert ".git/config" not in paths
|
||||
|
||||
def test_excluded_pycache(self, tmp_path: Path) -> None:
|
||||
"""__pycache__ 目录应被排除。"""
|
||||
cache = tmp_path / "pkg" / "__pycache__"
|
||||
cache.mkdir(parents=True)
|
||||
(cache / "mod.cpython-310.pyc").write_bytes(b"\x00")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
paths = {e.rel_path for e in entries}
|
||||
|
||||
assert not any("__pycache__" in p for p in paths)
|
||||
|
||||
def test_excluded_pyc_files(self, tmp_path: Path) -> None:
|
||||
"""*.pyc 文件应被排除。"""
|
||||
(tmp_path / "mod.pyc").write_bytes(b"\x00")
|
||||
(tmp_path / "mod.py").write_text("pass", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
paths = {e.rel_path for e in entries}
|
||||
|
||||
assert "mod.pyc" not in paths
|
||||
assert "mod.py" in paths
|
||||
|
||||
def test_empty_directory_detection(self, tmp_path: Path) -> None:
|
||||
"""空目录应被标记为 is_empty_dir=True。"""
|
||||
(tmp_path / "empty").mkdir()
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
d = next(e for e in entries if e.rel_path == "empty")
|
||||
|
||||
assert d.is_dir is True
|
||||
assert d.is_empty_dir is True
|
||||
|
||||
def test_dir_with_only_excluded_children(self, tmp_path: Path) -> None:
|
||||
"""仅含被排除子项的目录应视为空目录。"""
|
||||
sub = tmp_path / "pkg"
|
||||
sub.mkdir()
|
||||
cache = sub / "__pycache__"
|
||||
cache.mkdir()
|
||||
(cache / "x.pyc").write_bytes(b"\x00")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
d = next(e for e in entries if e.rel_path == "pkg")
|
||||
|
||||
assert d.is_empty_dir is True
|
||||
|
||||
def test_custom_exclude_patterns(self, tmp_path: Path) -> None:
|
||||
"""自定义排除模式应生效。"""
|
||||
(tmp_path / "keep.py").write_text("pass", encoding="utf-8")
|
||||
(tmp_path / "skip.log").write_text("log", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path, exclude=["*.log"])
|
||||
paths = {e.rel_path for e in entries}
|
||||
|
||||
assert "keep.py" in paths
|
||||
assert "skip.log" not in paths
|
||||
|
||||
def test_empty_repo(self, tmp_path: Path) -> None:
|
||||
"""空仓库应返回空列表。"""
|
||||
entries = scan_repo(tmp_path)
|
||||
assert entries == []
|
||||
|
||||
def test_results_sorted(self, tmp_path: Path) -> None:
|
||||
"""返回结果应按 rel_path 排序。"""
|
||||
(tmp_path / "z.py").write_text("", encoding="utf-8")
|
||||
(tmp_path / "a.py").write_text("", encoding="utf-8")
|
||||
sub = tmp_path / "m"
|
||||
sub.mkdir()
|
||||
(sub / "b.py").write_text("", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
paths = [e.rel_path for e in entries]
|
||||
|
||||
assert paths == sorted(paths)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.name == "nt",
|
||||
reason="Windows 上 chmod 行为不同,跳过权限测试",
|
||||
)
|
||||
def test_permission_error_skipped(self, tmp_path: Path) -> None:
|
||||
"""权限不足的目录应被跳过,不中断扫描。"""
|
||||
ok_file = tmp_path / "ok.py"
|
||||
ok_file.write_text("pass", encoding="utf-8")
|
||||
|
||||
no_access = tmp_path / "secret"
|
||||
no_access.mkdir()
|
||||
(no_access / "data.txt").write_text("x", encoding="utf-8")
|
||||
no_access.chmod(0o000)
|
||||
|
||||
try:
|
||||
entries = scan_repo(tmp_path)
|
||||
paths = {e.rel_path for e in entries}
|
||||
# ok.py 应正常扫描到
|
||||
assert "ok.py" in paths
|
||||
# secret 目录本身会被记录(在 _walk 中先记录目录再尝试 iterdir)
|
||||
# 但其子文件不应出现
|
||||
assert "secret/data.txt" not in paths
|
||||
finally:
|
||||
no_access.chmod(0o755)
|
||||
|
||||
def test_nested_directories(self, tmp_path: Path) -> None:
|
||||
"""多层嵌套目录应被正确遍历。"""
|
||||
deep = tmp_path / "a" / "b" / "c"
|
||||
deep.mkdir(parents=True)
|
||||
(deep / "leaf.py").write_text("pass", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
paths = {e.rel_path for e in entries}
|
||||
|
||||
assert "a" in paths
|
||||
assert "a/b" in paths
|
||||
assert "a/b/c" in paths
|
||||
assert "a/b/c/leaf.py" in paths
|
||||
|
||||
def test_extension_lowercase(self, tmp_path: Path) -> None:
|
||||
"""扩展名应统一为小写。"""
|
||||
(tmp_path / "README.MD").write_text("", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
md = next(e for e in entries if "README" in e.rel_path)
|
||||
|
||||
assert md.extension == ".md"
|
||||
|
||||
def test_no_extension(self, tmp_path: Path) -> None:
|
||||
"""无扩展名的文件 extension 应为空字符串。"""
|
||||
(tmp_path / "Makefile").write_text("", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
f = next(e for e in entries if e.rel_path == "Makefile")
|
||||
|
||||
assert f.extension == ""
|
||||
|
||||
def test_root_not_in_entries(self, tmp_path: Path) -> None:
|
||||
"""根目录自身不应出现在结果中。"""
|
||||
(tmp_path / "a.py").write_text("", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp_path)
|
||||
paths = {e.rel_path for e in entries}
|
||||
|
||||
assert "." not in paths
|
||||
assert "" not in paths
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 属性测试 — Property 7: 扫描器排除规则
|
||||
# Feature: repo-audit, Property 7: 扫描器排除规则
|
||||
# Validates: Requirements 1.1
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import fnmatch
|
||||
import string
|
||||
import tempfile
|
||||
|
||||
from hypothesis import given, settings
|
||||
from hypothesis import strategies as st
|
||||
|
||||
|
||||
# --- 生成器策略 ---
|
||||
|
||||
# 合法的文件/目录名字符(排除路径分隔符和特殊字符)
|
||||
_SAFE_CHARS = string.ascii_lowercase + string.digits + "_-"
|
||||
|
||||
# 安全的文件名策略(不与排除模式冲突的普通名称)
|
||||
_safe_name = st.text(_SAFE_CHARS, min_size=1, max_size=8)
|
||||
|
||||
# 排除模式中的目录名
|
||||
_EXCLUDED_DIR_NAMES = [".git", "__pycache__", ".pytest_cache", ".kiro"]
|
||||
|
||||
# 排除模式中的文件扩展名
|
||||
_EXCLUDED_FILE_EXT = ".pyc"
|
||||
|
||||
# 随机选择一个被排除的目录名
|
||||
_excluded_dir_name = st.sampled_from(_EXCLUDED_DIR_NAMES)
|
||||
|
||||
|
||||
def _build_tree(tmp: Path, normal_names: list[str], excluded_dirs: list[str],
|
||||
include_pyc: bool) -> None:
|
||||
"""在临时目录中构建包含正常文件和被排除条目的文件树。"""
|
||||
# 创建正常文件
|
||||
for name in normal_names:
|
||||
safe = name or "f"
|
||||
filepath = tmp / f"{safe}.txt"
|
||||
if not filepath.exists():
|
||||
filepath.write_text("ok", encoding="utf-8")
|
||||
|
||||
# 创建被排除的目录(含子文件)
|
||||
for dirname in excluded_dirs:
|
||||
d = tmp / dirname
|
||||
d.mkdir(exist_ok=True)
|
||||
(d / "inner.txt").write_text("hidden", encoding="utf-8")
|
||||
|
||||
# 可选:创建 .pyc 文件
|
||||
if include_pyc:
|
||||
(tmp / "module.pyc").write_bytes(b"\x00")
|
||||
|
||||
|
||||
class TestProperty7ScannerExclusionRules:
|
||||
"""
|
||||
Property 7: 扫描器排除规则
|
||||
|
||||
对于任意文件树,scan_repo 返回的 FileEntry 列表中不应包含
|
||||
rel_path 匹配排除模式(.git、__pycache__、.pytest_cache 等)的条目。
|
||||
|
||||
Feature: repo-audit, Property 7: 扫描器排除规则
|
||||
Validates: Requirements 1.1
|
||||
"""
|
||||
|
||||
@given(
|
||||
normal_names=st.lists(_safe_name, min_size=0, max_size=5),
|
||||
excluded_dirs=st.lists(_excluded_dir_name, min_size=1, max_size=3),
|
||||
include_pyc=st.booleans(),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_excluded_entries_never_in_results(
|
||||
self,
|
||||
normal_names: list[str],
|
||||
excluded_dirs: list[str],
|
||||
include_pyc: bool,
|
||||
) -> None:
|
||||
"""扫描结果中不应包含任何匹配排除模式的条目。"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmp = Path(tmpdir)
|
||||
_build_tree(tmp, normal_names, excluded_dirs, include_pyc)
|
||||
|
||||
entries = scan_repo(tmp)
|
||||
|
||||
for entry in entries:
|
||||
# 检查 rel_path 的每一段是否匹配排除模式
|
||||
parts = entry.rel_path.split("/")
|
||||
for part in parts:
|
||||
for pat in EXCLUDED_PATTERNS:
|
||||
assert not fnmatch.fnmatch(part, pat), (
|
||||
f"排除模式 '{pat}' 不应出现在结果中,"
|
||||
f"但发现 rel_path='{entry.rel_path}' 包含 '{part}'"
|
||||
)
|
||||
|
||||
@given(
|
||||
excluded_dir=_excluded_dir_name,
|
||||
depth=st.integers(min_value=1, max_value=3),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_excluded_dirs_at_any_depth(
|
||||
self,
|
||||
excluded_dir: str,
|
||||
depth: int,
|
||||
) -> None:
|
||||
"""被排除目录无论在哪一层嵌套深度,都不应出现在结果中。"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmp = Path(tmpdir)
|
||||
|
||||
# 构建嵌套路径:normal/normal/.../excluded_dir/file.txt
|
||||
current = tmp
|
||||
for i in range(depth):
|
||||
current = current / f"level{i}"
|
||||
current.mkdir(exist_ok=True)
|
||||
# 放一个正常文件保证父目录非空
|
||||
(current / "keep.txt").write_text("ok", encoding="utf-8")
|
||||
|
||||
# 在最深层放置被排除目录
|
||||
excluded = current / excluded_dir
|
||||
excluded.mkdir(exist_ok=True)
|
||||
(excluded / "secret.txt").write_text("hidden", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp)
|
||||
|
||||
for entry in entries:
|
||||
parts = entry.rel_path.split("/")
|
||||
assert excluded_dir not in parts, (
|
||||
f"被排除目录 '{excluded_dir}' 不应出现在结果中,"
|
||||
f"但发现 rel_path='{entry.rel_path}'"
|
||||
)
|
||||
|
||||
@given(
|
||||
custom_patterns=st.lists(
|
||||
st.sampled_from(["*.log", "*.tmp", "*.bak", "node_modules", ".venv"]),
|
||||
min_size=1,
|
||||
max_size=3,
|
||||
),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_custom_exclude_patterns_respected(
|
||||
self,
|
||||
custom_patterns: list[str],
|
||||
) -> None:
|
||||
"""自定义排除模式同样应被 scan_repo 正确排除。"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmp = Path(tmpdir)
|
||||
|
||||
# 创建一个正常文件
|
||||
(tmp / "main.py").write_text("pass", encoding="utf-8")
|
||||
|
||||
# 为每个自定义模式创建匹配的文件或目录
|
||||
for pat in custom_patterns:
|
||||
if pat.startswith("*."):
|
||||
# 通配符模式 → 创建匹配的文件
|
||||
ext = pat[1:] # e.g. ".log"
|
||||
(tmp / f"data{ext}").write_text("x", encoding="utf-8")
|
||||
else:
|
||||
# 精确匹配 → 创建目录
|
||||
d = tmp / pat
|
||||
d.mkdir(exist_ok=True)
|
||||
(d / "inner.txt").write_text("x", encoding="utf-8")
|
||||
|
||||
entries = scan_repo(tmp, exclude=custom_patterns)
|
||||
|
||||
for entry in entries:
|
||||
parts = entry.rel_path.split("/")
|
||||
for part in parts:
|
||||
for pat in custom_patterns:
|
||||
assert not fnmatch.fnmatch(part, pat), (
|
||||
f"自定义排除模式 '{pat}' 不应出现在结果中,"
|
||||
f"但发现 rel_path='{entry.rel_path}' 包含 '{part}'"
|
||||
)
|
||||
Reference in New Issue
Block a user