初始提交:飞球 ETL 系统全量代码
This commit is contained in:
309
tests/unit/test_audit_inventory.py
Normal file
309
tests/unit/test_audit_inventory.py
Normal file
@@ -0,0 +1,309 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
属性测试 — classify 完整性
|
||||
|
||||
Feature: repo-audit, Property 1: classify 完整性
|
||||
Validates: Requirements 1.2, 1.3
|
||||
|
||||
对于任意 FileEntry,classify 函数返回的 InventoryItem 的 category 字段
|
||||
应属于 Category 枚举,disposition 字段应属于 Disposition 枚举,
|
||||
且 description 字段为非空字符串。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import string
|
||||
|
||||
from hypothesis import given, settings
|
||||
from hypothesis import strategies as st
|
||||
|
||||
from scripts.audit import Category, Disposition, FileEntry, InventoryItem
|
||||
from scripts.audit.inventory_analyzer import classify
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 生成器策略
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# 常见文件扩展名(含空扩展名表示无扩展名的情况)
|
||||
_EXTENSIONS = st.sampled_from([
|
||||
"", ".py", ".sql", ".md", ".txt", ".json", ".csv", ".xlsx",
|
||||
".bat", ".sh", ".ps1", ".lnk", ".rar", ".log", ".ini", ".cfg",
|
||||
".toml", ".yaml", ".yml", ".html", ".css", ".js",
|
||||
])
|
||||
|
||||
# 路径片段:字母数字加常见特殊字符
|
||||
_PATH_CHARS = string.ascii_letters + string.digits + "_-."
|
||||
|
||||
_path_segment = st.text(
|
||||
alphabet=_PATH_CHARS,
|
||||
min_size=1,
|
||||
max_size=20,
|
||||
)
|
||||
|
||||
# 生成 1~4 层目录深度的相对路径
|
||||
_rel_path = st.lists(
|
||||
_path_segment,
|
||||
min_size=1,
|
||||
max_size=4,
|
||||
).map(lambda parts: "/".join(parts))
|
||||
|
||||
|
||||
def _file_entry_strategy() -> st.SearchStrategy[FileEntry]:
|
||||
"""生成随机 FileEntry 的 hypothesis 策略。
|
||||
|
||||
覆盖各种扩展名、目录层级、大小和布尔标志组合。
|
||||
"""
|
||||
return st.builds(
|
||||
FileEntry,
|
||||
rel_path=_rel_path,
|
||||
is_dir=st.booleans(),
|
||||
size_bytes=st.integers(min_value=0, max_value=10_000_000),
|
||||
extension=_EXTENSIONS,
|
||||
is_empty_dir=st.booleans(),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 1: classify 完整性
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@given(entry=_file_entry_strategy())
|
||||
@settings(max_examples=100)
|
||||
def test_classify_completeness(entry: FileEntry) -> None:
|
||||
"""Property 1: classify 完整性
|
||||
|
||||
Feature: repo-audit, Property 1: classify 完整性
|
||||
Validates: Requirements 1.2, 1.3
|
||||
|
||||
对于任意 FileEntry,classify 返回的 InventoryItem 应满足:
|
||||
- category 属于 Category 枚举
|
||||
- disposition 属于 Disposition 枚举
|
||||
- description 为非空字符串
|
||||
"""
|
||||
result = classify(entry)
|
||||
|
||||
# 返回类型正确
|
||||
assert isinstance(result, InventoryItem), (
|
||||
f"classify 应返回 InventoryItem,实际返回 {type(result)}"
|
||||
)
|
||||
|
||||
# category 属于 Category 枚举
|
||||
assert isinstance(result.category, Category), (
|
||||
f"category 应为 Category 枚举成员,实际为 {result.category!r}"
|
||||
)
|
||||
|
||||
# disposition 属于 Disposition 枚举
|
||||
assert isinstance(result.disposition, Disposition), (
|
||||
f"disposition 应为 Disposition 枚举成员,实际为 {result.disposition!r}"
|
||||
)
|
||||
|
||||
# description 为非空字符串
|
||||
assert isinstance(result.description, str) and len(result.description) > 0, (
|
||||
f"description 应为非空字符串,实际为 {result.description!r}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 辅助:高优先级目录前缀(用于在低优先级属性测试中排除)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_HIGH_PRIORITY_PREFIXES = ("tmp/", "logs/", "export/")
|
||||
|
||||
# 安全的顶层目录名(不会触发高优先级规则)
|
||||
_SAFE_TOP_DIRS = st.sampled_from([
|
||||
"src", "lib", "data", "misc", "vendor", "tools", "archive",
|
||||
"assets", "resources", "contrib", "extras",
|
||||
])
|
||||
|
||||
# 非 .lnk/.rar 的扩展名
|
||||
_SAFE_EXTENSIONS = st.sampled_from([
|
||||
"", ".py", ".sql", ".md", ".txt", ".json", ".csv", ".xlsx",
|
||||
".bat", ".sh", ".ps1", ".log", ".ini", ".cfg",
|
||||
".toml", ".yaml", ".yml", ".html", ".css", ".js",
|
||||
])
|
||||
|
||||
|
||||
def _safe_rel_path() -> st.SearchStrategy[str]:
|
||||
"""生成不以高优先级目录开头的相对路径。"""
|
||||
return st.builds(
|
||||
lambda top, rest: f"{top}/{rest}" if rest else top,
|
||||
top=_SAFE_TOP_DIRS,
|
||||
rest=st.lists(_path_segment, min_size=0, max_size=3).map(
|
||||
lambda parts: "/".join(parts) if parts else ""
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 3: 空目录标记为候选删除
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@given(data=st.data())
|
||||
@settings(max_examples=100)
|
||||
def test_empty_dir_candidate_delete(data: st.DataObject) -> None:
|
||||
"""Property 3: 空目录标记为候选删除
|
||||
|
||||
Feature: repo-audit, Property 3: 空目录标记为候选删除
|
||||
Validates: Requirements 1.5
|
||||
|
||||
对于任意 is_empty_dir=True 的 FileEntry(排除 tmp/、logs/、reports/、
|
||||
export/ 开头和 .lnk/.rar 扩展名),classify 返回的 disposition
|
||||
应为 Disposition.CANDIDATE_DELETE。
|
||||
"""
|
||||
rel_path = data.draw(_safe_rel_path())
|
||||
ext = data.draw(_SAFE_EXTENSIONS)
|
||||
entry = FileEntry(
|
||||
rel_path=rel_path,
|
||||
is_dir=True,
|
||||
size_bytes=0,
|
||||
extension=ext,
|
||||
is_empty_dir=True,
|
||||
)
|
||||
|
||||
result = classify(entry)
|
||||
|
||||
assert result.disposition == Disposition.CANDIDATE_DELETE, (
|
||||
f"空目录 '{entry.rel_path}' 应标记为候选删除,"
|
||||
f"实际为 {result.disposition.value}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 4: .lnk/.rar 文件标记为候选删除
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@given(data=st.data())
|
||||
@settings(max_examples=100)
|
||||
def test_lnk_rar_candidate_delete(data: st.DataObject) -> None:
|
||||
"""Property 4: .lnk/.rar 文件标记为候选删除
|
||||
|
||||
Feature: repo-audit, Property 4: .lnk/.rar 文件标记为候选删除
|
||||
Validates: Requirements 1.6
|
||||
|
||||
对于任意扩展名为 .lnk 或 .rar 的 FileEntry(排除 tmp/、logs/、
|
||||
reports/、export/ 开头,且 is_empty_dir=False),classify 返回的
|
||||
disposition 应为 Disposition.CANDIDATE_DELETE。
|
||||
"""
|
||||
rel_path = data.draw(_safe_rel_path())
|
||||
ext = data.draw(st.sampled_from([".lnk", ".rar"]))
|
||||
entry = FileEntry(
|
||||
rel_path=rel_path,
|
||||
is_dir=False,
|
||||
size_bytes=data.draw(st.integers(min_value=0, max_value=10_000_000)),
|
||||
extension=ext,
|
||||
is_empty_dir=False,
|
||||
)
|
||||
|
||||
result = classify(entry)
|
||||
|
||||
assert result.disposition == Disposition.CANDIDATE_DELETE, (
|
||||
f"文件 '{entry.rel_path}' (ext={ext}) 应标记为候选删除,"
|
||||
f"实际为 {result.disposition.value}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 5: tmp/ 下文件处置范围
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_TMP_EXTENSIONS = st.sampled_from([
|
||||
"", ".py", ".sql", ".md", ".txt", ".json", ".csv", ".xlsx",
|
||||
".bat", ".sh", ".ps1", ".lnk", ".rar", ".log", ".ini", ".cfg",
|
||||
".toml", ".yaml", ".yml", ".html", ".css", ".js", ".tmp", ".bak",
|
||||
])
|
||||
|
||||
|
||||
def _tmp_rel_path() -> st.SearchStrategy[str]:
|
||||
"""生成以 tmp/ 开头的相对路径。"""
|
||||
return st.builds(
|
||||
lambda rest: f"tmp/{rest}",
|
||||
rest=st.lists(_path_segment, min_size=1, max_size=3).map(
|
||||
lambda parts: "/".join(parts)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@given(data=st.data())
|
||||
@settings(max_examples=100)
|
||||
def test_tmp_disposition_range(data: st.DataObject) -> None:
|
||||
"""Property 5: tmp/ 下文件处置范围
|
||||
|
||||
Feature: repo-audit, Property 5: tmp/ 下文件处置范围
|
||||
Validates: Requirements 1.7
|
||||
|
||||
对于任意 rel_path 以 tmp/ 开头的 FileEntry,classify 返回的
|
||||
disposition 应为 CANDIDATE_DELETE 或 CANDIDATE_ARCHIVE 之一。
|
||||
"""
|
||||
rel_path = data.draw(_tmp_rel_path())
|
||||
ext = data.draw(_TMP_EXTENSIONS)
|
||||
entry = FileEntry(
|
||||
rel_path=rel_path,
|
||||
is_dir=data.draw(st.booleans()),
|
||||
size_bytes=data.draw(st.integers(min_value=0, max_value=10_000_000)),
|
||||
extension=ext,
|
||||
is_empty_dir=data.draw(st.booleans()),
|
||||
)
|
||||
|
||||
result = classify(entry)
|
||||
|
||||
allowed = {Disposition.CANDIDATE_DELETE, Disposition.CANDIDATE_ARCHIVE}
|
||||
assert result.disposition in allowed, (
|
||||
f"tmp/ 下文件 '{entry.rel_path}' 的处置应为候选删除或候选归档,"
|
||||
f"实际为 {result.disposition.value}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 6: 运行时产出目录标记为候选归档
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RUNTIME_DIRS = st.sampled_from(["logs", "export"])
|
||||
|
||||
# 排除 __init__.py 的文件名
|
||||
_NON_INIT_BASENAME = st.text(
|
||||
alphabet=_PATH_CHARS,
|
||||
min_size=1,
|
||||
max_size=20,
|
||||
).filter(lambda s: s != "__init__.py")
|
||||
|
||||
|
||||
def _runtime_output_rel_path() -> st.SearchStrategy[str]:
|
||||
"""生成以 logs/、reports/ 或 export/ 开头的相对路径,basename 不是 __init__.py。"""
|
||||
return st.builds(
|
||||
lambda top, mid, name: (
|
||||
f"{top}/{'/'.join(mid)}/{name}" if mid else f"{top}/{name}"
|
||||
),
|
||||
top=_RUNTIME_DIRS,
|
||||
mid=st.lists(_path_segment, min_size=0, max_size=2),
|
||||
name=_NON_INIT_BASENAME,
|
||||
)
|
||||
|
||||
|
||||
@given(data=st.data())
|
||||
@settings(max_examples=100)
|
||||
def test_runtime_output_candidate_archive(data: st.DataObject) -> None:
|
||||
"""Property 6: 运行时产出目录标记为候选归档
|
||||
|
||||
Feature: repo-audit, Property 6: 运行时产出目录标记为候选归档
|
||||
Validates: Requirements 1.8
|
||||
|
||||
对于任意 rel_path 以 logs/ 或 export/ 开头且非 __init__.py
|
||||
的 FileEntry,classify 返回的 disposition 应为 CANDIDATE_ARCHIVE。
|
||||
需求 1.8 仅覆盖 logs/ 和 export/ 目录(不含 reports/)。
|
||||
"""
|
||||
rel_path = data.draw(_runtime_output_rel_path())
|
||||
ext = data.draw(_EXTENSIONS)
|
||||
entry = FileEntry(
|
||||
rel_path=rel_path,
|
||||
is_dir=data.draw(st.booleans()),
|
||||
size_bytes=data.draw(st.integers(min_value=0, max_value=10_000_000)),
|
||||
extension=ext,
|
||||
is_empty_dir=data.draw(st.booleans()),
|
||||
)
|
||||
|
||||
result = classify(entry)
|
||||
|
||||
assert result.disposition == Disposition.CANDIDATE_ARCHIVE, (
|
||||
f"运行时产出 '{entry.rel_path}' 应标记为候选归档,"
|
||||
f"实际为 {result.disposition.value}"
|
||||
)
|
||||
Reference in New Issue
Block a user