Files
ZQYY.FQ-ETL/tests/unit/test_audit_inventory.py

310 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
属性测试 — classify 完整性
Feature: repo-audit, Property 1: classify 完整性
Validates: Requirements 1.2, 1.3
对于任意 FileEntryclassify 函数返回的 InventoryItem 的 category 字段
应属于 Category 枚举disposition 字段应属于 Disposition 枚举,
且 description 字段为非空字符串。
"""
from __future__ import annotations
import string
from hypothesis import given, settings
from hypothesis import strategies as st
from scripts.audit import Category, Disposition, FileEntry, InventoryItem
from scripts.audit.inventory_analyzer import classify
# ---------------------------------------------------------------------------
# 生成器策略
# ---------------------------------------------------------------------------
# 常见文件扩展名(含空扩展名表示无扩展名的情况)
_EXTENSIONS = st.sampled_from([
"", ".py", ".sql", ".md", ".txt", ".json", ".csv", ".xlsx",
".bat", ".sh", ".ps1", ".lnk", ".rar", ".log", ".ini", ".cfg",
".toml", ".yaml", ".yml", ".html", ".css", ".js",
])
# 路径片段:字母数字加常见特殊字符
_PATH_CHARS = string.ascii_letters + string.digits + "_-."
_path_segment = st.text(
alphabet=_PATH_CHARS,
min_size=1,
max_size=20,
)
# 生成 1~4 层目录深度的相对路径
_rel_path = st.lists(
_path_segment,
min_size=1,
max_size=4,
).map(lambda parts: "/".join(parts))
def _file_entry_strategy() -> st.SearchStrategy[FileEntry]:
"""生成随机 FileEntry 的 hypothesis 策略。
覆盖各种扩展名、目录层级、大小和布尔标志组合。
"""
return st.builds(
FileEntry,
rel_path=_rel_path,
is_dir=st.booleans(),
size_bytes=st.integers(min_value=0, max_value=10_000_000),
extension=_EXTENSIONS,
is_empty_dir=st.booleans(),
)
# ---------------------------------------------------------------------------
# Property 1: classify 完整性
# ---------------------------------------------------------------------------
@given(entry=_file_entry_strategy())
@settings(max_examples=100)
def test_classify_completeness(entry: FileEntry) -> None:
"""Property 1: classify 完整性
Feature: repo-audit, Property 1: classify 完整性
Validates: Requirements 1.2, 1.3
对于任意 FileEntryclassify 返回的 InventoryItem 应满足:
- category 属于 Category 枚举
- disposition 属于 Disposition 枚举
- description 为非空字符串
"""
result = classify(entry)
# 返回类型正确
assert isinstance(result, InventoryItem), (
f"classify 应返回 InventoryItem实际返回 {type(result)}"
)
# category 属于 Category 枚举
assert isinstance(result.category, Category), (
f"category 应为 Category 枚举成员,实际为 {result.category!r}"
)
# disposition 属于 Disposition 枚举
assert isinstance(result.disposition, Disposition), (
f"disposition 应为 Disposition 枚举成员,实际为 {result.disposition!r}"
)
# description 为非空字符串
assert isinstance(result.description, str) and len(result.description) > 0, (
f"description 应为非空字符串,实际为 {result.description!r}"
)
# ---------------------------------------------------------------------------
# 辅助:高优先级目录前缀(用于在低优先级属性测试中排除)
# ---------------------------------------------------------------------------
_HIGH_PRIORITY_PREFIXES = ("tmp/", "logs/", "export/")
# 安全的顶层目录名(不会触发高优先级规则)
_SAFE_TOP_DIRS = st.sampled_from([
"src", "lib", "data", "misc", "vendor", "tools", "archive",
"assets", "resources", "contrib", "extras",
])
# 非 .lnk/.rar 的扩展名
_SAFE_EXTENSIONS = st.sampled_from([
"", ".py", ".sql", ".md", ".txt", ".json", ".csv", ".xlsx",
".bat", ".sh", ".ps1", ".log", ".ini", ".cfg",
".toml", ".yaml", ".yml", ".html", ".css", ".js",
])
def _safe_rel_path() -> st.SearchStrategy[str]:
"""生成不以高优先级目录开头的相对路径。"""
return st.builds(
lambda top, rest: f"{top}/{rest}" if rest else top,
top=_SAFE_TOP_DIRS,
rest=st.lists(_path_segment, min_size=0, max_size=3).map(
lambda parts: "/".join(parts) if parts else ""
),
)
# ---------------------------------------------------------------------------
# Property 3: 空目录标记为候选删除
# ---------------------------------------------------------------------------
@given(data=st.data())
@settings(max_examples=100)
def test_empty_dir_candidate_delete(data: st.DataObject) -> None:
"""Property 3: 空目录标记为候选删除
Feature: repo-audit, Property 3: 空目录标记为候选删除
Validates: Requirements 1.5
对于任意 is_empty_dir=True 的 FileEntry排除 tmp/、logs/、reports/、
export/ 开头和 .lnk/.rar 扩展名classify 返回的 disposition
应为 Disposition.CANDIDATE_DELETE。
"""
rel_path = data.draw(_safe_rel_path())
ext = data.draw(_SAFE_EXTENSIONS)
entry = FileEntry(
rel_path=rel_path,
is_dir=True,
size_bytes=0,
extension=ext,
is_empty_dir=True,
)
result = classify(entry)
assert result.disposition == Disposition.CANDIDATE_DELETE, (
f"空目录 '{entry.rel_path}' 应标记为候选删除,"
f"实际为 {result.disposition.value}"
)
# ---------------------------------------------------------------------------
# Property 4: .lnk/.rar 文件标记为候选删除
# ---------------------------------------------------------------------------
@given(data=st.data())
@settings(max_examples=100)
def test_lnk_rar_candidate_delete(data: st.DataObject) -> None:
"""Property 4: .lnk/.rar 文件标记为候选删除
Feature: repo-audit, Property 4: .lnk/.rar 文件标记为候选删除
Validates: Requirements 1.6
对于任意扩展名为 .lnk 或 .rar 的 FileEntry排除 tmp/、logs/、
reports/、export/ 开头,且 is_empty_dir=Falseclassify 返回的
disposition 应为 Disposition.CANDIDATE_DELETE。
"""
rel_path = data.draw(_safe_rel_path())
ext = data.draw(st.sampled_from([".lnk", ".rar"]))
entry = FileEntry(
rel_path=rel_path,
is_dir=False,
size_bytes=data.draw(st.integers(min_value=0, max_value=10_000_000)),
extension=ext,
is_empty_dir=False,
)
result = classify(entry)
assert result.disposition == Disposition.CANDIDATE_DELETE, (
f"文件 '{entry.rel_path}' (ext={ext}) 应标记为候选删除,"
f"实际为 {result.disposition.value}"
)
# ---------------------------------------------------------------------------
# Property 5: tmp/ 下文件处置范围
# ---------------------------------------------------------------------------
_TMP_EXTENSIONS = st.sampled_from([
"", ".py", ".sql", ".md", ".txt", ".json", ".csv", ".xlsx",
".bat", ".sh", ".ps1", ".lnk", ".rar", ".log", ".ini", ".cfg",
".toml", ".yaml", ".yml", ".html", ".css", ".js", ".tmp", ".bak",
])
def _tmp_rel_path() -> st.SearchStrategy[str]:
"""生成以 tmp/ 开头的相对路径。"""
return st.builds(
lambda rest: f"tmp/{rest}",
rest=st.lists(_path_segment, min_size=1, max_size=3).map(
lambda parts: "/".join(parts)
),
)
@given(data=st.data())
@settings(max_examples=100)
def test_tmp_disposition_range(data: st.DataObject) -> None:
"""Property 5: tmp/ 下文件处置范围
Feature: repo-audit, Property 5: tmp/ 下文件处置范围
Validates: Requirements 1.7
对于任意 rel_path 以 tmp/ 开头的 FileEntryclassify 返回的
disposition 应为 CANDIDATE_DELETE 或 CANDIDATE_ARCHIVE 之一。
"""
rel_path = data.draw(_tmp_rel_path())
ext = data.draw(_TMP_EXTENSIONS)
entry = FileEntry(
rel_path=rel_path,
is_dir=data.draw(st.booleans()),
size_bytes=data.draw(st.integers(min_value=0, max_value=10_000_000)),
extension=ext,
is_empty_dir=data.draw(st.booleans()),
)
result = classify(entry)
allowed = {Disposition.CANDIDATE_DELETE, Disposition.CANDIDATE_ARCHIVE}
assert result.disposition in allowed, (
f"tmp/ 下文件 '{entry.rel_path}' 的处置应为候选删除或候选归档,"
f"实际为 {result.disposition.value}"
)
# ---------------------------------------------------------------------------
# Property 6: 运行时产出目录标记为候选归档
# ---------------------------------------------------------------------------
_RUNTIME_DIRS = st.sampled_from(["logs", "export"])
# 排除 __init__.py 的文件名
_NON_INIT_BASENAME = st.text(
alphabet=_PATH_CHARS,
min_size=1,
max_size=20,
).filter(lambda s: s != "__init__.py")
def _runtime_output_rel_path() -> st.SearchStrategy[str]:
"""生成以 logs/、reports/ 或 export/ 开头的相对路径basename 不是 __init__.py。"""
return st.builds(
lambda top, mid, name: (
f"{top}/{'/'.join(mid)}/{name}" if mid else f"{top}/{name}"
),
top=_RUNTIME_DIRS,
mid=st.lists(_path_segment, min_size=0, max_size=2),
name=_NON_INIT_BASENAME,
)
@given(data=st.data())
@settings(max_examples=100)
def test_runtime_output_candidate_archive(data: st.DataObject) -> None:
"""Property 6: 运行时产出目录标记为候选归档
Feature: repo-audit, Property 6: 运行时产出目录标记为候选归档
Validates: Requirements 1.8
对于任意 rel_path 以 logs/ 或 export/ 开头且非 __init__.py
的 FileEntryclassify 返回的 disposition 应为 CANDIDATE_ARCHIVE。
需求 1.8 仅覆盖 logs/ 和 export/ 目录(不含 reports/)。
"""
rel_path = data.draw(_runtime_output_rel_path())
ext = data.draw(_EXTENSIONS)
entry = FileEntry(
rel_path=rel_path,
is_dir=data.draw(st.booleans()),
size_bytes=data.draw(st.integers(min_value=0, max_value=10_000_000)),
extension=ext,
is_empty_dir=data.draw(st.booleans()),
)
result = classify(entry)
assert result.disposition == Disposition.CANDIDATE_ARCHIVE, (
f"运行时产出 '{entry.rel_path}' 应标记为候选归档,"
f"实际为 {result.disposition.value}"
)