122 lines
4.8 KiB
Python
122 lines
4.8 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
Property 5: 文件迁移完整性
|
||
|
||
对于任意源-目标目录映射关系(ETL 业务代码、database 文件、tests 目录),
|
||
源目录中的每个文件在目标目录的对应位置都应存在且内容一致。
|
||
|
||
**Validates: Requirements 5.1, 5.2, 5.3**
|
||
"""
|
||
import hashlib
|
||
import os
|
||
from typing import List, Tuple
|
||
|
||
from hypothesis import given, settings
|
||
from hypothesis.strategies import sampled_from
|
||
|
||
# 源-目标目录映射(需求 5.1: ETL 业务代码,5.2: database,5.3: tests)
|
||
MIGRATION_MAPPINGS: List[Tuple[str, str]] = [
|
||
# ETL 业务代码目录(需求 5.1)
|
||
(r"C:\ZQYY\FQ-ETL\api", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\api"),
|
||
(r"C:\ZQYY\FQ-ETL\cli", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\cli"),
|
||
(r"C:\ZQYY\FQ-ETL\config", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\config"),
|
||
(r"C:\ZQYY\FQ-ETL\loaders", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\loaders"),
|
||
(r"C:\ZQYY\FQ-ETL\models", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\models"),
|
||
(r"C:\ZQYY\FQ-ETL\orchestration", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\orchestration"),
|
||
(r"C:\ZQYY\FQ-ETL\scd", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\scd"),
|
||
(r"C:\ZQYY\FQ-ETL\tasks", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\tasks"),
|
||
(r"C:\ZQYY\FQ-ETL\utils", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\utils"),
|
||
(r"C:\ZQYY\FQ-ETL\quality", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\quality"),
|
||
# tests 子目录(需求 5.3)— 只映射 ETL 自身的 unit/integration,
|
||
# Monorepo 级属性测试(test_property_*.py)按设计放在 C:\NeoZQYY\tests\
|
||
(r"C:\ZQYY\FQ-ETL\tests\unit", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\tests\unit"),
|
||
(r"C:\ZQYY\FQ-ETL\tests\integration", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\tests\integration"),
|
||
]
|
||
|
||
# 排除模式:__pycache__ 等不参与比较
|
||
EXCLUDE_DIRS = {"__pycache__", ".pytest_cache", ".hypothesis"}
|
||
|
||
|
||
def _file_hash(filepath: str) -> str:
|
||
"""计算文件的 SHA-256 哈希值。"""
|
||
h = hashlib.sha256()
|
||
with open(filepath, "rb") as f:
|
||
for chunk in iter(lambda: f.read(8192), b""):
|
||
h.update(chunk)
|
||
return h.hexdigest()
|
||
|
||
|
||
def _collect_py_files(root_dir: str) -> List[str]:
|
||
"""递归收集目录下所有 .py 文件的相对路径(排除 __pycache__ 等)。"""
|
||
result = []
|
||
for dirpath, dirnames, filenames in os.walk(root_dir):
|
||
dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS]
|
||
for fname in filenames:
|
||
if fname.endswith(".py"):
|
||
rel = os.path.relpath(os.path.join(dirpath, fname), root_dir)
|
||
result.append(rel)
|
||
return sorted(result)
|
||
|
||
|
||
@settings(max_examples=100)
|
||
@given(mapping=sampled_from(MIGRATION_MAPPINGS))
|
||
def test_all_source_files_exist_in_target(mapping: Tuple[str, str]) -> None:
|
||
"""
|
||
Property 5(存在性):源目录中的每个 .py 文件在目标目录的对应位置都应存在。
|
||
|
||
**Validates: Requirements 5.1, 5.2, 5.3**
|
||
"""
|
||
src_dir, dst_dir = mapping
|
||
|
||
assert os.path.isdir(src_dir), f"源目录不存在: {src_dir}"
|
||
assert os.path.isdir(dst_dir), f"目标目录不存在: {dst_dir}"
|
||
|
||
src_files = _collect_py_files(src_dir)
|
||
assert len(src_files) > 0, f"源目录无 .py 文件: {src_dir}"
|
||
|
||
missing = []
|
||
for rel_path in src_files:
|
||
dst_path = os.path.join(dst_dir, rel_path)
|
||
if not os.path.isfile(dst_path):
|
||
missing.append(rel_path)
|
||
|
||
assert not missing, (
|
||
f"目标目录 {dst_dir} 缺少 {len(missing)} 个文件:\n"
|
||
+ "\n".join(f" - {f}" for f in missing[:10])
|
||
+ (f"\n ... 及其他 {len(missing) - 10} 个" if len(missing) > 10 else "")
|
||
)
|
||
|
||
|
||
@settings(max_examples=100)
|
||
@given(mapping=sampled_from(MIGRATION_MAPPINGS))
|
||
def test_source_and_target_file_content_identical(mapping: Tuple[str, str]) -> None:
|
||
"""
|
||
Property 5(内容一致性):源目录与目标目录中对应文件的内容应完全一致。
|
||
|
||
**Validates: Requirements 5.1, 5.2, 5.3**
|
||
"""
|
||
src_dir, dst_dir = mapping
|
||
|
||
assert os.path.isdir(src_dir), f"源目录不存在: {src_dir}"
|
||
assert os.path.isdir(dst_dir), f"目标目录不存在: {dst_dir}"
|
||
|
||
src_files = _collect_py_files(src_dir)
|
||
mismatched = []
|
||
|
||
for rel_path in src_files:
|
||
src_path = os.path.join(src_dir, rel_path)
|
||
dst_path = os.path.join(dst_dir, rel_path)
|
||
|
||
if not os.path.isfile(dst_path):
|
||
continue
|
||
|
||
src_hash = _file_hash(src_path)
|
||
dst_hash = _file_hash(dst_path)
|
||
if src_hash != dst_hash:
|
||
mismatched.append(rel_path)
|
||
|
||
assert not mismatched, (
|
||
f"源目录 {src_dir} 与目标目录 {dst_dir} 中 {len(mismatched)} 个文件内容不一致:\n"
|
||
+ "\n".join(f" - {f}" for f in mismatched[:10])
|
||
+ (f"\n ... 及其他 {len(mismatched) - 10} 个" if len(mismatched) > 10 else "")
|
||
) |