# -*- coding: utf-8 -*- """ Property 5: 文件迁移完整性 对于任意源-目标目录映射关系(ETL 业务代码、database 文件、tests 目录), 源目录中的每个文件在目标目录的对应位置都应存在且内容一致。 **Validates: Requirements 5.1, 5.2, 5.3** """ import hashlib import os from typing import List, Tuple from hypothesis import given, settings from hypothesis.strategies import sampled_from # 源-目标目录映射(需求 5.1: ETL 业务代码,5.2: database,5.3: tests) MIGRATION_MAPPINGS: List[Tuple[str, str]] = [ # ETL 业务代码目录(需求 5.1) (r"C:\ZQYY\FQ-ETL\api", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\api"), (r"C:\ZQYY\FQ-ETL\cli", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\cli"), (r"C:\ZQYY\FQ-ETL\config", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\config"), (r"C:\ZQYY\FQ-ETL\loaders", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\loaders"), (r"C:\ZQYY\FQ-ETL\models", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\models"), (r"C:\ZQYY\FQ-ETL\orchestration", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\orchestration"), (r"C:\ZQYY\FQ-ETL\scd", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\scd"), (r"C:\ZQYY\FQ-ETL\tasks", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\tasks"), (r"C:\ZQYY\FQ-ETL\utils", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\utils"), (r"C:\ZQYY\FQ-ETL\quality", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\quality"), # tests 子目录(需求 5.3)— 只映射 ETL 自身的 unit/integration, # Monorepo 级属性测试(test_property_*.py)按设计放在 C:\NeoZQYY\tests\ (r"C:\ZQYY\FQ-ETL\tests\unit", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\tests\unit"), (r"C:\ZQYY\FQ-ETL\tests\integration", r"C:\NeoZQYY\apps\etl\pipelines\feiqiu\tests\integration"), ] # 排除模式:__pycache__ 等不参与比较 EXCLUDE_DIRS = {"__pycache__", ".pytest_cache", ".hypothesis"} def _file_hash(filepath: str) -> str: """计算文件的 SHA-256 哈希值。""" h = hashlib.sha256() with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(8192), b""): h.update(chunk) return h.hexdigest() def _collect_py_files(root_dir: str) -> List[str]: """递归收集目录下所有 .py 文件的相对路径(排除 __pycache__ 等)。""" result = [] for dirpath, dirnames, filenames in os.walk(root_dir): dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS] for fname in filenames: if fname.endswith(".py"): rel = os.path.relpath(os.path.join(dirpath, fname), root_dir) result.append(rel) return sorted(result) @settings(max_examples=100) @given(mapping=sampled_from(MIGRATION_MAPPINGS)) def test_all_source_files_exist_in_target(mapping: Tuple[str, str]) -> None: """ Property 5(存在性):源目录中的每个 .py 文件在目标目录的对应位置都应存在。 **Validates: Requirements 5.1, 5.2, 5.3** """ src_dir, dst_dir = mapping assert os.path.isdir(src_dir), f"源目录不存在: {src_dir}" assert os.path.isdir(dst_dir), f"目标目录不存在: {dst_dir}" src_files = _collect_py_files(src_dir) assert len(src_files) > 0, f"源目录无 .py 文件: {src_dir}" missing = [] for rel_path in src_files: dst_path = os.path.join(dst_dir, rel_path) if not os.path.isfile(dst_path): missing.append(rel_path) assert not missing, ( f"目标目录 {dst_dir} 缺少 {len(missing)} 个文件:\n" + "\n".join(f" - {f}" for f in missing[:10]) + (f"\n ... 及其他 {len(missing) - 10} 个" if len(missing) > 10 else "") ) @settings(max_examples=100) @given(mapping=sampled_from(MIGRATION_MAPPINGS)) def test_source_and_target_file_content_identical(mapping: Tuple[str, str]) -> None: """ Property 5(内容一致性):源目录与目标目录中对应文件的内容应完全一致。 **Validates: Requirements 5.1, 5.2, 5.3** """ src_dir, dst_dir = mapping assert os.path.isdir(src_dir), f"源目录不存在: {src_dir}" assert os.path.isdir(dst_dir), f"目标目录不存在: {dst_dir}" src_files = _collect_py_files(src_dir) mismatched = [] for rel_path in src_files: src_path = os.path.join(src_dir, rel_path) dst_path = os.path.join(dst_dir, rel_path) if not os.path.isfile(dst_path): continue src_hash = _file_hash(src_path) dst_hash = _file_hash(dst_path) if src_hash != dst_hash: mismatched.append(rel_path) assert not mismatched, ( f"源目录 {src_dir} 与目标目录 {dst_dir} 中 {len(mismatched)} 个文件内容不一致:\n" + "\n".join(f" - {f}" for f in mismatched[:10]) + (f"\n ... 及其他 {len(mismatched) - 10} 个" if len(mismatched) > 10 else "") )