126 lines
5.0 KiB
Python
126 lines
5.0 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
Property 5: 文件迁移完整性(已归档)
|
||
|
||
原始用途:验证从 C:\\ZQYY\\FQ-ETL\\ 迁移到 NeoZQYY monorepo 时,
|
||
源目录中的每个文件在目标目录的对应位置都应存在且内容一致。
|
||
|
||
归档原因:迁移已于 2025 年完成,后续多轮重构(dwd-phase1-refactor、
|
||
etl-dws-flow-refactor、ods-dedup-standardize 等)对目标代码做了大量
|
||
结构性修改,源-目标 1:1 对比前提不再成立。扫描显示 50+ 个文件已合理分化。
|
||
|
||
如需重新启用,可移除模块级 skip 标记。
|
||
|
||
**Validates: Requirements 5.1, 5.2, 5.3**
|
||
"""
|
||
import pytest
|
||
|
||
# 迁移已完成且目标代码经多轮重构已合理分化,此测试模块整体跳过
|
||
pytestmark = pytest.mark.skip(
|
||
reason="文件迁移已完成,后续重构导致源-目标合理分化(50+ 文件),测试使命结束"
|
||
)
|
||
|
||
import hashlib
|
||
import os
|
||
from typing import List, Tuple
|
||
|
||
from hypothesis import given, settings
|
||
from hypothesis.strategies import sampled_from
|
||
|
||
# 源-目标目录映射(需求 5.1: ETL 业务代码,5.2: database,5.3: tests)
|
||
MIGRATION_MAPPINGS: List[Tuple[str, str]] = [
|
||
(r"C:\ZQYY\FQ-ETL\api", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\api"),
|
||
(r"C:\ZQYY\FQ-ETL\cli", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\cli"),
|
||
(r"C:\ZQYY\FQ-ETL\config", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\config"),
|
||
(r"C:\ZQYY\FQ-ETL\loaders", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\loaders"),
|
||
(r"C:\ZQYY\FQ-ETL\models", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\models"),
|
||
(r"C:\ZQYY\FQ-ETL\orchestration", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\orchestration"),
|
||
(r"C:\ZQYY\FQ-ETL\scd", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\scd"),
|
||
(r"C:\ZQYY\FQ-ETL\tasks", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\tasks"),
|
||
(r"C:\ZQYY\FQ-ETL\utils", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\utils"),
|
||
(r"C:\ZQYY\FQ-ETL\quality", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\quality"),
|
||
(r"C:\ZQYY\FQ-ETL\tests\unit", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\tests\unit"),
|
||
(r"C:\ZQYY\FQ-ETL\tests\integration", r"C:\NeoZQYY\apps\etl\connectors\feiqiu\tests\integration"),
|
||
]
|
||
|
||
EXCLUDE_DIRS = {"__pycache__", ".pytest_cache", ".hypothesis"}
|
||
|
||
|
||
def _file_hash(filepath: str) -> str:
|
||
"""计算文件的 SHA-256 哈希值。"""
|
||
h = hashlib.sha256()
|
||
with open(filepath, "rb") as f:
|
||
for chunk in iter(lambda: f.read(8192), b""):
|
||
h.update(chunk)
|
||
return h.hexdigest()
|
||
|
||
|
||
def _collect_py_files(root_dir: str) -> List[str]:
|
||
"""递归收集目录下所有 .py 文件的相对路径(排除 __pycache__ 等)。"""
|
||
result = []
|
||
for dirpath, dirnames, filenames in os.walk(root_dir):
|
||
dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS]
|
||
for fname in filenames:
|
||
if fname.endswith(".py"):
|
||
rel = os.path.relpath(os.path.join(dirpath, fname), root_dir)
|
||
result.append(rel)
|
||
return sorted(result)
|
||
|
||
|
||
@settings(max_examples=100)
|
||
@given(mapping=sampled_from(MIGRATION_MAPPINGS))
|
||
def test_all_source_files_exist_in_target(mapping: Tuple[str, str]) -> None:
|
||
"""
|
||
Property 5(存在性):源目录中的每个 .py 文件在目标目录的对应位置都应存在。
|
||
|
||
**Validates: Requirements 5.1, 5.2, 5.3**
|
||
"""
|
||
src_dir, dst_dir = mapping
|
||
assert os.path.isdir(src_dir), f"源目录不存在: {src_dir}"
|
||
assert os.path.isdir(dst_dir), f"目标目录不存在: {dst_dir}"
|
||
|
||
src_files = _collect_py_files(src_dir)
|
||
assert len(src_files) > 0, f"源目录无 .py 文件: {src_dir}"
|
||
|
||
missing = []
|
||
for rel_path in src_files:
|
||
dst_path = os.path.join(dst_dir, rel_path)
|
||
if not os.path.isfile(dst_path):
|
||
missing.append(rel_path)
|
||
|
||
assert not missing, (
|
||
f"目标目录 {dst_dir} 缺少 {len(missing)} 个文件:\n"
|
||
+ "\n".join(f" - {f}" for f in missing[:10])
|
||
+ (f"\n ... 及其他 {len(missing) - 10} 个" if len(missing) > 10 else "")
|
||
)
|
||
|
||
|
||
@settings(max_examples=100)
|
||
@given(mapping=sampled_from(MIGRATION_MAPPINGS))
|
||
def test_source_and_target_file_content_identical(mapping: Tuple[str, str]) -> None:
|
||
"""
|
||
Property 5(内容一致性):源目录与目标目录中对应文件的内容应完全一致。
|
||
|
||
**Validates: Requirements 5.1, 5.2, 5.3**
|
||
"""
|
||
src_dir, dst_dir = mapping
|
||
assert os.path.isdir(src_dir), f"源目录不存在: {src_dir}"
|
||
assert os.path.isdir(dst_dir), f"目标目录不存在: {dst_dir}"
|
||
|
||
src_files = _collect_py_files(src_dir)
|
||
mismatched = []
|
||
|
||
for rel_path in src_files:
|
||
src_path = os.path.join(src_dir, rel_path)
|
||
dst_path = os.path.join(dst_dir, rel_path)
|
||
if not os.path.isfile(dst_path):
|
||
continue
|
||
if _file_hash(src_path) != _file_hash(dst_path):
|
||
mismatched.append(rel_path)
|
||
|
||
assert not mismatched, (
|
||
f"源目录 {src_dir} 与目标目录 {dst_dir} 中 {len(mismatched)} 个文件内容不一致:\n"
|
||
+ "\n".join(f" - {f}" for f in mismatched[:10])
|
||
+ (f"\n ... 及其他 {len(mismatched) - 10} 个" if len(mismatched) > 10 else "")
|
||
)
|