87 lines
2.5 KiB
Python
87 lines
2.5 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""批量后置校验框架
|
||
|
||
提供各层数据的批量校验和补齐功能:
|
||
- ODS 层:主键 + content_hash 对比,批量 UPSERT
|
||
- DWD 层:维度 SCD2 / 事实主键对比,批量 UPSERT
|
||
- DWS 层:聚合对比,批量重算 UPSERT
|
||
- INDEX 层:实体覆盖对比,批量重算 UPSERT
|
||
"""
|
||
|
||
from .models import (
|
||
VerificationResult,
|
||
VerificationSummary,
|
||
VerificationStatus,
|
||
WindowSegment,
|
||
build_window_segments,
|
||
filter_verify_tables,
|
||
)
|
||
from .base_verifier import BaseVerifier
|
||
from .ods_verifier import OdsVerifier
|
||
from .dwd_verifier import DwdVerifier
|
||
from .dws_verifier import DwsVerifier
|
||
from .index_verifier import IndexVerifier
|
||
|
||
__all__ = [
|
||
# 模型
|
||
"VerificationResult",
|
||
"VerificationSummary",
|
||
"VerificationStatus",
|
||
"WindowSegment",
|
||
"build_window_segments",
|
||
"filter_verify_tables",
|
||
# 校验器
|
||
"BaseVerifier",
|
||
"OdsVerifier",
|
||
"DwdVerifier",
|
||
"DwsVerifier",
|
||
"IndexVerifier",
|
||
]
|
||
|
||
|
||
def get_verifier_for_layer(layer: str, db_connection, logger=None, **kwargs):
|
||
"""
|
||
根据层名获取对应的校验器实例
|
||
|
||
Args:
|
||
layer: 层名 ("ODS", "DWD", "DWS", "INDEX")
|
||
db_connection: 数据库连接
|
||
logger: 日志器
|
||
**kwargs: 额外参数
|
||
- api_client: API 客户端(ODS 层需要)
|
||
- fetch_from_api: 是否从 API 获取源数据(ODS 层需要)
|
||
- local_dump_dirs: 本地 JSON dump 目录映射(ODS 层需要)
|
||
- use_local_json: 是否优先使用本地 JSON(ODS 层需要)
|
||
|
||
Returns:
|
||
对应的校验器实例
|
||
"""
|
||
verifier_map = {
|
||
"ODS": OdsVerifier,
|
||
"DWD": DwdVerifier,
|
||
"DWS": DwsVerifier,
|
||
"INDEX": IndexVerifier,
|
||
}
|
||
|
||
verifier_class = verifier_map.get(layer.upper())
|
||
if verifier_class is None:
|
||
raise ValueError(f"未知的数据层: {layer}")
|
||
|
||
# ODS 层支持额外参数
|
||
if layer.upper() == "ODS":
|
||
api_client = kwargs.pop("api_client", None)
|
||
fetch_from_api = kwargs.pop("fetch_from_api", False)
|
||
local_dump_dirs = kwargs.pop("local_dump_dirs", None)
|
||
use_local_json = kwargs.pop("use_local_json", False)
|
||
return verifier_class(
|
||
db_connection,
|
||
api_client=api_client,
|
||
logger=logger,
|
||
fetch_from_api=fetch_from_api,
|
||
local_dump_dirs=local_dump_dirs,
|
||
use_local_json=use_local_json,
|
||
**kwargs
|
||
)
|
||
|
||
return verifier_class(db_connection, logger=logger, **kwargs)
|