1199 lines
45 KiB
Python
1199 lines
45 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""黑盒数据完整性校验脚本。
|
||
|
||
以独立视角从 API 源数据出发,逐层对比各 Schema 各表的数据完整性:
|
||
1. API → ODS:逐端点拉取数据,与 ODS 表记录数对比
|
||
2. ODS → DWD:按 TABLE_MAP 逐对比较记录数和金额列汇总
|
||
3. DWD → DWS:验证汇总表聚合结果与明细数据一致性
|
||
4. 可疑值检测:扫描各表中的边缘值、空值、重复记录
|
||
5. 抽样比对:随机抽样 100 条记录,逐字段与上游 API 源数据比对
|
||
|
||
用法:
|
||
cd apps/etl/connectors/feiqiu
|
||
python -m scripts.debug.debug_blackbox [--sample-size 100]
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import logging
|
||
import sys
|
||
import time
|
||
import traceback
|
||
from dataclasses import asdict, dataclass, field
|
||
from datetime import datetime
|
||
from decimal import Decimal
|
||
from pathlib import Path
|
||
from typing import Any
|
||
from zoneinfo import ZoneInfo
|
||
|
||
# ── 确保项目根目录在 sys.path ──
|
||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||
if str(_FEIQIU_ROOT) not in sys.path:
|
||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||
|
||
from config.settings import AppConfig
|
||
from database.connection import DatabaseConnection
|
||
from api.client import APIClient
|
||
from tasks.ods.ods_tasks import ODS_TASK_SPECS, OdsTaskSpec
|
||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||
from orchestration.task_registry import default_registry
|
||
|
||
|
||
# ── 数据模型 ──────────────────────────────────────────────────
|
||
|
||
@dataclass
|
||
class BlackboxCheckResult:
|
||
"""黑盒校验单表结果"""
|
||
layer: str = "" # "API_ODS" / "ODS_DWD" / "DWD_DWS"
|
||
source_table: str = ""
|
||
target_table: str = ""
|
||
source_count: int = 0
|
||
target_count: int = 0
|
||
count_diff: int = 0
|
||
amount_diffs: list = field(default_factory=list)
|
||
missing_keys: list = field(default_factory=list)
|
||
mismatch_count: int = 0
|
||
status: str = "" # PASS / WARN / FAIL / ERROR / SKIP
|
||
message: str = ""
|
||
details: dict = field(default_factory=dict)
|
||
|
||
|
||
@dataclass
|
||
class SuspectRecord:
|
||
"""可疑值检测结果"""
|
||
table: str = ""
|
||
check_type: str = "" # "null_pk" / "duplicate" / "edge_value" / "negative_amount"
|
||
count: int = 0
|
||
sample_keys: list = field(default_factory=list)
|
||
message: str = ""
|
||
|
||
|
||
@dataclass
|
||
class SampleMismatch:
|
||
"""抽样比对不一致记录"""
|
||
table: str = ""
|
||
pk_value: Any = None
|
||
field_name: str = ""
|
||
api_value: Any = None
|
||
ods_value: Any = None
|
||
|
||
|
||
# ── 工具函数 ──────────────────────────────────────────────────
|
||
|
||
def _setup_logging() -> logging.Logger:
|
||
logger = logging.getLogger("debug_blackbox")
|
||
logger.setLevel(logging.INFO)
|
||
if not logger.handlers:
|
||
handler = logging.StreamHandler(sys.stdout)
|
||
handler.setFormatter(logging.Formatter(
|
||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||
))
|
||
logger.addHandler(handler)
|
||
return logger
|
||
|
||
|
||
def _build_components(config: AppConfig, logger: logging.Logger):
|
||
"""构建 DB / API 组件。"""
|
||
db_conn = DatabaseConnection(
|
||
dsn=config["db"]["dsn"],
|
||
session=config["db"].get("session"),
|
||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||
)
|
||
api_client = APIClient(
|
||
base_url=config["api"]["base_url"],
|
||
token=config["api"]["token"],
|
||
timeout=config["api"].get("timeout_sec", 20),
|
||
retry_max=config["api"].get("retries", {}).get("max_attempts", 3),
|
||
headers_extra=config["api"].get("headers_extra"),
|
||
)
|
||
return db_conn, api_client
|
||
|
||
|
||
def _query_count(db_conn: DatabaseConnection, table: str) -> int:
|
||
"""查询表的总行数。"""
|
||
rows = db_conn.query(f"SELECT COUNT(*) AS cnt FROM {table}")
|
||
return int(rows[0]["cnt"]) if rows else 0
|
||
|
||
|
||
def _table_exists(db_conn: DatabaseConnection, table: str) -> bool:
|
||
"""检查表是否存在。"""
|
||
rows = db_conn.query("SELECT to_regclass(%s) AS reg", (table,))
|
||
return bool(rows and rows[0].get("reg"))
|
||
|
||
|
||
def _has_column(db_conn: DatabaseConnection, table: str, column: str) -> bool:
|
||
"""检查表是否包含指定列。"""
|
||
sql = """
|
||
SELECT 1 FROM information_schema.columns
|
||
WHERE table_schema || '.' || table_name = %s
|
||
AND column_name = %s
|
||
LIMIT 1
|
||
"""
|
||
rows = db_conn.query(sql, (table, column))
|
||
return bool(rows)
|
||
|
||
|
||
def _get_numeric_columns(db_conn: DatabaseConnection, table: str) -> list[str]:
|
||
"""获取表中所有 numeric/decimal 类型的列名(金额列候选)。"""
|
||
sql = """
|
||
SELECT column_name FROM information_schema.columns
|
||
WHERE table_schema || '.' || table_name = %s
|
||
AND data_type IN ('numeric', 'decimal', 'money')
|
||
ORDER BY ordinal_position
|
||
"""
|
||
rows = db_conn.query(sql, (table,))
|
||
return [r["column_name"] for r in rows]
|
||
|
||
|
||
def _get_pk_columns(db_conn: DatabaseConnection, table: str) -> list[str]:
|
||
"""获取表的主键列。"""
|
||
parts = table.split(".")
|
||
schema = parts[0] if len(parts) == 2 else "public"
|
||
tbl = parts[1] if len(parts) == 2 else parts[0]
|
||
sql = """
|
||
SELECT a.attname
|
||
FROM pg_index i
|
||
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
||
WHERE i.indrelid = %s::regclass AND i.indisprimary
|
||
ORDER BY array_position(i.indkey, a.attnum)
|
||
"""
|
||
rows = db_conn.query(sql, (table,))
|
||
return [r["attname"] for r in rows]
|
||
|
||
|
||
def _sanitize_for_json(obj):
|
||
"""递归处理不可序列化的值。"""
|
||
if isinstance(obj, dict):
|
||
return {k: _sanitize_for_json(v) for k, v in obj.items()}
|
||
if isinstance(obj, (list, tuple)):
|
||
return [_sanitize_for_json(v) for v in obj]
|
||
if isinstance(obj, datetime):
|
||
return obj.isoformat()
|
||
if isinstance(obj, Decimal):
|
||
return float(obj)
|
||
if isinstance(obj, set):
|
||
return list(obj)
|
||
return obj
|
||
|
||
|
||
# ── 已启用的 ODS 任务代码(与 task_registry 一致)──────────────
|
||
_ENABLED_ODS_SPECS: list[OdsTaskSpec] = [
|
||
spec for spec in ODS_TASK_SPECS
|
||
if spec.code in {m.upper() for m in default_registry.get_tasks_by_layer("ODS")}
|
||
]
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════
|
||
# 步骤 1: API → ODS 记录数对比
|
||
# ══════════════════════════════════════════════════════════════
|
||
|
||
def _check_api_vs_ods(
|
||
db_conn: DatabaseConnection,
|
||
api_client: APIClient,
|
||
config: AppConfig,
|
||
logger: logging.Logger,
|
||
) -> list[BlackboxCheckResult]:
|
||
"""逐端点从 API 拉取数据,与 ODS 表记录数对比。"""
|
||
logger.info("=" * 60)
|
||
logger.info("步骤 1: API → ODS 记录数对比")
|
||
logger.info("=" * 60)
|
||
|
||
store_id = int(config.get("app.store_id"))
|
||
results: list[BlackboxCheckResult] = []
|
||
|
||
for spec in _ENABLED_ODS_SPECS:
|
||
result = BlackboxCheckResult(
|
||
layer="API_ODS",
|
||
source_table=f"API:{spec.endpoint}",
|
||
target_table=spec.table_name,
|
||
)
|
||
logger.info(" ▶ %s → %s", spec.code, spec.table_name)
|
||
|
||
# 查询 ODS 表记录数
|
||
try:
|
||
if not _table_exists(db_conn, spec.table_name):
|
||
result.status = "ERROR"
|
||
result.message = f"ODS 表不存在: {spec.table_name}"
|
||
logger.warning(" ✗ %s", result.message)
|
||
results.append(result)
|
||
continue
|
||
ods_count = _query_count(db_conn, spec.table_name)
|
||
result.target_count = ods_count
|
||
except Exception as exc:
|
||
result.status = "ERROR"
|
||
result.message = f"查询 ODS 表失败: {exc}"
|
||
logger.error(" ✗ %s", result.message)
|
||
results.append(result)
|
||
continue
|
||
|
||
# 从 API 拉取记录数
|
||
# ODS_SETTLEMENT_TICKET 是特殊任务(逐条拉取),跳过 API 对比
|
||
if spec.code == "ODS_SETTLEMENT_TICKET":
|
||
result.status = "SKIP"
|
||
result.message = f"特殊任务跳过 API 对比, ODS 行数={ods_count}"
|
||
logger.info(" ⊘ %s", result.message)
|
||
results.append(result)
|
||
continue
|
||
|
||
try:
|
||
params: dict[str, Any] = {}
|
||
if spec.include_site_id:
|
||
params["siteId"] = store_id
|
||
|
||
# 不带时间窗口参数,拉取全量(与 ODS 全表对比)
|
||
all_records, _ = api_client.get_paginated(
|
||
endpoint=spec.endpoint,
|
||
params=params,
|
||
page_size=200,
|
||
data_path=spec.data_path,
|
||
list_key=spec.list_key,
|
||
)
|
||
api_count = len(all_records)
|
||
result.source_count = api_count
|
||
except Exception as exc:
|
||
result.status = "ERROR"
|
||
result.message = f"API 拉取失败: {exc}"
|
||
logger.error(" ✗ %s", result.message)
|
||
results.append(result)
|
||
continue
|
||
|
||
# 对比
|
||
diff = ods_count - api_count
|
||
result.count_diff = diff
|
||
|
||
if diff == 0:
|
||
result.status = "PASS"
|
||
result.message = f"记录数一致: API={api_count}, ODS={ods_count}"
|
||
elif abs(diff) <= max(5, int(api_count * 0.01)):
|
||
# 允许 1% 或 5 条以内的差异(历史数据/删除标记等)
|
||
result.status = "WARN"
|
||
result.message = f"记录数微差: API={api_count}, ODS={ods_count}, diff={diff}"
|
||
else:
|
||
result.status = "FAIL"
|
||
result.message = f"记录数差异较大: API={api_count}, ODS={ods_count}, diff={diff}"
|
||
|
||
icon = {"PASS": "✓", "WARN": "⚠", "FAIL": "✗"}.get(result.status, "?")
|
||
logger.info(" %s %s", icon, result.message)
|
||
results.append(result)
|
||
|
||
db_conn.ensure_open()
|
||
|
||
return results
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════
|
||
# 步骤 2: ODS → DWD 记录数 + 金额对比
|
||
# ══════════════════════════════════════════════════════════════
|
||
|
||
def _check_ods_vs_dwd(
|
||
db_conn: DatabaseConnection,
|
||
logger: logging.Logger,
|
||
) -> list[BlackboxCheckResult]:
|
||
"""按 TABLE_MAP 逐对比较 ODS 与 DWD 的记录数和金额列汇总。"""
|
||
logger.info("")
|
||
logger.info("=" * 60)
|
||
logger.info("步骤 2: ODS → DWD 记录数 + 金额对比")
|
||
logger.info("=" * 60)
|
||
|
||
table_map = DwdLoadTask.TABLE_MAP
|
||
results: list[BlackboxCheckResult] = []
|
||
|
||
# 按 ODS 源表分组(多个 DWD 表可能映射同一个 ODS 表)
|
||
for dwd_table, ods_table in sorted(table_map.items()):
|
||
result = BlackboxCheckResult(
|
||
layer="ODS_DWD",
|
||
source_table=ods_table,
|
||
target_table=dwd_table,
|
||
)
|
||
logger.info(" ▶ %s → %s", ods_table, dwd_table)
|
||
|
||
# 检查表是否存在
|
||
for tbl, label in [(ods_table, "ODS"), (dwd_table, "DWD")]:
|
||
if not _table_exists(db_conn, tbl):
|
||
result.status = "ERROR"
|
||
result.message = f"{label} 表不存在: {tbl}"
|
||
logger.warning(" ✗ %s", result.message)
|
||
break
|
||
if result.status == "ERROR":
|
||
results.append(result)
|
||
continue
|
||
|
||
try:
|
||
ods_count = _query_count(db_conn, ods_table)
|
||
dwd_count = _query_count(db_conn, dwd_table)
|
||
result.source_count = ods_count
|
||
result.target_count = dwd_count
|
||
result.count_diff = dwd_count - ods_count
|
||
except Exception as exc:
|
||
result.status = "ERROR"
|
||
result.message = f"查询记录数失败: {exc}"
|
||
logger.error(" ✗ %s", result.message)
|
||
results.append(result)
|
||
continue
|
||
|
||
# 维度表(dim_*)可能因 SCD2 导致 DWD 行数 >= ODS 行数
|
||
is_dim = "dim_" in dwd_table.split(".")[-1]
|
||
|
||
# 金额列对比:找两表共有的 numeric 列
|
||
try:
|
||
ods_num_cols = set(_get_numeric_columns(db_conn, ods_table))
|
||
dwd_num_cols = set(_get_numeric_columns(db_conn, dwd_table))
|
||
common_amount_cols = sorted(ods_num_cols & dwd_num_cols)
|
||
|
||
amount_diffs = []
|
||
for col in common_amount_cols[:5]: # 最多对比 5 个金额列
|
||
try:
|
||
ods_sum_rows = db_conn.query(
|
||
f'SELECT COALESCE(SUM("{col}"), 0) AS s FROM {ods_table}'
|
||
)
|
||
dwd_sum_rows = db_conn.query(
|
||
f'SELECT COALESCE(SUM("{col}"), 0) AS s FROM {dwd_table}'
|
||
)
|
||
ods_sum = float(ods_sum_rows[0]["s"]) if ods_sum_rows else 0.0
|
||
dwd_sum = float(dwd_sum_rows[0]["s"]) if dwd_sum_rows else 0.0
|
||
diff = round(dwd_sum - ods_sum, 2)
|
||
if abs(diff) > 0.01:
|
||
amount_diffs.append({
|
||
"column": col,
|
||
"ods_sum": ods_sum,
|
||
"dwd_sum": dwd_sum,
|
||
"diff": diff,
|
||
})
|
||
except Exception:
|
||
pass # 跳过无法聚合的列
|
||
|
||
result.amount_diffs = amount_diffs
|
||
except Exception:
|
||
pass
|
||
|
||
# 状态判定
|
||
if is_dim:
|
||
# 维度表:DWD >= ODS 是正常的(SCD2 历史版本)
|
||
if dwd_count >= ods_count:
|
||
result.status = "PASS"
|
||
result.message = (
|
||
f"维度表 SCD2: ODS={ods_count}, DWD={dwd_count} "
|
||
f"(+{dwd_count - ods_count} 历史版本)"
|
||
)
|
||
else:
|
||
result.status = "WARN"
|
||
result.message = f"维度表 DWD < ODS: ODS={ods_count}, DWD={dwd_count}"
|
||
else:
|
||
# 事实表:记录数应大致一致
|
||
if result.count_diff == 0:
|
||
result.status = "PASS"
|
||
result.message = f"记录数一致: {ods_count}"
|
||
elif abs(result.count_diff) <= max(5, int(ods_count * 0.02)):
|
||
result.status = "WARN"
|
||
result.message = (
|
||
f"记录数微差: ODS={ods_count}, DWD={dwd_count}, "
|
||
f"diff={result.count_diff}"
|
||
)
|
||
else:
|
||
result.status = "FAIL"
|
||
result.message = (
|
||
f"记录数差异较大: ODS={ods_count}, DWD={dwd_count}, "
|
||
f"diff={result.count_diff}"
|
||
)
|
||
|
||
if result.amount_diffs:
|
||
result.status = max(result.status, "WARN", key=lambda s: ["PASS", "WARN", "FAIL", "ERROR"].index(s) if s in ["PASS", "WARN", "FAIL", "ERROR"] else 0)
|
||
result.message += f" | {len(result.amount_diffs)} 个金额列有差异"
|
||
|
||
icon = {"PASS": "✓", "WARN": "⚠", "FAIL": "✗", "ERROR": "✗"}.get(result.status, "?")
|
||
logger.info(" %s %s", icon, result.message)
|
||
results.append(result)
|
||
|
||
return results
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════
|
||
# 步骤 3: DWD → DWS 聚合一致性
|
||
# ══════════════════════════════════════════════════════════════
|
||
|
||
# 已知的 DWS→DWD 聚合关系映射
|
||
_DWS_DWD_MAP: dict[str, dict] = {
|
||
"dws.dws_assistant_daily_detail": {
|
||
"dwd_source": "dwd.dwd_assistant_service_log",
|
||
"dws_date_col": "stat_date",
|
||
"dwd_date_col": "service_date",
|
||
"description": "助教日度明细 vs DWD 服务流水",
|
||
},
|
||
"dws.dws_finance_daily_summary": {
|
||
"dwd_source": "dwd.dwd_settlement_head",
|
||
"dws_date_col": "stat_date",
|
||
"dwd_date_col": "pay_time",
|
||
"dwd_date_cast": "::date",
|
||
"description": "财务日度汇总 vs DWD 结账记录",
|
||
},
|
||
"dws.dws_member_visit_detail": {
|
||
"dwd_source": "dwd.dwd_settlement_head",
|
||
"dws_date_col": "visit_date",
|
||
"dwd_date_col": "pay_time",
|
||
"dwd_date_cast": "::date",
|
||
"description": "会员到店明细 vs DWD 结账记录",
|
||
},
|
||
"dws.dws_member_consumption_summary": {
|
||
"dwd_source": "dwd.dwd_settlement_head",
|
||
"dws_date_col": "stat_month",
|
||
"dwd_date_col": "pay_time",
|
||
"dwd_date_cast": "date_trunc('month', %col%)::date",
|
||
"description": "会员消费汇总 vs DWD 结账记录",
|
||
},
|
||
"dws.dws_finance_recharge_summary": {
|
||
"dwd_source": "dwd.dwd_recharge_order",
|
||
"dws_date_col": "stat_date",
|
||
"dwd_date_col": "pay_time",
|
||
"dwd_date_cast": "::date",
|
||
"description": "充值汇总 vs DWD 充值订单",
|
||
},
|
||
}
|
||
|
||
|
||
def _check_dwd_vs_dws(
|
||
db_conn: DatabaseConnection,
|
||
config: AppConfig,
|
||
logger: logging.Logger,
|
||
) -> list[BlackboxCheckResult]:
|
||
"""验证 DWS 汇总表聚合结果与 DWD 明细数据的一致性。"""
|
||
logger.info("")
|
||
logger.info("=" * 60)
|
||
logger.info("步骤 3: DWD → DWS 聚合一致性")
|
||
logger.info("=" * 60)
|
||
|
||
results: list[BlackboxCheckResult] = []
|
||
|
||
# 获取所有 DWS 层任务的目标表
|
||
dws_codes = default_registry.get_tasks_by_layer("DWS")
|
||
dws_tables: list[str] = []
|
||
for code in sorted(dws_codes):
|
||
meta = default_registry.get_metadata(code)
|
||
if meta is None:
|
||
continue
|
||
try:
|
||
inst = meta.task_class(config, db_conn, None, logging.getLogger("noop"))
|
||
raw = inst.get_target_table()
|
||
full = f"dws.{raw}" if raw and "." not in raw else raw
|
||
if full:
|
||
dws_tables.append(full)
|
||
except Exception:
|
||
pass
|
||
|
||
for dws_table in sorted(set(dws_tables)):
|
||
result = BlackboxCheckResult(layer="DWD_DWS", target_table=dws_table)
|
||
|
||
if not _table_exists(db_conn, dws_table):
|
||
result.status = "SKIP"
|
||
result.message = f"DWS 表不存在: {dws_table}"
|
||
logger.info(" ⊘ %s", result.message)
|
||
results.append(result)
|
||
continue
|
||
|
||
mapping = _DWS_DWD_MAP.get(dws_table)
|
||
if not mapping:
|
||
# 无已知映射,仅做基本行数检查
|
||
try:
|
||
dws_count = _query_count(db_conn, dws_table)
|
||
result.target_count = dws_count
|
||
result.status = "PASS" if dws_count > 0 else "WARN"
|
||
result.message = f"无映射关系, DWS 行数={dws_count}"
|
||
except Exception as exc:
|
||
result.status = "ERROR"
|
||
result.message = f"查询失败: {exc}"
|
||
logger.info(" ℹ %s: %s", dws_table, result.message)
|
||
results.append(result)
|
||
continue
|
||
|
||
dwd_source = mapping["dwd_source"]
|
||
result.source_table = dwd_source
|
||
logger.info(" ▶ %s → %s (%s)", dwd_source, dws_table, mapping["description"])
|
||
|
||
if not _table_exists(db_conn, dwd_source):
|
||
result.status = "ERROR"
|
||
result.message = f"DWD 源表不存在: {dwd_source}"
|
||
logger.warning(" ✗ %s", result.message)
|
||
results.append(result)
|
||
continue
|
||
|
||
try:
|
||
dws_count = _query_count(db_conn, dws_table)
|
||
dwd_count = _query_count(db_conn, dwd_source)
|
||
result.source_count = dwd_count
|
||
result.target_count = dws_count
|
||
result.count_diff = dws_count - dwd_count
|
||
|
||
# DWS 是聚合表,行数通常 <= DWD
|
||
if dws_count == 0 and dwd_count > 0:
|
||
result.status = "WARN"
|
||
result.message = f"DWS 无数据但 DWD 有 {dwd_count} 行"
|
||
elif dws_count > 0 and dwd_count == 0:
|
||
result.status = "WARN"
|
||
result.message = f"DWS 有 {dws_count} 行但 DWD 无数据"
|
||
else:
|
||
result.status = "PASS"
|
||
result.message = f"DWD={dwd_count}, DWS={dws_count}"
|
||
|
||
# 抽样日期对比:取 DWS 最近 3 个日期
|
||
dws_date_col = mapping.get("dws_date_col")
|
||
if dws_date_col and _has_column(db_conn, dws_table, dws_date_col):
|
||
sample_sql = f"""
|
||
SELECT DISTINCT "{dws_date_col}" AS d
|
||
FROM {dws_table}
|
||
ORDER BY d DESC LIMIT 3
|
||
"""
|
||
date_rows = db_conn.query(sample_sql)
|
||
date_checks = []
|
||
for dr in date_rows:
|
||
d = dr["d"]
|
||
dws_day_rows = db_conn.query(
|
||
f'SELECT COUNT(*) AS cnt FROM {dws_table} WHERE "{dws_date_col}" = %s',
|
||
(d,),
|
||
)
|
||
dws_day = int(dws_day_rows[0]["cnt"]) if dws_day_rows else 0
|
||
date_checks.append({"date": str(d), "dws_rows": dws_day})
|
||
result.details["date_samples"] = date_checks
|
||
|
||
except Exception as exc:
|
||
result.status = "ERROR"
|
||
result.message = f"对比失败: {exc}"
|
||
|
||
icon = {"PASS": "✓", "WARN": "⚠", "FAIL": "✗", "ERROR": "✗", "SKIP": "⊘"}.get(result.status, "?")
|
||
logger.info(" %s %s", icon, result.message)
|
||
results.append(result)
|
||
|
||
return results
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════
|
||
# 步骤 4: 可疑值检测
|
||
# ══════════════════════════════════════════════════════════════
|
||
|
||
def _detect_suspects(
|
||
db_conn: DatabaseConnection,
|
||
logger: logging.Logger,
|
||
) -> list[SuspectRecord]:
|
||
"""扫描各表中的边缘值、空值、重复记录。"""
|
||
logger.info("")
|
||
logger.info("=" * 60)
|
||
logger.info("步骤 4: 可疑值检测")
|
||
logger.info("=" * 60)
|
||
|
||
suspects: list[SuspectRecord] = []
|
||
|
||
# 扫描所有 ODS 和 DWD 表
|
||
tables_to_scan: list[str] = []
|
||
for spec in _ENABLED_ODS_SPECS:
|
||
tables_to_scan.append(spec.table_name)
|
||
for dwd_table in sorted(DwdLoadTask.TABLE_MAP.keys()):
|
||
tables_to_scan.append(dwd_table)
|
||
|
||
for table in sorted(set(tables_to_scan)):
|
||
if not _table_exists(db_conn, table):
|
||
continue
|
||
|
||
logger.info(" 扫描 %s ...", table)
|
||
pk_cols = _get_pk_columns(db_conn, table)
|
||
|
||
# 4a. 主键空值检测
|
||
if pk_cols:
|
||
for pk in pk_cols:
|
||
try:
|
||
null_sql = f'SELECT COUNT(*) AS cnt FROM {table} WHERE "{pk}" IS NULL'
|
||
rows = db_conn.query(null_sql)
|
||
null_count = int(rows[0]["cnt"]) if rows else 0
|
||
if null_count > 0:
|
||
suspects.append(SuspectRecord(
|
||
table=table,
|
||
check_type="null_pk",
|
||
count=null_count,
|
||
message=f"主键列 {pk} 有 {null_count} 个 NULL 值",
|
||
))
|
||
logger.warning(" ⚠ %s: 主键 %s 有 %d 个 NULL", table, pk, null_count)
|
||
except Exception:
|
||
pass
|
||
|
||
# 4b. 重复记录检测(基于主键)
|
||
if pk_cols:
|
||
pk_expr = ", ".join(f'"{c}"' for c in pk_cols)
|
||
try:
|
||
dup_sql = f"""
|
||
SELECT {pk_expr}, COUNT(*) AS cnt
|
||
FROM {table}
|
||
GROUP BY {pk_expr}
|
||
HAVING COUNT(*) > 1
|
||
LIMIT 10
|
||
"""
|
||
dup_rows = db_conn.query(dup_sql)
|
||
if dup_rows:
|
||
dup_count = len(dup_rows)
|
||
sample_keys = [
|
||
{c: r[c] for c in pk_cols if c in r}
|
||
for r in dup_rows[:5]
|
||
]
|
||
suspects.append(SuspectRecord(
|
||
table=table,
|
||
check_type="duplicate",
|
||
count=dup_count,
|
||
sample_keys=sample_keys,
|
||
message=f"发现 {dup_count} 组重复主键",
|
||
))
|
||
logger.warning(" ⚠ %s: %d 组重复主键", table, dup_count)
|
||
except Exception:
|
||
pass
|
||
|
||
# 4c. 金额列负值/极端值检测
|
||
try:
|
||
num_cols = _get_numeric_columns(db_conn, table)
|
||
# 只检查名称中含 amount/money/price/fee/sum 的列
|
||
amount_keywords = ("amount", "money", "price", "fee", "sum", "balance", "cost")
|
||
amount_cols = [c for c in num_cols if any(k in c.lower() for k in amount_keywords)]
|
||
|
||
for col in amount_cols[:5]:
|
||
try:
|
||
neg_sql = f'SELECT COUNT(*) AS cnt FROM {table} WHERE "{col}" < 0'
|
||
neg_rows = db_conn.query(neg_sql)
|
||
neg_count = int(neg_rows[0]["cnt"]) if neg_rows else 0
|
||
if neg_count > 0:
|
||
suspects.append(SuspectRecord(
|
||
table=table,
|
||
check_type="negative_amount",
|
||
count=neg_count,
|
||
message=f"金额列 {col} 有 {neg_count} 个负值",
|
||
))
|
||
logger.info(" ℹ %s.%s: %d 个负值", table, col, neg_count)
|
||
except Exception:
|
||
pass
|
||
|
||
# 极端值:超过 100 万
|
||
try:
|
||
edge_sql = f'SELECT COUNT(*) AS cnt FROM {table} WHERE ABS("{col}") > 1000000'
|
||
edge_rows = db_conn.query(edge_sql)
|
||
edge_count = int(edge_rows[0]["cnt"]) if edge_rows else 0
|
||
if edge_count > 0:
|
||
suspects.append(SuspectRecord(
|
||
table=table,
|
||
check_type="edge_value",
|
||
count=edge_count,
|
||
message=f"金额列 {col} 有 {edge_count} 个超百万值",
|
||
))
|
||
logger.warning(" ⚠ %s.%s: %d 个超百万值", table, col, edge_count)
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
|
||
db_conn.ensure_open()
|
||
|
||
logger.info(" 可疑值检测完成, 共发现 %d 项", len(suspects))
|
||
return suspects
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════
|
||
# 步骤 5: 抽样比对 API vs ODS
|
||
# ══════════════════════════════════════════════════════════════
|
||
|
||
def _sample_compare_api_vs_ods(
|
||
db_conn: DatabaseConnection,
|
||
api_client: APIClient,
|
||
config: AppConfig,
|
||
logger: logging.Logger,
|
||
sample_size: int = 100,
|
||
) -> list[SampleMismatch]:
|
||
"""从 ODS 中随机抽样记录,逐字段与上游 API 源数据比对。"""
|
||
logger.info("")
|
||
logger.info("=" * 60)
|
||
logger.info("步骤 5: 抽样比对 API vs ODS (%d 条)", sample_size)
|
||
logger.info("=" * 60)
|
||
|
||
store_id = int(config.get("app.store_id"))
|
||
mismatches: list[SampleMismatch] = []
|
||
total_sampled = 0
|
||
|
||
# 选择适合抽样的 ODS 任务(有明确 id 主键、非特殊任务)
|
||
sample_specs = [
|
||
s for s in _ENABLED_ODS_SPECS
|
||
if s.code != "ODS_SETTLEMENT_TICKET"
|
||
and any(pk.column == "id" for pk in s.pk_columns)
|
||
]
|
||
|
||
# 每个表分配的抽样数
|
||
per_table = max(1, sample_size // max(len(sample_specs), 1))
|
||
remaining = sample_size
|
||
|
||
for spec in sample_specs:
|
||
if remaining <= 0:
|
||
break
|
||
|
||
n = min(per_table, remaining)
|
||
logger.info(" ▶ %s: 抽样 %d 条", spec.table_name, n)
|
||
|
||
if not _table_exists(db_conn, spec.table_name):
|
||
continue
|
||
|
||
# 从 ODS 随机抽样 id
|
||
try:
|
||
sample_sql = f"""
|
||
SELECT id FROM {spec.table_name}
|
||
ORDER BY RANDOM()
|
||
LIMIT %s
|
||
"""
|
||
id_rows = db_conn.query(sample_sql, (n,))
|
||
if not id_rows:
|
||
logger.info(" ℹ 表为空,跳过")
|
||
continue
|
||
sample_ids = [r["id"] for r in id_rows]
|
||
except Exception as exc:
|
||
logger.warning(" ⚠ 抽样失败: %s", exc)
|
||
continue
|
||
|
||
# 从 API 拉取全量数据(缓存在内存中用于比对)
|
||
try:
|
||
params: dict[str, Any] = {}
|
||
if spec.include_site_id:
|
||
params["siteId"] = store_id
|
||
|
||
api_records, _ = api_client.get_paginated(
|
||
endpoint=spec.endpoint,
|
||
params=params,
|
||
page_size=200,
|
||
data_path=spec.data_path,
|
||
list_key=spec.list_key,
|
||
)
|
||
# 按 id 建索引
|
||
api_by_id: dict[int, dict] = {}
|
||
for rec in api_records:
|
||
rec_id = rec.get("id")
|
||
if rec_id is not None:
|
||
try:
|
||
api_by_id[int(rec_id)] = rec
|
||
except (ValueError, TypeError):
|
||
pass
|
||
except Exception as exc:
|
||
logger.warning(" ⚠ API 拉取失败: %s", exc)
|
||
continue
|
||
|
||
# 逐条比对
|
||
table_mismatches = 0
|
||
for sid in sample_ids:
|
||
try:
|
||
sid_int = int(sid)
|
||
except (ValueError, TypeError):
|
||
continue
|
||
|
||
# 从 ODS 读取该条记录的 payload
|
||
try:
|
||
ods_row = db_conn.query(
|
||
f"SELECT payload FROM {spec.table_name} WHERE id = %s LIMIT 1",
|
||
(sid_int,),
|
||
)
|
||
if not ods_row or not ods_row[0].get("payload"):
|
||
continue
|
||
ods_payload = ods_row[0]["payload"]
|
||
# payload 可能是 JSON 字符串或已解析的 dict
|
||
if isinstance(ods_payload, str):
|
||
ods_payload = json.loads(ods_payload)
|
||
except Exception:
|
||
continue
|
||
|
||
api_rec = api_by_id.get(sid_int)
|
||
if api_rec is None:
|
||
mismatches.append(SampleMismatch(
|
||
table=spec.table_name,
|
||
pk_value=sid_int,
|
||
field_name="__missing__",
|
||
api_value=None,
|
||
ods_value="exists",
|
||
))
|
||
table_mismatches += 1
|
||
continue
|
||
|
||
# 逐字段比对(只比对 API 记录中的顶层字段)
|
||
for key, api_val in api_rec.items():
|
||
ods_val = ods_payload.get(key)
|
||
if not _values_match(api_val, ods_val):
|
||
mismatches.append(SampleMismatch(
|
||
table=spec.table_name,
|
||
pk_value=sid_int,
|
||
field_name=key,
|
||
api_value=str(api_val)[:200],
|
||
ods_value=str(ods_val)[:200],
|
||
))
|
||
table_mismatches += 1
|
||
|
||
total_sampled += 1
|
||
|
||
remaining -= len(sample_ids)
|
||
if table_mismatches > 0:
|
||
logger.info(" ⚠ %d 个字段不一致", table_mismatches)
|
||
else:
|
||
logger.info(" ✓ 抽样一致")
|
||
|
||
db_conn.ensure_open()
|
||
|
||
logger.info(" 抽样比对完成: 共抽样 %d 条, 发现 %d 处不一致", total_sampled, len(mismatches))
|
||
return mismatches
|
||
|
||
|
||
def _values_match(api_val: Any, ods_val: Any) -> bool:
|
||
"""宽松比较两个值是否一致。
|
||
|
||
处理常见的类型差异:int vs str、None vs 空字符串、浮点精度等。
|
||
"""
|
||
if api_val is None and ods_val is None:
|
||
return True
|
||
if api_val is None and ods_val in ("", 0, "0", False):
|
||
return True
|
||
if ods_val is None and api_val in ("", 0, "0", False):
|
||
return True
|
||
|
||
# 都转字符串比较(处理 int/str 差异)
|
||
str_api = str(api_val).strip() if api_val is not None else ""
|
||
str_ods = str(ods_val).strip() if ods_val is not None else ""
|
||
|
||
if str_api == str_ods:
|
||
return True
|
||
|
||
# 数值比较(处理浮点精度)
|
||
try:
|
||
if abs(float(str_api) - float(str_ods)) < 0.01:
|
||
return True
|
||
except (ValueError, TypeError):
|
||
pass
|
||
|
||
# 嵌套对象/列表:转 JSON 比较
|
||
if isinstance(api_val, (dict, list)) or isinstance(ods_val, (dict, list)):
|
||
try:
|
||
return json.dumps(api_val, sort_keys=True, default=str) == json.dumps(ods_val, sort_keys=True, default=str)
|
||
except Exception:
|
||
pass
|
||
|
||
return False
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════
|
||
# 报告生成
|
||
# ══════════════════════════════════════════════════════════════
|
||
|
||
def _generate_report(
|
||
api_ods_results: list[BlackboxCheckResult],
|
||
ods_dwd_results: list[BlackboxCheckResult],
|
||
dwd_dws_results: list[BlackboxCheckResult],
|
||
suspects: list[SuspectRecord],
|
||
sample_mismatches: list[SampleMismatch],
|
||
config: AppConfig,
|
||
tz: ZoneInfo,
|
||
) -> dict:
|
||
"""汇总所有校验结果为结构化报告。"""
|
||
now = datetime.now(tz)
|
||
|
||
all_results = api_ods_results + ods_dwd_results + dwd_dws_results
|
||
pass_count = sum(1 for r in all_results if r.status == "PASS")
|
||
warn_count = sum(1 for r in all_results if r.status == "WARN")
|
||
fail_count = sum(1 for r in all_results if r.status == "FAIL")
|
||
error_count = sum(1 for r in all_results if r.status == "ERROR")
|
||
skip_count = sum(1 for r in all_results if r.status == "SKIP")
|
||
|
||
report = {
|
||
"title": "黑盒数据完整性校验报告",
|
||
"generated_at": now.isoformat(),
|
||
"environment": {
|
||
"store_id": config.get("app.store_id"),
|
||
"db_name": config.get("db.name", ""),
|
||
"api_base": config.get("api.base_url", ""),
|
||
},
|
||
"summary": {
|
||
"total_checks": len(all_results),
|
||
"pass": pass_count,
|
||
"warn": warn_count,
|
||
"fail": fail_count,
|
||
"error": error_count,
|
||
"skip": skip_count,
|
||
"suspect_count": len(suspects),
|
||
"sample_mismatch_count": len(sample_mismatches),
|
||
},
|
||
"api_ods": [asdict(r) for r in api_ods_results],
|
||
"ods_dwd": [asdict(r) for r in ods_dwd_results],
|
||
"dwd_dws": [asdict(r) for r in dwd_dws_results],
|
||
"suspects": [asdict(s) for s in suspects],
|
||
"sample_mismatches": [asdict(m) for m in sample_mismatches[:200]],
|
||
}
|
||
return report
|
||
|
||
|
||
def _generate_markdown_report(report: dict) -> str:
|
||
"""将结构化报告转为 Markdown 格式。"""
|
||
lines: list[str] = []
|
||
summary = report["summary"]
|
||
|
||
lines.append(f"# {report['title']}")
|
||
lines.append("")
|
||
lines.append(f"生成时间: {report['generated_at']}")
|
||
lines.append("")
|
||
|
||
# 环境信息
|
||
env = report["environment"]
|
||
lines.append("## 环境信息")
|
||
lines.append(f"- 门店 ID: {env.get('store_id')}")
|
||
lines.append(f"- 数据库: {env.get('db_name')}")
|
||
lines.append(f"- API: {env.get('api_base')}")
|
||
lines.append("")
|
||
|
||
# 汇总
|
||
lines.append("## 校验汇总")
|
||
lines.append(f"| 指标 | 数值 |")
|
||
lines.append(f"|------|------|")
|
||
lines.append(f"| 总检查项 | {summary['total_checks']} |")
|
||
lines.append(f"| ✓ PASS | {summary['pass']} |")
|
||
lines.append(f"| ⚠ WARN | {summary['warn']} |")
|
||
lines.append(f"| ✗ FAIL | {summary['fail']} |")
|
||
lines.append(f"| ✗ ERROR | {summary['error']} |")
|
||
lines.append(f"| ⊘ SKIP | {summary['skip']} |")
|
||
lines.append(f"| 可疑值 | {summary['suspect_count']} |")
|
||
lines.append(f"| 抽样不一致 | {summary['sample_mismatch_count']} |")
|
||
lines.append("")
|
||
|
||
# API → ODS
|
||
lines.append("## 步骤 1: API → ODS 记录数对比")
|
||
lines.append("")
|
||
_append_result_table(lines, report.get("api_ods", []))
|
||
|
||
# ODS → DWD
|
||
lines.append("## 步骤 2: ODS → DWD 记录数 + 金额对比")
|
||
lines.append("")
|
||
_append_result_table(lines, report.get("ods_dwd", []))
|
||
|
||
# 金额差异详情
|
||
amount_issues = [r for r in report.get("ods_dwd", []) if r.get("amount_diffs")]
|
||
if amount_issues:
|
||
lines.append("### 金额差异详情")
|
||
lines.append("")
|
||
for r in amount_issues:
|
||
lines.append(f"**{r['source_table']} → {r['target_table']}**")
|
||
lines.append("")
|
||
lines.append("| 列名 | ODS 汇总 | DWD 汇总 | 差异 |")
|
||
lines.append("|------|----------|----------|------|")
|
||
for ad in r["amount_diffs"]:
|
||
lines.append(
|
||
f"| {ad['column']} | {ad['ods_sum']:.2f} | {ad['dwd_sum']:.2f} | {ad['diff']:.2f} |"
|
||
)
|
||
lines.append("")
|
||
|
||
# DWD → DWS
|
||
lines.append("## 步骤 3: DWD → DWS 聚合一致性")
|
||
lines.append("")
|
||
_append_result_table(lines, report.get("dwd_dws", []))
|
||
|
||
# 可疑值
|
||
lines.append("## 步骤 4: 可疑值检测")
|
||
lines.append("")
|
||
suspects_data = report.get("suspects", [])
|
||
if suspects_data:
|
||
lines.append("| 表 | 类型 | 数量 | 说明 |")
|
||
lines.append("|---|------|------|------|")
|
||
for s in suspects_data:
|
||
lines.append(f"| {s['table']} | {s['check_type']} | {s['count']} | {s['message']} |")
|
||
lines.append("")
|
||
else:
|
||
lines.append("未发现可疑值。")
|
||
lines.append("")
|
||
|
||
# 抽样比对
|
||
lines.append("## 步骤 5: 抽样比对 API vs ODS")
|
||
lines.append("")
|
||
sample_data = report.get("sample_mismatches", [])
|
||
if sample_data:
|
||
lines.append(f"共发现 {len(sample_data)} 处不一致:")
|
||
lines.append("")
|
||
lines.append("| 表 | 主键 | 字段 | API 值 | ODS 值 |")
|
||
lines.append("|---|------|------|--------|--------|")
|
||
for m in sample_data[:50]: # 最多展示 50 条
|
||
lines.append(
|
||
f"| {m['table']} | {m['pk_value']} | {m['field_name']} "
|
||
f"| {str(m.get('api_value', ''))[:60]} | {str(m.get('ods_value', ''))[:60]} |"
|
||
)
|
||
if len(sample_data) > 50:
|
||
lines.append(f"| ... | 共 {len(sample_data)} 条,仅展示前 50 条 | | | |")
|
||
lines.append("")
|
||
else:
|
||
lines.append("抽样比对全部一致。")
|
||
lines.append("")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
def _append_result_table(lines: list[str], results: list[dict]):
|
||
"""向 Markdown 追加校验结果表格。"""
|
||
if not results:
|
||
lines.append("无数据。")
|
||
lines.append("")
|
||
return
|
||
|
||
lines.append("| 状态 | 源 | 目标 | 源行数 | 目标行数 | 差异 | 说明 |")
|
||
lines.append("|------|---|------|--------|----------|------|------|")
|
||
for r in results:
|
||
icon = {"PASS": "✓", "WARN": "⚠", "FAIL": "✗", "ERROR": "✗", "SKIP": "⊘"}.get(r.get("status", ""), "?")
|
||
lines.append(
|
||
f"| {icon} {r.get('status', '')} "
|
||
f"| {r.get('source_table', '')} "
|
||
f"| {r.get('target_table', '')} "
|
||
f"| {r.get('source_count', '')} "
|
||
f"| {r.get('target_count', '')} "
|
||
f"| {r.get('count_diff', '')} "
|
||
f"| {r.get('message', '')} |"
|
||
)
|
||
lines.append("")
|
||
|
||
|
||
# ══════════════════════════════════════════════════════════════
|
||
# 主流程
|
||
# ══════════════════════════════════════════════════════════════
|
||
|
||
def run_blackbox_check(
|
||
sample_size: int = 100,
|
||
) -> dict:
|
||
"""执行完整的黑盒数据校验。
|
||
|
||
Returns:
|
||
结构化校验报告 dict
|
||
"""
|
||
logger = _setup_logging()
|
||
logger.info("╔" + "═" * 58 + "╗")
|
||
logger.info("║ 黑盒数据完整性校验 ║")
|
||
logger.info("╚" + "═" * 58 + "╝")
|
||
|
||
# 加载配置
|
||
config = AppConfig.load()
|
||
tz = ZoneInfo(config.get("app.timezone", "Asia/Shanghai"))
|
||
|
||
logger.info("门店 ID: %s", config.get("app.store_id"))
|
||
logger.info("数据库: %s", config.get("db.name", ""))
|
||
logger.info("API: %s", config.get("api.base_url", ""))
|
||
logger.info("")
|
||
|
||
# 构建组件
|
||
db_conn, api_client = _build_components(config, logger)
|
||
|
||
t0 = time.monotonic()
|
||
|
||
# 步骤 1: API → ODS
|
||
try:
|
||
api_ods_results = _check_api_vs_ods(db_conn, api_client, config, logger)
|
||
except Exception as exc:
|
||
logger.error("步骤 1 异常: %s", exc)
|
||
logger.error(traceback.format_exc())
|
||
api_ods_results = []
|
||
|
||
db_conn.ensure_open()
|
||
|
||
# 步骤 2: ODS → DWD
|
||
try:
|
||
ods_dwd_results = _check_ods_vs_dwd(db_conn, logger)
|
||
except Exception as exc:
|
||
logger.error("步骤 2 异常: %s", exc)
|
||
logger.error(traceback.format_exc())
|
||
ods_dwd_results = []
|
||
|
||
db_conn.ensure_open()
|
||
|
||
# 步骤 3: DWD → DWS
|
||
try:
|
||
dwd_dws_results = _check_dwd_vs_dws(db_conn, config, logger)
|
||
except Exception as exc:
|
||
logger.error("步骤 3 异常: %s", exc)
|
||
logger.error(traceback.format_exc())
|
||
dwd_dws_results = []
|
||
|
||
db_conn.ensure_open()
|
||
|
||
# 步骤 4: 可疑值检测
|
||
try:
|
||
suspects = _detect_suspects(db_conn, logger)
|
||
except Exception as exc:
|
||
logger.error("步骤 4 异常: %s", exc)
|
||
logger.error(traceback.format_exc())
|
||
suspects = []
|
||
|
||
db_conn.ensure_open()
|
||
|
||
# 步骤 5: 抽样比对
|
||
try:
|
||
sample_mismatches = _sample_compare_api_vs_ods(
|
||
db_conn, api_client, config, logger, sample_size=sample_size,
|
||
)
|
||
except Exception as exc:
|
||
logger.error("步骤 5 异常: %s", exc)
|
||
logger.error(traceback.format_exc())
|
||
sample_mismatches = []
|
||
|
||
total_sec = round(time.monotonic() - t0, 1)
|
||
|
||
# 生成报告
|
||
report = _generate_report(
|
||
api_ods_results, ods_dwd_results, dwd_dws_results,
|
||
suspects, sample_mismatches, config, tz,
|
||
)
|
||
report["duration_sec"] = total_sec
|
||
|
||
# 输出 JSON
|
||
output_dir = _FEIQIU_ROOT / "scripts" / "debug" / "output"
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
ts = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||
json_path = output_dir / f"blackbox_{ts}.json"
|
||
json_path.write_text(
|
||
json.dumps(_sanitize_for_json(report), ensure_ascii=False, indent=2, default=str),
|
||
encoding="utf-8",
|
||
)
|
||
logger.info("")
|
||
logger.info("JSON 报告: %s", json_path)
|
||
|
||
# 输出 Markdown
|
||
reports_dir = _FEIQIU_ROOT / "docs" / "reports"
|
||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||
md_path = reports_dir / f"blackbox_report_{ts}.md"
|
||
md_content = _generate_markdown_report(report)
|
||
md_path.write_text(md_content, encoding="utf-8")
|
||
logger.info("Markdown 报告: %s", md_path)
|
||
|
||
# 打印汇总
|
||
logger.info("")
|
||
logger.info("=" * 60)
|
||
logger.info("黑盒校验汇总")
|
||
logger.info("=" * 60)
|
||
s = report["summary"]
|
||
logger.info(" 总检查项: %d", s["total_checks"])
|
||
logger.info(" ✓ PASS: %d", s["pass"])
|
||
logger.info(" ⚠ WARN: %d", s["warn"])
|
||
logger.info(" ✗ FAIL: %d", s["fail"])
|
||
logger.info(" ✗ ERROR: %d", s["error"])
|
||
logger.info(" ⊘ SKIP: %d", s["skip"])
|
||
logger.info(" 可疑值: %d 项", s["suspect_count"])
|
||
logger.info(" 抽样不一致: %d 处", s["sample_mismatch_count"])
|
||
logger.info(" 总耗时: %.1f 秒", total_sec)
|
||
|
||
# 清理
|
||
db_conn.close()
|
||
return report
|
||
|
||
|
||
# ── CLI 入口 ──────────────────────────────────────────────────
|
||
|
||
def parse_args():
|
||
parser = argparse.ArgumentParser(description="黑盒数据完整性校验")
|
||
parser.add_argument("--sample-size", type=int, default=100,
|
||
help="抽样比对记录数(默认 100)")
|
||
return parser.parse_args()
|
||
|
||
|
||
def main():
|
||
args = parse_args()
|
||
report = run_blackbox_check(sample_size=args.sample_size)
|
||
|
||
# 退出码: 有 FAIL 则非零
|
||
has_fail = report["summary"]["fail"] > 0
|
||
sys.exit(1 if has_fail else 0)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|