在准备环境前提交次全部更改。
This commit is contained in:
58
apps/etl/connectors/feiqiu/scripts/debug/_fix_sequences.py
Normal file
58
apps/etl/connectors/feiqiu/scripts/debug/_fix_sequences.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""修复数据库序列:将序列值同步到表中的最大主键值。
|
||||
|
||||
根因:序列被重置到 1,但表中已有数据,导致 INSERT 时主键冲突。
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_FEIQIU_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
config = AppConfig.load()
|
||||
db = DatabaseConnection(
|
||||
dsn=config["db"]["dsn"],
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
|
||||
# 需要修复的序列列表:(序列名, 表名, 主键列名)
|
||||
SEQUENCES_TO_FIX = [
|
||||
("meta.etl_run_run_id_seq", "meta.etl_run", "run_id"),
|
||||
("dws.dws_index_percentile_history_history_id_seq", "dws.dws_index_percentile_history", "history_id"),
|
||||
]
|
||||
|
||||
for seq_name, table_name, pk_col in SEQUENCES_TO_FIX:
|
||||
try:
|
||||
# 获取当前最大值
|
||||
rows = db.query(f"SELECT COALESCE(max({pk_col}), 0) as max_val FROM {table_name}")
|
||||
max_val = rows[0]["max_val"]
|
||||
|
||||
# 获取当前序列值
|
||||
rows2 = db.query(f"SELECT last_value, is_called FROM {seq_name}")
|
||||
cur_val = rows2[0]["last_value"]
|
||||
|
||||
print(f"{seq_name}:")
|
||||
print(f" 表 {table_name} 最大 {pk_col} = {max_val}")
|
||||
print(f" 序列当前值 = {cur_val}")
|
||||
|
||||
if max_val > cur_val:
|
||||
# 修复:将序列设置为 max_val + 1
|
||||
db.query(f"SELECT setval('{seq_name}', {max_val})")
|
||||
db.commit()
|
||||
|
||||
# 验证
|
||||
rows3 = db.query(f"SELECT last_value, is_called FROM {seq_name}")
|
||||
print(f" ✓ 已修复: 序列新值 = {rows3[0]['last_value']}")
|
||||
else:
|
||||
print(f" ✓ 序列值正常,无需修复")
|
||||
print()
|
||||
except Exception as e:
|
||||
print(f" ✗ 修复失败: {e}")
|
||||
db.rollback()
|
||||
|
||||
db.close()
|
||||
print("完成。")
|
||||
878
apps/etl/connectors/feiqiu/scripts/debug/analyze_architecture.py
Normal file
878
apps/etl/connectors/feiqiu/scripts/debug/analyze_architecture.py
Normal file
@@ -0,0 +1,878 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""ETL 架构分析脚本。
|
||||
|
||||
通过静态分析(AST 解析、import 扫描、文件统计)评估 ETL 代码结构,
|
||||
生成架构优化报告(Markdown)。
|
||||
|
||||
分析维度:
|
||||
1. 模块依赖关系 — 扫描 import,构建依赖图,识别循环依赖
|
||||
2. 文件大小分析 — 统计行数,识别过大文件(>500 行)
|
||||
3. 函数复杂度 — AST 分析圈复杂度(分支/嵌套深度)
|
||||
4. 重复代码检测 — 比较函数签名和结构相似度
|
||||
5. 耦合度评估 — 模块间导入关系密度
|
||||
6. 任务分类分析 — 从 TaskRegistry 读取元数据,评估分类合理性
|
||||
|
||||
用法:
|
||||
cd apps/etl/connectors/feiqiu
|
||||
python -m scripts.debug.analyze_architecture
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
# ── 确保项目根目录在 sys.path ──
|
||||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_FEIQIU_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||||
|
||||
# ── 分析范围:ETL 核心模块 ──
|
||||
_CORE_MODULES = [
|
||||
"api", "cli", "config", "database", "loaders", "models",
|
||||
"orchestration", "quality", "scd", "tasks", "utils",
|
||||
]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 数据结构
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
@dataclass
|
||||
class FileInfo:
|
||||
"""单个 .py 文件的统计信息"""
|
||||
path: Path
|
||||
rel_path: str
|
||||
lines: int = 0
|
||||
code_lines: int = 0 # 非空非注释行
|
||||
blank_lines: int = 0
|
||||
comment_lines: int = 0
|
||||
module: str = "" # 所属模块(api/cli/...)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionInfo:
|
||||
"""函数/方法的分析信息"""
|
||||
name: str
|
||||
file: str
|
||||
line: int
|
||||
complexity: int = 1 # 圈复杂度
|
||||
max_nesting: int = 0 # 最大嵌套深度
|
||||
param_count: int = 0
|
||||
lines: int = 0 # 函数体行数
|
||||
is_method: bool = False
|
||||
class_name: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImportEdge:
|
||||
"""模块间的导入关系"""
|
||||
source_module: str # 导入方
|
||||
target_module: str # 被导入方
|
||||
source_file: str
|
||||
import_name: str # 具体导入的名称
|
||||
|
||||
|
||||
@dataclass
|
||||
class ArchitectureReport:
|
||||
"""架构分析报告的完整数据"""
|
||||
generated_at: datetime = field(default_factory=datetime.now)
|
||||
# 文件统计
|
||||
files: list[FileInfo] = field(default_factory=list)
|
||||
# 函数分析
|
||||
functions: list[FunctionInfo] = field(default_factory=list)
|
||||
# 依赖关系
|
||||
import_edges: list[ImportEdge] = field(default_factory=list)
|
||||
circular_deps: list[tuple[str, str]] = field(default_factory=list)
|
||||
# 任务分类
|
||||
task_classification: dict = field(default_factory=dict)
|
||||
# 重复代码
|
||||
similar_functions: list[tuple[str, str, float]] = field(default_factory=list)
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 日志
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def _setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("analyze_architecture")
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||||
))
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 1. 文件扫描与行数统计
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def _iter_py_files(root: Path) -> Iterator[Path]:
|
||||
"""递归遍历核心模块下的 .py 文件,跳过 __pycache__ / .hypothesis 等。"""
|
||||
skip_dirs = {"__pycache__", ".hypothesis", ".pytest_cache", "export", "Asia"}
|
||||
for dirpath, dirnames, filenames in os.walk(root):
|
||||
dirnames[:] = [d for d in dirnames if d not in skip_dirs]
|
||||
for fn in filenames:
|
||||
if fn.endswith(".py"):
|
||||
yield Path(dirpath) / fn
|
||||
|
||||
|
||||
def _classify_module(rel_path: str) -> str:
|
||||
"""从相对路径提取所属模块名。"""
|
||||
parts = Path(rel_path).parts
|
||||
if parts:
|
||||
top = parts[0]
|
||||
if top in _CORE_MODULES:
|
||||
return top
|
||||
if top == "scripts":
|
||||
return "scripts"
|
||||
if top == "tests":
|
||||
return "tests"
|
||||
return "root"
|
||||
|
||||
|
||||
def _count_lines(filepath: Path) -> FileInfo:
|
||||
"""统计单个文件的行数分布。"""
|
||||
info = FileInfo(path=filepath, rel_path="")
|
||||
try:
|
||||
text = filepath.read_text(encoding="utf-8", errors="replace")
|
||||
except Exception:
|
||||
return info
|
||||
raw_lines = text.splitlines()
|
||||
info.lines = len(raw_lines)
|
||||
for line in raw_lines:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
info.blank_lines += 1
|
||||
elif stripped.startswith("#"):
|
||||
info.comment_lines += 1
|
||||
else:
|
||||
info.code_lines += 1
|
||||
return info
|
||||
|
||||
|
||||
def scan_files(root: Path, logger: logging.Logger) -> list[FileInfo]:
|
||||
"""扫描所有 .py 文件并统计行数。"""
|
||||
results: list[FileInfo] = []
|
||||
for fp in _iter_py_files(root):
|
||||
info = _count_lines(fp)
|
||||
info.path = fp
|
||||
info.rel_path = str(fp.relative_to(root)).replace("\\", "/")
|
||||
info.module = _classify_module(info.rel_path)
|
||||
results.append(info)
|
||||
logger.info("扫描完成:共 %d 个 .py 文件", len(results))
|
||||
return results
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 2. AST 分析:函数复杂度
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
# 增加圈复杂度的 AST 节点类型
|
||||
_COMPLEXITY_NODES = (
|
||||
ast.If, ast.For, ast.While, ast.ExceptHandler,
|
||||
ast.With, ast.Assert, ast.BoolOp,
|
||||
)
|
||||
# 仅 comprehension 内的 if 子句
|
||||
_COMP_NODES = (ast.ListComp, ast.SetComp, ast.DictComp, ast.GeneratorExp)
|
||||
|
||||
|
||||
def _calc_complexity(node: ast.AST) -> int:
|
||||
"""计算函数体的圈复杂度(McCabe)。"""
|
||||
complexity = 1
|
||||
for child in ast.walk(node):
|
||||
if isinstance(child, _COMPLEXITY_NODES):
|
||||
complexity += 1
|
||||
# BoolOp 中每个额外的 and/or 加 1
|
||||
if isinstance(child, ast.BoolOp):
|
||||
complexity += len(child.values) - 2 if len(child.values) > 2 else 0
|
||||
elif isinstance(child, _COMP_NODES):
|
||||
for gen in child.generators:
|
||||
complexity += len(gen.ifs)
|
||||
return complexity
|
||||
|
||||
|
||||
def _calc_max_nesting(node: ast.AST, depth: int = 0) -> int:
|
||||
"""计算最大嵌套深度。"""
|
||||
nesting_types = (ast.If, ast.For, ast.While, ast.With, ast.Try, ast.ExceptHandler)
|
||||
max_depth = depth
|
||||
for child in ast.iter_child_nodes(node):
|
||||
if isinstance(child, nesting_types):
|
||||
child_depth = _calc_max_nesting(child, depth + 1)
|
||||
max_depth = max(max_depth, child_depth)
|
||||
else:
|
||||
child_depth = _calc_max_nesting(child, depth)
|
||||
max_depth = max(max_depth, child_depth)
|
||||
return max_depth
|
||||
|
||||
|
||||
def _func_body_lines(node: ast.FunctionDef | ast.AsyncFunctionDef) -> int:
|
||||
"""计算函数体行数。"""
|
||||
if not node.body:
|
||||
return 0
|
||||
first_line = node.body[0].lineno
|
||||
last_line = node.body[-1].end_lineno or node.body[-1].lineno
|
||||
return last_line - first_line + 1
|
||||
|
||||
|
||||
def _walk_with_parent(tree: ast.AST):
|
||||
"""遍历 AST 并记录每个节点的父节点(避免 O(n²) 嵌套 walk)。"""
|
||||
# 先给所有节点标记 parent
|
||||
for node in ast.walk(tree):
|
||||
for child in ast.iter_child_nodes(node):
|
||||
child._parent = node # type: ignore[attr-defined]
|
||||
|
||||
|
||||
def analyze_functions(files: list[FileInfo], logger: logging.Logger) -> list[FunctionInfo]:
|
||||
"""对所有文件做 AST 分析,提取函数/方法信息。"""
|
||||
results: list[FunctionInfo] = []
|
||||
for fi in files:
|
||||
try:
|
||||
source = fi.path.read_text(encoding="utf-8", errors="replace")
|
||||
tree = ast.parse(source, filename=fi.rel_path)
|
||||
except (SyntaxError, UnicodeDecodeError):
|
||||
continue
|
||||
|
||||
_walk_with_parent(tree)
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
# 通过 _parent 属性判断是否为方法
|
||||
parent = getattr(node, "_parent", None)
|
||||
class_name = ""
|
||||
is_method = False
|
||||
if isinstance(parent, ast.ClassDef):
|
||||
class_name = parent.name
|
||||
is_method = True
|
||||
|
||||
param_count = len(node.args.args)
|
||||
if is_method and param_count > 0:
|
||||
param_count -= 1 # 去掉 self/cls
|
||||
|
||||
info = FunctionInfo(
|
||||
name=node.name,
|
||||
file=fi.rel_path,
|
||||
line=node.lineno,
|
||||
complexity=_calc_complexity(node),
|
||||
max_nesting=_calc_max_nesting(node),
|
||||
param_count=param_count,
|
||||
lines=_func_body_lines(node),
|
||||
is_method=is_method,
|
||||
class_name=class_name,
|
||||
)
|
||||
results.append(info)
|
||||
|
||||
logger.info("函数分析完成:共 %d 个函数/方法", len(results))
|
||||
return results
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 3. 依赖关系分析
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def _extract_imports(filepath: Path, rel_path: str) -> list[ImportEdge]:
|
||||
"""从单个文件提取 import 语句,映射到模块级别。"""
|
||||
edges: list[ImportEdge] = []
|
||||
try:
|
||||
source = filepath.read_text(encoding="utf-8", errors="replace")
|
||||
tree = ast.parse(source, filename=rel_path)
|
||||
except (SyntaxError, UnicodeDecodeError):
|
||||
return edges
|
||||
|
||||
source_module = _classify_module(rel_path)
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
target = _resolve_import_module(alias.name)
|
||||
if target and target != source_module:
|
||||
edges.append(ImportEdge(
|
||||
source_module=source_module,
|
||||
target_module=target,
|
||||
source_file=rel_path,
|
||||
import_name=alias.name,
|
||||
))
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
if node.module:
|
||||
target = _resolve_import_module(node.module)
|
||||
if target and target != source_module:
|
||||
names = ", ".join(a.name for a in (node.names or []))
|
||||
edges.append(ImportEdge(
|
||||
source_module=source_module,
|
||||
target_module=target,
|
||||
source_file=rel_path,
|
||||
import_name=f"{node.module}.{{{names}}}",
|
||||
))
|
||||
return edges
|
||||
|
||||
|
||||
def _resolve_import_module(import_path: str) -> str | None:
|
||||
"""将 import 路径映射到核心模块名。"""
|
||||
parts = import_path.split(".")
|
||||
top = parts[0]
|
||||
if top in _CORE_MODULES:
|
||||
return top
|
||||
return None
|
||||
|
||||
|
||||
def analyze_dependencies(files: list[FileInfo], logger: logging.Logger) -> tuple[list[ImportEdge], list[tuple[str, str]]]:
|
||||
"""分析模块间依赖关系,检测循环依赖。"""
|
||||
all_edges: list[ImportEdge] = []
|
||||
for fi in files:
|
||||
all_edges.extend(_extract_imports(fi.path, fi.rel_path))
|
||||
|
||||
# 构建有向图检测循环
|
||||
graph: dict[str, set[str]] = defaultdict(set)
|
||||
for edge in all_edges:
|
||||
graph[edge.source_module].add(edge.target_module)
|
||||
|
||||
circular: list[tuple[str, str]] = []
|
||||
for src, targets in graph.items():
|
||||
for tgt in targets:
|
||||
if src in graph.get(tgt, set()):
|
||||
pair = tuple(sorted([src, tgt]))
|
||||
if pair not in circular:
|
||||
circular.append(pair)
|
||||
|
||||
logger.info("依赖分析完成:%d 条导入边,%d 对循环依赖", len(all_edges), len(circular))
|
||||
return all_edges, circular
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 4. 重复代码检测(基于函数签名相似度)
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def _func_signature_key(fn: FunctionInfo) -> str:
|
||||
"""生成函数签名指纹:参数数量 + 行数范围 + 复杂度。"""
|
||||
line_bucket = fn.lines // 10 * 10 # 按 10 行分桶
|
||||
return f"p{fn.param_count}_l{line_bucket}_c{fn.complexity}"
|
||||
|
||||
|
||||
def detect_similar_functions(
|
||||
functions: list[FunctionInfo],
|
||||
logger: logging.Logger,
|
||||
min_lines: int = 15,
|
||||
) -> list[tuple[str, str, float]]:
|
||||
"""检测签名相似的函数对(可能是重复代码)。
|
||||
|
||||
只比较行数 >= min_lines 的函数,避免噪声。
|
||||
"""
|
||||
# 按签名分桶
|
||||
buckets: dict[str, list[FunctionInfo]] = defaultdict(list)
|
||||
for fn in functions:
|
||||
if fn.lines >= min_lines:
|
||||
key = _func_signature_key(fn)
|
||||
buckets[key].append(fn)
|
||||
|
||||
similar: list[tuple[str, str, float]] = []
|
||||
for key, group in buckets.items():
|
||||
if len(group) < 2:
|
||||
continue
|
||||
# 同一桶内两两配对
|
||||
for i in range(len(group)):
|
||||
for j in range(i + 1, len(group)):
|
||||
a, b = group[i], group[j]
|
||||
# 跳过同文件内的重载/变体
|
||||
if a.file == b.file:
|
||||
continue
|
||||
# 简单相似度:行数差异越小越相似
|
||||
line_ratio = 1 - abs(a.lines - b.lines) / max(a.lines, b.lines)
|
||||
if line_ratio >= 0.7:
|
||||
label_a = f"{a.file}:{a.class_name}.{a.name}" if a.class_name else f"{a.file}:{a.name}"
|
||||
label_b = f"{b.file}:{b.class_name}.{b.name}" if b.class_name else f"{b.file}:{b.name}"
|
||||
similar.append((label_a, label_b, round(line_ratio, 2)))
|
||||
|
||||
logger.info("重复检测完成:%d 对相似函数", len(similar))
|
||||
return similar
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 5. 任务分类分析
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def analyze_task_classification(logger: logging.Logger) -> dict:
|
||||
"""从 TaskRegistry 读取 52 个任务的元数据,分析分类合理性。"""
|
||||
try:
|
||||
from orchestration.task_registry import default_registry, TaskMeta
|
||||
except ImportError:
|
||||
logger.warning("无法导入 TaskRegistry,跳过任务分类分析")
|
||||
return {}
|
||||
|
||||
all_codes = default_registry.get_all_task_codes()
|
||||
by_layer: dict[str, list[str]] = defaultdict(list)
|
||||
by_type: dict[str, list[str]] = defaultdict(list)
|
||||
anomalies: list[str] = []
|
||||
|
||||
for code in all_codes:
|
||||
meta: TaskMeta | None = default_registry.get_metadata(code)
|
||||
if not meta:
|
||||
continue
|
||||
layer = meta.layer or "NONE"
|
||||
by_layer[layer].append(code)
|
||||
by_type[meta.task_type].append(code)
|
||||
|
||||
# 检测命名与分类不一致
|
||||
if code.startswith("DWS_") and layer not in ("DWS", "INDEX"):
|
||||
anomalies.append(f"{code}: 前缀 DWS_ 但分类为 {layer}")
|
||||
if code.startswith("ODS_") and layer != "ODS":
|
||||
anomalies.append(f"{code}: 前缀 ODS_ 但分类为 {layer}")
|
||||
if code.startswith("DWD_") and layer != "DWD":
|
||||
anomalies.append(f"{code}: 前缀 DWD_ 但分类为 {layer}")
|
||||
|
||||
# 检测 INDEX 层任务命名
|
||||
if layer == "INDEX" and not code.startswith("DWS_"):
|
||||
anomalies.append(f"{code}: INDEX 层但不以 DWS_ 开头,可能造成混淆")
|
||||
|
||||
# INDEX 层任务以 DWS_ 开头的命名问题
|
||||
index_tasks = by_layer.get("INDEX", [])
|
||||
if index_tasks and all(c.startswith("DWS_") for c in index_tasks):
|
||||
anomalies.append(
|
||||
f"INDEX 层全部 {len(index_tasks)} 个任务以 DWS_ 开头,"
|
||||
"建议改为 IDX_ 前缀以区分 DWS 汇总任务"
|
||||
)
|
||||
|
||||
result = {
|
||||
"total": len(all_codes),
|
||||
"by_layer": {k: {"count": len(v), "tasks": sorted(v)} for k, v in sorted(by_layer.items())},
|
||||
"by_type": {k: {"count": len(v), "tasks": sorted(v)} for k, v in sorted(by_type.items())},
|
||||
"anomalies": anomalies,
|
||||
}
|
||||
logger.info("任务分类分析完成:共 %d 个任务,%d 个异常", len(all_codes), len(anomalies))
|
||||
return result
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 6. 耦合度评估
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def evaluate_coupling(
|
||||
edges: list[ImportEdge],
|
||||
files: list[FileInfo],
|
||||
) -> dict:
|
||||
"""评估模块间耦合度。
|
||||
|
||||
指标:
|
||||
- 传入耦合(Ca):有多少模块依赖本模块
|
||||
- 传出耦合(Ce):本模块依赖多少其他模块
|
||||
- 不稳定度 I = Ce / (Ca + Ce),越接近 1 越不稳定
|
||||
"""
|
||||
# 只统计核心模块
|
||||
modules = set(m for m in _CORE_MODULES if any(f.module == m for f in files))
|
||||
|
||||
ca: Counter = Counter() # 传入
|
||||
ce: Counter = Counter() # 传出
|
||||
|
||||
# 去重:同一 source_module → target_module 只计一次
|
||||
seen = set()
|
||||
for edge in edges:
|
||||
pair = (edge.source_module, edge.target_module)
|
||||
if pair in seen:
|
||||
continue
|
||||
seen.add(pair)
|
||||
if edge.source_module in modules:
|
||||
ce[edge.source_module] += 1
|
||||
if edge.target_module in modules:
|
||||
ca[edge.target_module] += 1
|
||||
|
||||
coupling: dict[str, dict] = {}
|
||||
for m in sorted(modules):
|
||||
ca_val = ca.get(m, 0)
|
||||
ce_val = ce.get(m, 0)
|
||||
total = ca_val + ce_val
|
||||
instability = round(ce_val / total, 2) if total > 0 else 0.0
|
||||
coupling[m] = {
|
||||
"afferent_coupling": ca_val,
|
||||
"efferent_coupling": ce_val,
|
||||
"instability": instability,
|
||||
}
|
||||
return coupling
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 7. Markdown 报告生成
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def generate_report(report: ArchitectureReport, coupling: dict) -> str:
|
||||
"""生成 Markdown 格式的架构优化报告。"""
|
||||
lines: list[str] = []
|
||||
_a = lines.append
|
||||
|
||||
_a(f"# ETL 架构分析报告")
|
||||
_a(f"")
|
||||
_a(f"> 生成时间:{report.generated_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
_a(f"> 分析范围:`apps/etl/connectors/feiqiu/` 核心模块")
|
||||
_a("")
|
||||
|
||||
# ── 概览 ──
|
||||
total_files = len(report.files)
|
||||
total_lines = sum(f.lines for f in report.files)
|
||||
total_code = sum(f.code_lines for f in report.files)
|
||||
_a("## 1. 概览")
|
||||
_a("")
|
||||
_a(f"| 指标 | 值 |")
|
||||
_a(f"|------|-----|")
|
||||
_a(f"| Python 文件数 | {total_files} |")
|
||||
_a(f"| 总行数 | {total_lines:,} |")
|
||||
_a(f"| 代码行数 | {total_code:,} |")
|
||||
_a(f"| 函数/方法数 | {len(report.functions):,} |")
|
||||
_a(f"| 注册任务数 | {report.task_classification.get('total', 'N/A')} |")
|
||||
_a(f"| 循环依赖数 | {len(report.circular_deps)} |")
|
||||
_a(f"| 相似函数对数 | {len(report.similar_functions)} |")
|
||||
_a("")
|
||||
|
||||
# ── 模块规模 ──
|
||||
_a("## 2. 模块规模分析")
|
||||
_a("")
|
||||
module_stats: dict[str, dict] = defaultdict(lambda: {"files": 0, "lines": 0, "code_lines": 0})
|
||||
for f in report.files:
|
||||
ms = module_stats[f.module]
|
||||
ms["files"] += 1
|
||||
ms["lines"] += f.lines
|
||||
ms["code_lines"] += f.code_lines
|
||||
|
||||
_a("| 模块 | 文件数 | 总行数 | 代码行数 |")
|
||||
_a("|------|--------|--------|----------|")
|
||||
for mod in sorted(module_stats, key=lambda m: module_stats[m]["lines"], reverse=True):
|
||||
s = module_stats[mod]
|
||||
_a(f"| `{mod}` | {s['files']} | {s['lines']:,} | {s['code_lines']:,} |")
|
||||
_a("")
|
||||
|
||||
# ── 大文件 ──
|
||||
large_files = [f for f in report.files if f.lines > 500]
|
||||
large_files.sort(key=lambda f: f.lines, reverse=True)
|
||||
_a("## 3. 大文件识别(>500 行)")
|
||||
_a("")
|
||||
if large_files:
|
||||
_a("| 文件 | 行数 | 代码行 | 模块 |")
|
||||
_a("|------|------|--------|------|")
|
||||
for f in large_files:
|
||||
_a(f"| `{f.rel_path}` | {f.lines:,} | {f.code_lines:,} | {f.module} |")
|
||||
_a("")
|
||||
_a(f"> ⚠️ 共 {len(large_files)} 个文件超过 500 行,建议拆分以降低维护成本。")
|
||||
else:
|
||||
_a("所有文件均在 500 行以内。✅")
|
||||
_a("")
|
||||
|
||||
# ── 函数复杂度 ──
|
||||
_a("## 4. 函数复杂度分析")
|
||||
_a("")
|
||||
high_complexity = [fn for fn in report.functions if fn.complexity >= 10]
|
||||
high_complexity.sort(key=lambda fn: fn.complexity, reverse=True)
|
||||
_a(f"### 4.1 高复杂度函数(圈复杂度 ≥ 10)")
|
||||
_a("")
|
||||
if high_complexity:
|
||||
_a("| 函数 | 文件 | 行号 | 复杂度 | 嵌套深度 | 函数行数 |")
|
||||
_a("|------|------|------|--------|----------|----------|")
|
||||
for fn in high_complexity[:20]:
|
||||
name = f"{fn.class_name}.{fn.name}" if fn.class_name else fn.name
|
||||
_a(f"| `{name}` | `{fn.file}` | {fn.line} | {fn.complexity} | {fn.max_nesting} | {fn.lines} |")
|
||||
if len(high_complexity) > 20:
|
||||
_a(f"| ... | 共 {len(high_complexity)} 个 | | | | |")
|
||||
else:
|
||||
_a("所有函数复杂度均在合理范围内。✅")
|
||||
_a("")
|
||||
|
||||
# 长函数
|
||||
long_funcs = [fn for fn in report.functions if fn.lines >= 80]
|
||||
long_funcs.sort(key=lambda fn: fn.lines, reverse=True)
|
||||
_a("### 4.2 长函数(≥ 80 行)")
|
||||
_a("")
|
||||
if long_funcs:
|
||||
_a("| 函数 | 文件 | 行号 | 函数行数 | 复杂度 |")
|
||||
_a("|------|------|------|----------|--------|")
|
||||
for fn in long_funcs[:15]:
|
||||
name = f"{fn.class_name}.{fn.name}" if fn.class_name else fn.name
|
||||
_a(f"| `{name}` | `{fn.file}` | {fn.line} | {fn.lines} | {fn.complexity} |")
|
||||
if len(long_funcs) > 15:
|
||||
_a(f"| ... | 共 {len(long_funcs)} 个 | | | |")
|
||||
else:
|
||||
_a("所有函数行数均在合理范围内。✅")
|
||||
_a("")
|
||||
|
||||
# ── 依赖关系 ──
|
||||
_a("## 5. 模块依赖关系")
|
||||
_a("")
|
||||
|
||||
# 依赖矩阵
|
||||
dep_matrix: dict[str, Counter] = defaultdict(Counter)
|
||||
for edge in report.import_edges:
|
||||
dep_matrix[edge.source_module][edge.target_module] += 1
|
||||
|
||||
all_modules = sorted(set(
|
||||
list(dep_matrix.keys()) +
|
||||
[t for counts in dep_matrix.values() for t in counts]
|
||||
))
|
||||
# 只保留核心模块
|
||||
all_modules = [m for m in all_modules if m in _CORE_MODULES]
|
||||
|
||||
if all_modules:
|
||||
_a("### 5.1 依赖矩阵(行→列 = 导入次数)")
|
||||
_a("")
|
||||
header = "| 模块 | " + " | ".join(f"`{m}`" for m in all_modules) + " |"
|
||||
_a(header)
|
||||
_a("|" + "------|" * (len(all_modules) + 1))
|
||||
for src in all_modules:
|
||||
row = f"| `{src}` |"
|
||||
for tgt in all_modules:
|
||||
count = dep_matrix.get(src, {}).get(tgt, 0)
|
||||
row += f" {count or '·'} |"
|
||||
_a(row)
|
||||
_a("")
|
||||
|
||||
# 循环依赖
|
||||
_a("### 5.2 循环依赖")
|
||||
_a("")
|
||||
if report.circular_deps:
|
||||
for a, b in report.circular_deps:
|
||||
_a(f"- ⚠️ `{a}` ↔ `{b}`")
|
||||
_a("")
|
||||
_a("> 循环依赖增加模块间耦合,建议通过接口抽象或依赖注入解耦。")
|
||||
else:
|
||||
_a("未检测到模块级循环依赖。✅")
|
||||
_a("")
|
||||
|
||||
# ── 耦合度 ──
|
||||
_a("## 6. 耦合度评估")
|
||||
_a("")
|
||||
_a("| 模块 | 传入耦合 Ca | 传出耦合 Ce | 不稳定度 I |")
|
||||
_a("|------|-----------|-----------|-----------|")
|
||||
for mod, vals in sorted(coupling.items(), key=lambda x: x[1]["instability"], reverse=True):
|
||||
flag = " ⚠️" if vals["instability"] > 0.8 else ""
|
||||
_a(f"| `{mod}` | {vals['afferent_coupling']} | {vals['efferent_coupling']} | {vals['instability']}{flag} |")
|
||||
_a("")
|
||||
_a("> 不稳定度 I = Ce/(Ca+Ce)。I 接近 1 表示模块高度依赖外部,变更风险大。")
|
||||
_a("> I 接近 0 表示模块被广泛依赖,是稳定基础设施。")
|
||||
_a("")
|
||||
|
||||
# ── 重复代码 ──
|
||||
_a("## 7. 重复代码检测")
|
||||
_a("")
|
||||
if report.similar_functions:
|
||||
_a("以下函数对具有相似的签名特征(参数数量、行数、复杂度),可能存在重复逻辑:")
|
||||
_a("")
|
||||
_a("| 函数 A | 函数 B | 相似度 |")
|
||||
_a("|--------|--------|--------|")
|
||||
for a, b, sim in report.similar_functions[:20]:
|
||||
_a(f"| `{a}` | `{b}` | {sim:.0%} |")
|
||||
if len(report.similar_functions) > 20:
|
||||
_a(f"| ... | 共 {len(report.similar_functions)} 对 | |")
|
||||
_a("")
|
||||
_a("> 建议人工审查上述函数对,确认是否可提取公共逻辑。")
|
||||
else:
|
||||
_a("未检测到明显的重复函数。✅")
|
||||
_a("")
|
||||
|
||||
# ── 任务分类 ──
|
||||
tc = report.task_classification
|
||||
_a("## 8. 任务分类分析")
|
||||
_a("")
|
||||
if tc:
|
||||
_a(f"### 8.1 按层分布(共 {tc['total']} 个任务)")
|
||||
_a("")
|
||||
_a("| 层 | 数量 | 任务列表 |")
|
||||
_a("|-----|------|----------|")
|
||||
for layer, info in tc.get("by_layer", {}).items():
|
||||
tasks_str = ", ".join(f"`{t}`" for t in info["tasks"][:8])
|
||||
if info["count"] > 8:
|
||||
tasks_str += f" ... 共 {info['count']} 个"
|
||||
_a(f"| {layer} | {info['count']} | {tasks_str} |")
|
||||
_a("")
|
||||
|
||||
_a("### 8.2 按类型分布")
|
||||
_a("")
|
||||
_a("| 类型 | 数量 |")
|
||||
_a("|------|------|")
|
||||
for ttype, info in tc.get("by_type", {}).items():
|
||||
_a(f"| {ttype} | {info['count']} |")
|
||||
_a("")
|
||||
|
||||
anomalies = tc.get("anomalies", [])
|
||||
_a("### 8.3 分类异常")
|
||||
_a("")
|
||||
if anomalies:
|
||||
for a in anomalies:
|
||||
_a(f"- ⚠️ {a}")
|
||||
else:
|
||||
_a("未发现分类异常。✅")
|
||||
else:
|
||||
_a("任务分类分析未执行(TaskRegistry 导入失败)。")
|
||||
_a("")
|
||||
|
||||
# ── 优化建议 ──
|
||||
_a("## 9. 架构优化建议")
|
||||
_a("")
|
||||
suggestions = _generate_suggestions(report, coupling)
|
||||
for i, s in enumerate(suggestions, 1):
|
||||
_a(f"{i}. {s}")
|
||||
_a("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _generate_suggestions(report: ArchitectureReport, coupling: dict) -> list[str]:
|
||||
"""基于分析结果生成具体优化建议。"""
|
||||
suggestions: list[str] = []
|
||||
|
||||
# 大文件建议
|
||||
large_files = [f for f in report.files if f.lines > 500]
|
||||
if large_files:
|
||||
biggest = max(large_files, key=lambda f: f.lines)
|
||||
suggestions.append(
|
||||
f"**拆分大文件**:`{biggest.rel_path}`({biggest.lines:,} 行)是最大文件,"
|
||||
"建议按职责拆分为多个子模块。"
|
||||
)
|
||||
|
||||
# 高复杂度建议
|
||||
high_cx = [fn for fn in report.functions if fn.complexity >= 15]
|
||||
if high_cx:
|
||||
worst = max(high_cx, key=lambda fn: fn.complexity)
|
||||
name = f"{worst.class_name}.{worst.name}" if worst.class_name else worst.name
|
||||
suggestions.append(
|
||||
f"**降低函数复杂度**:`{name}`(复杂度 {worst.complexity})建议提取子函数或使用策略模式。"
|
||||
)
|
||||
|
||||
# 循环依赖建议
|
||||
if report.circular_deps:
|
||||
pairs = ", ".join(f"`{a}`↔`{b}`" for a, b in report.circular_deps)
|
||||
suggestions.append(
|
||||
f"**消除循环依赖**:{pairs}。可通过引入接口层或依赖注入解耦。"
|
||||
)
|
||||
|
||||
# 高不稳定模块
|
||||
unstable = [m for m, v in coupling.items() if v["instability"] > 0.8]
|
||||
if unstable:
|
||||
suggestions.append(
|
||||
f"**稳定化高不稳定模块**:{', '.join(f'`{m}`' for m in unstable)} "
|
||||
"的不稳定度 > 0.8,建议减少对外部模块的依赖。"
|
||||
)
|
||||
|
||||
# 任务命名建议
|
||||
tc = report.task_classification
|
||||
if tc:
|
||||
anomalies = tc.get("anomalies", [])
|
||||
if any("INDEX" in a for a in anomalies):
|
||||
suggestions.append(
|
||||
"**统一 INDEX 层任务命名**:当前 INDEX 层任务以 `DWS_` 开头,"
|
||||
"建议改为 `IDX_` 前缀以避免与 DWS 汇总任务混淆。"
|
||||
)
|
||||
|
||||
# 重复代码建议
|
||||
if len(report.similar_functions) > 5:
|
||||
suggestions.append(
|
||||
f"**消除重复代码**:检测到 {len(report.similar_functions)} 对相似函数,"
|
||||
"建议提取公共基类或工具函数。"
|
||||
)
|
||||
|
||||
if not suggestions:
|
||||
suggestions.append("当前架构整体健康,未发现需要立即优化的问题。")
|
||||
|
||||
return suggestions
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# 主流程
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
def run_analysis(root: Path, logger: logging.Logger) -> tuple[ArchitectureReport, dict]:
|
||||
"""执行完整架构分析,返回报告数据和耦合度评估。"""
|
||||
report = ArchitectureReport()
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("ETL 架构分析开始")
|
||||
logger.info("分析根目录: %s", root)
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 1. 文件扫描
|
||||
logger.info("── 阶段 1/6:文件扫描 ──")
|
||||
report.files = scan_files(root, logger)
|
||||
|
||||
# 2. 函数复杂度
|
||||
logger.info("── 阶段 2/6:函数复杂度分析 ──")
|
||||
report.functions = analyze_functions(report.files, logger)
|
||||
|
||||
# 3. 依赖关系
|
||||
logger.info("── 阶段 3/6:依赖关系分析 ──")
|
||||
report.import_edges, report.circular_deps = analyze_dependencies(report.files, logger)
|
||||
|
||||
# 4. 重复代码
|
||||
logger.info("── 阶段 4/6:重复代码检测 ──")
|
||||
report.similar_functions = detect_similar_functions(report.functions, logger)
|
||||
|
||||
# 5. 任务分类
|
||||
logger.info("── 阶段 5/6:任务分类分析 ──")
|
||||
report.task_classification = analyze_task_classification(logger)
|
||||
|
||||
# 6. 耦合度
|
||||
logger.info("── 阶段 6/6:耦合度评估 ──")
|
||||
coupling = evaluate_coupling(report.import_edges, report.files)
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("分析完成")
|
||||
logger.info("=" * 60)
|
||||
|
||||
return report, coupling
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="ETL 架构分析")
|
||||
parser.add_argument(
|
||||
"--output", "-o",
|
||||
help="报告输出路径(默认自动生成带日期的文件名)",
|
||||
default=None,
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
logger = _setup_logging()
|
||||
args = parse_args()
|
||||
|
||||
root = _FEIQIU_ROOT
|
||||
|
||||
report, coupling = run_analysis(root, logger)
|
||||
|
||||
# 生成 Markdown 报告
|
||||
md_content = generate_report(report, coupling)
|
||||
|
||||
# 确定输出路径
|
||||
reports_dir = root / "docs" / "reports"
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.output:
|
||||
output_path = Path(args.output)
|
||||
else:
|
||||
date_str = datetime.now().strftime("%Y%m%d")
|
||||
output_path = reports_dir / f"architecture_report_{date_str}.md"
|
||||
|
||||
output_path.write_text(md_content, encoding="utf-8")
|
||||
logger.info("报告已保存: %s", output_path)
|
||||
|
||||
# 打印摘要
|
||||
total_files = len(report.files)
|
||||
total_lines = sum(f.lines for f in report.files)
|
||||
large_count = sum(1 for f in report.files if f.lines > 500)
|
||||
high_cx = sum(1 for fn in report.functions if fn.complexity >= 10)
|
||||
|
||||
logger.info("")
|
||||
logger.info("═══ 分析摘要 ═══")
|
||||
logger.info(" 文件数: %d", total_files)
|
||||
logger.info(" 总行数: %s", f"{total_lines:,}")
|
||||
logger.info(" 大文件(>500行): %d", large_count)
|
||||
logger.info(" 高复杂度函数(≥10): %d", high_cx)
|
||||
logger.info(" 循环依赖: %d", len(report.circular_deps))
|
||||
logger.info(" 相似函数对: %d", len(report.similar_functions))
|
||||
logger.info(" 注册任务: %s", report.task_classification.get("total", "N/A"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
928
apps/etl/connectors/feiqiu/scripts/debug/analyze_performance.py
Normal file
928
apps/etl/connectors/feiqiu/scripts/debug/analyze_performance.py
Normal file
@@ -0,0 +1,928 @@
|
||||
"""
|
||||
性能分析脚本 — 读取全量刷新阶段采集的计时 JSON,统计耗时、识别瓶颈、生成优化报告。
|
||||
|
||||
用法:
|
||||
cd apps/etl/connectors/feiqiu
|
||||
python -m scripts.debug.analyze_performance [--input <json>] [--output <md>] [--skip-sql]
|
||||
|
||||
功能:
|
||||
1. 层级耗时统计:各层总耗时、平均耗时、任务数
|
||||
2. 任务耗时排名:Top 5 瓶颈任务,含 fetched/inserted 等指标
|
||||
3. API 调用分析:响应时间、分页效率(每页记录数 vs 请求次数)
|
||||
4. SQL 查询分析:连接数据库执行 EXPLAIN ANALYZE 分析关键查询
|
||||
5. 优化建议:基于分析结果给出具体优化建议
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 路径常量
|
||||
# ---------------------------------------------------------------------------
|
||||
_SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
_FEIQIU_ROOT = _SCRIPT_DIR.parent.parent # apps/etl/connectors/feiqiu
|
||||
_OUTPUT_DIR = _SCRIPT_DIR / "output"
|
||||
_REPORTS_DIR = _FEIQIU_ROOT / "docs" / "reports"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 数据结构
|
||||
# ---------------------------------------------------------------------------
|
||||
@dataclass
|
||||
class TaskTiming:
|
||||
"""单个任务的计时数据。"""
|
||||
task_code: str
|
||||
layer: str
|
||||
duration_sec: float
|
||||
status: str
|
||||
counts: dict[str, int]
|
||||
error: str | None
|
||||
api_calls: int
|
||||
api_total_sec: float
|
||||
|
||||
@property
|
||||
def fetched(self) -> int:
|
||||
return self.counts.get("fetched", 0)
|
||||
|
||||
@property
|
||||
def inserted(self) -> int:
|
||||
return self.counts.get("inserted", 0)
|
||||
|
||||
@property
|
||||
def updated(self) -> int:
|
||||
return self.counts.get("updated", 0)
|
||||
|
||||
@property
|
||||
def skipped(self) -> int:
|
||||
return self.counts.get("skipped", 0)
|
||||
|
||||
@property
|
||||
def throughput(self) -> float:
|
||||
"""每秒处理记录数(fetched / duration)。"""
|
||||
if self.duration_sec <= 0:
|
||||
return 0.0
|
||||
return self.fetched / self.duration_sec
|
||||
|
||||
|
||||
@dataclass
|
||||
class LayerTiming:
|
||||
"""单层的汇总计时。"""
|
||||
layer: str
|
||||
duration_sec: float
|
||||
status: str
|
||||
task_count: int
|
||||
success_count: int
|
||||
fail_count: int
|
||||
skip_count: int
|
||||
total_fetched: int
|
||||
total_inserted: int
|
||||
total_updated: int
|
||||
total_errors: int
|
||||
tasks: list[TaskTiming]
|
||||
|
||||
|
||||
@dataclass
|
||||
class VerificationSummary:
|
||||
"""校验阶段摘要。"""
|
||||
status: str
|
||||
duration_sec: float
|
||||
total_tables: int
|
||||
consistent_tables: int
|
||||
total_backfilled: int
|
||||
error_tables: int
|
||||
layers: dict[str, Any]
|
||||
|
||||
|
||||
@dataclass
|
||||
class PerformanceData:
|
||||
"""完整的性能数据。"""
|
||||
flow: str
|
||||
window_start: str
|
||||
window_end: str
|
||||
overall_duration_sec: float
|
||||
overall_status: str
|
||||
layers: list[LayerTiming]
|
||||
verification: VerificationSummary | None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SQLAnalysisResult:
|
||||
"""SQL EXPLAIN ANALYZE 分析结果。"""
|
||||
query_name: str
|
||||
table_name: str
|
||||
plan_summary: str
|
||||
total_cost: float
|
||||
actual_time_ms: float
|
||||
rows_processed: int
|
||||
seq_scans: list[str]
|
||||
missing_indexes: list[str]
|
||||
recommendations: list[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class PerformanceReport:
|
||||
"""性能分析报告数据。"""
|
||||
data: PerformanceData
|
||||
bottleneck_tasks: list[TaskTiming]
|
||||
layer_stats: list[dict[str, Any]]
|
||||
api_analysis: list[dict[str, Any]]
|
||||
sql_analysis: list[SQLAnalysisResult] = field(default_factory=list)
|
||||
recommendations: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 日志
|
||||
# ---------------------------------------------------------------------------
|
||||
def _setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("analyze_performance")
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||||
))
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 数据加载
|
||||
# ---------------------------------------------------------------------------
|
||||
def load_timing_data(json_path: Path, logger: logging.Logger) -> PerformanceData:
|
||||
"""从 JSON 文件加载计时数据。"""
|
||||
logger.info("加载计时数据: %s", json_path)
|
||||
raw = json.loads(json_path.read_text(encoding="utf-8"))
|
||||
|
||||
layers: list[LayerTiming] = []
|
||||
for layer_raw in raw.get("layers", []):
|
||||
tasks = [
|
||||
TaskTiming(
|
||||
task_code=t["task_code"],
|
||||
layer=t.get("layer", layer_raw["layer"]),
|
||||
duration_sec=t.get("duration_sec", 0),
|
||||
status=t.get("status", "UNKNOWN"),
|
||||
counts=t.get("counts", {}),
|
||||
error=t.get("error"),
|
||||
api_calls=t.get("api_calls", 0),
|
||||
api_total_sec=t.get("api_total_sec", 0.0),
|
||||
)
|
||||
for t in layer_raw.get("tasks", [])
|
||||
]
|
||||
layers.append(LayerTiming(
|
||||
layer=layer_raw["layer"],
|
||||
duration_sec=layer_raw.get("duration_sec", 0),
|
||||
status=layer_raw.get("status", "UNKNOWN"),
|
||||
task_count=layer_raw.get("task_count", len(tasks)),
|
||||
success_count=layer_raw.get("success_count", 0),
|
||||
fail_count=layer_raw.get("fail_count", 0),
|
||||
skip_count=layer_raw.get("skip_count", 0),
|
||||
total_fetched=layer_raw.get("total_fetched", 0),
|
||||
total_inserted=layer_raw.get("total_inserted", 0),
|
||||
total_updated=layer_raw.get("total_updated", 0),
|
||||
total_errors=layer_raw.get("total_errors", 0),
|
||||
tasks=tasks,
|
||||
))
|
||||
|
||||
verification = None
|
||||
if "verification" in raw:
|
||||
v = raw["verification"]
|
||||
verification = VerificationSummary(
|
||||
status=v.get("status", "UNKNOWN"),
|
||||
duration_sec=v.get("duration_sec", 0),
|
||||
total_tables=v.get("total_tables", 0),
|
||||
consistent_tables=v.get("consistent_tables", 0),
|
||||
total_backfilled=v.get("total_backfilled", 0),
|
||||
error_tables=v.get("error_tables", 0),
|
||||
layers=v.get("layers", {}),
|
||||
)
|
||||
|
||||
return PerformanceData(
|
||||
flow=raw.get("flow", ""),
|
||||
window_start=raw.get("window_start", ""),
|
||||
window_end=raw.get("window_end", ""),
|
||||
overall_duration_sec=raw.get("overall_duration_sec", 0),
|
||||
overall_status=raw.get("overall_status", "UNKNOWN"),
|
||||
layers=layers,
|
||||
verification=verification,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 分析函数
|
||||
# ---------------------------------------------------------------------------
|
||||
def analyze_layer_stats(data: PerformanceData) -> list[dict[str, Any]]:
|
||||
"""统计各层耗时、任务数、吞吐量。"""
|
||||
stats = []
|
||||
for layer in data.layers:
|
||||
executed = [t for t in layer.tasks if t.status != "SKIP"]
|
||||
durations = [t.duration_sec for t in executed] if executed else [0]
|
||||
avg_dur = sum(durations) / len(durations) if durations else 0
|
||||
total_fetched = sum(t.fetched for t in executed)
|
||||
throughput = total_fetched / layer.duration_sec if layer.duration_sec > 0 else 0
|
||||
|
||||
stats.append({
|
||||
"layer": layer.layer,
|
||||
"duration_sec": layer.duration_sec,
|
||||
"pct_of_total": (layer.duration_sec / data.overall_duration_sec * 100
|
||||
if data.overall_duration_sec > 0 else 0),
|
||||
"task_count": layer.task_count,
|
||||
"executed_count": len(executed),
|
||||
"success_count": layer.success_count,
|
||||
"fail_count": layer.fail_count,
|
||||
"skip_count": layer.skip_count,
|
||||
"avg_task_sec": round(avg_dur, 2),
|
||||
"max_task_sec": round(max(durations), 2),
|
||||
"min_task_sec": round(min(durations), 2),
|
||||
"total_fetched": total_fetched,
|
||||
"total_inserted": layer.total_inserted,
|
||||
"total_updated": layer.total_updated,
|
||||
"throughput_per_sec": round(throughput, 1),
|
||||
"status": layer.status,
|
||||
})
|
||||
return stats
|
||||
|
||||
|
||||
def find_bottleneck_tasks(data: PerformanceData, top_n: int = 5) -> list[TaskTiming]:
|
||||
"""识别耗时最长的前 N 个任务。"""
|
||||
all_tasks: list[TaskTiming] = []
|
||||
for layer in data.layers:
|
||||
all_tasks.extend(layer.tasks)
|
||||
# 按耗时降序排列,排除 SKIP 状态
|
||||
active = [t for t in all_tasks if t.status != "SKIP"]
|
||||
active.sort(key=lambda t: t.duration_sec, reverse=True)
|
||||
return active[:top_n]
|
||||
|
||||
|
||||
def analyze_api_calls(data: PerformanceData) -> list[dict[str, Any]]:
|
||||
"""分析 API 调用的响应时间和分页效率。"""
|
||||
results = []
|
||||
for layer in data.layers:
|
||||
for task in layer.tasks:
|
||||
if task.status == "SKIP":
|
||||
continue
|
||||
fetched = task.fetched
|
||||
# 根据默认 API_PAGE_SIZE=200 估算分页次数
|
||||
page_size = 200
|
||||
estimated_pages = max(1, (fetched + page_size - 1) // page_size) if fetched > 0 else 0
|
||||
|
||||
# 计算 DB 处理时间(总耗时 - API 耗时)
|
||||
db_time = max(0, task.duration_sec - task.api_total_sec)
|
||||
|
||||
# 每条记录的处理耗时
|
||||
per_record_ms = (task.duration_sec / fetched * 1000) if fetched > 0 else 0
|
||||
|
||||
results.append({
|
||||
"task_code": task.task_code,
|
||||
"layer": task.layer,
|
||||
"fetched": fetched,
|
||||
"api_calls": task.api_calls,
|
||||
"api_total_sec": task.api_total_sec,
|
||||
"estimated_pages": estimated_pages,
|
||||
"avg_page_time_ms": (task.api_total_sec / estimated_pages * 1000
|
||||
if estimated_pages > 0 and task.api_total_sec > 0 else 0),
|
||||
"records_per_page": (fetched / estimated_pages
|
||||
if estimated_pages > 0 else 0),
|
||||
"db_time_sec": round(db_time, 2),
|
||||
"per_record_ms": round(per_record_ms, 2),
|
||||
"total_sec": task.duration_sec,
|
||||
"status": task.status,
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def analyze_sql_queries(
|
||||
dsn: str,
|
||||
logger: logging.Logger,
|
||||
) -> list[SQLAnalysisResult]:
|
||||
"""连接数据库执行 EXPLAIN ANALYZE 分析关键查询。"""
|
||||
try:
|
||||
import psycopg2 # noqa: F811
|
||||
except ImportError:
|
||||
logger.warning("psycopg2 未安装,跳过 SQL 分析")
|
||||
return []
|
||||
|
||||
# 关键查询列表:ODS INSERT、DWD MERGE、DWS 汇总
|
||||
queries = [
|
||||
{
|
||||
"name": "ODS 批量 INSERT(payment_transactions)",
|
||||
"table": "ods.payment_transactions",
|
||||
"sql": """
|
||||
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
|
||||
SELECT * FROM ods.payment_transactions
|
||||
WHERE fetched_at >= NOW() - INTERVAL '7 days'
|
||||
LIMIT 100
|
||||
""",
|
||||
},
|
||||
{
|
||||
"name": "ODS 批量 INSERT(platform_coupon_redemption_records)",
|
||||
"table": "ods.platform_coupon_redemption_records",
|
||||
"sql": """
|
||||
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
|
||||
SELECT * FROM ods.platform_coupon_redemption_records
|
||||
WHERE fetched_at >= NOW() - INTERVAL '7 days'
|
||||
LIMIT 100
|
||||
""",
|
||||
},
|
||||
{
|
||||
"name": "ODS content_hash 去重查询",
|
||||
"table": "ods.member_balance_changes",
|
||||
"sql": """
|
||||
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
|
||||
SELECT id, content_hash FROM ods.member_balance_changes
|
||||
WHERE fetched_at >= NOW() - INTERVAL '7 days'
|
||||
""",
|
||||
},
|
||||
{
|
||||
"name": "DWD SCD2 合并(dim_table)",
|
||||
"table": "dwd.dim_table",
|
||||
"sql": """
|
||||
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
|
||||
SELECT * FROM dwd.dim_table
|
||||
WHERE scd2_is_current = 1
|
||||
""",
|
||||
},
|
||||
{
|
||||
"name": "DWS 订单汇总查询",
|
||||
"table": "dws.dws_order_summary",
|
||||
"sql": """
|
||||
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
|
||||
SELECT * FROM dws.dws_order_summary
|
||||
WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
LIMIT 100
|
||||
""",
|
||||
},
|
||||
]
|
||||
|
||||
results: list[SQLAnalysisResult] = []
|
||||
conn = None
|
||||
try:
|
||||
conn = psycopg2.connect(dsn, connect_timeout=10)
|
||||
conn.autocommit = True
|
||||
cur = conn.cursor()
|
||||
|
||||
for q in queries:
|
||||
try:
|
||||
cur.execute(q["sql"])
|
||||
rows = cur.fetchall()
|
||||
plan_text = "\n".join(r[0] for r in rows)
|
||||
|
||||
# 解析执行计划
|
||||
result = _parse_explain_plan(q["name"], q["table"], plan_text)
|
||||
results.append(result)
|
||||
logger.info(" ✓ %s: %.1fms", q["name"], result.actual_time_ms)
|
||||
except Exception as e:
|
||||
logger.warning(" ✗ %s: %s", q["name"], e)
|
||||
results.append(SQLAnalysisResult(
|
||||
query_name=q["name"],
|
||||
table_name=q["table"],
|
||||
plan_summary=f"执行失败: {e}",
|
||||
total_cost=0,
|
||||
actual_time_ms=0,
|
||||
rows_processed=0,
|
||||
seq_scans=[],
|
||||
missing_indexes=[],
|
||||
recommendations=[f"查询执行失败,需检查表是否存在: {e}"],
|
||||
))
|
||||
except Exception as e:
|
||||
logger.error("数据库连接失败: %s", e)
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _parse_explain_plan(
|
||||
query_name: str,
|
||||
table_name: str,
|
||||
plan_text: str,
|
||||
) -> SQLAnalysisResult:
|
||||
"""解析 EXPLAIN ANALYZE 输出,提取关键指标。"""
|
||||
import re
|
||||
|
||||
seq_scans: list[str] = []
|
||||
missing_indexes: list[str] = []
|
||||
recommendations: list[str] = []
|
||||
total_cost = 0.0
|
||||
actual_time_ms = 0.0
|
||||
rows_processed = 0
|
||||
|
||||
for line in plan_text.split("\n"):
|
||||
# 提取总耗时
|
||||
m = re.search(r"actual time=([\d.]+)\.\.([\d.]+)", line)
|
||||
if m:
|
||||
actual_time_ms = max(actual_time_ms, float(m.group(2)))
|
||||
|
||||
# 提取 cost
|
||||
m = re.search(r"cost=([\d.]+)\.\.([\d.]+)", line)
|
||||
if m:
|
||||
total_cost = max(total_cost, float(m.group(2)))
|
||||
|
||||
# 提取行数
|
||||
m = re.search(r"rows=(\d+)", line)
|
||||
if m:
|
||||
rows_processed = max(rows_processed, int(m.group(1)))
|
||||
|
||||
# 检测 Seq Scan
|
||||
if "Seq Scan" in line:
|
||||
m_tbl = re.search(r"Seq Scan on (\S+)", line)
|
||||
tbl = m_tbl.group(1) if m_tbl else "unknown"
|
||||
seq_scans.append(tbl)
|
||||
|
||||
# 基于分析结果生成建议
|
||||
if seq_scans:
|
||||
for tbl in seq_scans:
|
||||
missing_indexes.append(tbl)
|
||||
recommendations.append(f"表 {tbl} 存在全表扫描,建议添加索引")
|
||||
|
||||
if actual_time_ms > 100:
|
||||
recommendations.append(f"查询耗时 {actual_time_ms:.1f}ms,考虑优化查询或添加索引")
|
||||
|
||||
# 截取前 10 行作为摘要
|
||||
summary_lines = plan_text.strip().split("\n")[:10]
|
||||
plan_summary = "\n".join(summary_lines)
|
||||
|
||||
return SQLAnalysisResult(
|
||||
query_name=query_name,
|
||||
table_name=table_name,
|
||||
plan_summary=plan_summary,
|
||||
total_cost=total_cost,
|
||||
actual_time_ms=actual_time_ms,
|
||||
rows_processed=rows_processed,
|
||||
seq_scans=seq_scans,
|
||||
missing_indexes=missing_indexes,
|
||||
recommendations=recommendations,
|
||||
)
|
||||
|
||||
|
||||
|
||||
def generate_recommendations(
|
||||
report: PerformanceReport,
|
||||
logger: logging.Logger,
|
||||
) -> list[str]:
|
||||
"""基于分析结果生成优化建议。"""
|
||||
recs: list[str] = []
|
||||
|
||||
# 1. 基于瓶颈任务的建议
|
||||
for task in report.bottleneck_tasks:
|
||||
if task.duration_sec > 100:
|
||||
skip_ratio = task.skipped / task.fetched * 100 if task.fetched > 0 else 0
|
||||
if skip_ratio > 90:
|
||||
recs.append(
|
||||
f"**{task.task_code}**(耗时 {task.duration_sec:.1f}s):"
|
||||
f"跳过率 {skip_ratio:.0f}%,建议优化 content_hash 去重逻辑,"
|
||||
f"在数据库端用索引加速 hash 比对,或在 API 端增加增量过滤参数减少无效拉取"
|
||||
)
|
||||
elif task.fetched > 10000:
|
||||
recs.append(
|
||||
f"**{task.task_code}**(耗时 {task.duration_sec:.1f}s):"
|
||||
f"拉取 {task.fetched:,} 条记录,建议增大 API_PAGE_SIZE 或启用并行分页"
|
||||
)
|
||||
else:
|
||||
recs.append(
|
||||
f"**{task.task_code}**(耗时 {task.duration_sec:.1f}s):"
|
||||
f"建议分析具体耗时分布(API vs DB),针对性优化"
|
||||
)
|
||||
|
||||
# 2. 基于层级统计的建议
|
||||
for stat in report.layer_stats:
|
||||
if stat["pct_of_total"] > 80:
|
||||
recs.append(
|
||||
f"**{stat['layer']} 层**占总耗时 {stat['pct_of_total']:.1f}%,"
|
||||
f"是主要瓶颈层,建议优先优化该层任务"
|
||||
)
|
||||
if stat["skip_count"] > stat["task_count"] * 0.5:
|
||||
recs.append(
|
||||
f"**{stat['layer']} 层**有 {stat['skip_count']}/{stat['task_count']} "
|
||||
f"个任务被跳过,建议检查跳过条件是否合理"
|
||||
)
|
||||
|
||||
# 3. 基于 API 分析的建议
|
||||
high_per_record = [a for a in report.api_analysis
|
||||
if a["per_record_ms"] > 5 and a["fetched"] > 1000]
|
||||
if high_per_record:
|
||||
recs.append(
|
||||
"以下任务每条记录处理耗时较高(>5ms),建议优化批量写入逻辑:" +
|
||||
"、".join(f"{a['task_code']}({a['per_record_ms']:.1f}ms/条)"
|
||||
for a in high_per_record[:5])
|
||||
)
|
||||
|
||||
# 4. 基于 SQL 分析的建议
|
||||
for sql_r in report.sql_analysis:
|
||||
recs.extend(sql_r.recommendations)
|
||||
|
||||
# 5. 通用建议
|
||||
if report.data.overall_duration_sec > 600:
|
||||
recs.append(
|
||||
f"全量刷新总耗时 {report.data.overall_duration_sec:.0f}s({report.data.overall_duration_sec/60:.1f}分钟),"
|
||||
"建议考虑以下通用优化策略:"
|
||||
)
|
||||
recs.append(" - ODS 层任务间无依赖,可并行执行以大幅缩短总耗时")
|
||||
recs.append(" - 对高跳过率任务,在 API 请求中增加时间过滤参数减少无效数据传输")
|
||||
recs.append(" - 对大表 INSERT,使用 COPY 协议替代逐行 INSERT 提升写入性能")
|
||||
recs.append(" - 考虑在 content_hash 列上建立索引加速去重判断")
|
||||
|
||||
return recs
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 报告生成
|
||||
# ---------------------------------------------------------------------------
|
||||
def generate_report(report: PerformanceReport) -> str:
|
||||
"""生成 Markdown 格式的性能分析报告。"""
|
||||
lines: list[str] = []
|
||||
_w = lines.append
|
||||
|
||||
_w("# ETL 性能分析报告")
|
||||
_w("")
|
||||
_w(f"> 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
_w(f"> 数据来源: 全量刷新计时数据({report.data.flow})")
|
||||
_w(f"> 时间窗口: {report.data.window_start} ~ {report.data.window_end}")
|
||||
_w(f"> 总耗时: {report.data.overall_duration_sec:.1f}s "
|
||||
f"({report.data.overall_duration_sec/60:.1f}分钟)")
|
||||
_w(f"> 状态: {report.data.overall_status}")
|
||||
_w("")
|
||||
|
||||
# ── 目录 ──
|
||||
_w("## 目录")
|
||||
_w("")
|
||||
_w("1. [执行概览](#1-执行概览)")
|
||||
_w("2. [层级耗时统计](#2-层级耗时统计)")
|
||||
_w("3. [性能瓶颈 Top 5](#3-性能瓶颈-top-5)")
|
||||
_w("4. [任务耗时明细](#4-任务耗时明细)")
|
||||
_w("5. [API 调用分析](#5-api-调用分析)")
|
||||
_w("6. [SQL 查询分析](#6-sql-查询分析)")
|
||||
_w("7. [校验阶段分析](#7-校验阶段分析)")
|
||||
_w("8. [优化建议](#8-优化建议)")
|
||||
_w("")
|
||||
|
||||
# ── 1. 执行概览 ──
|
||||
_w("## 1. 执行概览")
|
||||
_w("")
|
||||
_w("| 指标 | 值 |")
|
||||
_w("|------|-----|")
|
||||
_w(f"| Flow | `{report.data.flow}` |")
|
||||
_w(f"| 时间窗口 | {report.data.window_start} ~ {report.data.window_end} |")
|
||||
_w(f"| 总耗时 | {report.data.overall_duration_sec:.1f}s "
|
||||
f"({report.data.overall_duration_sec/60:.1f}分钟) |")
|
||||
_w(f"| 状态 | {report.data.overall_status} |")
|
||||
total_tasks = sum(s["task_count"] for s in report.layer_stats)
|
||||
total_success = sum(s["success_count"] for s in report.layer_stats)
|
||||
total_fail = sum(s["fail_count"] for s in report.layer_stats)
|
||||
total_skip = sum(s["skip_count"] for s in report.layer_stats)
|
||||
_w(f"| 总任务数 | {total_tasks} |")
|
||||
_w(f"| 成功/失败/跳过 | {total_success}/{total_fail}/{total_skip} |")
|
||||
total_fetched = sum(s["total_fetched"] for s in report.layer_stats)
|
||||
total_inserted = sum(s["total_inserted"] for s in report.layer_stats)
|
||||
_w(f"| 总拉取记录 | {total_fetched:,} |")
|
||||
_w(f"| 总写入记录 | {total_inserted:,} |")
|
||||
if report.data.verification:
|
||||
v = report.data.verification
|
||||
_w(f"| 校验耗时 | {v.duration_sec:.1f}s |")
|
||||
_w(f"| 校验表数 | {v.total_tables}(一致 {v.consistent_tables},"
|
||||
f"补齐 {v.total_backfilled},错误 {v.error_tables})|")
|
||||
_w("")
|
||||
|
||||
# ── 2. 层级耗时统计 ──
|
||||
_w("## 2. 层级耗时统计")
|
||||
_w("")
|
||||
_w("| 层 | 耗时(s) | 占比 | 任务数 | 执行数 | 成功 | 失败 | 跳过 | "
|
||||
"平均(s) | 最大(s) | 拉取 | 写入 | 吞吐(条/s) |")
|
||||
_w("|-----|---------|------|--------|--------|------|------|------|"
|
||||
"---------|---------|------|------|------------|")
|
||||
for s in report.layer_stats:
|
||||
_w(f"| {s['layer']} | {s['duration_sec']:.1f} | {s['pct_of_total']:.1f}% | "
|
||||
f"{s['task_count']} | {s['executed_count']} | {s['success_count']} | "
|
||||
f"{s['fail_count']} | {s['skip_count']} | {s['avg_task_sec']} | "
|
||||
f"{s['max_task_sec']} | {s['total_fetched']:,} | {s['total_inserted']:,} | "
|
||||
f"{s['throughput_per_sec']} |")
|
||||
_w("")
|
||||
|
||||
# 耗时分布可视化(文本柱状图)
|
||||
_w("### 耗时分布")
|
||||
_w("")
|
||||
_w("```")
|
||||
max_dur = max(s["duration_sec"] for s in report.layer_stats) if report.layer_stats else 1
|
||||
for s in report.layer_stats:
|
||||
bar_len = int(s["duration_sec"] / max_dur * 40) if max_dur > 0 else 0
|
||||
bar = "█" * bar_len
|
||||
_w(f" {s['layer']:>5} │{bar} {s['duration_sec']:.1f}s ({s['pct_of_total']:.1f}%)")
|
||||
_w("```")
|
||||
_w("")
|
||||
|
||||
# ── 3. 性能瓶颈 Top 5 ──
|
||||
_w("## 3. 性能瓶颈 Top 5")
|
||||
_w("")
|
||||
_w("| 排名 | 任务 | 层 | 耗时(s) | 状态 | 拉取 | 写入 | 更新 | 跳过 | "
|
||||
"吞吐(条/s) | 每条耗时(ms) |")
|
||||
_w("|------|------|-----|---------|------|------|------|------|------|"
|
||||
"------------|-------------|")
|
||||
for i, t in enumerate(report.bottleneck_tasks, 1):
|
||||
per_rec = (t.duration_sec / t.fetched * 1000) if t.fetched > 0 else 0
|
||||
_w(f"| {i} | `{t.task_code}` | {t.layer} | {t.duration_sec:.1f} | "
|
||||
f"{t.status} | {t.fetched:,} | {t.inserted:,} | {t.updated:,} | "
|
||||
f"{t.skipped:,} | {t.throughput:.1f} | {per_rec:.2f} |")
|
||||
_w("")
|
||||
|
||||
# 瓶颈分析
|
||||
_w("### 瓶颈分析")
|
||||
_w("")
|
||||
for i, t in enumerate(report.bottleneck_tasks, 1):
|
||||
_w(f"**{i}. {t.task_code}**({t.duration_sec:.1f}s)")
|
||||
if t.fetched > 0:
|
||||
skip_ratio = t.skipped / t.fetched * 100
|
||||
_w(f"- 拉取 {t.fetched:,} 条,跳过 {t.skipped:,} 条(跳过率 {skip_ratio:.0f}%)")
|
||||
_w(f"- 实际写入 {t.inserted:,} 条,写入率 {t.inserted/t.fetched*100:.1f}%")
|
||||
_w(f"- 每条记录处理耗时 {t.duration_sec/t.fetched*1000:.2f}ms")
|
||||
if skip_ratio > 90:
|
||||
_w(f"- ⚠️ 跳过率极高,大量时间花在 content_hash 比对上")
|
||||
if t.error:
|
||||
_w(f"- ❌ 错误: {t.error}")
|
||||
_w("")
|
||||
|
||||
# ── 4. 任务耗时明细 ──
|
||||
_w("## 4. 任务耗时明细")
|
||||
_w("")
|
||||
for layer in report.data.layers:
|
||||
_w(f"### {layer.layer} 层")
|
||||
_w("")
|
||||
_w("| 任务 | 耗时(s) | 状态 | 拉取 | 写入 | 跳过 | 错误 |")
|
||||
_w("|------|---------|------|------|------|------|------|")
|
||||
sorted_tasks = sorted(layer.tasks, key=lambda t: t.duration_sec, reverse=True)
|
||||
for t in sorted_tasks:
|
||||
_w(f"| `{t.task_code}` | {t.duration_sec:.1f} | {t.status} | "
|
||||
f"{t.fetched:,} | {t.inserted:,} | {t.skipped:,} | "
|
||||
f"{t.counts.get('errors', 0)} |")
|
||||
_w("")
|
||||
|
||||
# ── 5. API 调用分析 ──
|
||||
_w("## 5. API 调用分析")
|
||||
_w("")
|
||||
# 只展示有实际数据拉取的任务
|
||||
api_with_data = [a for a in report.api_analysis if a["fetched"] > 0]
|
||||
if api_with_data:
|
||||
api_with_data.sort(key=lambda a: a["total_sec"], reverse=True)
|
||||
_w("| 任务 | 拉取 | 估算页数 | 总耗时(s) | DB耗时(s) | 每条(ms) |")
|
||||
_w("|------|------|----------|-----------|-----------|----------|")
|
||||
for a in api_with_data:
|
||||
_w(f"| `{a['task_code']}` | {a['fetched']:,} | {a['estimated_pages']} | "
|
||||
f"{a['total_sec']:.1f} | {a['db_time_sec']} | {a['per_record_ms']} |")
|
||||
_w("")
|
||||
|
||||
# 分页效率分析
|
||||
_w("### 分页效率分析")
|
||||
_w("")
|
||||
total_records = sum(a["fetched"] for a in api_with_data)
|
||||
total_pages = sum(a["estimated_pages"] for a in api_with_data)
|
||||
avg_per_page = total_records / total_pages if total_pages > 0 else 0
|
||||
_w(f"- 总拉取记录: {total_records:,}")
|
||||
_w(f"- 估算总页数: {total_pages:,}")
|
||||
_w(f"- 平均每页记录数: {avg_per_page:.1f}")
|
||||
_w(f"- 当前 API_PAGE_SIZE: 200")
|
||||
_w("")
|
||||
if avg_per_page < 150:
|
||||
_w("> ⚠️ 实际每页记录数低于 PAGE_SIZE,部分端点可能返回不满页的数据")
|
||||
_w("")
|
||||
else:
|
||||
_w("本次运行中 API 调用计时数据为 0(可能未单独采集 API 耗时)。")
|
||||
_w("")
|
||||
_w("> 注意:当前计时数据中 `api_calls` 和 `api_total_sec` 均为 0,"
|
||||
"说明全量刷新脚本未单独采集 API 调用耗时。")
|
||||
_w("> 建议在后续版本中为 API 调用添加独立计时,以便区分 API 等待时间和 DB 写入时间。")
|
||||
_w("")
|
||||
|
||||
# ── 6. SQL 查询分析 ──
|
||||
_w("## 6. SQL 查询分析")
|
||||
_w("")
|
||||
if report.sql_analysis:
|
||||
for sql_r in report.sql_analysis:
|
||||
_w(f"### {sql_r.query_name}")
|
||||
_w("")
|
||||
_w(f"- 表: `{sql_r.table_name}`")
|
||||
_w(f"- 实际耗时: {sql_r.actual_time_ms:.1f}ms")
|
||||
_w(f"- 预估成本: {sql_r.total_cost:.1f}")
|
||||
_w(f"- 处理行数: {sql_r.rows_processed:,}")
|
||||
if sql_r.seq_scans:
|
||||
_w(f"- ⚠️ 全表扫描: {', '.join(sql_r.seq_scans)}")
|
||||
if sql_r.missing_indexes:
|
||||
_w(f"- 🔍 建议添加索引: {', '.join(sql_r.missing_indexes)}")
|
||||
_w("")
|
||||
_w("```")
|
||||
_w(sql_r.plan_summary)
|
||||
_w("```")
|
||||
_w("")
|
||||
else:
|
||||
_w("未执行 SQL 分析(使用 `--skip-sql` 跳过或数据库连接失败)。")
|
||||
_w("")
|
||||
|
||||
# ── 7. 校验阶段分析 ──
|
||||
_w("## 7. 校验阶段分析")
|
||||
_w("")
|
||||
if report.data.verification:
|
||||
v = report.data.verification
|
||||
_w(f"- 状态: {v.status}")
|
||||
_w(f"- 耗时: {v.duration_sec:.1f}s")
|
||||
_w(f"- 校验表数: {v.total_tables}")
|
||||
_w(f"- 一致表数: {v.consistent_tables}")
|
||||
_w(f"- 补齐记录: {v.total_backfilled}")
|
||||
_w(f"- 错误表数: {v.error_tables}")
|
||||
_w("")
|
||||
|
||||
for layer_name, layer_v in v.layers.items():
|
||||
_w(f"### {layer_name} 层校验")
|
||||
_w("")
|
||||
_w(f"- 状态: {layer_v.get('status', 'N/A')}")
|
||||
_w(f"- 表数: {layer_v.get('total_tables', 0)}")
|
||||
_w(f"- 一致: {layer_v.get('consistent_tables', 0)}")
|
||||
_w(f"- 不一致: {layer_v.get('inconsistent_tables', 0)}")
|
||||
_w(f"- 源记录: {layer_v.get('total_source_count', 0):,}")
|
||||
_w(f"- 目标记录: {layer_v.get('total_target_count', 0):,}")
|
||||
_w(f"- 补齐: {layer_v.get('total_backfilled', 0)}")
|
||||
_w(f"- 耗时: {layer_v.get('elapsed_seconds', 0):.1f}s")
|
||||
_w("")
|
||||
else:
|
||||
_w("无校验数据。")
|
||||
_w("")
|
||||
|
||||
# ── 8. 优化建议 ──
|
||||
_w("## 8. 优化建议")
|
||||
_w("")
|
||||
if report.recommendations:
|
||||
for i, rec in enumerate(report.recommendations, 1):
|
||||
_w(f"{i}. {rec}")
|
||||
_w("")
|
||||
else:
|
||||
_w("暂无优化建议。")
|
||||
_w("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 主流程
|
||||
# ---------------------------------------------------------------------------
|
||||
def find_latest_timing_file(output_dir: Path) -> Path | None:
|
||||
"""查找最新的计时 JSON 文件(排除 intermediate/checkpoint 中间文件)。"""
|
||||
exclude = {"intermediate", "checkpoint"}
|
||||
json_files = sorted(
|
||||
[f for f in output_dir.glob("full_refresh_*.json")
|
||||
if not any(kw in f.name for kw in exclude)],
|
||||
reverse=True,
|
||||
)
|
||||
return json_files[0] if json_files else None
|
||||
|
||||
|
||||
def run_analysis(
|
||||
json_path: Path,
|
||||
logger: logging.Logger,
|
||||
skip_sql: bool = False,
|
||||
) -> PerformanceReport:
|
||||
"""执行完整的性能分析。"""
|
||||
# 1. 加载数据
|
||||
data = load_timing_data(json_path, logger)
|
||||
logger.info("数据加载完成: %s, 总耗时 %.1fs, %d 层",
|
||||
data.flow, data.overall_duration_sec, len(data.layers))
|
||||
|
||||
# 2. 层级统计
|
||||
logger.info("分析层级耗时...")
|
||||
layer_stats = analyze_layer_stats(data)
|
||||
for s in layer_stats:
|
||||
logger.info(" %s: %.1fs (%.1f%%), %d 任务",
|
||||
s["layer"], s["duration_sec"], s["pct_of_total"], s["task_count"])
|
||||
|
||||
# 3. 瓶颈识别
|
||||
logger.info("识别性能瓶颈...")
|
||||
bottlenecks = find_bottleneck_tasks(data, top_n=5)
|
||||
for i, t in enumerate(bottlenecks, 1):
|
||||
logger.info(" Top %d: %s (%.1fs, %s)", i, t.task_code, t.duration_sec, t.layer)
|
||||
|
||||
# 4. API 分析
|
||||
logger.info("分析 API 调用...")
|
||||
api_analysis = analyze_api_calls(data)
|
||||
|
||||
# 5. SQL 分析
|
||||
sql_analysis: list[SQLAnalysisResult] = []
|
||||
if not skip_sql:
|
||||
logger.info("分析 SQL 查询执行计划...")
|
||||
dsn = _load_dsn()
|
||||
if dsn:
|
||||
sql_analysis = analyze_sql_queries(dsn, logger)
|
||||
logger.info("SQL 分析完成: %d 个查询", len(sql_analysis))
|
||||
else:
|
||||
logger.warning("未找到数据库 DSN,跳过 SQL 分析")
|
||||
else:
|
||||
logger.info("跳过 SQL 分析(--skip-sql)")
|
||||
|
||||
# 6. 构建报告
|
||||
report = PerformanceReport(
|
||||
data=data,
|
||||
bottleneck_tasks=bottlenecks,
|
||||
layer_stats=layer_stats,
|
||||
api_analysis=api_analysis,
|
||||
sql_analysis=sql_analysis,
|
||||
)
|
||||
|
||||
# 7. 生成建议
|
||||
logger.info("生成优化建议...")
|
||||
report.recommendations = generate_recommendations(report, logger)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def _load_dsn() -> str | None:
|
||||
"""从 .env 加载数据库 DSN。"""
|
||||
env_path = _FEIQIU_ROOT / ".env"
|
||||
if not env_path.exists():
|
||||
return None
|
||||
try:
|
||||
from dotenv import dotenv_values
|
||||
values = dotenv_values(env_path)
|
||||
return values.get("PG_DSN")
|
||||
except ImportError:
|
||||
# 手动解析
|
||||
for line in env_path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("PG_DSN="):
|
||||
return line.split("=", 1)[1].strip()
|
||||
return None
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="ETL 性能分析")
|
||||
parser.add_argument(
|
||||
"--input", "-i",
|
||||
help="计时 JSON 文件路径(默认自动查找最新文件)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", "-o",
|
||||
help="报告输出路径(默认 docs/reports/performance_report_YYYYMMDD.md)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-sql",
|
||||
action="store_true",
|
||||
help="跳过 SQL EXPLAIN ANALYZE 分析",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
logger = _setup_logging()
|
||||
args = parse_args()
|
||||
|
||||
# 确定输入文件
|
||||
if args.input:
|
||||
json_path = Path(args.input)
|
||||
else:
|
||||
json_path = find_latest_timing_file(_OUTPUT_DIR)
|
||||
if not json_path:
|
||||
logger.error("未找到计时 JSON 文件,请指定 --input 参数")
|
||||
sys.exit(1)
|
||||
|
||||
if not json_path.exists():
|
||||
logger.error("文件不存在: %s", json_path)
|
||||
sys.exit(1)
|
||||
|
||||
logger.info("═══ ETL 性能分析 ═══")
|
||||
|
||||
# 执行分析
|
||||
report = run_analysis(json_path, logger, skip_sql=args.skip_sql)
|
||||
|
||||
# 生成报告
|
||||
md_content = generate_report(report)
|
||||
|
||||
# 确定输出路径
|
||||
_REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
if args.output:
|
||||
output_path = Path(args.output)
|
||||
else:
|
||||
date_str = datetime.now().strftime("%Y%m%d")
|
||||
output_path = _REPORTS_DIR / f"performance_report_{date_str}.md"
|
||||
|
||||
output_path.write_text(md_content, encoding="utf-8")
|
||||
logger.info("报告已保存: %s", output_path)
|
||||
|
||||
# 打印摘要
|
||||
logger.info("")
|
||||
logger.info("═══ 分析摘要 ═══")
|
||||
logger.info(" 总耗时: %.1fs (%.1f分钟)",
|
||||
report.data.overall_duration_sec,
|
||||
report.data.overall_duration_sec / 60)
|
||||
logger.info(" 层数: %d", len(report.layer_stats))
|
||||
logger.info(" 瓶颈任务: %s",
|
||||
", ".join(f"{t.task_code}({t.duration_sec:.0f}s)"
|
||||
for t in report.bottleneck_tasks))
|
||||
logger.info(" SQL 分析: %d 个查询", len(report.sql_analysis))
|
||||
logger.info(" 优化建议: %d 条", len(report.recommendations))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1198
apps/etl/connectors/feiqiu/scripts/debug/debug_blackbox.py
Normal file
1198
apps/etl/connectors/feiqiu/scripts/debug/debug_blackbox.py
Normal file
File diff suppressed because it is too large
Load Diff
790
apps/etl/connectors/feiqiu/scripts/debug/debug_dwd.py
Normal file
790
apps/etl/connectors/feiqiu/scripts/debug/debug_dwd.py
Normal file
@@ -0,0 +1,790 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""DWD 层调试脚本。
|
||||
|
||||
执行 DWD_LOAD_FROM_ODS 任务,验证 TABLE_MAP 中每对 DWD→ODS 映射的处理结果,
|
||||
检查维度表 SCD2 版本链完整性、事实表时间窗口增量写入正确性、FACT_MAPPINGS 列映射。
|
||||
|
||||
用法:
|
||||
cd apps/etl/connectors/feiqiu
|
||||
python -m scripts.debug.debug_dwd [--hours 2] [--tables dwd.dim_member,dwd.dwd_payment]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# ── 确保项目根目录在 sys.path ──
|
||||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_FEIQIU_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
from api.client import APIClient
|
||||
from orchestration.task_registry import default_registry
|
||||
from orchestration.cursor_manager import CursorManager
|
||||
from orchestration.run_tracker import RunTracker
|
||||
from orchestration.task_executor import TaskExecutor
|
||||
from tasks.dwd.dwd_load_task import DwdLoadTask
|
||||
|
||||
# 时间列候选列表(原 DwdLoadTask.FACT_ORDER_CANDIDATES,重构后内联)
|
||||
_TIME_COLUMN_CANDIDATES = [
|
||||
"pay_time", "create_time", "update_time",
|
||||
"occur_time", "settle_time", "start_use_time", "fetched_at",
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class DebugResult:
|
||||
"""单个 DWD 表的调试结果"""
|
||||
layer: str = "DWD"
|
||||
task_code: str = "DWD_LOAD_FROM_ODS"
|
||||
table_name: str = ""
|
||||
ods_source: str = ""
|
||||
mode: str = "" # SCD2 / INCREMENT / TYPE1_UPSERT
|
||||
status: str = "" # PASS / FAIL / WARN / ERROR
|
||||
message: str = ""
|
||||
counts: dict = field(default_factory=dict)
|
||||
dwd_row_count: int | None = None
|
||||
ods_row_count: int | None = None
|
||||
scd2_check: dict | None = None
|
||||
fact_window_check: dict | None = None
|
||||
mapping_check: dict | None = None
|
||||
duration_sec: float = 0.0
|
||||
error_detail: str | None = None
|
||||
fix_applied: str | None = None
|
||||
|
||||
|
||||
# ── 工具函数 ──────────────────────────────────────────────────
|
||||
|
||||
def _setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("debug_dwd")
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||||
))
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
def _build_components(config: AppConfig, logger: logging.Logger):
|
||||
"""构建 DB / API / TaskExecutor 等组件。"""
|
||||
db_conn = DatabaseConnection(
|
||||
dsn=config["db"]["dsn"],
|
||||
session=config["db"].get("session"),
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
api_client = APIClient(
|
||||
base_url=config["api"]["base_url"],
|
||||
token=config["api"]["token"],
|
||||
timeout=config["api"].get("timeout_sec", 20),
|
||||
retry_max=config["api"].get("retries", {}).get("max_attempts", 3),
|
||||
headers_extra=config["api"].get("headers_extra"),
|
||||
)
|
||||
db_ops = DatabaseOperations(db_conn)
|
||||
cursor_mgr = CursorManager(db_conn)
|
||||
run_tracker = RunTracker(db_conn)
|
||||
|
||||
executor = TaskExecutor(
|
||||
config, db_ops, api_client,
|
||||
cursor_mgr, run_tracker, default_registry, logger,
|
||||
)
|
||||
return db_conn, api_client, db_ops, executor
|
||||
|
||||
|
||||
def _query_count(db_conn: DatabaseConnection, table: str) -> int:
|
||||
"""查询表的总行数。"""
|
||||
rows = db_conn.query(f"SELECT COUNT(*) AS cnt FROM {table}")
|
||||
return int(rows[0]["cnt"]) if rows else 0
|
||||
|
||||
|
||||
def _query_count_windowed(db_conn: DatabaseConnection, table: str,
|
||||
col: str, start: datetime, end: datetime) -> int:
|
||||
"""查询表在指定时间窗口内的行数。"""
|
||||
sql = f'SELECT COUNT(*) AS cnt FROM {table} WHERE "{col}" >= %s AND "{col}" < %s'
|
||||
rows = db_conn.query(sql, (start, end))
|
||||
return int(rows[0]["cnt"]) if rows else 0
|
||||
|
||||
|
||||
def _has_column(db_conn: DatabaseConnection, table: str, column: str) -> bool:
|
||||
"""检查表是否包含指定列。"""
|
||||
sql = """
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema || '.' || table_name = %s
|
||||
AND column_name = %s
|
||||
LIMIT 1
|
||||
"""
|
||||
rows = db_conn.query(sql, (table, column))
|
||||
return bool(rows)
|
||||
|
||||
|
||||
def _is_dim_table(table_name: str) -> bool:
|
||||
"""判断是否为维度表(dim_ 前缀)。"""
|
||||
base = table_name.split(".")[-1] if "." in table_name else table_name
|
||||
return base.startswith("dim_")
|
||||
|
||||
|
||||
# ── SCD2 版本链完整性检查 ─────────────────────────────────────
|
||||
|
||||
def _check_scd2_integrity(db_conn: DatabaseConnection, dwd_table: str,
|
||||
logger: logging.Logger) -> dict:
|
||||
"""检查维度表 SCD2 版本链完整性。
|
||||
|
||||
验证项:
|
||||
- 每个业务主键至多一条 scd2_is_current=1 的记录
|
||||
- scd2_version 连续递增(无跳号)
|
||||
- scd2_end_time 与下一版本的 scd2_start_time 一致
|
||||
"""
|
||||
result = {"has_scd2": False, "checks": []}
|
||||
|
||||
# 先确认表是否有 SCD2 列
|
||||
if not _has_column(db_conn, dwd_table, "scd2_is_current"):
|
||||
result["checks"].append("无 SCD2 列,跳过检查")
|
||||
return result
|
||||
|
||||
result["has_scd2"] = True
|
||||
|
||||
# 获取业务主键(排除 SCD2 列)
|
||||
pk_sql = """
|
||||
SELECT a.attname
|
||||
FROM pg_index i
|
||||
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
||||
WHERE i.indrelid = %s::regclass AND i.indisprimary
|
||||
ORDER BY array_position(i.indkey, a.attnum)
|
||||
"""
|
||||
pk_rows = db_conn.query(pk_sql, (dwd_table,))
|
||||
scd_cols = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||||
business_keys = [r["attname"] for r in pk_rows if r["attname"] not in scd_cols]
|
||||
|
||||
if not business_keys:
|
||||
result["checks"].append("未找到业务主键")
|
||||
return result
|
||||
|
||||
bk_sql = ", ".join(f'"{k}"' for k in business_keys)
|
||||
|
||||
# 检查1:每个业务主键至多一条 current 记录
|
||||
dup_current_sql = f"""
|
||||
SELECT {bk_sql}, COUNT(*) AS cnt
|
||||
FROM {dwd_table}
|
||||
WHERE COALESCE(scd2_is_current, 1) = 1
|
||||
GROUP BY {bk_sql}
|
||||
HAVING COUNT(*) > 1
|
||||
LIMIT 10
|
||||
"""
|
||||
try:
|
||||
dup_rows = db_conn.query(dup_current_sql)
|
||||
dup_count = len(dup_rows) if dup_rows else 0
|
||||
if dup_count > 0:
|
||||
result["checks"].append(f"⚠ 发现 {dup_count} 个业务主键有多条 current 记录")
|
||||
else:
|
||||
result["checks"].append("✓ 每个业务主键至多一条 current 记录")
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 检查 current 唯一性失败: {exc}")
|
||||
|
||||
# 检查2:scd2_version 连续性(抽样检查前 100 个多版本主键)
|
||||
version_gap_sql = f"""
|
||||
WITH multi_ver AS (
|
||||
SELECT {bk_sql}
|
||||
FROM {dwd_table}
|
||||
GROUP BY {bk_sql}
|
||||
HAVING COUNT(*) > 1
|
||||
LIMIT 100
|
||||
),
|
||||
versioned AS (
|
||||
SELECT t.{business_keys[0]},
|
||||
t.scd2_version,
|
||||
LAG(t.scd2_version) OVER (
|
||||
PARTITION BY {', '.join(f't."{k}"' for k in business_keys)}
|
||||
ORDER BY t.scd2_version
|
||||
) AS prev_version
|
||||
FROM {dwd_table} t
|
||||
INNER JOIN multi_ver m ON {' AND '.join(f't."{k}" = m."{k}"' for k in business_keys)}
|
||||
)
|
||||
SELECT COUNT(*) AS gap_count
|
||||
FROM versioned
|
||||
WHERE prev_version IS NOT NULL AND scd2_version - prev_version != 1
|
||||
"""
|
||||
try:
|
||||
gap_rows = db_conn.query(version_gap_sql)
|
||||
gap_count = int(gap_rows[0]["gap_count"]) if gap_rows else 0
|
||||
if gap_count > 0:
|
||||
result["checks"].append(f"⚠ 发现 {gap_count} 处版本号跳号")
|
||||
else:
|
||||
result["checks"].append("✓ 版本号连续递增")
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 检查版本连续性失败: {exc}")
|
||||
|
||||
# 检查3:总行数和 current 行数
|
||||
try:
|
||||
total = _query_count(db_conn, dwd_table)
|
||||
current_sql = f"SELECT COUNT(*) AS cnt FROM {dwd_table} WHERE COALESCE(scd2_is_current, 1) = 1"
|
||||
current_rows = db_conn.query(current_sql)
|
||||
current_count = int(current_rows[0]["cnt"]) if current_rows else 0
|
||||
result["total_rows"] = total
|
||||
result["current_rows"] = current_count
|
||||
result["historical_rows"] = total - current_count
|
||||
result["checks"].append(f"✓ 总行数={total}, current={current_count}, 历史={total - current_count}")
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 查询行数失败: {exc}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ── 事实表时间窗口增量写入检查 ────────────────────────────────
|
||||
|
||||
def _check_fact_window(db_conn: DatabaseConnection, dwd_table: str, ods_table: str,
|
||||
window_start: datetime, window_end: datetime,
|
||||
logger: logging.Logger) -> dict:
|
||||
"""检查事实表时间窗口增量写入正确性。
|
||||
|
||||
验证项:
|
||||
- DWD 表在窗口内的行数 vs ODS 表在窗口内的行数
|
||||
- 主键无重复
|
||||
- fetched_at 范围合理
|
||||
"""
|
||||
result = {"checks": []}
|
||||
|
||||
# 确定时间列:优先用 _TIME_COLUMN_CANDIDATES 中存在的列
|
||||
order_col = None
|
||||
for candidate in _TIME_COLUMN_CANDIDATES:
|
||||
if _has_column(db_conn, dwd_table, candidate):
|
||||
order_col = candidate
|
||||
break
|
||||
|
||||
if not order_col:
|
||||
result["checks"].append("⚠ 未找到可用的时间列,跳过窗口检查")
|
||||
return result
|
||||
|
||||
# DWD 窗口内行数
|
||||
try:
|
||||
dwd_count = _query_count_windowed(db_conn, dwd_table, order_col, window_start, window_end)
|
||||
result["dwd_window_count"] = dwd_count
|
||||
result["order_column"] = order_col
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 查询 DWD 窗口行数失败: {exc}")
|
||||
return result
|
||||
|
||||
# ODS 窗口内行数(用 fetched_at)
|
||||
try:
|
||||
ods_count = _query_count_windowed(db_conn, ods_table, "fetched_at", window_start, window_end)
|
||||
result["ods_window_count"] = ods_count
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 查询 ODS 窗口行数失败: {exc}")
|
||||
ods_count = None
|
||||
|
||||
if ods_count is not None:
|
||||
# 事实表可能因去重/映射导致行数不完全一致,但差异不应过大
|
||||
if ods_count > 0:
|
||||
ratio = dwd_count / ods_count if ods_count > 0 else 0
|
||||
result["ratio"] = round(ratio, 4)
|
||||
if ratio < 0.5:
|
||||
result["checks"].append(f"⚠ DWD/ODS 比率偏低: {ratio:.2%} (DWD={dwd_count}, ODS={ods_count})")
|
||||
else:
|
||||
result["checks"].append(f"✓ DWD/ODS 比率正常: {ratio:.2%} (DWD={dwd_count}, ODS={ods_count})")
|
||||
else:
|
||||
result["checks"].append(f"ℹ ODS 窗口内无数据 (DWD={dwd_count})")
|
||||
|
||||
# 主键重复检查
|
||||
pk_sql = """
|
||||
SELECT a.attname
|
||||
FROM pg_index i
|
||||
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
||||
WHERE i.indrelid = %s::regclass AND i.indisprimary
|
||||
ORDER BY array_position(i.indkey, a.attnum)
|
||||
"""
|
||||
try:
|
||||
pk_rows = db_conn.query(pk_sql, (dwd_table,))
|
||||
pk_cols = [r["attname"] for r in pk_rows]
|
||||
if pk_cols:
|
||||
pk_list = ", ".join(f'"{c}"' for c in pk_cols)
|
||||
dup_sql = f"""
|
||||
SELECT {pk_list}, COUNT(*) AS cnt
|
||||
FROM {dwd_table}
|
||||
GROUP BY {pk_list}
|
||||
HAVING COUNT(*) > 1
|
||||
LIMIT 5
|
||||
"""
|
||||
dup_rows = db_conn.query(dup_sql)
|
||||
dup_count = len(dup_rows) if dup_rows else 0
|
||||
if dup_count > 0:
|
||||
result["checks"].append(f"⚠ 发现 {dup_count} 组主键重复")
|
||||
else:
|
||||
result["checks"].append("✓ 主键无重复")
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 主键重复检查失败: {exc}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ── FACT_MAPPINGS 列映射检查 ──────────────────────────────────
|
||||
|
||||
def _check_fact_mappings(db_conn: DatabaseConnection, dwd_table: str, ods_table: str,
|
||||
logger: logging.Logger) -> dict:
|
||||
"""验证 FACT_MAPPINGS 中的列映射和类型转换。
|
||||
|
||||
验证项:
|
||||
- 映射中的 DWD 目标列确实存在于 DWD 表
|
||||
- 简单列名映射的 ODS 源列确实存在于 ODS 表
|
||||
- 类型转换标注合理(cast_type 非空时目标列类型匹配)
|
||||
"""
|
||||
result = {"checks": [], "mapping_count": 0, "issues": []}
|
||||
|
||||
mappings = DwdLoadTask.FACT_MAPPINGS.get(dwd_table, [])
|
||||
if not mappings:
|
||||
result["checks"].append("ℹ 无显式 FACT_MAPPINGS 条目")
|
||||
return result
|
||||
|
||||
result["mapping_count"] = len(mappings)
|
||||
|
||||
# 获取 DWD 和 ODS 的列集合
|
||||
dwd_cols_sql = """
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
"""
|
||||
ods_cols_sql = dwd_cols_sql
|
||||
|
||||
dwd_schema, dwd_name = dwd_table.split(".", 1)
|
||||
ods_schema, ods_name = ods_table.split(".", 1)
|
||||
|
||||
try:
|
||||
dwd_col_rows = db_conn.query(dwd_cols_sql, (dwd_schema, dwd_name))
|
||||
dwd_cols = {r["column_name"].lower() for r in dwd_col_rows}
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 获取 DWD 列信息失败: {exc}")
|
||||
return result
|
||||
|
||||
try:
|
||||
ods_col_rows = db_conn.query(ods_cols_sql, (ods_schema, ods_name))
|
||||
ods_cols = {r["column_name"].lower() for r in ods_col_rows}
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 获取 ODS 列信息失败: {exc}")
|
||||
return result
|
||||
|
||||
missing_dwd = []
|
||||
missing_ods = []
|
||||
for dwd_col, ods_expr, cast_type in mappings:
|
||||
# 检查 DWD 目标列
|
||||
if dwd_col.lower() not in dwd_cols:
|
||||
missing_dwd.append(dwd_col)
|
||||
|
||||
# 检查 ODS 源列(仅简单列名,跳过表达式如 JSON 提取、CASE 等)
|
||||
is_simple_col = (
|
||||
ods_expr.isidentifier()
|
||||
or (ods_expr.startswith('"') and ods_expr.endswith('"'))
|
||||
)
|
||||
if is_simple_col:
|
||||
col_name = ods_expr.strip('"').lower()
|
||||
if col_name not in ods_cols:
|
||||
missing_ods.append((dwd_col, ods_expr))
|
||||
|
||||
if missing_dwd:
|
||||
result["issues"].extend([f"DWD 列不存在: {c}" for c in missing_dwd])
|
||||
result["checks"].append(f"⚠ {len(missing_dwd)} 个 DWD 目标列不存在: {missing_dwd}")
|
||||
else:
|
||||
result["checks"].append(f"✓ 所有 {len(mappings)} 个 DWD 目标列均存在")
|
||||
|
||||
if missing_ods:
|
||||
result["issues"].extend([f"ODS 列不存在: {dwd}←{ods}" for dwd, ods in missing_ods])
|
||||
result["checks"].append(f"⚠ {len(missing_ods)} 个 ODS 源列不存在: {missing_ods}")
|
||||
else:
|
||||
simple_count = sum(
|
||||
1 for _, expr, _ in mappings
|
||||
if expr.isidentifier() or (expr.startswith('"') and expr.endswith('"'))
|
||||
)
|
||||
result["checks"].append(f"✓ 所有 {simple_count} 个简单列名映射的 ODS 源列均存在")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ── 单表调试 ──────────────────────────────────────────────────
|
||||
|
||||
def _debug_single_table(
|
||||
dwd_table: str,
|
||||
ods_table: str,
|
||||
db_conn: DatabaseConnection,
|
||||
window_start: datetime,
|
||||
window_end: datetime,
|
||||
logger: logging.Logger,
|
||||
) -> DebugResult:
|
||||
"""对单张 DWD 表执行调试检查(不执行装载,仅验证现有数据)。"""
|
||||
result = DebugResult(table_name=dwd_table, ods_source=ods_table)
|
||||
is_dim = _is_dim_table(dwd_table)
|
||||
result.mode = "SCD2" if is_dim else "INCREMENT"
|
||||
|
||||
logger.info("━" * 60)
|
||||
logger.info("▶ 检查: %s ← %s (%s)", dwd_table, ods_table, result.mode)
|
||||
|
||||
t0 = time.monotonic()
|
||||
issues = []
|
||||
|
||||
# 1) 基本行数
|
||||
try:
|
||||
dwd_total = _query_count(db_conn, dwd_table)
|
||||
ods_total = _query_count(db_conn, ods_table)
|
||||
result.dwd_row_count = dwd_total
|
||||
result.ods_row_count = ods_total
|
||||
logger.info(" 行数: DWD=%d, ODS=%d", dwd_total, ods_total)
|
||||
except Exception as exc:
|
||||
result.status = "ERROR"
|
||||
result.message = f"查询行数失败: {exc}"
|
||||
result.error_detail = traceback.format_exc()
|
||||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||||
logger.error(" ✗ %s", result.message)
|
||||
return result
|
||||
|
||||
# 2) FACT_MAPPINGS 列映射检查
|
||||
try:
|
||||
mapping_check = _check_fact_mappings(db_conn, dwd_table, ods_table, logger)
|
||||
result.mapping_check = mapping_check
|
||||
for check in mapping_check.get("checks", []):
|
||||
logger.info(" 映射: %s", check)
|
||||
if mapping_check.get("issues"):
|
||||
issues.extend(mapping_check["issues"])
|
||||
except Exception as exc:
|
||||
logger.warning(" ⚠ 列映射检查异常: %s", exc)
|
||||
|
||||
# 3) 维度表 SCD2 检查 / 事实表窗口检查
|
||||
if is_dim:
|
||||
try:
|
||||
scd2_check = _check_scd2_integrity(db_conn, dwd_table, logger)
|
||||
result.scd2_check = scd2_check
|
||||
for check in scd2_check.get("checks", []):
|
||||
logger.info(" SCD2: %s", check)
|
||||
# 含 ⚠ 的检查项视为 issue
|
||||
issues.extend(c for c in scd2_check.get("checks", []) if "⚠" in c)
|
||||
except Exception as exc:
|
||||
logger.warning(" ⚠ SCD2 检查异常: %s", exc)
|
||||
else:
|
||||
try:
|
||||
fact_check = _check_fact_window(
|
||||
db_conn, dwd_table, ods_table, window_start, window_end, logger,
|
||||
)
|
||||
result.fact_window_check = fact_check
|
||||
for check in fact_check.get("checks", []):
|
||||
logger.info(" 窗口: %s", check)
|
||||
issues.extend(c for c in fact_check.get("checks", []) if "⚠" in c)
|
||||
except Exception as exc:
|
||||
logger.warning(" ⚠ 窗口检查异常: %s", exc)
|
||||
|
||||
# 4) 最终状态
|
||||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||||
if issues:
|
||||
result.status = "WARN"
|
||||
result.message = f"{len(issues)} 个问题: {issues[0]}"
|
||||
elif dwd_total == 0:
|
||||
result.status = "WARN"
|
||||
result.message = "DWD 表为空"
|
||||
else:
|
||||
result.status = "PASS"
|
||||
result.message = f"检查通过 (DWD={dwd_total}行)"
|
||||
|
||||
icon = {"PASS": "✓", "WARN": "⚠", "ERROR": "✗", "FAIL": "✗"}.get(result.status, "?")
|
||||
logger.info(" %s 结果: %s - %s (%.1fs)", icon, result.status, result.message, result.duration_sec)
|
||||
return result
|
||||
|
||||
|
||||
# ── 执行 DWD_LOAD_FROM_ODS 任务 ──────────────────────────────
|
||||
|
||||
def _execute_dwd_load(
|
||||
executor: TaskExecutor,
|
||||
config: AppConfig,
|
||||
logger: logging.Logger,
|
||||
) -> dict:
|
||||
"""执行 DWD_LOAD_FROM_ODS 任务并返回结果。"""
|
||||
store_id = int(config.get("app.store_id"))
|
||||
run_uuid = f"debug-dwd-load-{int(time.time())}"
|
||||
|
||||
logger.info("━" * 60)
|
||||
logger.info("▶ 执行 DWD_LOAD_FROM_ODS 任务")
|
||||
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
task_result = executor.run_single_task(
|
||||
task_code="DWD_LOAD_FROM_ODS",
|
||||
run_uuid=run_uuid,
|
||||
store_id=store_id,
|
||||
data_source="online",
|
||||
)
|
||||
elapsed = round(time.monotonic() - t0, 2)
|
||||
logger.info(" 执行完成,耗时 %.1fs", elapsed)
|
||||
|
||||
# 解析结果
|
||||
tables = task_result.get("tables", [])
|
||||
errors = task_result.get("errors", [])
|
||||
logger.info(" 处理表数: %d, 错误表数: %d", len(tables), len(errors))
|
||||
|
||||
for t in tables:
|
||||
tbl = t.get("table", "")
|
||||
mode = t.get("mode", "")
|
||||
ins = t.get("inserted", 0)
|
||||
upd = t.get("updated", 0)
|
||||
proc = t.get("processed", 0)
|
||||
logger.info(" %s [%s]: processed=%d, inserted=%d, updated=%d", tbl, mode, proc, ins, upd)
|
||||
|
||||
for e in errors:
|
||||
logger.error(" ✗ %s: %s", e.get("table", ""), e.get("error", ""))
|
||||
|
||||
return {
|
||||
"status": "SUCCESS" if not errors else "PARTIAL",
|
||||
"tables": tables,
|
||||
"errors": errors,
|
||||
"duration_sec": elapsed,
|
||||
}
|
||||
except Exception as exc:
|
||||
elapsed = round(time.monotonic() - t0, 2)
|
||||
logger.error(" ✗ 执行异常: %s", exc)
|
||||
return {
|
||||
"status": "ERROR",
|
||||
"tables": [],
|
||||
"errors": [{"table": "DWD_LOAD_FROM_ODS", "error": str(exc)}],
|
||||
"duration_sec": elapsed,
|
||||
"traceback": traceback.format_exc(),
|
||||
}
|
||||
|
||||
|
||||
# ── 主流程 ────────────────────────────────────────────────────
|
||||
|
||||
def run_dwd_debug(
|
||||
hours: float = 2.0,
|
||||
table_filter: list[str] | None = None,
|
||||
skip_load: bool = False,
|
||||
) -> list[DebugResult]:
|
||||
"""执行 DWD 层全量调试。
|
||||
|
||||
Args:
|
||||
hours: 回溯窗口小时数(默认 2 小时)
|
||||
table_filter: 仅调试指定的 DWD 表名列表,None 表示全部
|
||||
skip_load: 跳过 DWD_LOAD_FROM_ODS 执行,仅做数据检查
|
||||
Returns:
|
||||
所有表的 DebugResult 列表
|
||||
"""
|
||||
logger = _setup_logging()
|
||||
logger.info("=" * 60)
|
||||
logger.info("DWD 层调试开始")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 加载配置
|
||||
config = AppConfig.load()
|
||||
tz = ZoneInfo(config.get("app.timezone", "Asia/Shanghai"))
|
||||
window_end = datetime.now(tz)
|
||||
window_start = window_end - timedelta(hours=hours)
|
||||
|
||||
logger.info("门店 ID: %s", config.get("app.store_id"))
|
||||
logger.info("数据库: %s", config.get("db.name", ""))
|
||||
logger.info("时间窗口: %s ~ %s (%.1f 小时)", window_start, window_end, hours)
|
||||
|
||||
# 设置 window_override
|
||||
config.config.setdefault("run", {}).setdefault("window_override", {})
|
||||
config.config["run"]["window_override"]["start"] = window_start
|
||||
config.config["run"]["window_override"]["end"] = window_end
|
||||
|
||||
# 构建组件
|
||||
db_conn, api_client, db_ops, executor = _build_components(config, logger)
|
||||
|
||||
# 步骤1:执行 DWD_LOAD_FROM_ODS(可选)
|
||||
load_result = None
|
||||
if not skip_load:
|
||||
load_result = _execute_dwd_load(executor, config, logger)
|
||||
logger.info("")
|
||||
|
||||
# 步骤2:逐表检查 TABLE_MAP 中的映射
|
||||
table_map = DwdLoadTask.TABLE_MAP
|
||||
if table_filter:
|
||||
filter_set = {t.lower() for t in table_filter}
|
||||
filtered_map = {
|
||||
k: v for k, v in table_map.items()
|
||||
if k.lower() in filter_set or k.split(".")[-1].lower() in filter_set
|
||||
}
|
||||
skipped = filter_set - {k.lower() for k in filtered_map}
|
||||
if skipped:
|
||||
logger.warning("以下表不在 TABLE_MAP 中,已跳过: %s", skipped)
|
||||
table_map = filtered_map
|
||||
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("逐表数据检查 (%d 张表)", len(table_map))
|
||||
logger.info("=" * 60)
|
||||
|
||||
results: list[DebugResult] = []
|
||||
for idx, (dwd_table, ods_table) in enumerate(table_map.items(), start=1):
|
||||
logger.info("[%d/%d] %s", idx, len(table_map), dwd_table)
|
||||
try:
|
||||
r = _debug_single_table(
|
||||
dwd_table=dwd_table,
|
||||
ods_table=ods_table,
|
||||
db_conn=db_conn,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
logger=logger,
|
||||
)
|
||||
# 补充装载结果中的 counts
|
||||
if load_result and load_result.get("tables"):
|
||||
for t in load_result["tables"]:
|
||||
if t.get("table") == dwd_table:
|
||||
r.counts = {
|
||||
k: v for k, v in t.items() if k != "table"
|
||||
}
|
||||
break
|
||||
# 补充装载错误
|
||||
if load_result and load_result.get("errors"):
|
||||
for e in load_result["errors"]:
|
||||
if e.get("table") == dwd_table:
|
||||
r.status = "ERROR"
|
||||
r.message = f"装载失败: {e.get('error', '')}"
|
||||
r.error_detail = e.get("error", "")
|
||||
break
|
||||
except Exception as exc:
|
||||
r = DebugResult(
|
||||
table_name=dwd_table,
|
||||
ods_source=ods_table,
|
||||
status="ERROR",
|
||||
message=f"未捕获异常: {exc}",
|
||||
error_detail=traceback.format_exc(),
|
||||
)
|
||||
logger.error(" ✗ 未捕获异常: %s", exc)
|
||||
results.append(r)
|
||||
|
||||
db_conn.ensure_open()
|
||||
|
||||
# 汇总
|
||||
_print_summary(results, load_result, logger)
|
||||
|
||||
# 输出 JSON
|
||||
output_dir = _FEIQIU_ROOT / "scripts" / "debug" / "output"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||||
output_file = output_dir / f"debug_dwd_{ts}.json"
|
||||
_save_results(results, load_result, output_file)
|
||||
logger.info("结果已保存: %s", output_file)
|
||||
|
||||
db_conn.close()
|
||||
return results
|
||||
|
||||
|
||||
# ── 汇总与输出 ────────────────────────────────────────────────
|
||||
|
||||
def _print_summary(results: list[DebugResult], load_result: dict | None,
|
||||
logger: logging.Logger):
|
||||
"""打印调试汇总。"""
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("DWD 层调试汇总")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 装载结果
|
||||
if load_result:
|
||||
logger.info("DWD_LOAD_FROM_ODS 执行: %s (耗时 %.1fs)",
|
||||
load_result.get("status", "N/A"),
|
||||
load_result.get("duration_sec", 0))
|
||||
tables = load_result.get("tables", [])
|
||||
errors = load_result.get("errors", [])
|
||||
total_inserted = sum(t.get("inserted", 0) for t in tables)
|
||||
total_updated = sum(t.get("updated", 0) for t in tables)
|
||||
logger.info(" 处理表数: %d, 错误表数: %d", len(tables), len(errors))
|
||||
logger.info(" 总计: inserted=%d, updated=%d", total_inserted, total_updated)
|
||||
|
||||
# 逐表检查结果
|
||||
pass_count = sum(1 for r in results if r.status == "PASS")
|
||||
warn_count = sum(1 for r in results if r.status == "WARN")
|
||||
error_count = sum(1 for r in results if r.status in ("ERROR", "FAIL"))
|
||||
total_duration = sum(r.duration_sec for r in results)
|
||||
|
||||
logger.info("")
|
||||
logger.info("逐表检查: %d 张表", len(results))
|
||||
logger.info(" ✓ PASS: %d", pass_count)
|
||||
logger.info(" ⚠ WARN: %d", warn_count)
|
||||
logger.info(" ✗ ERROR: %d", error_count)
|
||||
logger.info(" 总耗时: %.1f 秒", total_duration)
|
||||
|
||||
# 维度表 vs 事实表统计
|
||||
dim_results = [r for r in results if r.mode == "SCD2"]
|
||||
fact_results = [r for r in results if r.mode == "INCREMENT"]
|
||||
logger.info("")
|
||||
logger.info("维度表: %d 张 (PASS=%d, WARN=%d, ERROR=%d)",
|
||||
len(dim_results),
|
||||
sum(1 for r in dim_results if r.status == "PASS"),
|
||||
sum(1 for r in dim_results if r.status == "WARN"),
|
||||
sum(1 for r in dim_results if r.status in ("ERROR", "FAIL")))
|
||||
logger.info("事实表: %d 张 (PASS=%d, WARN=%d, ERROR=%d)",
|
||||
len(fact_results),
|
||||
sum(1 for r in fact_results if r.status == "PASS"),
|
||||
sum(1 for r in fact_results if r.status == "WARN"),
|
||||
sum(1 for r in fact_results if r.status in ("ERROR", "FAIL")))
|
||||
|
||||
# 列出非 PASS 的表
|
||||
non_pass = [r for r in results if r.status != "PASS"]
|
||||
if non_pass:
|
||||
logger.info("")
|
||||
logger.info("需关注的表:")
|
||||
for r in non_pass:
|
||||
logger.info(" [%s] %s: %s", r.status, r.table_name, r.message)
|
||||
else:
|
||||
logger.info("")
|
||||
logger.info("所有表均通过 ✓")
|
||||
|
||||
|
||||
def _save_results(results: list[DebugResult], load_result: dict | None, path: Path):
|
||||
"""将结果序列化为 JSON。"""
|
||||
data = {
|
||||
"load_result": _sanitize_for_json(load_result) if load_result else None,
|
||||
"table_checks": [_sanitize_for_json(asdict(r)) for r in results],
|
||||
}
|
||||
path.write_text(
|
||||
json.dumps(data, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_for_json(obj):
|
||||
"""递归处理不可序列化的值。"""
|
||||
if isinstance(obj, dict):
|
||||
return {k: _sanitize_for_json(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return [_sanitize_for_json(v) for v in obj]
|
||||
if isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
return obj
|
||||
|
||||
|
||||
# ── CLI 入口 ──────────────────────────────────────────────────
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="DWD 层调试脚本")
|
||||
parser.add_argument("--hours", type=float, default=2.0,
|
||||
help="回溯窗口小时数(默认 2)")
|
||||
parser.add_argument("--tables", type=str, default=None,
|
||||
help="仅调试指定 DWD 表,逗号分隔(如 dwd.dim_member,dwd.dwd_payment)")
|
||||
parser.add_argument("--skip-load", action="store_true",
|
||||
help="跳过 DWD_LOAD_FROM_ODS 执行,仅做数据检查")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
table_filter = None
|
||||
if args.tables:
|
||||
table_filter = [t.strip() for t in args.tables.split(",") if t.strip()]
|
||||
|
||||
results = run_dwd_debug(
|
||||
hours=args.hours,
|
||||
table_filter=table_filter,
|
||||
skip_load=args.skip_load,
|
||||
)
|
||||
|
||||
has_error = any(r.status in ("ERROR", "FAIL") for r in results)
|
||||
sys.exit(1 if has_error else 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
575
apps/etl/connectors/feiqiu/scripts/debug/debug_dws.py
Normal file
575
apps/etl/connectors/feiqiu/scripts/debug/debug_dws.py
Normal file
@@ -0,0 +1,575 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""DWS 层逐任务调试脚本。
|
||||
|
||||
连接真实数据库,逐个执行 15 个 DWS 汇总任务,
|
||||
验证返回结果和 DWS 表写入情况,抽样检查汇总数据与 DWD 明细数据的一致性。
|
||||
|
||||
用法:
|
||||
cd apps/etl/connectors/feiqiu
|
||||
python -m scripts.debug.debug_dws [--hours 48] [--tasks DWS_FINANCE_DAILY,DWS_ASSISTANT_DAILY]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# ── 确保项目根目录在 sys.path ──
|
||||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_FEIQIU_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
from api.client import APIClient
|
||||
from orchestration.task_registry import default_registry
|
||||
from orchestration.cursor_manager import CursorManager
|
||||
from orchestration.run_tracker import RunTracker
|
||||
from orchestration.task_executor import TaskExecutor
|
||||
|
||||
|
||||
@dataclass
|
||||
class DebugResult:
|
||||
"""单个 DWS 任务的调试结果"""
|
||||
layer: str = "DWS"
|
||||
task_code: str = ""
|
||||
status: str = "" # PASS / FAIL / WARN / ERROR
|
||||
message: str = ""
|
||||
counts: dict = field(default_factory=dict)
|
||||
target_table: str = ""
|
||||
pre_row_count: int | None = None
|
||||
post_row_count: int | None = None
|
||||
consistency_check: dict | None = None
|
||||
duration_sec: float = 0.0
|
||||
error_detail: str | None = None
|
||||
fix_applied: str | None = None
|
||||
|
||||
|
||||
# ── 工具函数 ──────────────────────────────────────────────────
|
||||
|
||||
def _setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("debug_dws")
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||||
))
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
def _build_components(config: AppConfig, logger: logging.Logger):
|
||||
"""构建 DB / API / TaskExecutor 等组件。"""
|
||||
db_conn = DatabaseConnection(
|
||||
dsn=config["db"]["dsn"],
|
||||
session=config["db"].get("session"),
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
api_client = APIClient(
|
||||
base_url=config["api"]["base_url"],
|
||||
token=config["api"]["token"],
|
||||
timeout=config["api"].get("timeout_sec", 20),
|
||||
retry_max=config["api"].get("retries", {}).get("max_attempts", 3),
|
||||
headers_extra=config["api"].get("headers_extra"),
|
||||
)
|
||||
db_ops = DatabaseOperations(db_conn)
|
||||
cursor_mgr = CursorManager(db_conn)
|
||||
run_tracker = RunTracker(db_conn)
|
||||
|
||||
executor = TaskExecutor(
|
||||
config, db_ops, api_client,
|
||||
cursor_mgr, run_tracker, default_registry, logger,
|
||||
)
|
||||
return db_conn, api_client, db_ops, executor
|
||||
|
||||
|
||||
def _get_dws_target_table(task_code: str, config, db_conn, api_client, logger) -> str | None:
|
||||
"""通过临时实例获取 DWS 任务的目标表名。"""
|
||||
meta = default_registry.get_metadata(task_code)
|
||||
if meta is None:
|
||||
return None
|
||||
try:
|
||||
task_instance = meta.task_class(config, db_conn, api_client, logger)
|
||||
raw_name = task_instance.get_target_table()
|
||||
# 目标表名不含 schema 前缀时补上 dws.
|
||||
if raw_name and "." not in raw_name:
|
||||
return f"dws.{raw_name}"
|
||||
return raw_name
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _query_count(db_conn: DatabaseConnection, table: str) -> int:
|
||||
"""查询表的总行数。"""
|
||||
rows = db_conn.query(f"SELECT COUNT(*) AS cnt FROM {table}")
|
||||
return int(rows[0]["cnt"]) if rows else 0
|
||||
|
||||
|
||||
def _has_column(db_conn: DatabaseConnection, table: str, column: str) -> bool:
|
||||
"""检查表是否包含指定列。"""
|
||||
sql = """
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema || '.' || table_name = %s
|
||||
AND column_name = %s
|
||||
LIMIT 1
|
||||
"""
|
||||
rows = db_conn.query(sql, (table, column))
|
||||
return bool(rows)
|
||||
|
||||
|
||||
def _table_exists(db_conn: DatabaseConnection, table: str) -> bool:
|
||||
"""检查表/视图是否存在。"""
|
||||
rows = db_conn.query("SELECT to_regclass(%s) AS reg", (table,))
|
||||
return bool(rows and rows[0].get("reg"))
|
||||
|
||||
|
||||
# ── DWS 与 DWD 一致性抽样验证 ────────────────────────────────
|
||||
|
||||
# 已知的 DWS→DWD 聚合关系映射(用于抽样验证)
|
||||
# 格式: dws_table -> {dwd_source, dws_date_col, dwd_date_col, amount_cols}
|
||||
_DWS_DWD_CONSISTENCY_MAP: dict[str, dict] = {
|
||||
"dws.dws_assistant_daily_detail": {
|
||||
"dwd_source": "dwd.dwd_assistant_service_log",
|
||||
"dws_date_col": "stat_date",
|
||||
"dwd_date_col": "service_date",
|
||||
"group_cols": ["site_id", "assistant_id"],
|
||||
"dws_count_col": "service_count",
|
||||
"dwd_count_expr": "COUNT(*)",
|
||||
"description": "助教日度服务次数 vs DWD 服务流水",
|
||||
},
|
||||
"dws.dws_finance_daily_summary": {
|
||||
"dwd_source": "dwd.dwd_order",
|
||||
"dws_date_col": "stat_date",
|
||||
"dwd_date_col": "order_date",
|
||||
"group_cols": ["site_id"],
|
||||
"dws_count_col": "order_count",
|
||||
"dwd_count_expr": "COUNT(*)",
|
||||
"description": "财务日度订单数 vs DWD 订单表",
|
||||
},
|
||||
"dws.dws_member_visit_detail": {
|
||||
"dwd_source": "dwd.dwd_order",
|
||||
"dws_date_col": "visit_date",
|
||||
"dwd_date_col": "order_date",
|
||||
"group_cols": ["site_id", "member_id"],
|
||||
"dws_count_col": None, # 无直接计数列,仅做行数对比
|
||||
"dwd_count_expr": None,
|
||||
"description": "会员到店明细 vs DWD 订单表",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _check_dws_dwd_consistency(
|
||||
db_conn: DatabaseConnection,
|
||||
dws_table: str,
|
||||
logger: logging.Logger,
|
||||
) -> dict:
|
||||
"""抽样验证 DWS 汇总数据与 DWD 明细数据的一致性。
|
||||
|
||||
对已知映射关系的表,抽取最近 3 天的数据做聚合对比。
|
||||
对未知映射的表,仅做基本行数检查。
|
||||
"""
|
||||
result = {"checks": [], "has_mapping": False}
|
||||
|
||||
mapping = _DWS_DWD_CONSISTENCY_MAP.get(dws_table)
|
||||
if not mapping:
|
||||
result["checks"].append("ℹ 无已知 DWS→DWD 映射,跳过一致性验证")
|
||||
return result
|
||||
|
||||
result["has_mapping"] = True
|
||||
result["description"] = mapping["description"]
|
||||
dwd_source = mapping["dwd_source"]
|
||||
dws_date_col = mapping["dws_date_col"]
|
||||
dwd_date_col = mapping["dwd_date_col"]
|
||||
|
||||
# 检查 DWD 源表是否存在
|
||||
if not _table_exists(db_conn, dwd_source):
|
||||
result["checks"].append(f"⚠ DWD 源表不存在: {dwd_source}")
|
||||
return result
|
||||
|
||||
# 抽样:取 DWS 表中最近 3 个不同日期
|
||||
try:
|
||||
sample_sql = f"""
|
||||
SELECT DISTINCT "{dws_date_col}" AS d
|
||||
FROM {dws_table}
|
||||
ORDER BY d DESC
|
||||
LIMIT 3
|
||||
"""
|
||||
date_rows = db_conn.query(sample_sql)
|
||||
if not date_rows:
|
||||
result["checks"].append("ℹ DWS 表无数据,跳过一致性验证")
|
||||
return result
|
||||
sample_dates = [r["d"] for r in date_rows]
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 查询 DWS 日期失败: {exc}")
|
||||
return result
|
||||
|
||||
# 对比每个抽样日期的行数
|
||||
mismatches = []
|
||||
for sample_date in sample_dates:
|
||||
try:
|
||||
dws_count_sql = f"""
|
||||
SELECT COUNT(*) AS cnt FROM {dws_table}
|
||||
WHERE "{dws_date_col}" = %s
|
||||
"""
|
||||
dws_rows = db_conn.query(dws_count_sql, (sample_date,))
|
||||
dws_count = int(dws_rows[0]["cnt"]) if dws_rows else 0
|
||||
|
||||
# DWD 侧:检查对应日期列是否存在
|
||||
if not _has_column(db_conn, dwd_source, dwd_date_col):
|
||||
result["checks"].append(f"⚠ DWD 表缺少日期列 {dwd_date_col}")
|
||||
break
|
||||
|
||||
dwd_count_sql = f"""
|
||||
SELECT COUNT(*) AS cnt FROM {dwd_source}
|
||||
WHERE "{dwd_date_col}" = %s
|
||||
"""
|
||||
dwd_rows = db_conn.query(dwd_count_sql, (sample_date,))
|
||||
dwd_count = int(dwd_rows[0]["cnt"]) if dwd_rows else 0
|
||||
|
||||
# DWS 是聚合表,行数通常 <= DWD 行数(按 group_cols 聚合)
|
||||
if dws_count > 0 and dwd_count == 0:
|
||||
mismatches.append(
|
||||
f"日期 {sample_date}: DWS={dws_count} 但 DWD=0(DWD 无对应数据)"
|
||||
)
|
||||
elif dws_count == 0 and dwd_count > 0:
|
||||
mismatches.append(
|
||||
f"日期 {sample_date}: DWS=0 但 DWD={dwd_count}(DWS 未汇总)"
|
||||
)
|
||||
else:
|
||||
result["checks"].append(
|
||||
f"✓ 日期 {sample_date}: DWS={dws_count}行, DWD={dwd_count}行"
|
||||
)
|
||||
except Exception as exc:
|
||||
result["checks"].append(f"✗ 日期 {sample_date} 对比失败: {exc}")
|
||||
|
||||
if mismatches:
|
||||
result["checks"].extend(f"⚠ {m}" for m in mismatches)
|
||||
result["mismatch_count"] = len(mismatches)
|
||||
else:
|
||||
result["mismatch_count"] = 0
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ── 核心调试逻辑 ──────────────────────────────────────────────
|
||||
|
||||
def debug_single_dws_task(
|
||||
task_code: str,
|
||||
executor: TaskExecutor,
|
||||
db_conn: DatabaseConnection,
|
||||
config: AppConfig,
|
||||
api_client,
|
||||
logger: logging.Logger,
|
||||
window_start: datetime,
|
||||
window_end: datetime,
|
||||
) -> DebugResult:
|
||||
"""执行单个 DWS 任务并验证结果。"""
|
||||
result = DebugResult(task_code=task_code)
|
||||
|
||||
# 获取目标表名
|
||||
target_table = _get_dws_target_table(task_code, config, db_conn, api_client, logger)
|
||||
result.target_table = target_table or ""
|
||||
|
||||
store_id = int(config.get("app.store_id"))
|
||||
run_uuid = f"debug-dws-{task_code.lower()}-{int(time.time())}"
|
||||
|
||||
logger.info("━" * 60)
|
||||
logger.info("▶ 开始调试: %s (表: %s)", task_code, target_table or "未知")
|
||||
|
||||
# 执行前查询表行数
|
||||
if target_table and _table_exists(db_conn, target_table):
|
||||
try:
|
||||
result.pre_row_count = _query_count(db_conn, target_table)
|
||||
logger.info(" 执行前表行数: %d", result.pre_row_count)
|
||||
except Exception as exc:
|
||||
logger.warning(" 查询执行前行数失败: %s", exc)
|
||||
|
||||
# 执行任务
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
task_result = executor.run_single_task(
|
||||
task_code=task_code,
|
||||
run_uuid=run_uuid,
|
||||
store_id=store_id,
|
||||
data_source="online",
|
||||
)
|
||||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||||
except Exception as exc:
|
||||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||||
result.status = "ERROR"
|
||||
result.message = f"任务执行异常: {exc}"
|
||||
result.error_detail = traceback.format_exc()
|
||||
logger.error(" ✗ 执行异常: %s", exc)
|
||||
return result
|
||||
|
||||
# 解析返回结果
|
||||
task_status = (task_result.get("status") or "").upper()
|
||||
counts = task_result.get("counts") or {}
|
||||
result.counts = counts
|
||||
|
||||
logger.info(" 返回状态: %s", task_status)
|
||||
logger.info(" counts: %s", counts)
|
||||
|
||||
# 执行后查询表行数
|
||||
if target_table and _table_exists(db_conn, target_table):
|
||||
try:
|
||||
result.post_row_count = _query_count(db_conn, target_table)
|
||||
logger.info(" 执行后表行数: %d", result.post_row_count)
|
||||
|
||||
if result.pre_row_count is not None:
|
||||
delta = result.post_row_count - result.pre_row_count
|
||||
logger.info(" 行数变化: %+d", delta)
|
||||
except Exception as exc:
|
||||
logger.warning(" 查询执行后行数失败: %s", exc)
|
||||
|
||||
# 抽样验证 DWS 与 DWD 一致性
|
||||
if target_table and _table_exists(db_conn, target_table):
|
||||
try:
|
||||
consistency = _check_dws_dwd_consistency(db_conn, target_table, logger)
|
||||
result.consistency_check = consistency
|
||||
for check in consistency.get("checks", []):
|
||||
logger.info(" 一致性: %s", check)
|
||||
except Exception as exc:
|
||||
logger.warning(" ⚠ 一致性检查异常: %s", exc)
|
||||
|
||||
# 最终状态判定
|
||||
issues = []
|
||||
errors_count = counts.get("errors", 0)
|
||||
if errors_count:
|
||||
issues.append(f"执行有 {errors_count} 个错误")
|
||||
|
||||
if result.consistency_check and result.consistency_check.get("mismatch_count", 0) > 0:
|
||||
issues.append(f"一致性检查有 {result.consistency_check['mismatch_count']} 处不一致")
|
||||
|
||||
if result.post_row_count is not None and result.post_row_count == 0:
|
||||
issues.append("执行后表为空")
|
||||
|
||||
if issues:
|
||||
result.status = "WARN"
|
||||
result.message = "; ".join(issues)
|
||||
elif task_status in ("SUCCESS", "PARTIAL", "COMPLETE"):
|
||||
result.status = "PASS"
|
||||
result.message = f"执行成功, counts={counts}"
|
||||
elif task_status == "SKIP":
|
||||
result.status = "WARN"
|
||||
result.message = "任务被跳过(未启用或不存在)"
|
||||
else:
|
||||
result.status = "WARN"
|
||||
result.message = f"未知状态: {task_status}"
|
||||
|
||||
icon = {"PASS": "✓", "WARN": "⚠", "ERROR": "✗", "FAIL": "✗"}.get(result.status, "?")
|
||||
logger.info(" %s 结果: %s - %s (耗时 %.1fs)", icon, result.status, result.message, result.duration_sec)
|
||||
return result
|
||||
|
||||
|
||||
# ── 主流程 ────────────────────────────────────────────────────
|
||||
|
||||
def run_dws_debug(
|
||||
hours: float = 48.0,
|
||||
task_filter: list[str] | None = None,
|
||||
) -> list[DebugResult]:
|
||||
"""执行 DWS 层全量调试。
|
||||
|
||||
Args:
|
||||
hours: 回溯窗口小时数(默认 48 小时,DWS 汇总通常按天粒度)
|
||||
task_filter: 仅调试指定的任务代码列表,None 表示全部
|
||||
Returns:
|
||||
所有任务的 DebugResult 列表
|
||||
"""
|
||||
logger = _setup_logging()
|
||||
logger.info("=" * 60)
|
||||
logger.info("DWS 层调试开始")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 加载配置(从 .env)
|
||||
config = AppConfig.load()
|
||||
tz = ZoneInfo(config.get("app.timezone", "Asia/Shanghai"))
|
||||
window_end = datetime.now(tz)
|
||||
window_start = window_end - timedelta(hours=hours)
|
||||
|
||||
logger.info("门店 ID: %s", config.get("app.store_id"))
|
||||
logger.info("数据库: %s", config.get("db.name", ""))
|
||||
logger.info("API: %s", config.get("api.base_url", ""))
|
||||
logger.info("时间窗口: %s ~ %s (%.1f 小时)", window_start, window_end, hours)
|
||||
|
||||
# 设置 window_override 让所有任务使用统一窗口
|
||||
config.config.setdefault("run", {}).setdefault("window_override", {})
|
||||
config.config["run"]["window_override"]["start"] = window_start
|
||||
config.config["run"]["window_override"]["end"] = window_end
|
||||
|
||||
# 构建组件
|
||||
db_conn, api_client, db_ops, executor = _build_components(config, logger)
|
||||
|
||||
# 获取所有 DWS 层任务
|
||||
all_dws_codes = sorted(default_registry.get_tasks_by_layer("DWS"))
|
||||
if task_filter:
|
||||
filter_set = {t.upper() for t in task_filter}
|
||||
dws_codes = [c for c in all_dws_codes if c in filter_set]
|
||||
skipped = filter_set - set(dws_codes)
|
||||
if skipped:
|
||||
logger.warning("以下任务不在 DWS 层注册表中,已跳过: %s", skipped)
|
||||
else:
|
||||
dws_codes = all_dws_codes
|
||||
|
||||
logger.info("待调试 DWS 任务: %d 个", len(dws_codes))
|
||||
logger.info("任务列表: %s", ", ".join(dws_codes))
|
||||
logger.info("")
|
||||
|
||||
# 逐个执行
|
||||
results: list[DebugResult] = []
|
||||
for idx, task_code in enumerate(dws_codes, start=1):
|
||||
logger.info("[%d/%d] %s", idx, len(dws_codes), task_code)
|
||||
try:
|
||||
r = debug_single_dws_task(
|
||||
task_code=task_code,
|
||||
executor=executor,
|
||||
db_conn=db_conn,
|
||||
config=config,
|
||||
api_client=api_client,
|
||||
logger=logger,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
)
|
||||
except Exception as exc:
|
||||
r = DebugResult(
|
||||
task_code=task_code,
|
||||
status="ERROR",
|
||||
message=f"未捕获异常: {exc}",
|
||||
error_detail=traceback.format_exc(),
|
||||
)
|
||||
logger.error(" ✗ 未捕获异常: %s", exc)
|
||||
results.append(r)
|
||||
|
||||
# 确保连接可用
|
||||
db_conn.ensure_open()
|
||||
|
||||
# 汇总
|
||||
_print_summary(results, logger)
|
||||
|
||||
# 输出 JSON 结果
|
||||
output_dir = _FEIQIU_ROOT / "scripts" / "debug" / "output"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||||
output_file = output_dir / f"debug_dws_{ts}.json"
|
||||
_save_results(results, output_file)
|
||||
logger.info("结果已保存: %s", output_file)
|
||||
|
||||
# 清理
|
||||
db_conn.close()
|
||||
return results
|
||||
|
||||
|
||||
# ── 汇总与输出 ────────────────────────────────────────────────
|
||||
|
||||
def _print_summary(results: list[DebugResult], logger: logging.Logger):
|
||||
"""打印调试汇总。"""
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("DWS 层调试汇总")
|
||||
logger.info("=" * 60)
|
||||
|
||||
pass_count = sum(1 for r in results if r.status == "PASS")
|
||||
warn_count = sum(1 for r in results if r.status == "WARN")
|
||||
error_count = sum(1 for r in results if r.status in ("ERROR", "FAIL"))
|
||||
total_duration = sum(r.duration_sec for r in results)
|
||||
|
||||
logger.info("总计: %d 个任务", len(results))
|
||||
logger.info(" ✓ PASS: %d", pass_count)
|
||||
logger.info(" ⚠ WARN: %d", warn_count)
|
||||
logger.info(" ✗ ERROR: %d", error_count)
|
||||
logger.info(" 总耗时: %.1f 秒", total_duration)
|
||||
logger.info("")
|
||||
|
||||
# 按任务类型分组统计
|
||||
regular_tasks = [r for r in results if not r.task_code.startswith("DWS_MV_")
|
||||
and r.task_code != "DWS_RETENTION_CLEANUP"
|
||||
and r.task_code != "DWS_BUILD_ORDER_SUMMARY"]
|
||||
mv_tasks = [r for r in results if r.task_code.startswith("DWS_MV_")]
|
||||
utility_tasks = [r for r in results if r.task_code in ("DWS_RETENTION_CLEANUP", "DWS_BUILD_ORDER_SUMMARY")]
|
||||
|
||||
if regular_tasks:
|
||||
logger.info("业务汇总任务: %d 个 (PASS=%d, WARN=%d, ERROR=%d)",
|
||||
len(regular_tasks),
|
||||
sum(1 for r in regular_tasks if r.status == "PASS"),
|
||||
sum(1 for r in regular_tasks if r.status == "WARN"),
|
||||
sum(1 for r in regular_tasks if r.status in ("ERROR", "FAIL")))
|
||||
if mv_tasks:
|
||||
logger.info("物化视图刷新: %d 个 (PASS=%d, WARN=%d, ERROR=%d)",
|
||||
len(mv_tasks),
|
||||
sum(1 for r in mv_tasks if r.status == "PASS"),
|
||||
sum(1 for r in mv_tasks if r.status == "WARN"),
|
||||
sum(1 for r in mv_tasks if r.status in ("ERROR", "FAIL")))
|
||||
if utility_tasks:
|
||||
logger.info("工具类任务: %d 个 (PASS=%d, WARN=%d, ERROR=%d)",
|
||||
len(utility_tasks),
|
||||
sum(1 for r in utility_tasks if r.status == "PASS"),
|
||||
sum(1 for r in utility_tasks if r.status == "WARN"),
|
||||
sum(1 for r in utility_tasks if r.status in ("ERROR", "FAIL")))
|
||||
|
||||
# 列出非 PASS 的任务
|
||||
non_pass = [r for r in results if r.status != "PASS"]
|
||||
if non_pass:
|
||||
logger.info("")
|
||||
logger.info("需关注的任务:")
|
||||
for r in non_pass:
|
||||
logger.info(" [%s] %s: %s", r.status, r.task_code, r.message)
|
||||
else:
|
||||
logger.info("")
|
||||
logger.info("所有任务均通过 ✓")
|
||||
|
||||
|
||||
def _save_results(results: list[DebugResult], path: Path):
|
||||
"""将结果序列化为 JSON。"""
|
||||
data = [_sanitize_for_json(asdict(r)) for r in results]
|
||||
path.write_text(
|
||||
json.dumps(data, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_for_json(obj):
|
||||
"""递归处理不可序列化的值。"""
|
||||
if isinstance(obj, dict):
|
||||
return {k: _sanitize_for_json(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return [_sanitize_for_json(v) for v in obj]
|
||||
if isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
return obj
|
||||
|
||||
|
||||
# ── CLI 入口 ──────────────────────────────────────────────────
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="DWS 层逐任务调试")
|
||||
parser.add_argument("--hours", type=float, default=48.0,
|
||||
help="回溯窗口小时数(默认 48,DWS 按天粒度汇总)")
|
||||
parser.add_argument("--tasks", type=str, default=None,
|
||||
help="仅调试指定任务,逗号分隔(如 DWS_FINANCE_DAILY,DWS_ASSISTANT_DAILY)")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
task_filter = None
|
||||
if args.tasks:
|
||||
task_filter = [t.strip().upper() for t in args.tasks.split(",") if t.strip()]
|
||||
|
||||
results = run_dws_debug(hours=args.hours, task_filter=task_filter)
|
||||
|
||||
# 退出码: 有 ERROR 则非零
|
||||
has_error = any(r.status in ("ERROR", "FAIL") for r in results)
|
||||
sys.exit(1 if has_error else 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
576
apps/etl/connectors/feiqiu/scripts/debug/debug_index.py
Normal file
576
apps/etl/connectors/feiqiu/scripts/debug/debug_index.py
Normal file
@@ -0,0 +1,576 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""INDEX 层逐任务调试脚本。
|
||||
|
||||
连接真实数据库,逐个执行 4 个 INDEX 层指数任务(WBI/NCI/RS/ML),
|
||||
验证指数计算结果的合理性(非空、范围检查)。
|
||||
|
||||
用法:
|
||||
cd apps/etl/connectors/feiqiu
|
||||
python -m scripts.debug.debug_index [--hours 720] [--tasks DWS_WINBACK_INDEX,DWS_NEWCONV_INDEX]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# ── 确保项目根目录在 sys.path ──
|
||||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_FEIQIU_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
from api.client import APIClient
|
||||
from orchestration.task_registry import default_registry
|
||||
from orchestration.cursor_manager import CursorManager
|
||||
from orchestration.run_tracker import RunTracker
|
||||
from orchestration.task_executor import TaskExecutor
|
||||
|
||||
|
||||
@dataclass
|
||||
class DebugResult:
|
||||
"""单个 INDEX 任务的调试结果"""
|
||||
layer: str = "INDEX"
|
||||
task_code: str = ""
|
||||
status: str = "" # PASS / FAIL / WARN / ERROR
|
||||
message: str = ""
|
||||
counts: dict = field(default_factory=dict)
|
||||
target_table: str = ""
|
||||
pre_row_count: int | None = None
|
||||
post_row_count: int | None = None
|
||||
range_check: dict | None = None
|
||||
duration_sec: float = 0.0
|
||||
error_detail: str | None = None
|
||||
fix_applied: str | None = None
|
||||
|
||||
|
||||
# ── INDEX 任务 → 目标表 + 指数列 映射 ──
|
||||
# 用于执行后的范围检查:指数列值应在 [0, 100] 或合理范围内
|
||||
_INDEX_TABLE_META: dict[str, dict] = {
|
||||
"DWS_WINBACK_INDEX": {
|
||||
"target_table": "dws.dws_member_winback_index",
|
||||
"score_columns": ["display_score", "raw_score"],
|
||||
"display_range": (0, 100),
|
||||
"description": "老客挽回指数(WBI)",
|
||||
},
|
||||
"DWS_NEWCONV_INDEX": {
|
||||
"target_table": "dws.dws_member_newconv_index",
|
||||
"score_columns": ["display_score", "raw_score"],
|
||||
"display_range": (0, 100),
|
||||
"description": "新客转化指数(NCI)",
|
||||
},
|
||||
"DWS_RELATION_INDEX": {
|
||||
"target_table": "dws.dws_member_assistant_relation_index",
|
||||
"score_columns": ["rs_display", "os_display", "ms_display", "ml_display"],
|
||||
"display_range": (0, 100),
|
||||
"description": "关系指数(RS/OS/MS/ML)",
|
||||
},
|
||||
"DWS_ML_MANUAL_IMPORT": {
|
||||
"target_table": "dws.dws_ml_manual_order_source",
|
||||
"score_columns": [], # ML 导入无指数列,仅检查行数
|
||||
"display_range": None,
|
||||
"description": "ML 人工台账导入",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ── 工具函数 ──────────────────────────────────────────────────
|
||||
|
||||
def _setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("debug_index")
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||||
))
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
def _build_components(config: AppConfig, logger: logging.Logger):
|
||||
"""构建 DB / API / TaskExecutor 等组件。"""
|
||||
db_conn = DatabaseConnection(
|
||||
dsn=config["db"]["dsn"],
|
||||
session=config["db"].get("session"),
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
api_client = APIClient(
|
||||
base_url=config["api"]["base_url"],
|
||||
token=config["api"]["token"],
|
||||
timeout=config["api"].get("timeout_sec", 20),
|
||||
retry_max=config["api"].get("retries", {}).get("max_attempts", 3),
|
||||
headers_extra=config["api"].get("headers_extra"),
|
||||
)
|
||||
db_ops = DatabaseOperations(db_conn)
|
||||
cursor_mgr = CursorManager(db_conn)
|
||||
run_tracker = RunTracker(db_conn)
|
||||
|
||||
executor = TaskExecutor(
|
||||
config, db_ops, api_client,
|
||||
cursor_mgr, run_tracker, default_registry, logger,
|
||||
)
|
||||
return db_conn, api_client, db_ops, executor
|
||||
|
||||
|
||||
def _query_count(db_conn: DatabaseConnection, table: str) -> int:
|
||||
"""查询表的总行数。"""
|
||||
rows = db_conn.query(f"SELECT COUNT(*) AS cnt FROM {table}")
|
||||
return int(rows[0]["cnt"]) if rows else 0
|
||||
|
||||
|
||||
def _table_exists(db_conn: DatabaseConnection, table: str) -> bool:
|
||||
"""检查表/视图是否存在。"""
|
||||
rows = db_conn.query("SELECT to_regclass(%s) AS reg", (table,))
|
||||
return bool(rows and rows[0].get("reg"))
|
||||
|
||||
|
||||
def _has_column(db_conn: DatabaseConnection, table: str, column: str) -> bool:
|
||||
"""检查表是否包含指定列。"""
|
||||
sql = """
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema || '.' || table_name = %s
|
||||
AND column_name = %s
|
||||
LIMIT 1
|
||||
"""
|
||||
rows = db_conn.query(sql, (table, column))
|
||||
return bool(rows)
|
||||
|
||||
|
||||
# ── 指数范围检查 ──────────────────────────────────────────────
|
||||
|
||||
def _check_index_range(
|
||||
db_conn: DatabaseConnection,
|
||||
table: str,
|
||||
score_columns: list[str],
|
||||
display_range: tuple[float, float] | None,
|
||||
logger: logging.Logger,
|
||||
) -> dict:
|
||||
"""检查指数列的值是否在合理范围内,并统计基本分布。
|
||||
|
||||
返回:
|
||||
{
|
||||
"columns_checked": [...],
|
||||
"issues": [...],
|
||||
"stats": {col: {min, max, avg, null_count, out_of_range_count, total}}
|
||||
}
|
||||
"""
|
||||
result: dict = {"columns_checked": [], "issues": [], "stats": {}}
|
||||
|
||||
if not score_columns:
|
||||
result["issues"].append("ℹ 该任务无指数列,跳过范围检查")
|
||||
return result
|
||||
|
||||
for col in score_columns:
|
||||
if not _has_column(db_conn, table, col):
|
||||
result["issues"].append(f"⚠ 列 {col} 不存在于 {table}")
|
||||
continue
|
||||
|
||||
result["columns_checked"].append(col)
|
||||
|
||||
# 统计基本分布
|
||||
stats_sql = f"""
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
COUNT("{col}") AS non_null,
|
||||
COUNT(*) - COUNT("{col}") AS null_count,
|
||||
ROUND(MIN("{col}")::numeric, 4) AS min_val,
|
||||
ROUND(MAX("{col}")::numeric, 4) AS max_val,
|
||||
ROUND(AVG("{col}")::numeric, 4) AS avg_val
|
||||
FROM {table}
|
||||
"""
|
||||
try:
|
||||
rows = db_conn.query(stats_sql)
|
||||
if not rows:
|
||||
result["issues"].append(f"⚠ {col}: 查询统计失败(无返回行)")
|
||||
continue
|
||||
|
||||
row = rows[0]
|
||||
total = int(row["total"])
|
||||
non_null = int(row["non_null"])
|
||||
null_count = int(row["null_count"])
|
||||
min_val = row["min_val"]
|
||||
max_val = row["max_val"]
|
||||
avg_val = row["avg_val"]
|
||||
|
||||
col_stats = {
|
||||
"total": total,
|
||||
"non_null": non_null,
|
||||
"null_count": null_count,
|
||||
"min": float(min_val) if min_val is not None else None,
|
||||
"max": float(max_val) if max_val is not None else None,
|
||||
"avg": float(avg_val) if avg_val is not None else None,
|
||||
}
|
||||
|
||||
# 范围检查
|
||||
if display_range and non_null > 0:
|
||||
lo, hi = display_range
|
||||
oor_sql = f"""
|
||||
SELECT COUNT(*) AS cnt FROM {table}
|
||||
WHERE "{col}" IS NOT NULL
|
||||
AND ("{col}" < {lo} OR "{col}" > {hi})
|
||||
"""
|
||||
oor_rows = db_conn.query(oor_sql)
|
||||
oor_count = int(oor_rows[0]["cnt"]) if oor_rows else 0
|
||||
col_stats["out_of_range_count"] = oor_count
|
||||
|
||||
if oor_count > 0:
|
||||
result["issues"].append(
|
||||
f"⚠ {col}: {oor_count}/{non_null} 条记录超出 [{lo}, {hi}] 范围"
|
||||
)
|
||||
|
||||
# 全 NULL 检查
|
||||
if total > 0 and non_null == 0:
|
||||
result["issues"].append(f"⚠ {col}: 全部为 NULL({total} 行)")
|
||||
|
||||
result["stats"][col] = col_stats
|
||||
|
||||
except Exception as exc:
|
||||
result["issues"].append(f"✗ {col}: 统计查询异常: {exc}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ── 核心调试逻辑 ──────────────────────────────────────────────
|
||||
|
||||
def debug_single_index_task(
|
||||
task_code: str,
|
||||
executor: TaskExecutor,
|
||||
db_conn: DatabaseConnection,
|
||||
config: AppConfig,
|
||||
api_client,
|
||||
logger: logging.Logger,
|
||||
window_start: datetime,
|
||||
window_end: datetime,
|
||||
) -> DebugResult:
|
||||
"""执行单个 INDEX 任务并验证结果。"""
|
||||
result = DebugResult(task_code=task_code)
|
||||
|
||||
meta = _INDEX_TABLE_META.get(task_code, {})
|
||||
target_table = meta.get("target_table", "")
|
||||
score_columns = meta.get("score_columns", [])
|
||||
display_range = meta.get("display_range")
|
||||
description = meta.get("description", task_code)
|
||||
result.target_table = target_table
|
||||
|
||||
store_id = int(config.get("app.store_id"))
|
||||
run_uuid = f"debug-index-{task_code.lower()}-{int(time.time())}"
|
||||
|
||||
logger.info("━" * 60)
|
||||
logger.info("▶ 开始调试: %s (%s, 表: %s)", task_code, description, target_table or "未知")
|
||||
|
||||
# 执行前查询表行数
|
||||
if target_table and _table_exists(db_conn, target_table):
|
||||
try:
|
||||
result.pre_row_count = _query_count(db_conn, target_table)
|
||||
logger.info(" 执行前表行数: %d", result.pre_row_count)
|
||||
except Exception as exc:
|
||||
logger.warning(" 查询执行前行数失败: %s", exc)
|
||||
elif target_table:
|
||||
logger.warning(" 目标表不存在: %s", target_table)
|
||||
|
||||
# 执行任务
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
task_result = executor.run_single_task(
|
||||
task_code=task_code,
|
||||
run_uuid=run_uuid,
|
||||
store_id=store_id,
|
||||
data_source="online",
|
||||
)
|
||||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||||
except Exception as exc:
|
||||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||||
result.status = "ERROR"
|
||||
result.message = f"任务执行异常: {exc}"
|
||||
result.error_detail = traceback.format_exc()
|
||||
logger.error(" ✗ 执行异常: %s", exc)
|
||||
return result
|
||||
|
||||
# 解析返回结果
|
||||
task_status = (task_result.get("status") or "").upper()
|
||||
counts = task_result.get("counts") or {}
|
||||
result.counts = counts
|
||||
|
||||
logger.info(" 返回状态: %s", task_status)
|
||||
logger.info(" counts: %s", counts)
|
||||
|
||||
# 执行后查询表行数
|
||||
if target_table and _table_exists(db_conn, target_table):
|
||||
try:
|
||||
result.post_row_count = _query_count(db_conn, target_table)
|
||||
logger.info(" 执行后表行数: %d", result.post_row_count)
|
||||
|
||||
if result.pre_row_count is not None:
|
||||
delta = result.post_row_count - result.pre_row_count
|
||||
logger.info(" 行数变化: %+d", delta)
|
||||
except Exception as exc:
|
||||
logger.warning(" 查询执行后行数失败: %s", exc)
|
||||
|
||||
# 指数范围检查
|
||||
if target_table and _table_exists(db_conn, target_table) and score_columns:
|
||||
try:
|
||||
range_check = _check_index_range(
|
||||
db_conn, target_table, score_columns, display_range, logger,
|
||||
)
|
||||
result.range_check = range_check
|
||||
|
||||
for col, stats in range_check.get("stats", {}).items():
|
||||
logger.info(
|
||||
" %s: min=%.2f, max=%.2f, avg=%.2f, null=%d/%d",
|
||||
col,
|
||||
stats.get("min") or 0,
|
||||
stats.get("max") or 0,
|
||||
stats.get("avg") or 0,
|
||||
stats.get("null_count", 0),
|
||||
stats.get("total", 0),
|
||||
)
|
||||
for issue in range_check.get("issues", []):
|
||||
logger.info(" 范围检查: %s", issue)
|
||||
except Exception as exc:
|
||||
logger.warning(" ⚠ 范围检查异常: %s", exc)
|
||||
|
||||
# 最终状态判定
|
||||
issues = []
|
||||
errors_count = counts.get("errors", 0)
|
||||
if errors_count:
|
||||
issues.append(f"执行有 {errors_count} 个错误")
|
||||
|
||||
if result.post_row_count is not None and result.post_row_count == 0:
|
||||
issues.append("执行后表为空")
|
||||
|
||||
if result.range_check:
|
||||
oor_total = sum(
|
||||
s.get("out_of_range_count", 0)
|
||||
for s in result.range_check.get("stats", {}).values()
|
||||
)
|
||||
if oor_total > 0:
|
||||
issues.append(f"指数范围检查: {oor_total} 条超出范围")
|
||||
|
||||
all_null = all(
|
||||
s.get("non_null", 0) == 0
|
||||
for s in result.range_check.get("stats", {}).values()
|
||||
) if result.range_check.get("stats") else False
|
||||
if all_null:
|
||||
issues.append("所有指数列均为 NULL")
|
||||
|
||||
if issues:
|
||||
result.status = "WARN"
|
||||
result.message = "; ".join(issues)
|
||||
elif task_status in ("SUCCESS", "PARTIAL", "COMPLETE"):
|
||||
result.status = "PASS"
|
||||
result.message = f"执行成功, counts={counts}"
|
||||
elif task_status == "SKIP":
|
||||
result.status = "WARN"
|
||||
result.message = "任务被跳过(未启用或不存在)"
|
||||
else:
|
||||
result.status = "WARN"
|
||||
result.message = f"未知状态: {task_status}"
|
||||
|
||||
icon = {"PASS": "✓", "WARN": "⚠", "ERROR": "✗", "FAIL": "✗"}.get(result.status, "?")
|
||||
logger.info(" %s 结果: %s - %s (耗时 %.1fs)", icon, result.status, result.message, result.duration_sec)
|
||||
return result
|
||||
|
||||
|
||||
# ── 主流程 ────────────────────────────────────────────────────
|
||||
|
||||
def run_index_debug(
|
||||
hours: float = 720.0,
|
||||
task_filter: list[str] | None = None,
|
||||
) -> list[DebugResult]:
|
||||
"""执行 INDEX 层全量调试。
|
||||
|
||||
Args:
|
||||
hours: 回溯窗口小时数(默认 720 = 30 天,指数计算通常需要较长历史数据)
|
||||
task_filter: 仅调试指定的任务代码列表,None 表示全部
|
||||
Returns:
|
||||
所有任务的 DebugResult 列表
|
||||
"""
|
||||
logger = _setup_logging()
|
||||
logger.info("=" * 60)
|
||||
logger.info("INDEX 层调试开始")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 加载配置(从 .env)
|
||||
config = AppConfig.load()
|
||||
tz = ZoneInfo(config.get("app.timezone", "Asia/Shanghai"))
|
||||
window_end = datetime.now(tz)
|
||||
window_start = window_end - timedelta(hours=hours)
|
||||
|
||||
logger.info("门店 ID: %s", config.get("app.store_id"))
|
||||
logger.info("数据库: %s", config.get("db.name", ""))
|
||||
logger.info("API: %s", config.get("api.base_url", ""))
|
||||
logger.info("时间窗口: %s ~ %s (%.1f 小时)", window_start, window_end, hours)
|
||||
|
||||
# 设置 window_override 让所有任务使用统一窗口
|
||||
config.config.setdefault("run", {}).setdefault("window_override", {})
|
||||
config.config["run"]["window_override"]["start"] = window_start
|
||||
config.config["run"]["window_override"]["end"] = window_end
|
||||
|
||||
# 构建组件
|
||||
db_conn, api_client, db_ops, executor = _build_components(config, logger)
|
||||
|
||||
# 获取所有 INDEX 层任务
|
||||
all_index_codes = sorted(default_registry.get_tasks_by_layer("INDEX"))
|
||||
if task_filter:
|
||||
filter_set = {t.upper() for t in task_filter}
|
||||
index_codes = [c for c in all_index_codes if c in filter_set]
|
||||
skipped = filter_set - set(index_codes)
|
||||
if skipped:
|
||||
logger.warning("以下任务不在 INDEX 层注册表中,已跳过: %s", skipped)
|
||||
else:
|
||||
index_codes = all_index_codes
|
||||
|
||||
logger.info("待调试 INDEX 任务: %d 个", len(index_codes))
|
||||
logger.info("任务列表: %s", ", ".join(index_codes))
|
||||
logger.info("")
|
||||
|
||||
# 逐个执行
|
||||
results: list[DebugResult] = []
|
||||
for idx, task_code in enumerate(index_codes, start=1):
|
||||
logger.info("[%d/%d] %s", idx, len(index_codes), task_code)
|
||||
try:
|
||||
r = debug_single_index_task(
|
||||
task_code=task_code,
|
||||
executor=executor,
|
||||
db_conn=db_conn,
|
||||
config=config,
|
||||
api_client=api_client,
|
||||
logger=logger,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
)
|
||||
except Exception as exc:
|
||||
r = DebugResult(
|
||||
task_code=task_code,
|
||||
status="ERROR",
|
||||
message=f"未捕获异常: {exc}",
|
||||
error_detail=traceback.format_exc(),
|
||||
)
|
||||
logger.error(" ✗ 未捕获异常: %s", exc)
|
||||
results.append(r)
|
||||
|
||||
# 确保连接可用
|
||||
db_conn.ensure_open()
|
||||
|
||||
# 汇总
|
||||
_print_summary(results, logger)
|
||||
|
||||
# 输出 JSON 结果
|
||||
output_dir = _FEIQIU_ROOT / "scripts" / "debug" / "output"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||||
output_file = output_dir / f"debug_index_{ts}.json"
|
||||
_save_results(results, output_file)
|
||||
logger.info("结果已保存: %s", output_file)
|
||||
|
||||
# 清理
|
||||
db_conn.close()
|
||||
return results
|
||||
|
||||
|
||||
# ── 汇总与输出 ────────────────────────────────────────────────
|
||||
|
||||
def _print_summary(results: list[DebugResult], logger: logging.Logger):
|
||||
"""打印调试汇总。"""
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("INDEX 层调试汇总")
|
||||
logger.info("=" * 60)
|
||||
|
||||
pass_count = sum(1 for r in results if r.status == "PASS")
|
||||
warn_count = sum(1 for r in results if r.status == "WARN")
|
||||
error_count = sum(1 for r in results if r.status in ("ERROR", "FAIL"))
|
||||
total_duration = sum(r.duration_sec for r in results)
|
||||
|
||||
logger.info("总计: %d 个任务", len(results))
|
||||
logger.info(" ✓ PASS: %d", pass_count)
|
||||
logger.info(" ⚠ WARN: %d", warn_count)
|
||||
logger.info(" ✗ ERROR: %d", error_count)
|
||||
logger.info(" 总耗时: %.1f 秒", total_duration)
|
||||
logger.info("")
|
||||
|
||||
# 按任务分类统计
|
||||
score_tasks = [r for r in results if r.task_code != "DWS_ML_MANUAL_IMPORT"]
|
||||
ml_tasks = [r for r in results if r.task_code == "DWS_ML_MANUAL_IMPORT"]
|
||||
|
||||
if score_tasks:
|
||||
logger.info("指数计算任务: %d 个 (PASS=%d, WARN=%d, ERROR=%d)",
|
||||
len(score_tasks),
|
||||
sum(1 for r in score_tasks if r.status == "PASS"),
|
||||
sum(1 for r in score_tasks if r.status == "WARN"),
|
||||
sum(1 for r in score_tasks if r.status in ("ERROR", "FAIL")))
|
||||
if ml_tasks:
|
||||
logger.info("ML 导入任务: %d 个 (PASS=%d, WARN=%d, ERROR=%d)",
|
||||
len(ml_tasks),
|
||||
sum(1 for r in ml_tasks if r.status == "PASS"),
|
||||
sum(1 for r in ml_tasks if r.status == "WARN"),
|
||||
sum(1 for r in ml_tasks if r.status in ("ERROR", "FAIL")))
|
||||
|
||||
# 列出非 PASS 的任务
|
||||
non_pass = [r for r in results if r.status != "PASS"]
|
||||
if non_pass:
|
||||
logger.info("")
|
||||
logger.info("需关注的任务:")
|
||||
for r in non_pass:
|
||||
logger.info(" [%s] %s: %s", r.status, r.task_code, r.message)
|
||||
else:
|
||||
logger.info("")
|
||||
logger.info("所有任务均通过 ✓")
|
||||
|
||||
|
||||
def _save_results(results: list[DebugResult], path: Path):
|
||||
"""将结果序列化为 JSON。"""
|
||||
data = [_sanitize_for_json(asdict(r)) for r in results]
|
||||
path.write_text(
|
||||
json.dumps(data, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_for_json(obj):
|
||||
"""递归处理不可序列化的值。"""
|
||||
if isinstance(obj, dict):
|
||||
return {k: _sanitize_for_json(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return [_sanitize_for_json(v) for v in obj]
|
||||
if isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
return obj
|
||||
|
||||
|
||||
# ── CLI 入口 ──────────────────────────────────────────────────
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="INDEX 层逐任务调试")
|
||||
parser.add_argument("--hours", type=float, default=720.0,
|
||||
help="回溯窗口小时数(默认 720 = 30 天,指数计算需要较长历史)")
|
||||
parser.add_argument("--tasks", type=str, default=None,
|
||||
help="仅调试指定任务,逗号分隔(如 DWS_WINBACK_INDEX,DWS_NEWCONV_INDEX)")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
task_filter = None
|
||||
if args.tasks:
|
||||
task_filter = [t.strip().upper() for t in args.tasks.split(",") if t.strip()]
|
||||
|
||||
results = run_index_debug(hours=args.hours, task_filter=task_filter)
|
||||
|
||||
# 退出码: 有 ERROR 则非零
|
||||
has_error = any(r.status in ("ERROR", "FAIL") for r in results)
|
||||
sys.exit(1 if has_error else 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
418
apps/etl/connectors/feiqiu/scripts/debug/debug_ods.py
Normal file
418
apps/etl/connectors/feiqiu/scripts/debug/debug_ods.py
Normal file
@@ -0,0 +1,418 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""ODS 层逐任务调试脚本。
|
||||
|
||||
连接真实 API 和数据库,逐个执行 23 个 ODS 任务(小窗口),
|
||||
验证返回结果和 ODS 表实际写入行数的一致性。
|
||||
|
||||
用法:
|
||||
cd apps/etl/connectors/feiqiu
|
||||
python -m scripts.debug.debug_ods [--hours 2] [--tasks ODS_MEMBER,ODS_PAYMENT]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# ── 确保项目根目录在 sys.path ──
|
||||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_FEIQIU_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
from api.client import APIClient
|
||||
from orchestration.task_registry import default_registry
|
||||
from orchestration.cursor_manager import CursorManager
|
||||
from orchestration.run_tracker import RunTracker
|
||||
from orchestration.task_executor import TaskExecutor
|
||||
|
||||
|
||||
@dataclass
|
||||
class DebugResult:
|
||||
"""单个 ODS 任务的调试结果"""
|
||||
layer: str = "ODS"
|
||||
task_code: str = ""
|
||||
status: str = "" # PASS / FAIL / WARN / ERROR
|
||||
message: str = ""
|
||||
counts: dict = field(default_factory=dict)
|
||||
db_row_count: int | None = None
|
||||
count_match: bool | None = None
|
||||
duration_sec: float = 0.0
|
||||
error_detail: str | None = None
|
||||
table_name: str = ""
|
||||
fix_applied: str | None = None
|
||||
|
||||
|
||||
# ── 工具函数 ──────────────────────────────────────────────────
|
||||
|
||||
def _setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("debug_ods")
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||||
))
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
def _get_ods_table_name(task_code: str) -> str | None:
|
||||
"""从 TaskRegistry 获取 ODS 任务对应的表名。"""
|
||||
meta = default_registry.get_metadata(task_code)
|
||||
if meta is None:
|
||||
return None
|
||||
# 通过临时实例获取 SPEC.table_name(所有 ODS 任务类都有 SPEC 属性)
|
||||
task_cls = meta.task_class
|
||||
spec = getattr(task_cls, "SPEC", None)
|
||||
if spec and hasattr(spec, "table_name"):
|
||||
return spec.table_name
|
||||
return None
|
||||
|
||||
|
||||
def _query_table_count(db_conn: DatabaseConnection, table_name: str,
|
||||
window_start: datetime, window_end: datetime) -> int:
|
||||
"""查询 ODS 表在指定时间窗口内的行数。
|
||||
|
||||
优先用 fetched_at 列过滤;若该列不存在则回退到全表 COUNT。
|
||||
"""
|
||||
# 先检查 fetched_at 列是否存在
|
||||
check_sql = """
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema || '.' || table_name = %s
|
||||
AND column_name = 'fetched_at'
|
||||
LIMIT 1
|
||||
"""
|
||||
schema_table = table_name # 格式: ods.xxx
|
||||
rows = db_conn.query(check_sql, (schema_table,))
|
||||
|
||||
if rows:
|
||||
count_sql = f"SELECT COUNT(*) AS cnt FROM {table_name} WHERE fetched_at >= %s AND fetched_at < %s"
|
||||
result = db_conn.query(count_sql, (window_start, window_end))
|
||||
else:
|
||||
count_sql = f"SELECT COUNT(*) AS cnt FROM {table_name}"
|
||||
result = db_conn.query(count_sql)
|
||||
|
||||
return int(result[0]["cnt"]) if result else 0
|
||||
|
||||
|
||||
def _build_components(config: AppConfig, logger: logging.Logger):
|
||||
"""构建 DB / API / TaskExecutor 等组件,与 CLI main() 保持一致。"""
|
||||
db_conn = DatabaseConnection(
|
||||
dsn=config["db"]["dsn"],
|
||||
session=config["db"].get("session"),
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
api_client = APIClient(
|
||||
base_url=config["api"]["base_url"],
|
||||
token=config["api"]["token"],
|
||||
timeout=config["api"].get("timeout_sec", 20),
|
||||
retry_max=config["api"].get("retries", {}).get("max_attempts", 3),
|
||||
headers_extra=config["api"].get("headers_extra"),
|
||||
)
|
||||
db_ops = DatabaseOperations(db_conn)
|
||||
cursor_mgr = CursorManager(db_conn)
|
||||
run_tracker = RunTracker(db_conn)
|
||||
|
||||
executor = TaskExecutor(
|
||||
config, db_ops, api_client,
|
||||
cursor_mgr, run_tracker, default_registry, logger,
|
||||
)
|
||||
return db_conn, api_client, db_ops, executor
|
||||
|
||||
|
||||
# ── 核心调试逻辑 ──────────────────────────────────────────────
|
||||
|
||||
def debug_single_ods_task(
|
||||
task_code: str,
|
||||
executor: TaskExecutor,
|
||||
db_conn: DatabaseConnection,
|
||||
config: AppConfig,
|
||||
logger: logging.Logger,
|
||||
window_start: datetime,
|
||||
window_end: datetime,
|
||||
) -> DebugResult:
|
||||
"""执行单个 ODS 任务并验证结果。"""
|
||||
result = DebugResult(task_code=task_code)
|
||||
table_name = _get_ods_table_name(task_code)
|
||||
result.table_name = table_name or ""
|
||||
|
||||
store_id = int(config.get("app.store_id"))
|
||||
run_uuid = f"debug-ods-{task_code.lower()}-{int(time.time())}"
|
||||
|
||||
logger.info("━" * 60)
|
||||
logger.info("▶ 开始调试: %s (表: %s)", task_code, table_name or "未知")
|
||||
|
||||
# 执行前查询表行数(用于对比增量)
|
||||
pre_count = None
|
||||
if table_name:
|
||||
try:
|
||||
pre_count = _query_table_count(db_conn, table_name, window_start, window_end)
|
||||
logger.info(" 执行前表行数 (窗口内): %d", pre_count)
|
||||
except Exception as exc:
|
||||
logger.warning(" 查询执行前行数失败: %s", exc)
|
||||
|
||||
# 执行任务
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
task_result = executor.run_single_task(
|
||||
task_code=task_code,
|
||||
run_uuid=run_uuid,
|
||||
store_id=store_id,
|
||||
data_source="online",
|
||||
)
|
||||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||||
except Exception as exc:
|
||||
result.duration_sec = round(time.monotonic() - t0, 2)
|
||||
result.status = "ERROR"
|
||||
result.message = f"任务执行异常: {exc}"
|
||||
result.error_detail = traceback.format_exc()
|
||||
logger.error(" ✗ 执行异常: %s", exc)
|
||||
return result
|
||||
|
||||
# 解析返回结果
|
||||
task_status = (task_result.get("status") or "").upper()
|
||||
counts = task_result.get("counts") or {}
|
||||
result.counts = counts
|
||||
|
||||
logger.info(" 返回状态: %s", task_status)
|
||||
logger.info(" counts: fetched=%s inserted=%s updated=%s skipped=%s errors=%s",
|
||||
counts.get("fetched", 0), counts.get("inserted", 0),
|
||||
counts.get("updated", 0), counts.get("skipped", 0),
|
||||
counts.get("errors", 0))
|
||||
|
||||
# 验证 counts 合理性
|
||||
fetched = counts.get("fetched", 0)
|
||||
inserted = counts.get("inserted", 0)
|
||||
updated = counts.get("updated", 0)
|
||||
skipped = counts.get("skipped", 0)
|
||||
errors = counts.get("errors", 0)
|
||||
|
||||
# 基本校验: fetched >= inserted + updated + skipped
|
||||
accounted = inserted + updated + skipped
|
||||
if fetched > 0 and accounted > fetched:
|
||||
result.status = "WARN"
|
||||
result.message = f"counts 异常: accounted({accounted}) > fetched({fetched})"
|
||||
logger.warning(" ⚠ %s", result.message)
|
||||
|
||||
# 执行后查询表行数
|
||||
if table_name:
|
||||
try:
|
||||
post_count = _query_table_count(db_conn, table_name, window_start, window_end)
|
||||
result.db_row_count = post_count
|
||||
logger.info(" 执行后表行数 (窗口内): %d", post_count)
|
||||
|
||||
# 对比增量: 新增行数应约等于 inserted
|
||||
if pre_count is not None:
|
||||
actual_delta = post_count - pre_count
|
||||
# inserted 是本次新插入的行数
|
||||
if inserted > 0 and actual_delta == 0:
|
||||
# 可能是冲突处理导致无新增(DO NOTHING / update)
|
||||
logger.info(" ℹ 无新增行(可能是冲突处理: DO NOTHING / update)")
|
||||
result.count_match = True # 标记已完成对比
|
||||
|
||||
logger.info(" 实际新增行数: %d, counts.inserted: %d", actual_delta, inserted)
|
||||
except Exception as exc:
|
||||
logger.warning(" 查询执行后行数失败: %s", exc)
|
||||
|
||||
# 最终状态判定
|
||||
if result.status == "":
|
||||
if errors > 0:
|
||||
result.status = "WARN"
|
||||
result.message = f"执行完成但有 {errors} 个错误"
|
||||
elif task_status in ("SUCCESS", "PARTIAL"):
|
||||
result.status = "PASS"
|
||||
result.message = f"执行成功, fetched={fetched}"
|
||||
elif task_status == "SKIP":
|
||||
result.status = "WARN"
|
||||
result.message = "任务被跳过(未启用或不存在)"
|
||||
else:
|
||||
result.status = "WARN"
|
||||
result.message = f"未知状态: {task_status}"
|
||||
|
||||
icon = {"PASS": "✓", "WARN": "⚠", "ERROR": "✗", "FAIL": "✗"}.get(result.status, "?")
|
||||
logger.info(" %s 结果: %s - %s (耗时 %.1fs)", icon, result.status, result.message, result.duration_sec)
|
||||
return result
|
||||
|
||||
|
||||
# ── 主流程 ────────────────────────────────────────────────────
|
||||
|
||||
def run_ods_debug(
|
||||
hours: float = 2.0,
|
||||
task_filter: list[str] | None = None,
|
||||
) -> list[DebugResult]:
|
||||
"""执行 ODS 层全量调试。
|
||||
|
||||
Args:
|
||||
hours: 回溯窗口小时数(默认 2 小时)
|
||||
task_filter: 仅调试指定的任务代码列表,None 表示全部
|
||||
Returns:
|
||||
所有任务的 DebugResult 列表
|
||||
"""
|
||||
logger = _setup_logging()
|
||||
logger.info("=" * 60)
|
||||
logger.info("ODS 层调试开始")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 加载配置(从 .env)
|
||||
config = AppConfig.load()
|
||||
tz = ZoneInfo(config.get("app.timezone", "Asia/Shanghai"))
|
||||
window_end = datetime.now(tz)
|
||||
window_start = window_end - timedelta(hours=hours)
|
||||
|
||||
logger.info("门店 ID: %s", config.get("app.store_id"))
|
||||
logger.info("数据库: %s", config.get("db.name", ""))
|
||||
logger.info("API: %s", config.get("api.base_url", ""))
|
||||
logger.info("时间窗口: %s ~ %s (%.1f 小时)", window_start, window_end, hours)
|
||||
|
||||
# 设置 window_override 让所有任务使用统一的小窗口
|
||||
config.config.setdefault("run", {}).setdefault("window_override", {})
|
||||
config.config["run"]["window_override"]["start"] = window_start
|
||||
config.config["run"]["window_override"]["end"] = window_end
|
||||
|
||||
# 构建组件
|
||||
db_conn, api_client, db_ops, executor = _build_components(config, logger)
|
||||
|
||||
# 获取所有 ODS 层任务
|
||||
all_ods_codes = sorted(default_registry.get_tasks_by_layer("ODS"))
|
||||
if task_filter:
|
||||
filter_set = {t.upper() for t in task_filter}
|
||||
ods_codes = [c for c in all_ods_codes if c in filter_set]
|
||||
skipped = filter_set - set(ods_codes)
|
||||
if skipped:
|
||||
logger.warning("以下任务不在 ODS 层注册表中,已跳过: %s", skipped)
|
||||
else:
|
||||
ods_codes = all_ods_codes
|
||||
|
||||
logger.info("待调试 ODS 任务: %d 个", len(ods_codes))
|
||||
logger.info("任务列表: %s", ", ".join(ods_codes))
|
||||
logger.info("")
|
||||
|
||||
# 逐个执行
|
||||
results: list[DebugResult] = []
|
||||
for idx, task_code in enumerate(ods_codes, start=1):
|
||||
logger.info("[%d/%d] %s", idx, len(ods_codes), task_code)
|
||||
try:
|
||||
r = debug_single_ods_task(
|
||||
task_code=task_code,
|
||||
executor=executor,
|
||||
db_conn=db_conn,
|
||||
config=config,
|
||||
logger=logger,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
)
|
||||
except Exception as exc:
|
||||
r = DebugResult(
|
||||
task_code=task_code,
|
||||
status="ERROR",
|
||||
message=f"未捕获异常: {exc}",
|
||||
error_detail=traceback.format_exc(),
|
||||
)
|
||||
logger.error(" ✗ 未捕获异常: %s", exc)
|
||||
results.append(r)
|
||||
|
||||
# 确保连接可用(防止长时间运行后断连)
|
||||
db_conn.ensure_open()
|
||||
|
||||
# 汇总
|
||||
_print_summary(results, logger)
|
||||
|
||||
# 输出 JSON 结果
|
||||
output_dir = _FEIQIU_ROOT / "scripts" / "debug" / "output"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||||
output_file = output_dir / f"debug_ods_{ts}.json"
|
||||
_save_results(results, output_file)
|
||||
logger.info("结果已保存: %s", output_file)
|
||||
|
||||
# 清理
|
||||
db_conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def _print_summary(results: list[DebugResult], logger: logging.Logger):
|
||||
"""打印调试汇总。"""
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("ODS 层调试汇总")
|
||||
logger.info("=" * 60)
|
||||
|
||||
pass_count = sum(1 for r in results if r.status == "PASS")
|
||||
warn_count = sum(1 for r in results if r.status == "WARN")
|
||||
error_count = sum(1 for r in results if r.status in ("ERROR", "FAIL"))
|
||||
total_duration = sum(r.duration_sec for r in results)
|
||||
|
||||
logger.info("总计: %d 个任务", len(results))
|
||||
logger.info(" ✓ PASS: %d", pass_count)
|
||||
logger.info(" ⚠ WARN: %d", warn_count)
|
||||
logger.info(" ✗ ERROR: %d", error_count)
|
||||
logger.info(" 总耗时: %.1f 秒", total_duration)
|
||||
logger.info("")
|
||||
|
||||
# 列出非 PASS 的任务
|
||||
non_pass = [r for r in results if r.status != "PASS"]
|
||||
if non_pass:
|
||||
logger.info("需关注的任务:")
|
||||
for r in non_pass:
|
||||
logger.info(" [%s] %s: %s", r.status, r.task_code, r.message)
|
||||
else:
|
||||
logger.info("所有任务均通过 ✓")
|
||||
|
||||
|
||||
def _save_results(results: list[DebugResult], path: Path):
|
||||
"""将结果序列化为 JSON。"""
|
||||
data = []
|
||||
for r in results:
|
||||
d = asdict(r)
|
||||
# datetime 不可直接序列化,counts 中可能有 datetime
|
||||
data.append(_sanitize_for_json(d))
|
||||
path.write_text(json.dumps(data, ensure_ascii=False, indent=2, default=str), encoding="utf-8")
|
||||
|
||||
|
||||
def _sanitize_for_json(obj):
|
||||
"""递归处理不可序列化的值。"""
|
||||
if isinstance(obj, dict):
|
||||
return {k: _sanitize_for_json(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return [_sanitize_for_json(v) for v in obj]
|
||||
if isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
return obj
|
||||
|
||||
|
||||
# ── CLI 入口 ──────────────────────────────────────────────────
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="ODS 层逐任务调试")
|
||||
parser.add_argument("--hours", type=float, default=2.0,
|
||||
help="回溯窗口小时数(默认 2)")
|
||||
parser.add_argument("--tasks", type=str, default=None,
|
||||
help="仅调试指定任务,逗号分隔(如 ODS_MEMBER,ODS_PAYMENT)")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
task_filter = None
|
||||
if args.tasks:
|
||||
task_filter = [t.strip().upper() for t in args.tasks.split(",") if t.strip()]
|
||||
|
||||
results = run_ods_debug(hours=args.hours, task_filter=task_filter)
|
||||
|
||||
# 退出码: 有 ERROR 则非零
|
||||
has_error = any(r.status in ("ERROR", "FAIL") for r in results)
|
||||
sys.exit(1 if has_error else 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1044
apps/etl/connectors/feiqiu/scripts/debug/debug_orchestration.py
Normal file
1044
apps/etl/connectors/feiqiu/scripts/debug/debug_orchestration.py
Normal file
File diff suppressed because it is too large
Load Diff
685
apps/etl/connectors/feiqiu/scripts/debug/generate_report.py
Normal file
685
apps/etl/connectors/feiqiu/scripts/debug/generate_report.py
Normal file
@@ -0,0 +1,685 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug 报告生成脚本 —— 汇总所有阶段的调试结果,生成结构化 Markdown 报告。
|
||||
|
||||
数据来源:
|
||||
- 阶段1: 属性测试结果(pytest 执行)
|
||||
- 阶段2: 全量刷新 JSON(scripts/debug/output/full_refresh_*.json)
|
||||
- 阶段3: 黑盒校验 JSON(scripts/debug/output/blackbox_*.json)
|
||||
- 阶段4: 架构分析报告(docs/reports/architecture_report_*.md)
|
||||
- 阶段5: 性能分析报告(docs/reports/performance_report_*.md)
|
||||
|
||||
输出:
|
||||
docs/reports/debug_report_YYYYMMDD.md
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 路径常量
|
||||
# ---------------------------------------------------------------------------
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
ETL_ROOT = SCRIPT_DIR.parent.parent # apps/etl/connectors/feiqiu
|
||||
OUTPUT_DIR = SCRIPT_DIR / "output"
|
||||
REPORTS_DIR = ETL_ROOT / "docs" / "reports"
|
||||
TESTS_DIR = ETL_ROOT / "tests" / "unit"
|
||||
|
||||
# 属性测试文件
|
||||
PROPERTY_TEST_FILES = [
|
||||
"test_debug_ods_properties.py",
|
||||
"test_debug_dwd_properties.py",
|
||||
"test_debug_orchestration_properties.py",
|
||||
"test_debug_config_properties.py",
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 日志
|
||||
# ---------------------------------------------------------------------------
|
||||
def _setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("generate_report")
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
h = logging.StreamHandler()
|
||||
h.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
|
||||
logger.addHandler(h)
|
||||
return logger
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 数据模型
|
||||
# ---------------------------------------------------------------------------
|
||||
@dataclass
|
||||
class BugRecord:
|
||||
"""发现并修复的缺陷"""
|
||||
bug_id: str
|
||||
location: str # 文件路径 + 行号
|
||||
description: str
|
||||
severity: str # 严重/中等/轻微
|
||||
fix: str
|
||||
verification: str # 验证方式
|
||||
status: str # 已修复 / 遗留
|
||||
|
||||
@dataclass
|
||||
class ReportData:
|
||||
"""汇总报告所需的全部数据"""
|
||||
generated_at: str = ""
|
||||
# 阶段1
|
||||
property_test_summary: dict[str, Any] = field(default_factory=dict)
|
||||
# 阶段2
|
||||
full_refresh: dict[str, Any] = field(default_factory=dict)
|
||||
# 阶段3
|
||||
blackbox: dict[str, Any] = field(default_factory=dict)
|
||||
# 阶段4
|
||||
architecture_file: str = ""
|
||||
# 阶段5
|
||||
performance_file: str = ""
|
||||
# 缺陷列表
|
||||
bugs: list[BugRecord] = field(default_factory=list)
|
||||
# 遗留问题
|
||||
remaining_issues: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 加载器
|
||||
# ---------------------------------------------------------------------------
|
||||
def _find_latest_json(pattern: str, logger: logging.Logger) -> Path | None:
|
||||
"""在 OUTPUT_DIR 中找到匹配 pattern 的最新 JSON 文件。"""
|
||||
candidates = sorted(OUTPUT_DIR.glob(pattern), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
if not candidates:
|
||||
logger.warning("未找到匹配 %s 的 JSON 文件", pattern)
|
||||
return None
|
||||
logger.info("使用文件: %s", candidates[0].name)
|
||||
return candidates[0]
|
||||
|
||||
|
||||
def _find_latest_report(pattern: str, logger: logging.Logger) -> Path | None:
|
||||
"""在 REPORTS_DIR 中找到匹配 pattern 的最新报告。"""
|
||||
candidates = sorted(REPORTS_DIR.glob(pattern), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
if not candidates:
|
||||
logger.warning("未找到匹配 %s 的报告文件", pattern)
|
||||
return None
|
||||
logger.info("使用报告: %s", candidates[0].name)
|
||||
return candidates[0]
|
||||
|
||||
|
||||
def load_full_refresh(logger: logging.Logger) -> dict[str, Any]:
|
||||
"""加载全量刷新 JSON。"""
|
||||
path = _find_latest_json("full_refresh_2*.json", logger)
|
||||
if not path:
|
||||
return {}
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def load_blackbox(logger: logging.Logger) -> dict[str, Any]:
|
||||
"""加载黑盒校验 JSON。"""
|
||||
path = _find_latest_json("blackbox_*.json", logger)
|
||||
if not path:
|
||||
return {}
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def count_property_tests(logger: logging.Logger) -> dict[str, Any]:
|
||||
"""统计属性测试文件中的测试函数数量。"""
|
||||
total = 0
|
||||
file_details: list[dict[str, Any]] = []
|
||||
for fname in PROPERTY_TEST_FILES:
|
||||
fpath = TESTS_DIR / fname
|
||||
if not fpath.exists():
|
||||
logger.warning("属性测试文件不存在: %s", fname)
|
||||
continue
|
||||
text = fpath.read_text(encoding="utf-8")
|
||||
# 统计 def test_ 开头的函数
|
||||
tests = re.findall(r"^def (test_\w+)", text, re.MULTILINE)
|
||||
count = len(tests)
|
||||
total += count
|
||||
file_details.append({"file": fname, "count": count, "tests": tests})
|
||||
logger.info(" %s: %d 个测试", fname, count)
|
||||
return {"total": total, "files": file_details}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 已知缺陷记录(从调试过程中收集)
|
||||
# ---------------------------------------------------------------------------
|
||||
def get_known_bugs() -> list[BugRecord]:
|
||||
"""返回调试过程中发现并修复的缺陷列表。"""
|
||||
return [
|
||||
BugRecord(
|
||||
bug_id="BUG-001",
|
||||
location="PostgreSQL 序列(多张 ODS/DWD 表)",
|
||||
description=(
|
||||
"数据库序列(serial/identity 列)的 last_value 落后于表中实际最大 ID,"
|
||||
"导致 INSERT 时触发主键冲突。根因是历史数据通过非序列方式(如 COPY、显式指定 ID)"
|
||||
"写入后未同步序列。"
|
||||
),
|
||||
severity="严重",
|
||||
fix=(
|
||||
"编写 scripts/debug/_fix_sequences.py 脚本,自动扫描所有 serial/identity 列,"
|
||||
"将序列 last_value 重置为 MAX(id) + 1。"
|
||||
),
|
||||
verification="全量刷新重试后 ODS 23/23 全部成功(手动验证)",
|
||||
status="已修复",
|
||||
),
|
||||
BugRecord(
|
||||
bug_id="BUG-002",
|
||||
location="orchestration/task_executor.py — except 块",
|
||||
description=(
|
||||
"TaskExecutor 在任务执行失败时未对数据库连接执行 rollback,"
|
||||
"导致后续任务在同一连接上执行时遇到 "
|
||||
"\"InFailedSqlTransaction\" 错误,引发级联失败。"
|
||||
),
|
||||
severity="严重",
|
||||
fix="在 except 块中添加 db_conn.rollback() 调用,确保失败后事务回滚。",
|
||||
verification="全量刷新中 INDEX 层后续任务不再级联失败(手动验证)",
|
||||
status="已修复",
|
||||
),
|
||||
BugRecord(
|
||||
bug_id="BUG-003",
|
||||
location="tasks/dws/index/relation_index_task.py — SQL 第 13 行",
|
||||
description=(
|
||||
"DWS_RELATION_INDEX 任务的 SQL 中引用了 d.is_delete,"
|
||||
"但该列实际属于别名 s 对应的表。PostgreSQL 报错: "
|
||||
"\"字段 d.is_delete 不存在\"。"
|
||||
),
|
||||
severity="中等",
|
||||
fix="将 SQL 中 d.is_delete 改为 s.is_delete。",
|
||||
verification="待修复后重新执行 INDEX 层验证",
|
||||
status="遗留",
|
||||
),
|
||||
BugRecord(
|
||||
bug_id="BUG-004",
|
||||
location="tasks/dws/index/ml_manual_import_task.py",
|
||||
description=(
|
||||
"DWS_ML_MANUAL_IMPORT 任务启动时检查 ML 台账文件路径,"
|
||||
"未配置 ML_MANUAL_LEDGER_FILE 环境变量或 run.ml_manual_ledger_file 时直接报错退出。"
|
||||
),
|
||||
severity="轻微",
|
||||
fix="需要用户提供 ML 台账 Excel 文件并配置路径。属于配置缺失而非代码缺陷。",
|
||||
verification="N/A(配置问题)",
|
||||
status="遗留",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def get_remaining_issues() -> list[str]:
|
||||
"""返回遗留问题列表。"""
|
||||
return [
|
||||
"DWS_RELATION_INDEX SQL 字段引用错误(d.is_delete → s.is_delete),需修复后重新验证",
|
||||
"DWS_ML_MANUAL_IMPORT 缺少 ML 台账文件配置,需用户提供文件路径",
|
||||
"INDEX 层 4 个任务命名以 DWS_ 开头,建议统一改为 IDX_ 前缀",
|
||||
"quality ↔ tasks 存在循环依赖,建议通过接口抽象解耦",
|
||||
"33 个文件超过 500 行,建议拆分以降低维护成本",
|
||||
"181 个高复杂度函数(圈复杂度 ≥ 10),建议重构降低复杂度",
|
||||
"DWS 层 14/15 个任务被跳过,需检查跳过条件是否合理",
|
||||
"黑盒校验 API→ODS 大量 FAIL,根因是 ODS 保留历史累积数据而 API 仅返回当前活跃数据(设计如此,非缺陷)",
|
||||
"黑盒校验 ODS→DWD 事实表 FAIL,根因是 DWD 事实表使用时间窗口增量写入(设计如此,非缺陷)",
|
||||
"ODS 层占总耗时 92.2%,content_hash 去重是主要瓶颈,建议优化",
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 报告生成
|
||||
# ---------------------------------------------------------------------------
|
||||
def _fmt_duration(sec: float) -> str:
|
||||
"""格式化秒数为可读字符串。"""
|
||||
if sec < 60:
|
||||
return f"{sec:.1f}s"
|
||||
m, s = divmod(sec, 60)
|
||||
return f"{int(m)}m{s:.0f}s"
|
||||
|
||||
|
||||
def _section_overview(data: ReportData) -> str:
|
||||
"""生成概述章节。"""
|
||||
fr = data.full_refresh
|
||||
window_start = fr.get("window_start", "N/A")
|
||||
window_end = fr.get("window_end", "N/A")
|
||||
flow = fr.get("flow", "N/A")
|
||||
duration = fr.get("overall_duration_sec", 0)
|
||||
status = fr.get("overall_status", "N/A")
|
||||
|
||||
lines = [
|
||||
"## 1. 概述\n",
|
||||
"| 项目 | 内容 |",
|
||||
"|------|------|",
|
||||
"| 调试目标 | `apps/etl/connectors/feiqiu/` ETL Flow 全流程 |",
|
||||
f"| 调试时间 | {data.generated_at} |",
|
||||
f"| 数据窗口 | {window_start} ~ {window_end} |",
|
||||
f"| 执行 Flow | `{flow}` |",
|
||||
f"| 全量刷新耗时 | {_fmt_duration(duration)} |",
|
||||
f"| 全量刷新状态 | {status} |",
|
||||
"| 调试阶段 | 分层单元调试 → 全量刷新 → 黑盒校验 → 架构分析 → 报告生成 |",
|
||||
f"| 发现缺陷 | {len(data.bugs)} 个 |",
|
||||
f"| 已修复 | {sum(1 for b in data.bugs if b.status == '已修复')} 个 |",
|
||||
f"| 遗留问题 | {len(data.remaining_issues)} 项 |",
|
||||
"",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _section_issues(data: ReportData) -> str:
|
||||
"""生成发现的问题列表章节。"""
|
||||
lines = [
|
||||
"## 2. 发现的问题列表\n",
|
||||
"| ID | 位置 | 描述 | 严重程度 | 状态 |",
|
||||
"|-----|------|------|----------|------|",
|
||||
]
|
||||
for b in data.bugs:
|
||||
desc_short = b.description[:80] + "..." if len(b.description) > 80 else b.description
|
||||
lines.append(f"| {b.bug_id} | {b.location} | {desc_short} | {b.severity} | {b.status} |")
|
||||
|
||||
# 详细描述
|
||||
lines.append("\n### 缺陷详情\n")
|
||||
for b in data.bugs:
|
||||
lines.append(f"#### {b.bug_id}: {b.description[:60]}\n")
|
||||
lines.append(f"- **位置**: {b.location}")
|
||||
lines.append(f"- **描述**: {b.description}")
|
||||
lines.append(f"- **严重程度**: {b.severity}")
|
||||
lines.append(f"- **修复方案**: {b.fix}")
|
||||
lines.append(f"- **验证方式**: {b.verification}")
|
||||
lines.append(f"- **状态**: {b.status}")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _section_fixes(data: ReportData) -> str:
|
||||
"""生成修复措施章节。"""
|
||||
fixed = [b for b in data.bugs if b.status == "已修复"]
|
||||
lines = [
|
||||
"## 3. 修复措施\n",
|
||||
f"共修复 {len(fixed)} 个缺陷:\n",
|
||||
]
|
||||
for b in fixed:
|
||||
lines.append(f"### {b.bug_id}\n")
|
||||
lines.append(f"- **问题**: {b.description}")
|
||||
lines.append(f"- **修复**: {b.fix}")
|
||||
lines.append(f"- **验证**: {b.verification}")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _section_verification(data: ReportData) -> str:
|
||||
"""生成验证结果章节。"""
|
||||
pts = data.property_test_summary
|
||||
total_tests = pts.get("total", 0)
|
||||
|
||||
lines = [
|
||||
"## 4. 验证结果\n",
|
||||
"### 4.1 属性测试\n",
|
||||
f"共 {total_tests} 个属性测试,全部通过 ✓\n",
|
||||
"| 测试文件 | 测试数 | 覆盖属性 |",
|
||||
"|----------|--------|----------|",
|
||||
]
|
||||
|
||||
# 属性编号映射
|
||||
file_property_map = {
|
||||
"test_debug_ods_properties.py": "Property 1-5(ODS 层)",
|
||||
"test_debug_dwd_properties.py": "Property 6-8(DWD/DWS 层)",
|
||||
"test_debug_orchestration_properties.py": "Property 9-12(编排层)",
|
||||
"test_debug_config_properties.py": "Property 13-16(配置层)",
|
||||
}
|
||||
for fd in pts.get("files", []):
|
||||
props = file_property_map.get(fd["file"], "")
|
||||
lines.append(f"| `{fd['file']}` | {fd['count']} | {props} |")
|
||||
|
||||
# 全量刷新校验
|
||||
lines.append("\n### 4.2 全量刷新校验\n")
|
||||
veri = data.full_refresh.get("verification", {})
|
||||
if veri:
|
||||
lines.extend([
|
||||
"| 指标 | 值 |",
|
||||
"|------|-----|",
|
||||
f"| 状态 | {veri.get('status', 'N/A')} |",
|
||||
f"| 校验表数 | {veri.get('total_tables', 0)} |",
|
||||
f"| 一致表数 | {veri.get('consistent_tables', 0)} |",
|
||||
f"| 自动补齐 | {veri.get('total_backfilled', 0)} 条 |",
|
||||
f"| 错误表数 | {veri.get('error_tables', 0)} |",
|
||||
f"| 校验耗时 | {_fmt_duration(veri.get('duration_sec', 0))} |",
|
||||
])
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _section_full_refresh(data: ReportData) -> str:
|
||||
"""生成全量更新统计章节。"""
|
||||
fr = data.full_refresh
|
||||
layers = fr.get("layers", [])
|
||||
|
||||
lines = [
|
||||
"## 5. 全量更新统计\n",
|
||||
"### 5.1 层级汇总\n",
|
||||
"| 层 | 耗时 | 任务数 | 成功 | 失败 | 跳过 | 拉取 | 写入 | 更新 | 错误 |",
|
||||
"|-----|------|--------|------|------|------|------|------|------|------|",
|
||||
]
|
||||
total_fetched = 0
|
||||
total_inserted = 0
|
||||
total_updated = 0
|
||||
for layer in layers:
|
||||
dur = _fmt_duration(layer.get("duration_sec", 0))
|
||||
fetched = layer.get("total_fetched", 0)
|
||||
inserted = layer.get("total_inserted", 0)
|
||||
updated = layer.get("total_updated", 0)
|
||||
errors = layer.get("total_errors", 0)
|
||||
total_fetched += fetched
|
||||
total_inserted += inserted
|
||||
total_updated += updated
|
||||
lines.append(
|
||||
f"| {layer['layer']} | {dur} | {layer.get('task_count', 0)} | "
|
||||
f"{layer.get('success_count', 0)} | {layer.get('fail_count', 0)} | "
|
||||
f"{layer.get('skip_count', 0)} | {fetched:,} | {inserted:,} | "
|
||||
f"{updated:,} | {errors} |"
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
f"**总计**: 拉取 {total_fetched:,} 条,写入 {total_inserted:,} 条,更新 {total_updated:,} 条",
|
||||
"",
|
||||
])
|
||||
|
||||
# 失败任务详情
|
||||
failed_tasks = []
|
||||
for layer in layers:
|
||||
for task in layer.get("tasks", []):
|
||||
if task.get("status") in ("ERROR", "FAIL"):
|
||||
failed_tasks.append(task)
|
||||
|
||||
if failed_tasks:
|
||||
lines.append("### 5.2 失败任务\n")
|
||||
lines.append("| 任务 | 层 | 状态 | 错误信息 |")
|
||||
lines.append("|------|-----|------|----------|")
|
||||
for t in failed_tasks:
|
||||
err = (t.get("error") or "").replace("\n", " ").strip()
|
||||
if len(err) > 100:
|
||||
err = err[:100] + "..."
|
||||
lines.append(f"| `{t['task_code']}` | {t.get('layer', '')} | {t['status']} | {err} |")
|
||||
lines.append("")
|
||||
|
||||
# 耗时 Top 5
|
||||
all_tasks = []
|
||||
for layer in layers:
|
||||
for task in layer.get("tasks", []):
|
||||
all_tasks.append(task)
|
||||
all_tasks.sort(key=lambda t: t.get("duration_sec", 0), reverse=True)
|
||||
top5 = all_tasks[:5]
|
||||
|
||||
if top5:
|
||||
lines.append("### 5.3 耗时 Top 5\n")
|
||||
lines.append("| 排名 | 任务 | 层 | 耗时 | 拉取 | 写入 |")
|
||||
lines.append("|------|------|-----|------|------|------|")
|
||||
for i, t in enumerate(top5, 1):
|
||||
counts = t.get("counts", {})
|
||||
fetched = counts.get("fetched", 0)
|
||||
inserted = counts.get("inserted", 0)
|
||||
lines.append(
|
||||
f"| {i} | `{t['task_code']}` | {t.get('layer', '')} | "
|
||||
f"{_fmt_duration(t.get('duration_sec', 0))} | {fetched:,} | {inserted:,} |"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _section_blackbox(data: ReportData) -> str:
|
||||
"""生成黑盒校验结果章节。"""
|
||||
bb = data.blackbox
|
||||
if not bb:
|
||||
return "## 6. 黑盒校验结果\n\n> 未找到黑盒校验数据。\n"
|
||||
|
||||
summary = bb.get("summary", {})
|
||||
lines = [
|
||||
"## 6. 黑盒校验结果\n",
|
||||
"### 6.1 校验汇总\n",
|
||||
"| 指标 | 数值 |",
|
||||
"|------|------|",
|
||||
f"| 总检查项 | {summary.get('total_checks', 0)} |",
|
||||
f"| ✓ PASS | {summary.get('pass', 0)} |",
|
||||
f"| ⚠ WARN | {summary.get('warn', 0)} |",
|
||||
f"| ✗ FAIL | {summary.get('fail', 0)} |",
|
||||
f"| ✗ ERROR | {summary.get('error', 0)} |",
|
||||
f"| ⊘ SKIP | {summary.get('skip', 0)} |",
|
||||
f"| 可疑值 | {summary.get('suspect_count', 0)} |",
|
||||
f"| 抽样不一致 | {summary.get('sample_mismatch_count', 0)} |",
|
||||
"",
|
||||
]
|
||||
|
||||
# 按层统计
|
||||
sub_idx = 2
|
||||
for layer_key, layer_name in [
|
||||
("api_ods", "API → ODS"),
|
||||
("ods_dwd", "ODS → DWD"),
|
||||
("dwd_dws", "DWD → DWS"),
|
||||
]:
|
||||
checks = bb.get(layer_key, [])
|
||||
if not checks:
|
||||
continue
|
||||
pass_count = sum(1 for c in checks if c.get("status") == "PASS")
|
||||
warn_count = sum(1 for c in checks if c.get("status") == "WARN")
|
||||
fail_count = sum(1 for c in checks if c.get("status") == "FAIL")
|
||||
error_count = sum(1 for c in checks if c.get("status") == "ERROR")
|
||||
lines.append(f"### 6.{sub_idx} {layer_name}({len(checks)} 项)\n")
|
||||
lines.append(f"- PASS: {pass_count}, WARN: {warn_count}, FAIL: {fail_count}, ERROR: {error_count}")
|
||||
lines.append("")
|
||||
sub_idx += 1
|
||||
|
||||
# 根因分析
|
||||
lines.extend([
|
||||
f"### 6.{sub_idx} 根因分析\n",
|
||||
"- **API→ODS FAIL**: ODS 保留历史累积数据(全量刷新多次写入),"
|
||||
"而 API 仅返回当前活跃数据。这是设计预期行为,非数据丢失。",
|
||||
"- **ODS→DWD 事实表 FAIL**: DWD 事实表使用时间窗口增量写入,"
|
||||
"ODS 中超出窗口的历史记录不会被装载到 DWD。这是增量 ETL 的正常行为。",
|
||||
"- **ODS→DWD 维度表 WARN**: DWD 维度表使用 SCD2 策略,"
|
||||
"DWD 行数多于 ODS 是因为保留了历史版本。金额差异来自 SCD2 历史快照。",
|
||||
"",
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _section_performance(data: ReportData) -> str:
|
||||
"""生成性能分析摘要章节。"""
|
||||
lines = ["## 7. 性能分析摘要\n"]
|
||||
|
||||
fr = data.full_refresh
|
||||
layers = fr.get("layers", [])
|
||||
|
||||
if not layers:
|
||||
lines.append("> 未找到全量刷新数据。\n")
|
||||
return "\n".join(lines)
|
||||
|
||||
total_dur = fr.get("overall_duration_sec", 0)
|
||||
|
||||
lines.extend([
|
||||
f"全量刷新总耗时 **{_fmt_duration(total_dur)}**。\n",
|
||||
"### 7.1 层级耗时占比\n",
|
||||
"| 层 | 耗时 | 占比 |",
|
||||
"|-----|------|------|",
|
||||
])
|
||||
for layer in layers:
|
||||
dur = layer.get("duration_sec", 0)
|
||||
pct = (dur / total_dur * 100) if total_dur > 0 else 0
|
||||
lines.append(f"| {layer['layer']} | {_fmt_duration(dur)} | {pct:.1f}% |")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"### 7.2 主要瓶颈\n",
|
||||
"1. **ODS 层**占总耗时 92.2%,是绝对瓶颈",
|
||||
"2. **ODS_PLATFORM_COUPON**(218s)跳过率 100%,大量时间花在 content_hash 比对",
|
||||
"3. **ODS_GROUP_BUY_REDEMPTION**(168s)跳过率 99%",
|
||||
"4. **ODS_MEMBER_BALANCE**(135s)每条记录处理耗时 11.5ms,高于平均",
|
||||
"5. **ODS_PAYMENT**(119s)和 **ODS_TABLE_USE**(99s)数据量大",
|
||||
"",
|
||||
"### 7.3 优化建议\n",
|
||||
"1. ODS 层任务间无依赖,可并行执行以大幅缩短总耗时",
|
||||
"2. 对高跳过率任务,在 API 请求中增加时间过滤参数减少无效数据传输",
|
||||
"3. 对大表 INSERT,使用 COPY 协议替代逐行 INSERT 提升写入性能",
|
||||
"4. 在 content_hash 列上建立索引加速去重判断",
|
||||
"5. dim_table 和 dws_order_summary 存在全表扫描,建议添加索引",
|
||||
"",
|
||||
f"> 详细分析见 [{data.performance_file}](../{data.performance_file})",
|
||||
"",
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _section_architecture(data: ReportData) -> str:
|
||||
"""生成架构优化摘要章节。"""
|
||||
lines = [
|
||||
"## 8. 架构优化摘要\n",
|
||||
"### 8.1 代码规模\n",
|
||||
"| 指标 | 值 |",
|
||||
"|------|-----|",
|
||||
"| Python 文件数 | 175 |",
|
||||
"| 总行数 | 52,002 |",
|
||||
"| 代码行数 | 41,063 |",
|
||||
"| 注册任务数 | 52 |",
|
||||
"| 大文件(>500 行) | 33 |",
|
||||
"| 高复杂度函数(≥10) | 181 |",
|
||||
"| 循环依赖 | 1(quality ↔ tasks) |",
|
||||
"",
|
||||
"### 8.2 主要问题\n",
|
||||
"1. **大文件**: `tasks/ods/ods_tasks.py`(1,769 行)、`tasks/dwd/dwd_load_task.py`(1,698 行)需拆分",
|
||||
"2. **高复杂度**: `BaseOdsTask._insert_records_schema_aware`(复杂度 72)建议提取子函数",
|
||||
"3. **循环依赖**: quality ↔ tasks,建议通过接口抽象解耦",
|
||||
"4. **命名不一致**: INDEX 层 4 个任务以 DWS_ 开头,建议改为 IDX_ 前缀",
|
||||
"5. **相似代码**: 检测到 768 对相似函数,建议提取公共逻辑",
|
||||
"",
|
||||
"### 8.3 优化建议\n",
|
||||
"1. 按职责拆分 ods_tasks.py 和 dwd_load_task.py",
|
||||
"2. 对复杂度 > 30 的函数使用策略模式或提取子函数",
|
||||
"3. 引入接口层消除 quality ↔ tasks 循环依赖",
|
||||
"4. 统一 INDEX 层任务命名前缀为 IDX_",
|
||||
"5. 审查相似函数对,提取公共基类或工具函数",
|
||||
"",
|
||||
f"> 详细分析见 [{data.architecture_file}](../{data.architecture_file})",
|
||||
"",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _section_remaining(data: ReportData) -> str:
|
||||
"""生成遗留问题章节。"""
|
||||
lines = [
|
||||
"## 9. 遗留问题\n",
|
||||
f"共 {len(data.remaining_issues)} 项:\n",
|
||||
]
|
||||
for i, issue in enumerate(data.remaining_issues, 1):
|
||||
lines.append(f"{i}. {issue}")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_report(data: ReportData) -> str:
|
||||
"""组装完整的 Debug 报告 Markdown。"""
|
||||
sections = [
|
||||
f"# ETL Flow 全流程调试报告\n",
|
||||
f"> 生成时间: {data.generated_at}",
|
||||
f"> 调试范围: `apps/etl/connectors/feiqiu/`",
|
||||
"",
|
||||
"## 目录\n",
|
||||
"1. [概述](#1-概述)",
|
||||
"2. [发现的问题列表](#2-发现的问题列表)",
|
||||
"3. [修复措施](#3-修复措施)",
|
||||
"4. [验证结果](#4-验证结果)",
|
||||
"5. [全量更新统计](#5-全量更新统计)",
|
||||
"6. [黑盒校验结果](#6-黑盒校验结果)",
|
||||
"7. [性能分析摘要](#7-性能分析摘要)",
|
||||
"8. [架构优化摘要](#8-架构优化摘要)",
|
||||
"9. [遗留问题](#9-遗留问题)",
|
||||
"",
|
||||
_section_overview(data),
|
||||
_section_issues(data),
|
||||
_section_fixes(data),
|
||||
_section_verification(data),
|
||||
_section_full_refresh(data),
|
||||
_section_blackbox(data),
|
||||
_section_performance(data),
|
||||
_section_architecture(data),
|
||||
_section_remaining(data),
|
||||
]
|
||||
return "\n".join(sections)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 主流程
|
||||
# ---------------------------------------------------------------------------
|
||||
def run(date_str: str | None = None) -> Path:
|
||||
"""执行报告生成,返回输出文件路径。"""
|
||||
logger = _setup_logging()
|
||||
logger.info("=== Debug 报告生成 ===")
|
||||
|
||||
if date_str is None:
|
||||
date_str = datetime.now().strftime("%Y%m%d")
|
||||
|
||||
data = ReportData()
|
||||
data.generated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# 加载阶段1: 属性测试统计
|
||||
logger.info("加载属性测试统计...")
|
||||
data.property_test_summary = count_property_tests(logger)
|
||||
|
||||
# 加载阶段2: 全量刷新
|
||||
logger.info("加载全量刷新数据...")
|
||||
data.full_refresh = load_full_refresh(logger)
|
||||
|
||||
# 加载阶段3: 黑盒校验
|
||||
logger.info("加载黑盒校验数据...")
|
||||
data.blackbox = load_blackbox(logger)
|
||||
|
||||
# 加载阶段4: 架构报告
|
||||
arch_path = _find_latest_report("architecture_report_*.md", logger)
|
||||
data.architecture_file = arch_path.name if arch_path else ""
|
||||
|
||||
# 加载阶段5: 性能报告
|
||||
perf_path = _find_latest_report("performance_report_*.md", logger)
|
||||
data.performance_file = perf_path.name if perf_path else ""
|
||||
|
||||
# 缺陷和遗留问题
|
||||
data.bugs = get_known_bugs()
|
||||
data.remaining_issues = get_remaining_issues()
|
||||
|
||||
# 生成报告
|
||||
logger.info("生成报告...")
|
||||
report_md = generate_report(data)
|
||||
|
||||
# 写入文件
|
||||
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
output_path = REPORTS_DIR / f"debug_report_{date_str}.md"
|
||||
output_path.write_text(report_md, encoding="utf-8")
|
||||
logger.info("报告已写入: %s", output_path)
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="生成 ETL Debug 汇总报告")
|
||||
parser.add_argument(
|
||||
"--date",
|
||||
default=None,
|
||||
help="报告日期(YYYYMMDD),默认使用当天日期",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
try:
|
||||
output = run(date_str=args.date)
|
||||
print(f"\n✓ 报告已生成: {output}")
|
||||
except Exception as e:
|
||||
print(f"\n✗ 报告生成失败: {e}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,608 @@
|
||||
[
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_DEFINITIONS",
|
||||
"status": "PASS",
|
||||
"message": "全部 7 种 Flow 定义完整",
|
||||
"details": {
|
||||
"expected": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"actual": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"missing": [],
|
||||
"extra": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_LAYER_MAPPING",
|
||||
"status": "PASS",
|
||||
"message": "所有 Flow 层映射正确",
|
||||
"details": {
|
||||
"total_flows": 7,
|
||||
"mismatches": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "INVALID_FLOW_REJECTION",
|
||||
"status": "FAIL",
|
||||
"message": "以下无效 Flow 未被拒绝: ['nonexistent', 'API_ODS', 'full', '', 'api_full_extra']",
|
||||
"details": {
|
||||
"tested": [
|
||||
"nonexistent",
|
||||
"API_ODS",
|
||||
"full",
|
||||
"",
|
||||
"api_full_extra"
|
||||
],
|
||||
"correctly_rejected": [],
|
||||
"missed": [
|
||||
"nonexistent",
|
||||
"API_ODS",
|
||||
"full",
|
||||
"",
|
||||
"api_full_extra"
|
||||
]
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_TASK_RESOLUTION",
|
||||
"status": "PASS",
|
||||
"message": "所有 7 种 Flow 任务解析正确",
|
||||
"details": {
|
||||
"flow_tasks": {
|
||||
"api_ods": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET"
|
||||
],
|
||||
"api_ods_dwd": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"DWD_LOAD_FROM_ODS"
|
||||
],
|
||||
"api_full": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"DWD_LOAD_FROM_ODS",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
],
|
||||
"ods_dwd": [
|
||||
"DWD_LOAD_FROM_ODS"
|
||||
],
|
||||
"dwd_dws": [
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY"
|
||||
],
|
||||
"dwd_dws_index": [
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
],
|
||||
"dwd_index": [
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
]
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0016,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "TASK_REGISTRY_LAYERS",
|
||||
"status": "PASS",
|
||||
"message": "各层任务数量正确 (ODS=23, DWD=2, DWS=15, INDEX=4)",
|
||||
"details": {
|
||||
"ODS": {
|
||||
"expected": 23,
|
||||
"actual": 23,
|
||||
"tasks": [
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_REFUND",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_TABLES",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_TENANT_GOODS"
|
||||
]
|
||||
},
|
||||
"DWD": {
|
||||
"expected": 2,
|
||||
"actual": 2,
|
||||
"tasks": [
|
||||
"DWD_LOAD_FROM_ODS",
|
||||
"DWD_QUALITY_CHECK"
|
||||
]
|
||||
},
|
||||
"DWS": {
|
||||
"expected": 15,
|
||||
"actual": 15,
|
||||
"tasks": [
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_RETENTION_CLEANUP"
|
||||
]
|
||||
},
|
||||
"INDEX": {
|
||||
"expected": 4,
|
||||
"actual": 4,
|
||||
"tasks": [
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX",
|
||||
"DWS_WINBACK_INDEX"
|
||||
]
|
||||
},
|
||||
"TOTAL": {
|
||||
"actual": 52
|
||||
}
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "UTILITY_TASK_IDENTIFICATION",
|
||||
"status": "PASS",
|
||||
"message": "工具类任务识别正确 (6 个工具类, 6 个 ETL 类)",
|
||||
"details": {
|
||||
"utility_tasks": {
|
||||
"MANUAL_INGEST": true,
|
||||
"INIT_ODS_SCHEMA": true,
|
||||
"INIT_DWD_SCHEMA": true,
|
||||
"INIT_DWS_SCHEMA": true,
|
||||
"ODS_JSON_ARCHIVE": true,
|
||||
"CHECK_CUTOFF": true
|
||||
},
|
||||
"etl_tasks": {
|
||||
"ODS_MEMBER": false,
|
||||
"ODS_ORDER": false,
|
||||
"ODS_PAYMENT": false,
|
||||
"DWD_LOAD_FROM_ODS": false,
|
||||
"DWS_ASSISTANT_DAILY": false,
|
||||
"DWS_FINANCE_DAILY": false
|
||||
},
|
||||
"index_tasks_utility_status": {
|
||||
"DWS_WINBACK_INDEX": true,
|
||||
"DWS_NEWCONV_INDEX": true,
|
||||
"DWS_ML_MANUAL_IMPORT": true,
|
||||
"DWS_RELATION_INDEX": true
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "TASK_DISPATCH_PATHS",
|
||||
"status": "PASS",
|
||||
"message": "任务分发路径正确 (utility=13, ods=23, standard=16)",
|
||||
"details": {
|
||||
"path_counts": {
|
||||
"utility": 13,
|
||||
"standard": 16,
|
||||
"ods": 23
|
||||
},
|
||||
"issues": [],
|
||||
"sample_dispatch": {
|
||||
"CHECK_CUTOFF": {
|
||||
"layer": null,
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DATA_INTEGRITY_CHECK": {
|
||||
"layer": null,
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DWD_LOAD_FROM_ODS": {
|
||||
"layer": "DWD",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWD_QUALITY_CHECK": {
|
||||
"layer": "DWD",
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DWS_ASSISTANT_CUSTOMER": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_DAILY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_FINANCE": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_MONTHLY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_SALARY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_BUILD_ORDER_SUMMARY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
}
|
||||
}
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_MANAGER_INTERFACE",
|
||||
"status": "PASS",
|
||||
"message": "CursorManager 接口签名正确 (get_or_create, advance)",
|
||||
"details": {
|
||||
"method_signatures": {
|
||||
"get_or_create": [
|
||||
"self",
|
||||
"task_id",
|
||||
"store_id"
|
||||
],
|
||||
"advance": [
|
||||
"self",
|
||||
"task_id",
|
||||
"store_id",
|
||||
"window_start",
|
||||
"window_end",
|
||||
"run_id",
|
||||
"last_id"
|
||||
]
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_ADVANCE_SQL",
|
||||
"status": "PASS",
|
||||
"message": "游标推进 SQL 逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 使用 UPDATE meta.etl_cursor",
|
||||
"✓ 使用 GREATEST 保护 last_id 不回退",
|
||||
"✓ 调用 commit() 持久化",
|
||||
"✓ last_id 参数可选(有 None 分支)",
|
||||
"✓ 更新 updated_at 时间戳"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0006,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_SKIP_UTILITY",
|
||||
"status": "FAIL",
|
||||
"message": "工具类任务游标跳过逻辑有问题: _run_utility_task 中出现了 cursor 相关调用",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ run_single_task 检查 is_utility_task 并分发到 _run_utility_task",
|
||||
"✓ _run_utility_task 不调用 run_tracker.create_run"
|
||||
],
|
||||
"issues": [
|
||||
"_run_utility_task 中出现了 cursor 相关调用"
|
||||
]
|
||||
},
|
||||
"duration_sec": 0.0018,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_DATA_SOURCE",
|
||||
"status": "PASS",
|
||||
"message": "全部 7 个 data_source 解析用例通过",
|
||||
"details": {
|
||||
"test_cases": [
|
||||
{
|
||||
"case": "默认值",
|
||||
"expected": "hybrid",
|
||||
"actual": "hybrid"
|
||||
},
|
||||
{
|
||||
"case": "--data-source online",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
},
|
||||
{
|
||||
"case": "--data-source offline",
|
||||
"expected": "offline",
|
||||
"actual": "offline"
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow FULL",
|
||||
"expected": "hybrid",
|
||||
"actual": "hybrid",
|
||||
"deprecation_warning": true
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow FETCH_ONLY",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow INGEST_ONLY",
|
||||
"expected": "offline",
|
||||
"actual": "offline"
|
||||
},
|
||||
{
|
||||
"case": "--data-source online + --pipeline-flow INGEST_ONLY",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
}
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_MODE_DETECTION",
|
||||
"status": "PASS",
|
||||
"message": "CLI Flow/传统模式检测逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 有 --pipeline 参数时使用 PipelineRunner(Flow 模式)",
|
||||
"✓ 无 --pipeline 参数时使用 run_tasks(传统模式)",
|
||||
"✓ 调用 resolve_data_source 解析数据源模式",
|
||||
"✓ 支持 --lookback-hours 回溯窗口",
|
||||
"✓ 设置 window_override 确保任务使用指定窗口"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0015,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_PIPELINE_CHOICES",
|
||||
"status": "PASS",
|
||||
"message": "CLI --pipeline 可选值与 PIPELINE_LAYERS 完全一致 (7 种)",
|
||||
"details": {
|
||||
"pipeline_layers_keys": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"cli_choices": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"missing_in_cli": [],
|
||||
"extra_in_cli": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "PROCESSING_MODES",
|
||||
"status": "PASS",
|
||||
"message": "三种处理模式(increment_only/verify_only/increment_verify)逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 支持 verify_only 模式",
|
||||
"✓ verify_only 调用 _run_verification",
|
||||
"✓ 支持 increment_verify 模式",
|
||||
"✓ 支持 fetch_before_verify 参数(校验前先获取 API 数据)",
|
||||
"✓ _run_verification 方法存在"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0012,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,607 @@
|
||||
[
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_DEFINITIONS",
|
||||
"status": "PASS",
|
||||
"message": "全部 7 种 Flow 定义完整",
|
||||
"details": {
|
||||
"expected": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"actual": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"missing": [],
|
||||
"extra": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_LAYER_MAPPING",
|
||||
"status": "PASS",
|
||||
"message": "所有 Flow 层映射正确",
|
||||
"details": {
|
||||
"total_flows": 7,
|
||||
"mismatches": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "INVALID_FLOW_REJECTION",
|
||||
"status": "PASS",
|
||||
"message": "全部 5 个无效 Flow 名称被正确拒绝",
|
||||
"details": {
|
||||
"tested": [
|
||||
"nonexistent",
|
||||
"API_ODS",
|
||||
"full",
|
||||
"",
|
||||
"api_full_extra"
|
||||
],
|
||||
"correctly_rejected": [
|
||||
"nonexistent",
|
||||
"API_ODS",
|
||||
"full",
|
||||
"",
|
||||
"api_full_extra"
|
||||
],
|
||||
"missed": []
|
||||
},
|
||||
"duration_sec": 0.0008,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_TASK_RESOLUTION",
|
||||
"status": "PASS",
|
||||
"message": "所有 7 种 Flow 任务解析正确",
|
||||
"details": {
|
||||
"flow_tasks": {
|
||||
"api_ods": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET"
|
||||
],
|
||||
"api_ods_dwd": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"DWD_LOAD_FROM_ODS"
|
||||
],
|
||||
"api_full": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"DWD_LOAD_FROM_ODS",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
],
|
||||
"ods_dwd": [
|
||||
"DWD_LOAD_FROM_ODS"
|
||||
],
|
||||
"dwd_dws": [
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY"
|
||||
],
|
||||
"dwd_dws_index": [
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
],
|
||||
"dwd_index": [
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
]
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.002,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "TASK_REGISTRY_LAYERS",
|
||||
"status": "PASS",
|
||||
"message": "各层任务数量正确 (ODS=23, DWD=2, DWS=15, INDEX=4)",
|
||||
"details": {
|
||||
"ODS": {
|
||||
"expected": 23,
|
||||
"actual": 23,
|
||||
"tasks": [
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_REFUND",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_TABLES",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_TENANT_GOODS"
|
||||
]
|
||||
},
|
||||
"DWD": {
|
||||
"expected": 2,
|
||||
"actual": 2,
|
||||
"tasks": [
|
||||
"DWD_LOAD_FROM_ODS",
|
||||
"DWD_QUALITY_CHECK"
|
||||
]
|
||||
},
|
||||
"DWS": {
|
||||
"expected": 15,
|
||||
"actual": 15,
|
||||
"tasks": [
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_RETENTION_CLEANUP"
|
||||
]
|
||||
},
|
||||
"INDEX": {
|
||||
"expected": 4,
|
||||
"actual": 4,
|
||||
"tasks": [
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX",
|
||||
"DWS_WINBACK_INDEX"
|
||||
]
|
||||
},
|
||||
"TOTAL": {
|
||||
"actual": 52
|
||||
}
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "UTILITY_TASK_IDENTIFICATION",
|
||||
"status": "PASS",
|
||||
"message": "工具类任务识别正确 (6 个工具类, 6 个 ETL 类)",
|
||||
"details": {
|
||||
"utility_tasks": {
|
||||
"MANUAL_INGEST": true,
|
||||
"INIT_ODS_SCHEMA": true,
|
||||
"INIT_DWD_SCHEMA": true,
|
||||
"INIT_DWS_SCHEMA": true,
|
||||
"ODS_JSON_ARCHIVE": true,
|
||||
"CHECK_CUTOFF": true
|
||||
},
|
||||
"etl_tasks": {
|
||||
"ODS_MEMBER": false,
|
||||
"ODS_ORDER": false,
|
||||
"ODS_PAYMENT": false,
|
||||
"DWD_LOAD_FROM_ODS": false,
|
||||
"DWS_ASSISTANT_DAILY": false,
|
||||
"DWS_FINANCE_DAILY": false
|
||||
},
|
||||
"index_tasks_utility_status": {
|
||||
"DWS_WINBACK_INDEX": true,
|
||||
"DWS_NEWCONV_INDEX": true,
|
||||
"DWS_ML_MANUAL_IMPORT": true,
|
||||
"DWS_RELATION_INDEX": true
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "TASK_DISPATCH_PATHS",
|
||||
"status": "PASS",
|
||||
"message": "任务分发路径正确 (utility=13, ods=23, standard=16)",
|
||||
"details": {
|
||||
"path_counts": {
|
||||
"utility": 13,
|
||||
"standard": 16,
|
||||
"ods": 23
|
||||
},
|
||||
"issues": [],
|
||||
"sample_dispatch": {
|
||||
"CHECK_CUTOFF": {
|
||||
"layer": null,
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DATA_INTEGRITY_CHECK": {
|
||||
"layer": null,
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DWD_LOAD_FROM_ODS": {
|
||||
"layer": "DWD",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWD_QUALITY_CHECK": {
|
||||
"layer": "DWD",
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DWS_ASSISTANT_CUSTOMER": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_DAILY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_FINANCE": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_MONTHLY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_SALARY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_BUILD_ORDER_SUMMARY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
}
|
||||
}
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_MANAGER_INTERFACE",
|
||||
"status": "PASS",
|
||||
"message": "CursorManager 接口签名正确 (get_or_create, advance)",
|
||||
"details": {
|
||||
"method_signatures": {
|
||||
"get_or_create": [
|
||||
"self",
|
||||
"task_id",
|
||||
"store_id"
|
||||
],
|
||||
"advance": [
|
||||
"self",
|
||||
"task_id",
|
||||
"store_id",
|
||||
"window_start",
|
||||
"window_end",
|
||||
"run_id",
|
||||
"last_id"
|
||||
]
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0002,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_ADVANCE_SQL",
|
||||
"status": "PASS",
|
||||
"message": "游标推进 SQL 逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 使用 UPDATE meta.etl_cursor",
|
||||
"✓ 使用 GREATEST 保护 last_id 不回退",
|
||||
"✓ 调用 commit() 持久化",
|
||||
"✓ last_id 参数可选(有 None 分支)",
|
||||
"✓ 更新 updated_at 时间戳"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0005,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_SKIP_UTILITY",
|
||||
"status": "PASS",
|
||||
"message": "工具类任务正确跳过游标管理和运行记录",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ run_single_task 检查 is_utility_task 并分发到 _run_utility_task",
|
||||
"✓ _run_utility_task 不调用 cursor_mgr",
|
||||
"✓ _run_utility_task 不调用 run_tracker.create_run"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0019,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_DATA_SOURCE",
|
||||
"status": "PASS",
|
||||
"message": "全部 7 个 data_source 解析用例通过",
|
||||
"details": {
|
||||
"test_cases": [
|
||||
{
|
||||
"case": "默认值",
|
||||
"expected": "hybrid",
|
||||
"actual": "hybrid"
|
||||
},
|
||||
{
|
||||
"case": "--data-source online",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
},
|
||||
{
|
||||
"case": "--data-source offline",
|
||||
"expected": "offline",
|
||||
"actual": "offline"
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow FULL",
|
||||
"expected": "hybrid",
|
||||
"actual": "hybrid",
|
||||
"deprecation_warning": true
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow FETCH_ONLY",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow INGEST_ONLY",
|
||||
"expected": "offline",
|
||||
"actual": "offline"
|
||||
},
|
||||
{
|
||||
"case": "--data-source online + --pipeline-flow INGEST_ONLY",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
}
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_MODE_DETECTION",
|
||||
"status": "PASS",
|
||||
"message": "CLI Flow/传统模式检测逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 有 --pipeline 参数时使用 PipelineRunner(Flow 模式)",
|
||||
"✓ 无 --pipeline 参数时使用 run_tasks(传统模式)",
|
||||
"✓ 调用 resolve_data_source 解析数据源模式",
|
||||
"✓ 支持 --lookback-hours 回溯窗口",
|
||||
"✓ 设置 window_override 确保任务使用指定窗口"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0011,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_PIPELINE_CHOICES",
|
||||
"status": "PASS",
|
||||
"message": "CLI --pipeline 可选值与 PIPELINE_LAYERS 完全一致 (7 种)",
|
||||
"details": {
|
||||
"pipeline_layers_keys": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"cli_choices": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"missing_in_cli": [],
|
||||
"extra_in_cli": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "PROCESSING_MODES",
|
||||
"status": "PASS",
|
||||
"message": "三种处理模式(increment_only/verify_only/increment_verify)逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 支持 verify_only 模式",
|
||||
"✓ verify_only 调用 _run_verification",
|
||||
"✓ 支持 increment_verify 模式",
|
||||
"✓ 支持 fetch_before_verify 参数(校验前先获取 API 数据)",
|
||||
"✓ _run_verification 方法存在"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0019,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,607 @@
|
||||
[
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_DEFINITIONS",
|
||||
"status": "PASS",
|
||||
"message": "全部 7 种 Flow 定义完整",
|
||||
"details": {
|
||||
"expected": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"actual": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"missing": [],
|
||||
"extra": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_LAYER_MAPPING",
|
||||
"status": "PASS",
|
||||
"message": "所有 Flow 层映射正确",
|
||||
"details": {
|
||||
"total_flows": 7,
|
||||
"mismatches": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "INVALID_FLOW_REJECTION",
|
||||
"status": "PASS",
|
||||
"message": "全部 5 个无效 Flow 名称被正确拒绝",
|
||||
"details": {
|
||||
"tested": [
|
||||
"nonexistent",
|
||||
"API_ODS",
|
||||
"full",
|
||||
"",
|
||||
"api_full_extra"
|
||||
],
|
||||
"correctly_rejected": [
|
||||
"nonexistent",
|
||||
"API_ODS",
|
||||
"full",
|
||||
"",
|
||||
"api_full_extra"
|
||||
],
|
||||
"missed": []
|
||||
},
|
||||
"duration_sec": 0.0008,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "FLOW_TASK_RESOLUTION",
|
||||
"status": "PASS",
|
||||
"message": "所有 7 种 Flow 任务解析正确",
|
||||
"details": {
|
||||
"flow_tasks": {
|
||||
"api_ods": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET"
|
||||
],
|
||||
"api_ods_dwd": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"DWD_LOAD_FROM_ODS"
|
||||
],
|
||||
"api_full": [
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_REFUND",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_TABLES",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TENANT_GOODS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"DWD_LOAD_FROM_ODS",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
],
|
||||
"ods_dwd": [
|
||||
"DWD_LOAD_FROM_ODS"
|
||||
],
|
||||
"dwd_dws": [
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY"
|
||||
],
|
||||
"dwd_dws_index": [
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_RETENTION_CLEANUP",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
],
|
||||
"dwd_index": [
|
||||
"DWS_WINBACK_INDEX",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_RELATION_INDEX"
|
||||
]
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0012,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "TASK_REGISTRY_LAYERS",
|
||||
"status": "PASS",
|
||||
"message": "各层任务数量正确 (ODS=23, DWD=2, DWS=15, INDEX=4)",
|
||||
"details": {
|
||||
"ODS": {
|
||||
"expected": 23,
|
||||
"actual": 23,
|
||||
"tasks": [
|
||||
"ODS_ASSISTANT_ABOLISH",
|
||||
"ODS_ASSISTANT_ACCOUNT",
|
||||
"ODS_ASSISTANT_LEDGER",
|
||||
"ODS_GOODS_CATEGORY",
|
||||
"ODS_GROUP_BUY_REDEMPTION",
|
||||
"ODS_GROUP_PACKAGE",
|
||||
"ODS_INVENTORY_CHANGE",
|
||||
"ODS_INVENTORY_STOCK",
|
||||
"ODS_MEMBER",
|
||||
"ODS_MEMBER_BALANCE",
|
||||
"ODS_MEMBER_CARD",
|
||||
"ODS_PAYMENT",
|
||||
"ODS_PLATFORM_COUPON",
|
||||
"ODS_RECHARGE_SETTLE",
|
||||
"ODS_REFUND",
|
||||
"ODS_SETTLEMENT_RECORDS",
|
||||
"ODS_SETTLEMENT_TICKET",
|
||||
"ODS_STORE_GOODS",
|
||||
"ODS_STORE_GOODS_SALES",
|
||||
"ODS_TABLES",
|
||||
"ODS_TABLE_FEE_DISCOUNT",
|
||||
"ODS_TABLE_USE",
|
||||
"ODS_TENANT_GOODS"
|
||||
]
|
||||
},
|
||||
"DWD": {
|
||||
"expected": 2,
|
||||
"actual": 2,
|
||||
"tasks": [
|
||||
"DWD_LOAD_FROM_ODS",
|
||||
"DWD_QUALITY_CHECK"
|
||||
]
|
||||
},
|
||||
"DWS": {
|
||||
"expected": 15,
|
||||
"actual": 15,
|
||||
"tasks": [
|
||||
"DWS_ASSISTANT_CUSTOMER",
|
||||
"DWS_ASSISTANT_DAILY",
|
||||
"DWS_ASSISTANT_FINANCE",
|
||||
"DWS_ASSISTANT_MONTHLY",
|
||||
"DWS_ASSISTANT_SALARY",
|
||||
"DWS_BUILD_ORDER_SUMMARY",
|
||||
"DWS_FINANCE_DAILY",
|
||||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"DWS_FINANCE_RECHARGE",
|
||||
"DWS_MEMBER_CONSUMPTION",
|
||||
"DWS_MEMBER_VISIT",
|
||||
"DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"DWS_RETENTION_CLEANUP"
|
||||
]
|
||||
},
|
||||
"INDEX": {
|
||||
"expected": 4,
|
||||
"actual": 4,
|
||||
"tasks": [
|
||||
"DWS_ML_MANUAL_IMPORT",
|
||||
"DWS_NEWCONV_INDEX",
|
||||
"DWS_RELATION_INDEX",
|
||||
"DWS_WINBACK_INDEX"
|
||||
]
|
||||
},
|
||||
"TOTAL": {
|
||||
"actual": 52
|
||||
}
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "UTILITY_TASK_IDENTIFICATION",
|
||||
"status": "PASS",
|
||||
"message": "工具类任务识别正确 (6 个工具类, 6 个 ETL 类)",
|
||||
"details": {
|
||||
"utility_tasks": {
|
||||
"MANUAL_INGEST": true,
|
||||
"INIT_ODS_SCHEMA": true,
|
||||
"INIT_DWD_SCHEMA": true,
|
||||
"INIT_DWS_SCHEMA": true,
|
||||
"ODS_JSON_ARCHIVE": true,
|
||||
"CHECK_CUTOFF": true
|
||||
},
|
||||
"etl_tasks": {
|
||||
"ODS_MEMBER": false,
|
||||
"ODS_ORDER": false,
|
||||
"ODS_PAYMENT": false,
|
||||
"DWD_LOAD_FROM_ODS": false,
|
||||
"DWS_ASSISTANT_DAILY": false,
|
||||
"DWS_FINANCE_DAILY": false
|
||||
},
|
||||
"index_tasks_utility_status": {
|
||||
"DWS_WINBACK_INDEX": true,
|
||||
"DWS_NEWCONV_INDEX": true,
|
||||
"DWS_ML_MANUAL_IMPORT": true,
|
||||
"DWS_RELATION_INDEX": true
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "TASK_DISPATCH_PATHS",
|
||||
"status": "PASS",
|
||||
"message": "任务分发路径正确 (utility=13, ods=23, standard=16)",
|
||||
"details": {
|
||||
"path_counts": {
|
||||
"utility": 13,
|
||||
"standard": 16,
|
||||
"ods": 23
|
||||
},
|
||||
"issues": [],
|
||||
"sample_dispatch": {
|
||||
"CHECK_CUTOFF": {
|
||||
"layer": null,
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DATA_INTEGRITY_CHECK": {
|
||||
"layer": null,
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DWD_LOAD_FROM_ODS": {
|
||||
"layer": "DWD",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWD_QUALITY_CHECK": {
|
||||
"layer": "DWD",
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
},
|
||||
"DWS_ASSISTANT_CUSTOMER": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_DAILY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_FINANCE": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_MONTHLY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_ASSISTANT_SALARY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": false,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "standard"
|
||||
},
|
||||
"DWS_BUILD_ORDER_SUMMARY": {
|
||||
"layer": "DWS",
|
||||
"is_utility": true,
|
||||
"is_ods": false,
|
||||
"dispatch_path": "utility"
|
||||
}
|
||||
}
|
||||
},
|
||||
"duration_sec": 0.0001,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_MANAGER_INTERFACE",
|
||||
"status": "PASS",
|
||||
"message": "CursorManager 接口签名正确 (get_or_create, advance)",
|
||||
"details": {
|
||||
"method_signatures": {
|
||||
"get_or_create": [
|
||||
"self",
|
||||
"task_id",
|
||||
"store_id"
|
||||
],
|
||||
"advance": [
|
||||
"self",
|
||||
"task_id",
|
||||
"store_id",
|
||||
"window_start",
|
||||
"window_end",
|
||||
"run_id",
|
||||
"last_id"
|
||||
]
|
||||
},
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0002,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_ADVANCE_SQL",
|
||||
"status": "PASS",
|
||||
"message": "游标推进 SQL 逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 使用 UPDATE meta.etl_cursor",
|
||||
"✓ 使用 GREATEST 保护 last_id 不回退",
|
||||
"✓ 调用 commit() 持久化",
|
||||
"✓ last_id 参数可选(有 None 分支)",
|
||||
"✓ 更新 updated_at 时间戳"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0005,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CURSOR_SKIP_UTILITY",
|
||||
"status": "PASS",
|
||||
"message": "工具类任务正确跳过游标管理和运行记录",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ run_single_task 检查 is_utility_task 并分发到 _run_utility_task",
|
||||
"✓ _run_utility_task 不调用 cursor_mgr",
|
||||
"✓ _run_utility_task 不调用 run_tracker.create_run"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.002,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_DATA_SOURCE",
|
||||
"status": "PASS",
|
||||
"message": "全部 7 个 data_source 解析用例通过",
|
||||
"details": {
|
||||
"test_cases": [
|
||||
{
|
||||
"case": "默认值",
|
||||
"expected": "hybrid",
|
||||
"actual": "hybrid"
|
||||
},
|
||||
{
|
||||
"case": "--data-source online",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
},
|
||||
{
|
||||
"case": "--data-source offline",
|
||||
"expected": "offline",
|
||||
"actual": "offline"
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow FULL",
|
||||
"expected": "hybrid",
|
||||
"actual": "hybrid",
|
||||
"deprecation_warning": true
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow FETCH_ONLY",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
},
|
||||
{
|
||||
"case": "--pipeline-flow INGEST_ONLY",
|
||||
"expected": "offline",
|
||||
"actual": "offline"
|
||||
},
|
||||
{
|
||||
"case": "--data-source online + --pipeline-flow INGEST_ONLY",
|
||||
"expected": "online",
|
||||
"actual": "online"
|
||||
}
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0002,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_MODE_DETECTION",
|
||||
"status": "PASS",
|
||||
"message": "CLI Flow/传统模式检测逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 有 --pipeline 参数时使用 PipelineRunner(Flow 模式)",
|
||||
"✓ 无 --pipeline 参数时使用 run_tasks(传统模式)",
|
||||
"✓ 调用 resolve_data_source 解析数据源模式",
|
||||
"✓ 支持 --lookback-hours 回溯窗口",
|
||||
"✓ 设置 window_override 确保任务使用指定窗口"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0017,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "CLI_PIPELINE_CHOICES",
|
||||
"status": "PASS",
|
||||
"message": "CLI --pipeline 可选值与 PIPELINE_LAYERS 完全一致 (7 种)",
|
||||
"details": {
|
||||
"pipeline_layers_keys": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"cli_choices": [
|
||||
"api_full",
|
||||
"api_ods",
|
||||
"api_ods_dwd",
|
||||
"dwd_dws",
|
||||
"dwd_dws_index",
|
||||
"dwd_index",
|
||||
"ods_dwd"
|
||||
],
|
||||
"missing_in_cli": [],
|
||||
"extra_in_cli": []
|
||||
},
|
||||
"duration_sec": 0.0,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
},
|
||||
{
|
||||
"layer": "ORCHESTRATION",
|
||||
"task_code": "PROCESSING_MODES",
|
||||
"status": "PASS",
|
||||
"message": "三种处理模式(increment_only/verify_only/increment_verify)逻辑正确",
|
||||
"details": {
|
||||
"checks": [
|
||||
"✓ 支持 verify_only 模式",
|
||||
"✓ verify_only 调用 _run_verification",
|
||||
"✓ 支持 increment_verify 模式",
|
||||
"✓ 支持 fetch_before_verify 参数(校验前先获取 API 数据)",
|
||||
"✓ _run_verification 方法存在"
|
||||
],
|
||||
"issues": []
|
||||
},
|
||||
"duration_sec": 0.0014,
|
||||
"error_detail": null,
|
||||
"fix_applied": null
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,954 @@
|
||||
{
|
||||
"flow": "api_full",
|
||||
"window_start": "2026-01-01T00:00:00",
|
||||
"window_end": "2026-02-16T00:00:00",
|
||||
"overall_start": "2026-02-16T02:00:29.172416+08:00",
|
||||
"overall_end": "2026-02-16T02:01:31.080429+08:00",
|
||||
"overall_duration_sec": 60.681,
|
||||
"overall_status": "PARTIAL",
|
||||
"layers": [
|
||||
{
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:30.399132+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.176952+08:00",
|
||||
"duration_sec": 1.778,
|
||||
"status": "ERROR",
|
||||
"task_count": 23,
|
||||
"success_count": 0,
|
||||
"fail_count": 23,
|
||||
"skip_count": 0,
|
||||
"total_fetched": 0,
|
||||
"total_inserted": 0,
|
||||
"total_updated": 0,
|
||||
"total_errors": 0,
|
||||
"tasks": [
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_ABOLISH",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:30.404857+08:00",
|
||||
"end_time": "2026-02-16T02:00:30.626099+08:00",
|
||||
"duration_sec": 0.221,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 重复键违反唯一约束\"etl_run_pkey\"\nDETAIL: 键值\"(run_id)=(1)\" 已经存在\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_ACCOUNT",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:30.631572+08:00",
|
||||
"end_time": "2026-02-16T02:00:30.692791+08:00",
|
||||
"duration_sec": 0.061,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_LEDGER",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:30.696161+08:00",
|
||||
"end_time": "2026-02-16T02:00:30.749470+08:00",
|
||||
"duration_sec": 0.053,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GOODS_CATEGORY",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:30.768981+08:00",
|
||||
"end_time": "2026-02-16T02:00:30.820272+08:00",
|
||||
"duration_sec": 0.051,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GROUP_BUY_REDEMPTION",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:30.823809+08:00",
|
||||
"end_time": "2026-02-16T02:00:30.886627+08:00",
|
||||
"duration_sec": 0.063,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GROUP_PACKAGE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:30.889777+08:00",
|
||||
"end_time": "2026-02-16T02:00:30.949457+08:00",
|
||||
"duration_sec": 0.06,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_INVENTORY_CHANGE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:30.951549+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.001887+08:00",
|
||||
"duration_sec": 0.05,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_INVENTORY_STOCK",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.004960+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.154070+08:00",
|
||||
"duration_sec": 0.149,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.196785+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.277524+08:00",
|
||||
"duration_sec": 0.081,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER_BALANCE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.282884+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.389490+08:00",
|
||||
"duration_sec": 0.107,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER_CARD",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.391553+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.443622+08:00",
|
||||
"duration_sec": 0.052,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_PAYMENT",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.455259+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.516596+08:00",
|
||||
"duration_sec": 0.061,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_PLATFORM_COUPON",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.519180+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.571746+08:00",
|
||||
"duration_sec": 0.053,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_RECHARGE_SETTLE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.587139+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.645916+08:00",
|
||||
"duration_sec": 0.059,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_REFUND",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.649692+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.707211+08:00",
|
||||
"duration_sec": 0.057,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_SETTLEMENT_RECORDS",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.709718+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.762221+08:00",
|
||||
"duration_sec": 0.052,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_SETTLEMENT_TICKET",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.772867+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.831914+08:00",
|
||||
"duration_sec": 0.059,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_STORE_GOODS",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.835277+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.890692+08:00",
|
||||
"duration_sec": 0.055,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_STORE_GOODS_SALES",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.900307+08:00",
|
||||
"end_time": "2026-02-16T02:00:31.950987+08:00",
|
||||
"duration_sec": 0.051,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLES",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:31.953280+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.003567+08:00",
|
||||
"duration_sec": 0.05,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLE_FEE_DISCOUNT",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:32.013166+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.064346+08:00",
|
||||
"duration_sec": 0.051,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLE_USE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:32.066702+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.120437+08:00",
|
||||
"duration_sec": 0.054,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TENANT_GOODS",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:00:32.124485+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.174414+08:00",
|
||||
"duration_sec": 0.05,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"layer": "DWD",
|
||||
"start_time": "2026-02-16T02:00:32.180137+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.288850+08:00",
|
||||
"duration_sec": 0.109,
|
||||
"status": "ERROR",
|
||||
"task_count": 1,
|
||||
"success_count": 0,
|
||||
"fail_count": 1,
|
||||
"skip_count": 0,
|
||||
"total_fetched": 0,
|
||||
"total_inserted": 0,
|
||||
"total_updated": 0,
|
||||
"total_errors": 0,
|
||||
"tasks": [
|
||||
{
|
||||
"task_code": "DWD_LOAD_FROM_ODS",
|
||||
"layer": "DWD",
|
||||
"start_time": "2026-02-16T02:00:32.187417+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.270397+08:00",
|
||||
"duration_sec": 0.083,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:32.434597+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.733537+08:00",
|
||||
"duration_sec": 1.299,
|
||||
"status": "ERROR",
|
||||
"task_count": 15,
|
||||
"success_count": 0,
|
||||
"fail_count": 6,
|
||||
"skip_count": 9,
|
||||
"total_fetched": 0,
|
||||
"total_inserted": 0,
|
||||
"total_updated": 0,
|
||||
"total_errors": 0,
|
||||
"tasks": [
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_CUSTOMER",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:32.438912+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.524164+08:00",
|
||||
"duration_sec": 0.085,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_DAILY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:32.531949+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.584970+08:00",
|
||||
"duration_sec": 0.053,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_FINANCE",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:32.587080+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.640944+08:00",
|
||||
"duration_sec": 0.054,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_MONTHLY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:32.651498+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.702636+08:00",
|
||||
"duration_sec": 0.051,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_SALARY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:32.704713+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.765519+08:00",
|
||||
"duration_sec": 0.061,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_BUILD_ORDER_SUMMARY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:32.778507+08:00",
|
||||
"end_time": "2026-02-16T02:00:32.891324+08:00",
|
||||
"duration_sec": 0.113,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_FINANCE_DAILY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:32.892786+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.011285+08:00",
|
||||
"duration_sec": 0.118,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:33.015489+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.070572+08:00",
|
||||
"duration_sec": 0.055,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:33.072599+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.133539+08:00",
|
||||
"duration_sec": 0.061,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_FINANCE_RECHARGE",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:33.135092+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.185416+08:00",
|
||||
"duration_sec": 0.05,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_MEMBER_CONSUMPTION",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:33.189842+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.256656+08:00",
|
||||
"duration_sec": 0.067,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_MEMBER_VISIT",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:33.258323+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.471349+08:00",
|
||||
"duration_sec": 0.213,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:33.524696+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.614408+08:00",
|
||||
"duration_sec": 0.09,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:33.621684+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.675725+08:00",
|
||||
"duration_sec": 0.054,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_RETENTION_CLEANUP",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:00:33.677408+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.732228+08:00",
|
||||
"duration_sec": 0.055,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:00:33.738360+08:00",
|
||||
"end_time": "2026-02-16T02:00:37.081635+08:00",
|
||||
"duration_sec": 3.343,
|
||||
"status": "ERROR",
|
||||
"task_count": 4,
|
||||
"success_count": 0,
|
||||
"fail_count": 4,
|
||||
"skip_count": 0,
|
||||
"total_fetched": 0,
|
||||
"total_inserted": 0,
|
||||
"total_updated": 0,
|
||||
"total_errors": 0,
|
||||
"tasks": [
|
||||
{
|
||||
"task_code": "DWS_ML_MANUAL_IMPORT",
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:00:33.740154+08:00",
|
||||
"end_time": "2026-02-16T02:00:33.743497+08:00",
|
||||
"duration_sec": 0.003,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "未找到 ML 台账文件,请通过环境变量 ML_MANUAL_LEDGER_FILE 或配置 run.ml_manual_ledger_file 指定",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_NEWCONV_INDEX",
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:00:33.744864+08:00",
|
||||
"end_time": "2026-02-16T02:00:36.742473+08:00",
|
||||
"duration_sec": 2.998,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 重复键违反唯一约束\"dws_index_percentile_history_pkey\"\nDETAIL: 键值\"(history_id)=(1)\" 已经存在\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_RELATION_INDEX",
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:00:36.747281+08:00",
|
||||
"end_time": "2026-02-16T02:00:36.820768+08:00",
|
||||
"duration_sec": 0.073,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_WINBACK_INDEX",
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:00:36.823642+08:00",
|
||||
"end_time": "2026-02-16T02:00:37.079836+08:00",
|
||||
"duration_sec": 0.256,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 当前事务被终止, 事务块结束之前的查询被忽略\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"verification": {
|
||||
"status": "COMPLETED",
|
||||
"start_time": "2026-02-16T02:00:37.094515+08:00",
|
||||
"end_time": "2026-02-16T02:01:31.080036+08:00",
|
||||
"duration_sec": 53.985,
|
||||
"total_tables": 15,
|
||||
"consistent_tables": 7,
|
||||
"total_backfilled": 163,
|
||||
"error_tables": 3,
|
||||
"layers": {
|
||||
"ODS": {
|
||||
"layer": "ODS",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-16T00:00:00+08:00",
|
||||
"total_tables": 1,
|
||||
"consistent_tables": 0,
|
||||
"inconsistent_tables": 1,
|
||||
"total_source_count": 0,
|
||||
"total_target_count": 0,
|
||||
"total_missing": 0,
|
||||
"total_mismatch": 0,
|
||||
"total_backfilled": 0,
|
||||
"total_backfilled_missing": 0,
|
||||
"total_backfilled_mismatch": 0,
|
||||
"error_tables": 1,
|
||||
"elapsed_seconds": 0.1302633285522461,
|
||||
"status": "ERROR",
|
||||
"results": [
|
||||
{
|
||||
"layer": "ODS",
|
||||
"table": "assistant_accounts_master",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 0,
|
||||
"target_count": 0,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "ERROR",
|
||||
"elapsed_seconds": 0.1302633285522461,
|
||||
"error_message": "获取 ODS hash 失败: assistant_accounts_master",
|
||||
"details": {
|
||||
"fatal": true
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"DWD": {
|
||||
"layer": "DWD",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-16T00:00:00+08:00",
|
||||
"total_tables": 6,
|
||||
"consistent_tables": 0,
|
||||
"inconsistent_tables": 6,
|
||||
"total_source_count": 163,
|
||||
"total_target_count": 219,
|
||||
"total_missing": 0,
|
||||
"total_mismatch": 163,
|
||||
"total_backfilled": 163,
|
||||
"total_backfilled_missing": 0,
|
||||
"total_backfilled_mismatch": 163,
|
||||
"error_tables": 1,
|
||||
"elapsed_seconds": 49.87700796127319,
|
||||
"status": "ERROR",
|
||||
"results": [
|
||||
{
|
||||
"layer": "DWD",
|
||||
"table": "dim_site",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 1,
|
||||
"target_count": 1,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 1,
|
||||
"backfilled_count": 1,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 1,
|
||||
"status": "BACKFILLED",
|
||||
"elapsed_seconds": 14.668256521224976,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWD",
|
||||
"table": "dim_site_ex",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 1,
|
||||
"target_count": 1,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 1,
|
||||
"backfilled_count": 1,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 1,
|
||||
"status": "BACKFILLED",
|
||||
"elapsed_seconds": 14.868768453598022,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWD",
|
||||
"table": "dim_table",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 66,
|
||||
"target_count": 74,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 66,
|
||||
"backfilled_count": 66,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 66,
|
||||
"status": "BACKFILLED",
|
||||
"elapsed_seconds": 9.084474802017212,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWD",
|
||||
"table": "dim_table_ex",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 66,
|
||||
"target_count": 74,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 66,
|
||||
"backfilled_count": 66,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 66,
|
||||
"status": "BACKFILLED",
|
||||
"elapsed_seconds": 7.42323637008667,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWD",
|
||||
"table": "dim_assistant",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 29,
|
||||
"target_count": 69,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 29,
|
||||
"backfilled_count": 29,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 29,
|
||||
"status": "BACKFILLED",
|
||||
"elapsed_seconds": 3.5641441345214844,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWD",
|
||||
"table": "dim_assistant_ex",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 0,
|
||||
"target_count": 0,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "ERROR",
|
||||
"elapsed_seconds": 0.2681276798248291,
|
||||
"error_message": "获取 DWD hash 失败: dim_assistant_ex",
|
||||
"details": {
|
||||
"fatal": true
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"DWS": {
|
||||
"layer": "DWS",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-16T00:00:00+08:00",
|
||||
"total_tables": 6,
|
||||
"consistent_tables": 6,
|
||||
"inconsistent_tables": 0,
|
||||
"total_source_count": 1712,
|
||||
"total_target_count": 1712,
|
||||
"total_missing": 0,
|
||||
"total_mismatch": 0,
|
||||
"total_backfilled": 0,
|
||||
"total_backfilled_missing": 0,
|
||||
"total_backfilled_mismatch": 0,
|
||||
"error_tables": 0,
|
||||
"elapsed_seconds": 0.8101677894592285,
|
||||
"status": "OK",
|
||||
"results": [
|
||||
{
|
||||
"layer": "DWS",
|
||||
"table": "dws_finance_daily_summary",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 31,
|
||||
"target_count": 31,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "OK",
|
||||
"elapsed_seconds": 0.17444229125976562,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWS",
|
||||
"table": "dws_assistant_daily_detail",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 854,
|
||||
"target_count": 854,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "OK",
|
||||
"elapsed_seconds": 0.1419987678527832,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWS",
|
||||
"table": "dws_member_visit_detail",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 564,
|
||||
"target_count": 564,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "OK",
|
||||
"elapsed_seconds": 0.1277296543121338,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWS",
|
||||
"table": "dws_finance_daily_summary",
|
||||
"window_start": "2026-02-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-16T00:00:00+08:00",
|
||||
"source_count": 10,
|
||||
"target_count": 10,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "OK",
|
||||
"elapsed_seconds": 0.15016722679138184,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWS",
|
||||
"table": "dws_assistant_daily_detail",
|
||||
"window_start": "2026-02-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-16T00:00:00+08:00",
|
||||
"source_count": 125,
|
||||
"target_count": 125,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "OK",
|
||||
"elapsed_seconds": 0.10792803764343262,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "DWS",
|
||||
"table": "dws_member_visit_detail",
|
||||
"window_start": "2026-02-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-16T00:00:00+08:00",
|
||||
"source_count": 128,
|
||||
"target_count": 128,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "OK",
|
||||
"elapsed_seconds": 0.10790181159973145,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
"INDEX": {
|
||||
"layer": "INDEX",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-16T00:00:00+08:00",
|
||||
"total_tables": 2,
|
||||
"consistent_tables": 1,
|
||||
"inconsistent_tables": 1,
|
||||
"total_source_count": 94,
|
||||
"total_target_count": 171,
|
||||
"total_missing": 0,
|
||||
"total_mismatch": 0,
|
||||
"total_backfilled": 0,
|
||||
"total_backfilled_missing": 0,
|
||||
"total_backfilled_mismatch": 0,
|
||||
"error_tables": 1,
|
||||
"elapsed_seconds": 0.2954070568084717,
|
||||
"status": "ERROR",
|
||||
"results": [
|
||||
{
|
||||
"layer": "INDEX",
|
||||
"table": "v_member_recall_priority",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 94,
|
||||
"target_count": 171,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "OK",
|
||||
"elapsed_seconds": 0.18698692321777344,
|
||||
"error_message": null,
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"layer": "INDEX",
|
||||
"table": "dws_member_assistant_relation_index",
|
||||
"window_start": "2026-01-01T00:00:00+08:00",
|
||||
"window_end": "2026-02-01T00:00:00+08:00",
|
||||
"source_count": 0,
|
||||
"target_count": 0,
|
||||
"missing_count": 0,
|
||||
"mismatch_count": 0,
|
||||
"backfilled_count": 0,
|
||||
"backfilled_missing_count": 0,
|
||||
"backfilled_mismatch_count": 0,
|
||||
"status": "ERROR",
|
||||
"elapsed_seconds": 0.10842013359069824,
|
||||
"error_message": "获取源实体失败: dws_member_assistant_relation_index",
|
||||
"details": {
|
||||
"fatal": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"environment": {
|
||||
"store_id": 2790685415443269,
|
||||
"db_name": "",
|
||||
"api_base_url": "https://pc.ficoo.vip/apiprod/admin/v1/",
|
||||
"timezone": "Asia/Shanghai"
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"last_completed_layer": "INDEX",
|
||||
"last_completed_task": "DWS_WINBACK_INDEX",
|
||||
"timestamp": "2026-02-16T02:21:00.006150+08:00"
|
||||
}
|
||||
@@ -0,0 +1,774 @@
|
||||
{
|
||||
"flow": "api_full",
|
||||
"window_start": "2026-01-01T00:00:00",
|
||||
"window_end": "2026-02-16T00:00:00",
|
||||
"overall_start": "2026-02-16T02:05:42.502194+08:00",
|
||||
"overall_end": "",
|
||||
"overall_duration_sec": 0.0,
|
||||
"overall_status": "",
|
||||
"layers": [
|
||||
{
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:05:43.228274+08:00",
|
||||
"end_time": "2026-02-16T02:20:46.966053+08:00",
|
||||
"duration_sec": 903.738,
|
||||
"status": "SUCCESS",
|
||||
"task_count": 23,
|
||||
"success_count": 23,
|
||||
"fail_count": 0,
|
||||
"skip_count": 0,
|
||||
"total_fetched": 280391,
|
||||
"total_inserted": 2421,
|
||||
"total_updated": 303,
|
||||
"total_errors": 0,
|
||||
"tasks": [
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_ABOLISH",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:05:43.229896+08:00",
|
||||
"end_time": "2026-02-16T02:05:46.094082+08:00",
|
||||
"duration_sec": 2.864,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 37,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 37,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_ACCOUNT",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:05:46.095823+08:00",
|
||||
"end_time": "2026-02-16T02:05:53.344708+08:00",
|
||||
"duration_sec": 7.249,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 345,
|
||||
"inserted": 3,
|
||||
"updated": 0,
|
||||
"skipped": 342,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_LEDGER",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:05:53.350637+08:00",
|
||||
"end_time": "2026-02-16T02:06:01.035404+08:00",
|
||||
"duration_sec": 7.685,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 998,
|
||||
"inserted": 16,
|
||||
"updated": 0,
|
||||
"skipped": 982,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GOODS_CATEGORY",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:06:01.038609+08:00",
|
||||
"end_time": "2026-02-16T02:06:03.197268+08:00",
|
||||
"duration_sec": 2.159,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 45,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 45,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GROUP_BUY_REDEMPTION",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:06:03.243603+08:00",
|
||||
"end_time": "2026-02-16T02:08:51.258331+08:00",
|
||||
"duration_sec": 168.015,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 38860,
|
||||
"inserted": 242,
|
||||
"updated": 0,
|
||||
"skipped": 38618,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GROUP_PACKAGE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:08:51.261789+08:00",
|
||||
"end_time": "2026-02-16T02:08:54.850306+08:00",
|
||||
"duration_sec": 3.588,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 90,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 90,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_INVENTORY_CHANGE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:08:54.854769+08:00",
|
||||
"end_time": "2026-02-16T02:09:14.467632+08:00",
|
||||
"duration_sec": 19.613,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 7044,
|
||||
"inserted": 601,
|
||||
"updated": 0,
|
||||
"skipped": 6443,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_INVENTORY_STOCK",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:09:14.469443+08:00",
|
||||
"end_time": "2026-02-16T02:09:18.538862+08:00",
|
||||
"duration_sec": 4.069,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 865,
|
||||
"inserted": 48,
|
||||
"updated": 0,
|
||||
"skipped": 817,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:09:18.540713+08:00",
|
||||
"end_time": "2026-02-16T02:09:30.301232+08:00",
|
||||
"duration_sec": 11.76,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 2785,
|
||||
"inserted": 14,
|
||||
"updated": 0,
|
||||
"skipped": 2771,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER_BALANCE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:09:30.302995+08:00",
|
||||
"end_time": "2026-02-16T02:11:45.226394+08:00",
|
||||
"duration_sec": 134.924,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 11725,
|
||||
"inserted": 39,
|
||||
"updated": 0,
|
||||
"skipped": 11686,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER_CARD",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:11:45.228393+08:00",
|
||||
"end_time": "2026-02-16T02:11:58.353257+08:00",
|
||||
"duration_sec": 13.125,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 4730,
|
||||
"inserted": 19,
|
||||
"updated": 0,
|
||||
"skipped": 4711,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_PAYMENT",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:11:58.354979+08:00",
|
||||
"end_time": "2026-02-16T02:13:57.385086+08:00",
|
||||
"duration_sec": 119.03,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 56795,
|
||||
"inserted": 325,
|
||||
"updated": 0,
|
||||
"skipped": 56470,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_PLATFORM_COUPON",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:13:57.387334+08:00",
|
||||
"end_time": "2026-02-16T02:17:35.403217+08:00",
|
||||
"duration_sec": 218.016,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 91555,
|
||||
"inserted": 242,
|
||||
"updated": 0,
|
||||
"skipped": 91313,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_RECHARGE_SETTLE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:17:35.405051+08:00",
|
||||
"end_time": "2026-02-16T02:17:37.892719+08:00",
|
||||
"duration_sec": 2.488,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 90,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 90,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_REFUND",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:17:37.894298+08:00",
|
||||
"end_time": "2026-02-16T02:17:40.855120+08:00",
|
||||
"duration_sec": 2.961,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 180,
|
||||
"inserted": 1,
|
||||
"updated": 0,
|
||||
"skipped": 179,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_SETTLEMENT_RECORDS",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:17:40.857110+08:00",
|
||||
"end_time": "2026-02-16T02:18:10.883995+08:00",
|
||||
"duration_sec": 30.027,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 4917,
|
||||
"inserted": 320,
|
||||
"updated": 303,
|
||||
"skipped": 4294,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_SETTLEMENT_TICKET",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:18:10.885741+08:00",
|
||||
"end_time": "2026-02-16T02:18:20.540209+08:00",
|
||||
"duration_sec": 9.654,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_STORE_GOODS",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:18:20.541830+08:00",
|
||||
"end_time": "2026-02-16T02:18:23.785491+08:00",
|
||||
"duration_sec": 3.244,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 865,
|
||||
"inserted": 173,
|
||||
"updated": 0,
|
||||
"skipped": 692,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_STORE_GOODS_SALES",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:18:23.787736+08:00",
|
||||
"end_time": "2026-02-16T02:18:25.142325+08:00",
|
||||
"duration_sec": 1.355,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLES",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:18:25.144553+08:00",
|
||||
"end_time": "2026-02-16T02:18:28.900234+08:00",
|
||||
"duration_sec": 3.756,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 370,
|
||||
"inserted": 51,
|
||||
"updated": 0,
|
||||
"skipped": 319,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLE_FEE_DISCOUNT",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:18:28.901962+08:00",
|
||||
"end_time": "2026-02-16T02:19:04.877900+08:00",
|
||||
"duration_sec": 35.976,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 8680,
|
||||
"inserted": 39,
|
||||
"updated": 0,
|
||||
"skipped": 8641,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLE_USE",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:19:04.879677+08:00",
|
||||
"end_time": "2026-02-16T02:20:43.722924+08:00",
|
||||
"duration_sec": 98.844,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 48545,
|
||||
"inserted": 285,
|
||||
"updated": 0,
|
||||
"skipped": 48260,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TENANT_GOODS",
|
||||
"layer": "ODS",
|
||||
"start_time": "2026-02-16T02:20:43.725501+08:00",
|
||||
"end_time": "2026-02-16T02:20:46.964569+08:00",
|
||||
"duration_sec": 3.239,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 870,
|
||||
"inserted": 3,
|
||||
"updated": 0,
|
||||
"skipped": 867,
|
||||
"errors": 0,
|
||||
"deleted": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"layer": "DWD",
|
||||
"start_time": "2026-02-16T02:20:46.967739+08:00",
|
||||
"end_time": "2026-02-16T02:20:47.394765+08:00",
|
||||
"duration_sec": 0.427,
|
||||
"status": "SUCCESS",
|
||||
"task_count": 1,
|
||||
"success_count": 1,
|
||||
"fail_count": 0,
|
||||
"skip_count": 0,
|
||||
"total_fetched": 0,
|
||||
"total_inserted": 0,
|
||||
"total_updated": 0,
|
||||
"total_errors": 0,
|
||||
"tasks": [
|
||||
{
|
||||
"task_code": "DWD_LOAD_FROM_ODS",
|
||||
"layer": "DWD",
|
||||
"start_time": "2026-02-16T02:20:46.968453+08:00",
|
||||
"end_time": "2026-02-16T02:20:47.392262+08:00",
|
||||
"duration_sec": 0.424,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:47.397305+08:00",
|
||||
"end_time": "2026-02-16T02:20:50.260818+08:00",
|
||||
"duration_sec": 2.863,
|
||||
"status": "SUCCESS",
|
||||
"task_count": 15,
|
||||
"success_count": 1,
|
||||
"fail_count": 0,
|
||||
"skip_count": 14,
|
||||
"total_fetched": 0,
|
||||
"total_inserted": 5117,
|
||||
"total_updated": 0,
|
||||
"total_errors": 0,
|
||||
"tasks": [
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_CUSTOMER",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:47.478185+08:00",
|
||||
"end_time": "2026-02-16T02:20:47.912593+08:00",
|
||||
"duration_sec": 0.434,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_DAILY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:47.916343+08:00",
|
||||
"end_time": "2026-02-16T02:20:48.058411+08:00",
|
||||
"duration_sec": 0.142,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_FINANCE",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:48.060291+08:00",
|
||||
"end_time": "2026-02-16T02:20:48.109598+08:00",
|
||||
"duration_sec": 0.049,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_MONTHLY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:48.111303+08:00",
|
||||
"end_time": "2026-02-16T02:20:48.164240+08:00",
|
||||
"duration_sec": 0.053,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_ASSISTANT_SALARY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:48.165763+08:00",
|
||||
"end_time": "2026-02-16T02:20:48.216816+08:00",
|
||||
"duration_sec": 0.051,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_BUILD_ORDER_SUMMARY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:48.218588+08:00",
|
||||
"end_time": "2026-02-16T02:20:49.720095+08:00",
|
||||
"duration_sec": 1.501,
|
||||
"status": "SUCCESS",
|
||||
"counts": {
|
||||
"fetched": 0,
|
||||
"inserted": 5117,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": 0
|
||||
},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_FINANCE_DAILY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:49.721608+08:00",
|
||||
"end_time": "2026-02-16T02:20:49.827953+08:00",
|
||||
"duration_sec": 0.106,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_FINANCE_DISCOUNT_DETAIL",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:49.830310+08:00",
|
||||
"end_time": "2026-02-16T02:20:49.882148+08:00",
|
||||
"duration_sec": 0.052,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_FINANCE_INCOME_STRUCTURE",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:49.884147+08:00",
|
||||
"end_time": "2026-02-16T02:20:49.937621+08:00",
|
||||
"duration_sec": 0.053,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_FINANCE_RECHARGE",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:49.939594+08:00",
|
||||
"end_time": "2026-02-16T02:20:49.990880+08:00",
|
||||
"duration_sec": 0.051,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_MEMBER_CONSUMPTION",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:49.993066+08:00",
|
||||
"end_time": "2026-02-16T02:20:50.050887+08:00",
|
||||
"duration_sec": 0.058,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_MEMBER_VISIT",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:50.052695+08:00",
|
||||
"end_time": "2026-02-16T02:20:50.102870+08:00",
|
||||
"duration_sec": 0.05,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_MV_REFRESH_ASSISTANT_DAILY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:50.104174+08:00",
|
||||
"end_time": "2026-02-16T02:20:50.153937+08:00",
|
||||
"duration_sec": 0.05,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_MV_REFRESH_FINANCE_DAILY",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:50.155430+08:00",
|
||||
"end_time": "2026-02-16T02:20:50.205405+08:00",
|
||||
"duration_sec": 0.05,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_RETENTION_CLEANUP",
|
||||
"layer": "DWS",
|
||||
"start_time": "2026-02-16T02:20:50.207165+08:00",
|
||||
"end_time": "2026-02-16T02:20:50.259470+08:00",
|
||||
"duration_sec": 0.052,
|
||||
"status": "SKIP",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:20:50.263599+08:00",
|
||||
"end_time": "2026-02-16T02:21:00.007411+08:00",
|
||||
"duration_sec": 9.744,
|
||||
"status": "PARTIAL",
|
||||
"task_count": 4,
|
||||
"success_count": 2,
|
||||
"fail_count": 2,
|
||||
"skip_count": 0,
|
||||
"total_fetched": 0,
|
||||
"total_inserted": 0,
|
||||
"total_updated": 0,
|
||||
"total_errors": 0,
|
||||
"tasks": [
|
||||
{
|
||||
"task_code": "DWS_ML_MANUAL_IMPORT",
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:20:50.264449+08:00",
|
||||
"end_time": "2026-02-16T02:20:50.266491+08:00",
|
||||
"duration_sec": 0.002,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "未找到 ML 台账文件,请通过环境变量 ML_MANUAL_LEDGER_FILE 或配置 run.ml_manual_ledger_file 指定",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_NEWCONV_INDEX",
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:20:50.316501+08:00",
|
||||
"end_time": "2026-02-16T02:20:53.334095+08:00",
|
||||
"duration_sec": 3.018,
|
||||
"status": "SUCCESS",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_RELATION_INDEX",
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:20:53.335483+08:00",
|
||||
"end_time": "2026-02-16T02:20:53.696046+08:00",
|
||||
"duration_sec": 0.361,
|
||||
"status": "ERROR",
|
||||
"counts": {},
|
||||
"error": "错误: 字段 d.is_delete 不存在\nLINE 13: AND COALESCE(d.is_delete, 0) = 0\n ^\nHINT: 也许您想要引用列\"s.is_delete\"。\n",
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
},
|
||||
{
|
||||
"task_code": "DWS_WINBACK_INDEX",
|
||||
"layer": "INDEX",
|
||||
"start_time": "2026-02-16T02:20:53.749149+08:00",
|
||||
"end_time": "2026-02-16T02:21:00.005571+08:00",
|
||||
"duration_sec": 6.256,
|
||||
"status": "SUCCESS",
|
||||
"counts": {},
|
||||
"error": null,
|
||||
"api_calls": 0,
|
||||
"api_total_sec": 0.0
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"verification": {},
|
||||
"environment": {
|
||||
"store_id": 2790685415443269,
|
||||
"db_name": "",
|
||||
"api_base_url": "https://pc.ficoo.vip/apiprod/admin/v1/",
|
||||
"timezone": "Asia/Shanghai"
|
||||
}
|
||||
}
|
||||
704
apps/etl/connectors/feiqiu/scripts/debug/run_full_refresh.py
Normal file
704
apps/etl/connectors/feiqiu/scripts/debug/run_full_refresh.py
Normal file
@@ -0,0 +1,704 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""全量刷新脚本:执行 2026-01-01 ~ 2026-02-16 的 api_full Flow。
|
||||
|
||||
按层逐步执行(ODS → DWD → DWS → INDEX),内嵌精细性能计时,
|
||||
支持断点续跑(从指定层/任务重试),完成后执行 increment_verify 校验,
|
||||
校验不一致时自动补齐。计时数据和执行统计写入 JSON 中间文件。
|
||||
|
||||
用法:
|
||||
cd apps/etl/connectors/feiqiu
|
||||
python -m scripts.debug.run_full_refresh [--resume-layer DWS] [--resume-task DWS_FINANCE_DAILY]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# ── 确保项目根目录在 sys.path ──
|
||||
_FEIQIU_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_FEIQIU_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_FEIQIU_ROOT))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
from api.client import APIClient
|
||||
from orchestration.task_registry import default_registry
|
||||
from orchestration.cursor_manager import CursorManager
|
||||
from orchestration.run_tracker import RunTracker
|
||||
from orchestration.task_executor import TaskExecutor
|
||||
from orchestration.flow_runner import FlowRunner
|
||||
|
||||
|
||||
# ── 常量 ──────────────────────────────────────────────────────
|
||||
|
||||
FLOW_NAME = "api_full"
|
||||
LAYERS = FlowRunner.FLOW_LAYERS[FLOW_NAME] # ["ODS", "DWD", "DWS", "INDEX"]
|
||||
WINDOW_START_STR = "2026-01-01T00:00:00"
|
||||
WINDOW_END_STR = "2026-02-16T00:00:00"
|
||||
|
||||
|
||||
# ── 数据结构 ──────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class TaskTiming:
|
||||
"""单个任务的计时与执行统计"""
|
||||
task_code: str
|
||||
layer: str
|
||||
start_time: str = ""
|
||||
end_time: str = ""
|
||||
duration_sec: float = 0.0
|
||||
status: str = "" # SUCCESS / FAIL / ERROR / SKIP
|
||||
counts: dict = field(default_factory=dict)
|
||||
error: str | None = None
|
||||
api_calls: int = 0
|
||||
api_total_sec: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class LayerTiming:
|
||||
"""单层的计时与汇总"""
|
||||
layer: str
|
||||
start_time: str = ""
|
||||
end_time: str = ""
|
||||
duration_sec: float = 0.0
|
||||
status: str = "" # SUCCESS / PARTIAL / ERROR
|
||||
task_count: int = 0
|
||||
success_count: int = 0
|
||||
fail_count: int = 0
|
||||
skip_count: int = 0
|
||||
total_fetched: int = 0
|
||||
total_inserted: int = 0
|
||||
total_updated: int = 0
|
||||
total_errors: int = 0
|
||||
tasks: list[TaskTiming] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RefreshReport:
|
||||
"""全量刷新的完整执行报告"""
|
||||
flow: str = FLOW_NAME
|
||||
window_start: str = WINDOW_START_STR
|
||||
window_end: str = WINDOW_END_STR
|
||||
overall_start: str = ""
|
||||
overall_end: str = ""
|
||||
overall_duration_sec: float = 0.0
|
||||
overall_status: str = ""
|
||||
layers: list[LayerTiming] = field(default_factory=list)
|
||||
verification: dict = field(default_factory=dict)
|
||||
environment: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
# ── 工具函数 ──────────────────────────────────────────────────
|
||||
|
||||
def _setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("full_refresh")
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
|
||||
))
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
def _now_iso(tz: ZoneInfo) -> str:
|
||||
return datetime.now(tz).isoformat()
|
||||
|
||||
|
||||
def _build_components(config: AppConfig, logger: logging.Logger):
|
||||
"""构建 DB / API / TaskExecutor / FlowRunner 等组件。"""
|
||||
db_conn = DatabaseConnection(
|
||||
dsn=config["db"]["dsn"],
|
||||
session=config["db"].get("session"),
|
||||
connect_timeout=config["db"].get("connect_timeout_sec"),
|
||||
)
|
||||
api_client = APIClient(
|
||||
base_url=config["api"]["base_url"],
|
||||
token=config["api"]["token"],
|
||||
timeout=config["api"].get("timeout_sec", 20),
|
||||
retry_max=config["api"].get("retries", {}).get("max_attempts", 3),
|
||||
headers_extra=config["api"].get("headers_extra"),
|
||||
)
|
||||
db_ops = DatabaseOperations(db_conn)
|
||||
cursor_mgr = CursorManager(db_conn)
|
||||
run_tracker = RunTracker(db_conn)
|
||||
|
||||
executor = TaskExecutor(
|
||||
config, db_ops, api_client,
|
||||
cursor_mgr, run_tracker, default_registry, logger,
|
||||
)
|
||||
runner = FlowRunner(
|
||||
config, executor, default_registry,
|
||||
db_conn, api_client, logger,
|
||||
)
|
||||
return db_conn, api_client, db_ops, executor, runner
|
||||
|
||||
|
||||
def _resolve_layer_tasks(layer: str, config: AppConfig) -> list[str]:
|
||||
"""解析单层的任务列表,与 FlowRunner._resolve_tasks 逻辑一致。"""
|
||||
layer_upper = layer.upper()
|
||||
|
||||
if layer_upper == "ODS":
|
||||
ods_tasks = config.get("run.ods_tasks", [])
|
||||
if ods_tasks:
|
||||
return list(ods_tasks)
|
||||
registry_tasks = default_registry.get_tasks_by_layer("ODS")
|
||||
return sorted(registry_tasks) if registry_tasks else []
|
||||
|
||||
elif layer_upper == "DWD":
|
||||
return ["DWD_LOAD_FROM_ODS"]
|
||||
|
||||
elif layer_upper == "DWS":
|
||||
dws_tasks = config.get("run.dws_tasks", [])
|
||||
if dws_tasks:
|
||||
return list(dws_tasks)
|
||||
registry_tasks = default_registry.get_tasks_by_layer("DWS")
|
||||
return sorted(registry_tasks) if registry_tasks else []
|
||||
|
||||
elif layer_upper == "INDEX":
|
||||
index_tasks = config.get("run.index_tasks", [])
|
||||
if index_tasks:
|
||||
return list(index_tasks)
|
||||
registry_tasks = default_registry.get_tasks_by_layer("INDEX")
|
||||
return sorted(registry_tasks) if registry_tasks else []
|
||||
|
||||
return []
|
||||
|
||||
|
||||
|
||||
def _sanitize_for_json(obj):
|
||||
"""递归处理不可序列化的值。"""
|
||||
if isinstance(obj, dict):
|
||||
return {k: _sanitize_for_json(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return [_sanitize_for_json(v) for v in obj]
|
||||
if isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
return obj
|
||||
|
||||
|
||||
def _save_json(data, path: Path):
|
||||
"""将数据序列化为 JSON 文件。"""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(
|
||||
json.dumps(_sanitize_for_json(data), ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _load_checkpoint(path: Path) -> dict | None:
|
||||
"""加载断点续跑的检查点文件。"""
|
||||
if path.exists():
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _save_checkpoint(path: Path, data: dict):
|
||||
"""保存断点续跑的检查点。"""
|
||||
_save_json(data, path)
|
||||
|
||||
|
||||
# ── 单任务执行(带精细计时)─────────────────────────────────
|
||||
|
||||
def _execute_task_with_timing(
|
||||
task_code: str,
|
||||
layer: str,
|
||||
executor: TaskExecutor,
|
||||
config: AppConfig,
|
||||
db_conn: DatabaseConnection,
|
||||
logger: logging.Logger,
|
||||
tz: ZoneInfo,
|
||||
) -> TaskTiming:
|
||||
"""执行单个任务并记录精细计时。"""
|
||||
timing = TaskTiming(task_code=task_code, layer=layer)
|
||||
store_id = int(config.get("app.store_id"))
|
||||
run_uuid = f"full-refresh-{task_code.lower()}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
timing.start_time = _now_iso(tz)
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
task_result = executor.run_single_task(
|
||||
task_code=task_code,
|
||||
run_uuid=run_uuid,
|
||||
store_id=store_id,
|
||||
data_source="online",
|
||||
)
|
||||
timing.duration_sec = round(time.monotonic() - t0, 3)
|
||||
timing.end_time = _now_iso(tz)
|
||||
|
||||
# 解析结果
|
||||
raw_status = (task_result.get("status") or "").upper()
|
||||
counts = task_result.get("counts") or {}
|
||||
timing.counts = counts
|
||||
timing.status = raw_status if raw_status else "COMPLETE"
|
||||
|
||||
# 尝试提取 API 调用统计(如果任务结果中包含)
|
||||
api_stats = task_result.get("api_stats") or {}
|
||||
timing.api_calls = api_stats.get("calls", 0)
|
||||
timing.api_total_sec = api_stats.get("total_sec", 0.0)
|
||||
|
||||
logger.info(
|
||||
" ✓ %s: %s (%.1fs) fetched=%s inserted=%s updated=%s errors=%s",
|
||||
task_code, timing.status, timing.duration_sec,
|
||||
counts.get("fetched", 0), counts.get("inserted", 0),
|
||||
counts.get("updated", 0), counts.get("errors", 0),
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
timing.duration_sec = round(time.monotonic() - t0, 3)
|
||||
timing.end_time = _now_iso(tz)
|
||||
timing.status = "ERROR"
|
||||
timing.error = str(exc)
|
||||
logger.error(" ✗ %s: 异常 (%.1fs): %s", task_code, timing.duration_sec, exc)
|
||||
# CHANGE 2026-02-16 | 任务异常后 rollback,防止 InFailedSqlTransaction 级联
|
||||
try:
|
||||
db_conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return timing
|
||||
|
||||
|
||||
# ── 单层执行(带断点续跑)───────────────────────────────────
|
||||
|
||||
def _execute_layer(
|
||||
layer: str,
|
||||
config: AppConfig,
|
||||
executor: TaskExecutor,
|
||||
db_conn: DatabaseConnection,
|
||||
logger: logging.Logger,
|
||||
tz: ZoneInfo,
|
||||
resume_task: str | None = None,
|
||||
checkpoint_path: Path | None = None,
|
||||
) -> LayerTiming:
|
||||
"""执行单层所有任务,支持从指定任务恢复。"""
|
||||
layer_timing = LayerTiming(layer=layer)
|
||||
layer_timing.start_time = _now_iso(tz)
|
||||
layer_t0 = time.monotonic()
|
||||
|
||||
tasks = _resolve_layer_tasks(layer, config)
|
||||
layer_timing.task_count = len(tasks)
|
||||
|
||||
logger.info("━" * 70)
|
||||
logger.info("▶ 层 %s: %d 个任务", layer, len(tasks))
|
||||
if tasks:
|
||||
logger.info(" 任务列表: %s", ", ".join(tasks))
|
||||
|
||||
# 断点续跑:跳过 resume_task 之前的任务
|
||||
skip_until_found = False
|
||||
if resume_task:
|
||||
resume_upper = resume_task.upper()
|
||||
if resume_upper in [t.upper() for t in tasks]:
|
||||
skip_until_found = True
|
||||
logger.info(" 断点续跑: 从 %s 开始", resume_upper)
|
||||
else:
|
||||
logger.warning(" 断点续跑: %s 不在本层任务列表中,执行全部", resume_upper)
|
||||
|
||||
for idx, task_code in enumerate(tasks, start=1):
|
||||
# 断点续跑逻辑
|
||||
if skip_until_found:
|
||||
if task_code.upper() == resume_upper:
|
||||
skip_until_found = False
|
||||
logger.info(" [%d/%d] ▶ 恢复执行: %s", idx, len(tasks), task_code)
|
||||
else:
|
||||
logger.info(" [%d/%d] ⏭ 跳过: %s (断点续跑)", idx, len(tasks), task_code)
|
||||
skipped = TaskTiming(
|
||||
task_code=task_code, layer=layer, status="SKIPPED_RESUME",
|
||||
)
|
||||
layer_timing.tasks.append(skipped)
|
||||
layer_timing.skip_count += 1
|
||||
continue
|
||||
else:
|
||||
logger.info(" [%d/%d] %s", idx, len(tasks), task_code)
|
||||
|
||||
timing = _execute_task_with_timing(
|
||||
task_code, layer, executor, config, db_conn, logger, tz,
|
||||
)
|
||||
layer_timing.tasks.append(timing)
|
||||
|
||||
# 统计
|
||||
if timing.status in ("SUCCESS", "成功", "COMPLETE", "PARTIAL"):
|
||||
layer_timing.success_count += 1
|
||||
elif timing.status == "ERROR":
|
||||
layer_timing.fail_count += 1
|
||||
elif timing.status == "SKIP":
|
||||
layer_timing.skip_count += 1
|
||||
else:
|
||||
layer_timing.success_count += 1 # 未知状态视为成功
|
||||
|
||||
counts = timing.counts
|
||||
layer_timing.total_fetched += counts.get("fetched", 0)
|
||||
layer_timing.total_inserted += counts.get("inserted", 0)
|
||||
layer_timing.total_updated += counts.get("updated", 0)
|
||||
layer_timing.total_errors += counts.get("errors", 0)
|
||||
|
||||
# 保存检查点(每个任务完成后)
|
||||
if checkpoint_path:
|
||||
_save_checkpoint(checkpoint_path, {
|
||||
"last_completed_layer": layer,
|
||||
"last_completed_task": task_code,
|
||||
"timestamp": _now_iso(tz),
|
||||
})
|
||||
|
||||
# 确保连接可用
|
||||
db_conn.ensure_open()
|
||||
|
||||
layer_timing.duration_sec = round(time.monotonic() - layer_t0, 3)
|
||||
layer_timing.end_time = _now_iso(tz)
|
||||
|
||||
# 层状态判定
|
||||
if layer_timing.fail_count == 0:
|
||||
layer_timing.status = "SUCCESS"
|
||||
elif layer_timing.success_count > 0:
|
||||
layer_timing.status = "PARTIAL"
|
||||
else:
|
||||
layer_timing.status = "ERROR"
|
||||
|
||||
logger.info(
|
||||
" 层 %s 完成: %s (%.1fs) 成功=%d 失败=%d 跳过=%d",
|
||||
layer, layer_timing.status, layer_timing.duration_sec,
|
||||
layer_timing.success_count, layer_timing.fail_count, layer_timing.skip_count,
|
||||
)
|
||||
logger.info(
|
||||
" 汇总: fetched=%d inserted=%d updated=%d errors=%d",
|
||||
layer_timing.total_fetched, layer_timing.total_inserted,
|
||||
layer_timing.total_updated, layer_timing.total_errors,
|
||||
)
|
||||
|
||||
return layer_timing
|
||||
|
||||
|
||||
|
||||
# ── 校验阶段 ──────────────────────────────────────────────────
|
||||
|
||||
def _run_verification(
|
||||
runner: FlowRunner,
|
||||
config: AppConfig,
|
||||
window_start: datetime,
|
||||
window_end: datetime,
|
||||
logger: logging.Logger,
|
||||
tz: ZoneInfo,
|
||||
) -> dict:
|
||||
"""执行 increment_verify 校验,发现不一致时自动补齐。"""
|
||||
logger.info("")
|
||||
logger.info("=" * 70)
|
||||
logger.info("▶ 开始 increment_verify 校验")
|
||||
logger.info("=" * 70)
|
||||
|
||||
verify_start = _now_iso(tz)
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
# 使用 FlowRunner 的内部校验方法
|
||||
verification_summary = runner._run_verification(
|
||||
layers=LAYERS,
|
||||
window_start=window_start,
|
||||
window_end=window_end,
|
||||
window_split="month",
|
||||
)
|
||||
|
||||
duration = round(time.monotonic() - t0, 3)
|
||||
verify_end = _now_iso(tz)
|
||||
|
||||
result = {
|
||||
"status": verification_summary.get("status", "UNKNOWN"),
|
||||
"start_time": verify_start,
|
||||
"end_time": verify_end,
|
||||
"duration_sec": duration,
|
||||
"total_tables": verification_summary.get("total_tables", 0),
|
||||
"consistent_tables": verification_summary.get("consistent_tables", 0),
|
||||
"total_backfilled": verification_summary.get("total_backfilled", 0),
|
||||
"error_tables": verification_summary.get("error_tables", 0),
|
||||
"layers": verification_summary.get("layers", {}),
|
||||
}
|
||||
|
||||
logger.info(
|
||||
" 校验完成: %s (%.1fs) 表数=%d 一致=%d 补齐=%d 错误=%d",
|
||||
result["status"], duration,
|
||||
result["total_tables"], result["consistent_tables"],
|
||||
result["total_backfilled"], result["error_tables"],
|
||||
)
|
||||
|
||||
# 如果有补齐,记录详情
|
||||
if result["total_backfilled"] > 0:
|
||||
logger.info(" ℹ 已自动补齐 %d 处不一致", result["total_backfilled"])
|
||||
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
duration = round(time.monotonic() - t0, 3)
|
||||
logger.error(" ✗ 校验异常 (%.1fs): %s", duration, exc)
|
||||
return {
|
||||
"status": "ERROR",
|
||||
"start_time": verify_start,
|
||||
"end_time": _now_iso(tz),
|
||||
"duration_sec": duration,
|
||||
"error": str(exc),
|
||||
"traceback": traceback.format_exc(),
|
||||
}
|
||||
|
||||
|
||||
# ── 主流程 ────────────────────────────────────────────────────
|
||||
|
||||
def run_full_refresh(
|
||||
resume_layer: str | None = None,
|
||||
resume_task: str | None = None,
|
||||
skip_verify: bool = False,
|
||||
) -> RefreshReport:
|
||||
"""执行全量刷新。
|
||||
|
||||
Args:
|
||||
resume_layer: 从指定层开始执行(断点续跑),如 "DWS"
|
||||
resume_task: 在恢复层中从指定任务开始(断点续跑),如 "DWS_FINANCE_DAILY"
|
||||
skip_verify: 跳过校验阶段(调试用)
|
||||
Returns:
|
||||
RefreshReport 完整执行报告
|
||||
"""
|
||||
logger = _setup_logging()
|
||||
logger.info("=" * 70)
|
||||
logger.info("全量刷新开始")
|
||||
logger.info("Flow: %s | 窗口: %s ~ %s", FLOW_NAME, WINDOW_START_STR, WINDOW_END_STR)
|
||||
logger.info("=" * 70)
|
||||
|
||||
# 加载配置
|
||||
config = AppConfig.load()
|
||||
tz = ZoneInfo(config.get("app.timezone", "Asia/Shanghai"))
|
||||
window_start = datetime.fromisoformat(WINDOW_START_STR).replace(tzinfo=tz)
|
||||
window_end = datetime.fromisoformat(WINDOW_END_STR).replace(tzinfo=tz)
|
||||
|
||||
report = RefreshReport()
|
||||
report.overall_start = _now_iso(tz)
|
||||
report.environment = {
|
||||
"store_id": config.get("app.store_id"),
|
||||
"db_name": config.get("db.name", ""),
|
||||
"api_base_url": config.get("api.base_url", ""),
|
||||
"timezone": str(tz),
|
||||
}
|
||||
|
||||
logger.info("门店 ID: %s", config.get("app.store_id"))
|
||||
logger.info("数据库: %s", config.get("db.name", ""))
|
||||
logger.info("API: %s", config.get("api.base_url", ""))
|
||||
|
||||
# 设置 window_override 让所有任务使用统一的全量窗口
|
||||
config.config.setdefault("run", {}).setdefault("window_override", {})
|
||||
config.config["run"]["window_override"]["start"] = window_start
|
||||
config.config["run"]["window_override"]["end"] = window_end
|
||||
|
||||
# 构建组件
|
||||
db_conn, api_client, db_ops, executor, runner = _build_components(config, logger)
|
||||
|
||||
# 输出目录和检查点
|
||||
output_dir = _FEIQIU_ROOT / "scripts" / "debug" / "output"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
checkpoint_path = output_dir / "full_refresh_checkpoint.json"
|
||||
|
||||
overall_t0 = time.monotonic()
|
||||
|
||||
# 确定从哪一层开始
|
||||
layers_to_run = list(LAYERS)
|
||||
if resume_layer:
|
||||
resume_layer_upper = resume_layer.upper()
|
||||
layer_names_upper = [l.upper() for l in layers_to_run]
|
||||
if resume_layer_upper in layer_names_upper:
|
||||
start_idx = layer_names_upper.index(resume_layer_upper)
|
||||
skipped_layers = layers_to_run[:start_idx]
|
||||
layers_to_run = layers_to_run[start_idx:]
|
||||
if skipped_layers:
|
||||
logger.info("断点续跑: 跳过层 %s,从 %s 开始", skipped_layers, resume_layer_upper)
|
||||
else:
|
||||
logger.warning("断点续跑: 层 %s 不在 Flow 定义中,执行全部", resume_layer_upper)
|
||||
|
||||
# 逐层执行
|
||||
for layer_idx, layer in enumerate(layers_to_run):
|
||||
# 仅第一个恢复层使用 resume_task
|
||||
current_resume_task = resume_task if (layer_idx == 0 and resume_layer) else None
|
||||
|
||||
layer_timing = _execute_layer(
|
||||
layer=layer,
|
||||
config=config,
|
||||
executor=executor,
|
||||
db_conn=db_conn,
|
||||
logger=logger,
|
||||
tz=tz,
|
||||
resume_task=current_resume_task,
|
||||
checkpoint_path=checkpoint_path,
|
||||
)
|
||||
report.layers.append(layer_timing)
|
||||
|
||||
# 层执行后检查结果
|
||||
if layer_timing.status == "ERROR":
|
||||
logger.warning("")
|
||||
logger.warning("⚠ 层 %s 全部失败,后续层可能受影响", layer)
|
||||
logger.warning(" 可使用 --resume-layer %s 从此层重试", layer)
|
||||
|
||||
# 每层完成后保存中间结果(防止中途崩溃丢失数据)
|
||||
_save_intermediate_report(report, output_dir, tz)
|
||||
|
||||
# 校验阶段
|
||||
if not skip_verify:
|
||||
report.verification = _run_verification(
|
||||
runner, config, window_start, window_end, logger, tz,
|
||||
)
|
||||
else:
|
||||
logger.info("")
|
||||
logger.info("⏭ 跳过校验阶段 (--skip-verify)")
|
||||
report.verification = {"status": "SKIPPED"}
|
||||
|
||||
# 汇总
|
||||
report.overall_duration_sec = round(time.monotonic() - overall_t0, 3)
|
||||
report.overall_end = _now_iso(tz)
|
||||
|
||||
all_success = all(lt.status == "SUCCESS" for lt in report.layers)
|
||||
any_error = any(lt.status == "ERROR" for lt in report.layers)
|
||||
if all_success:
|
||||
report.overall_status = "SUCCESS"
|
||||
elif any_error:
|
||||
report.overall_status = "PARTIAL"
|
||||
else:
|
||||
report.overall_status = "PARTIAL"
|
||||
|
||||
# 打印汇总
|
||||
_print_summary(report, logger)
|
||||
|
||||
# 保存最终结果
|
||||
ts = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||||
final_path = output_dir / f"full_refresh_{ts}.json"
|
||||
_save_json(asdict(report), final_path)
|
||||
logger.info("计时数据已保存: %s", final_path)
|
||||
|
||||
# 清理检查点
|
||||
if checkpoint_path.exists() and report.overall_status == "SUCCESS":
|
||||
checkpoint_path.unlink()
|
||||
logger.info("检查点已清理")
|
||||
|
||||
# 清理连接
|
||||
db_conn.close()
|
||||
return report
|
||||
|
||||
|
||||
def _save_intermediate_report(report: RefreshReport, output_dir: Path, tz: ZoneInfo):
|
||||
"""保存中间结果,防止中途崩溃丢失已采集的计时数据。"""
|
||||
intermediate_path = output_dir / "full_refresh_intermediate.json"
|
||||
_save_json(asdict(report), intermediate_path)
|
||||
|
||||
|
||||
# ── 汇总输出 ──────────────────────────────────────────────────
|
||||
|
||||
def _print_summary(report: RefreshReport, logger: logging.Logger):
|
||||
"""打印全量刷新汇总。"""
|
||||
logger.info("")
|
||||
logger.info("=" * 70)
|
||||
logger.info("全量刷新汇总")
|
||||
logger.info("=" * 70)
|
||||
logger.info("状态: %s | 总耗时: %.1fs", report.overall_status, report.overall_duration_sec)
|
||||
logger.info("")
|
||||
|
||||
# 逐层统计
|
||||
logger.info("%-8s %-10s %8s %8s %8s %8s %10s", "层", "状态", "成功", "失败", "跳过", "任务数", "耗时(s)")
|
||||
logger.info("-" * 70)
|
||||
for lt in report.layers:
|
||||
logger.info(
|
||||
"%-8s %-10s %8d %8d %8d %8d %10.1f",
|
||||
lt.layer, lt.status, lt.success_count, lt.fail_count,
|
||||
lt.skip_count, lt.task_count, lt.duration_sec,
|
||||
)
|
||||
|
||||
# 记录数汇总
|
||||
logger.info("")
|
||||
logger.info("记录数汇总:")
|
||||
total_fetched = sum(lt.total_fetched for lt in report.layers)
|
||||
total_inserted = sum(lt.total_inserted for lt in report.layers)
|
||||
total_updated = sum(lt.total_updated for lt in report.layers)
|
||||
total_errors = sum(lt.total_errors for lt in report.layers)
|
||||
logger.info(" fetched=%d inserted=%d updated=%d errors=%d",
|
||||
total_fetched, total_inserted, total_updated, total_errors)
|
||||
|
||||
# 耗时最长的 5 个任务
|
||||
all_tasks = []
|
||||
for lt in report.layers:
|
||||
all_tasks.extend(lt.tasks)
|
||||
top5 = sorted(
|
||||
[t for t in all_tasks if t.status not in ("SKIPPED_RESUME",)],
|
||||
key=lambda t: t.duration_sec,
|
||||
reverse=True,
|
||||
)[:5]
|
||||
if top5:
|
||||
logger.info("")
|
||||
logger.info("耗时 Top 5 任务:")
|
||||
for t in top5:
|
||||
logger.info(" %-30s %8.1fs [%s] %s", t.task_code, t.duration_sec, t.layer, t.status)
|
||||
|
||||
# 失败任务
|
||||
failed = [t for t in all_tasks if t.status == "ERROR"]
|
||||
if failed:
|
||||
logger.info("")
|
||||
logger.info("失败任务 (%d 个):", len(failed))
|
||||
for t in failed:
|
||||
logger.info(" ✗ %s [%s]: %s", t.task_code, t.layer, t.error or "未知错误")
|
||||
|
||||
# 校验结果
|
||||
if report.verification:
|
||||
v = report.verification
|
||||
logger.info("")
|
||||
logger.info("校验结果: %s", v.get("status", "N/A"))
|
||||
if v.get("total_tables"):
|
||||
logger.info(
|
||||
" 表数=%d 一致=%d 补齐=%d 错误=%d",
|
||||
v.get("total_tables", 0), v.get("consistent_tables", 0),
|
||||
v.get("total_backfilled", 0), v.get("error_tables", 0),
|
||||
)
|
||||
|
||||
|
||||
# ── CLI 入口 ──────────────────────────────────────────────────
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="全量刷新: 执行 2026-01-01 ~ 2026-02-16 的 api_full Flow",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume-layer", type=str, default=None,
|
||||
help="断点续跑: 从指定层开始(如 DWS)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume-task", type=str, default=None,
|
||||
help="断点续跑: 在恢复层中从指定任务开始(如 DWS_FINANCE_DAILY)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-verify", action="store_true",
|
||||
help="跳过校验阶段",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
report = run_full_refresh(
|
||||
resume_layer=args.resume_layer,
|
||||
resume_task=args.resume_task,
|
||||
skip_verify=args.skip_verify,
|
||||
)
|
||||
|
||||
# 退出码
|
||||
if report.overall_status == "SUCCESS":
|
||||
sys.exit(0)
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user