初始提交:飞球 ETL 系统全量代码
This commit is contained in:
247
utils/reporting.py
Normal file
247
utils/reporting.py
Normal file
@@ -0,0 +1,247 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""任务结果汇总与格式化工具。
|
||||
|
||||
提供多种格式的任务报告输出:
|
||||
- 简单文本格式
|
||||
- 详细表格格式(ASCII)
|
||||
- 任务总结报告
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
|
||||
|
||||
def summarize_counts(task_results: Iterable[dict]) -> dict:
|
||||
"""
|
||||
汇总多个任务的 counts,返回总计与逐任务明细。
|
||||
task_results: 形如 {"task_code": str, "counts": {...}} 的字典序列。
|
||||
"""
|
||||
totals = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
|
||||
details = []
|
||||
|
||||
for res in task_results:
|
||||
code = res.get("task_code") or res.get("code") or "UNKNOWN"
|
||||
counts = res.get("counts") or {}
|
||||
row = {"task_code": code}
|
||||
for key in totals.keys():
|
||||
val = int(counts.get(key, 0) or 0)
|
||||
row[key] = val
|
||||
totals[key] += val
|
||||
details.append(row)
|
||||
|
||||
return {"total": totals, "details": details}
|
||||
|
||||
|
||||
def format_report(summary: dict) -> str:
|
||||
"""将 summarize_counts 的输出格式化为可读文案(简单格式)。"""
|
||||
lines = []
|
||||
totals = summary.get("total", {})
|
||||
lines.append(
|
||||
"TOTAL fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format(
|
||||
fetched=totals.get("fetched", 0),
|
||||
inserted=totals.get("inserted", 0),
|
||||
updated=totals.get("updated", 0),
|
||||
skipped=totals.get("skipped", 0),
|
||||
errors=totals.get("errors", 0),
|
||||
)
|
||||
)
|
||||
for row in summary.get("details", []):
|
||||
lines.append(
|
||||
"{task_code}: fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format(
|
||||
task_code=row.get("task_code", "UNKNOWN"),
|
||||
fetched=row.get("fetched", 0),
|
||||
inserted=row.get("inserted", 0),
|
||||
updated=row.get("updated", 0),
|
||||
skipped=row.get("skipped", 0),
|
||||
errors=row.get("errors", 0),
|
||||
)
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_task_summary(result: dict) -> str:
|
||||
"""
|
||||
生成格式化的任务总结报告
|
||||
|
||||
Args:
|
||||
result: 任务执行结果字典,包含:
|
||||
- task_code: 任务代码
|
||||
- status: 执行状态
|
||||
- start_time: 开始时间
|
||||
- end_time: 结束时间
|
||||
- elapsed_seconds: 耗时秒数
|
||||
- counts: 统计数据
|
||||
- verification_result: 校验结果(可选)
|
||||
- error_message: 错误信息(可选)
|
||||
|
||||
Returns:
|
||||
格式化的总结字符串(ASCII 边框)
|
||||
"""
|
||||
task_code = result.get("task_code", "UNKNOWN")
|
||||
status = result.get("status", "未知")
|
||||
counts = result.get("counts", {})
|
||||
verification = result.get("verification_result")
|
||||
error_message = result.get("error_message")
|
||||
|
||||
# 计算时间
|
||||
start_time = result.get("start_time")
|
||||
end_time = result.get("end_time")
|
||||
elapsed = result.get("elapsed_seconds", 0)
|
||||
|
||||
if isinstance(start_time, str):
|
||||
start_str = start_time[:19]
|
||||
elif isinstance(start_time, datetime):
|
||||
start_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||
else:
|
||||
start_str = "-"
|
||||
|
||||
if isinstance(end_time, str):
|
||||
end_str = end_time[11:19] if len(end_time) >= 19 else end_time
|
||||
elif isinstance(end_time, datetime):
|
||||
end_str = end_time.strftime("%H:%M:%S")
|
||||
else:
|
||||
end_str = "-"
|
||||
|
||||
elapsed_str = _format_duration(elapsed)
|
||||
|
||||
# 构建报告
|
||||
lines = [
|
||||
"╔══════════════════════════════════════════════════════════════╗",
|
||||
"║ 任务执行总结 ║",
|
||||
"╠══════════════════════════════════════════════════════════════╣",
|
||||
f"║ 任务代码: {task_code:<50} ║",
|
||||
f"║ 执行状态: {status:<50} ║",
|
||||
f"║ 执行时间: {start_str} ~ {end_str} ({elapsed_str}){' '*(31-len(elapsed_str))} ║",
|
||||
"╠══════════════════════════════════════════════════════════════╣",
|
||||
"║ 数据统计 ║",
|
||||
f"║ - 获取记录: {counts.get('fetched', 0):>10,} ║",
|
||||
f"║ - 新增记录: {counts.get('inserted', 0):>10,} ║",
|
||||
f"║ - 更新记录: {counts.get('updated', 0):>10,} ║",
|
||||
f"║ - 跳过记录: {counts.get('skipped', 0):>10,} ║",
|
||||
f"║ - 错误记录: {counts.get('errors', 0):>10,} ║",
|
||||
]
|
||||
|
||||
# 校验结果
|
||||
if verification:
|
||||
backfilled_missing = verification.get("backfilled_missing_count", verification.get("backfilled_count", 0))
|
||||
backfilled_mismatch = verification.get("backfilled_mismatch_count", 0)
|
||||
lines.extend([
|
||||
"╠══════════════════════════════════════════════════════════════╣",
|
||||
"║ 校验结果 ║",
|
||||
f"║ - 源数据量: {verification.get('source_count', 0):>10,} ║",
|
||||
f"║ - 目标数据量: {verification.get('target_count', 0):>10,} ║",
|
||||
f"║ - 缺失补齐: {backfilled_missing:>10,} ║",
|
||||
f"║ - 不一致补齐: {backfilled_mismatch:>10,} ║",
|
||||
])
|
||||
|
||||
# 错误信息
|
||||
if error_message:
|
||||
error_str = str(error_message)[:48]
|
||||
lines.extend([
|
||||
"╠══════════════════════════════════════════════════════════════╣",
|
||||
f"║ 错误信息: {error_str:<50} ║",
|
||||
])
|
||||
|
||||
lines.append("╚══════════════════════════════════════════════════════════════╝")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_pipeline_summary(
|
||||
pipeline_name: str,
|
||||
task_results: List[dict],
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
verification_summary: Optional[dict] = None,
|
||||
) -> str:
|
||||
"""
|
||||
生成管道执行总结报告
|
||||
|
||||
Args:
|
||||
pipeline_name: 管道名称
|
||||
task_results: 各任务执行结果列表
|
||||
start_time: 管道开始时间
|
||||
end_time: 管道结束时间
|
||||
verification_summary: 校验汇总(可选)
|
||||
|
||||
Returns:
|
||||
格式化的管道总结字符串
|
||||
"""
|
||||
elapsed = (end_time - start_time).total_seconds()
|
||||
elapsed_str = _format_duration(elapsed)
|
||||
|
||||
# 汇总统计
|
||||
summary = summarize_counts(task_results)
|
||||
totals = summary.get("total", {})
|
||||
|
||||
# 统计成功/失败
|
||||
success_count = sum(1 for r in task_results if r.get("status") == "成功")
|
||||
fail_count = len(task_results) - success_count
|
||||
|
||||
lines = [
|
||||
"╔══════════════════════════════════════════════════════════════╗",
|
||||
"║ 管道执行总结 ║",
|
||||
"╠══════════════════════════════════════════════════════════════╣",
|
||||
f"║ 管道名称: {pipeline_name:<50} ║",
|
||||
f"║ 任务数量: {len(task_results)} (成功: {success_count}, 失败: {fail_count}){' '*(32-len(str(len(task_results)))-len(str(success_count))-len(str(fail_count)))} ║",
|
||||
f"║ 执行时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {end_time.strftime('%H:%M:%S')} ({elapsed_str}){' '*(31-len(elapsed_str))} ║",
|
||||
"╠══════════════════════════════════════════════════════════════╣",
|
||||
"║ 数据汇总 ║",
|
||||
f"║ - 总获取: {totals.get('fetched', 0):>12,} ║",
|
||||
f"║ - 总新增: {totals.get('inserted', 0):>12,} ║",
|
||||
f"║ - 总更新: {totals.get('updated', 0):>12,} ║",
|
||||
f"║ - 总跳过: {totals.get('skipped', 0):>12,} ║",
|
||||
f"║ - 总错误: {totals.get('errors', 0):>12,} ║",
|
||||
]
|
||||
|
||||
# 校验汇总
|
||||
if verification_summary:
|
||||
total_backfilled_missing = verification_summary.get(
|
||||
"total_backfilled_missing",
|
||||
verification_summary.get("total_backfilled", 0),
|
||||
)
|
||||
total_backfilled_mismatch = verification_summary.get("total_backfilled_mismatch", 0)
|
||||
lines.extend([
|
||||
"╠══════════════════════════════════════════════════════════════╣",
|
||||
"║ 校验汇总 ║",
|
||||
f"║ - 校验表数: {verification_summary.get('total_tables', 0):>10,} ║",
|
||||
f"║ - 一致表数: {verification_summary.get('consistent_tables', 0):>10,} ║",
|
||||
f"║ - 总补齐数: {verification_summary.get('total_backfilled', 0):>10,} ║",
|
||||
f"║ - 缺失补齐: {total_backfilled_missing:>10,} ║",
|
||||
f"║ - 不一致补齐: {total_backfilled_mismatch:>8,} ║",
|
||||
])
|
||||
|
||||
# 任务明细
|
||||
lines.extend([
|
||||
"╠══════════════════════════════════════════════════════════════╣",
|
||||
"║ 任务明细 ║",
|
||||
])
|
||||
|
||||
for result in task_results[:10]: # 最多显示10个
|
||||
task_code = result.get("task_code", "UNKNOWN")[:25]
|
||||
status = "✓" if result.get("status") == "成功" else "✗"
|
||||
counts = result.get("counts", {})
|
||||
fetched = counts.get("fetched", 0)
|
||||
lines.append(f"║ {status} {task_code:<25} 获取:{fetched:>6,} ║")
|
||||
|
||||
if len(task_results) > 10:
|
||||
lines.append(f"║ ... 还有 {len(task_results) - 10} 个任务 ... ║")
|
||||
|
||||
lines.append("╚══════════════════════════════════════════════════════════════╝")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_duration(seconds: float) -> str:
|
||||
"""格式化时长"""
|
||||
if seconds < 60:
|
||||
return f"{seconds:.1f}秒"
|
||||
elif seconds < 3600:
|
||||
mins = int(seconds // 60)
|
||||
secs = seconds % 60
|
||||
return f"{mins}分{secs:.0f}秒"
|
||||
else:
|
||||
hours = int(seconds // 3600)
|
||||
mins = int((seconds % 3600) // 60)
|
||||
return f"{hours}时{mins}分"
|
||||
Reference in New Issue
Block a user