Files
ZQYY.FQ-ETL/utils/reporting.py

248 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""任务结果汇总与格式化工具。
提供多种格式的任务报告输出:
- 简单文本格式
- 详细表格格式ASCII
- 任务总结报告
"""
from __future__ import annotations
from datetime import datetime
from typing import Any, Dict, Iterable, List, Optional
def summarize_counts(task_results: Iterable[dict]) -> dict:
"""
汇总多个任务的 counts返回总计与逐任务明细。
task_results: 形如 {"task_code": str, "counts": {...}} 的字典序列。
"""
totals = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
details = []
for res in task_results:
code = res.get("task_code") or res.get("code") or "UNKNOWN"
counts = res.get("counts") or {}
row = {"task_code": code}
for key in totals.keys():
val = int(counts.get(key, 0) or 0)
row[key] = val
totals[key] += val
details.append(row)
return {"total": totals, "details": details}
def format_report(summary: dict) -> str:
"""将 summarize_counts 的输出格式化为可读文案(简单格式)。"""
lines = []
totals = summary.get("total", {})
lines.append(
"TOTAL fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format(
fetched=totals.get("fetched", 0),
inserted=totals.get("inserted", 0),
updated=totals.get("updated", 0),
skipped=totals.get("skipped", 0),
errors=totals.get("errors", 0),
)
)
for row in summary.get("details", []):
lines.append(
"{task_code}: fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format(
task_code=row.get("task_code", "UNKNOWN"),
fetched=row.get("fetched", 0),
inserted=row.get("inserted", 0),
updated=row.get("updated", 0),
skipped=row.get("skipped", 0),
errors=row.get("errors", 0),
)
)
return "\n".join(lines)
def format_task_summary(result: dict) -> str:
"""
生成格式化的任务总结报告
Args:
result: 任务执行结果字典,包含:
- task_code: 任务代码
- status: 执行状态
- start_time: 开始时间
- end_time: 结束时间
- elapsed_seconds: 耗时秒数
- counts: 统计数据
- verification_result: 校验结果(可选)
- error_message: 错误信息(可选)
Returns:
格式化的总结字符串ASCII 边框)
"""
task_code = result.get("task_code", "UNKNOWN")
status = result.get("status", "未知")
counts = result.get("counts", {})
verification = result.get("verification_result")
error_message = result.get("error_message")
# 计算时间
start_time = result.get("start_time")
end_time = result.get("end_time")
elapsed = result.get("elapsed_seconds", 0)
if isinstance(start_time, str):
start_str = start_time[:19]
elif isinstance(start_time, datetime):
start_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
else:
start_str = "-"
if isinstance(end_time, str):
end_str = end_time[11:19] if len(end_time) >= 19 else end_time
elif isinstance(end_time, datetime):
end_str = end_time.strftime("%H:%M:%S")
else:
end_str = "-"
elapsed_str = _format_duration(elapsed)
# 构建报告
lines = [
"╔══════════════════════════════════════════════════════════════╗",
"║ 任务执行总结 ║",
"╠══════════════════════════════════════════════════════════════╣",
f"║ 任务代码: {task_code:<50}",
f"║ 执行状态: {status:<50}",
f"║ 执行时间: {start_str} ~ {end_str} ({elapsed_str}){' '*(31-len(elapsed_str))}",
"╠══════════════════════════════════════════════════════════════╣",
"║ 数据统计 ║",
f"║ - 获取记录: {counts.get('fetched', 0):>10,}",
f"║ - 新增记录: {counts.get('inserted', 0):>10,}",
f"║ - 更新记录: {counts.get('updated', 0):>10,}",
f"║ - 跳过记录: {counts.get('skipped', 0):>10,}",
f"║ - 错误记录: {counts.get('errors', 0):>10,}",
]
# 校验结果
if verification:
backfilled_missing = verification.get("backfilled_missing_count", verification.get("backfilled_count", 0))
backfilled_mismatch = verification.get("backfilled_mismatch_count", 0)
lines.extend([
"╠══════════════════════════════════════════════════════════════╣",
"║ 校验结果 ║",
f"║ - 源数据量: {verification.get('source_count', 0):>10,}",
f"║ - 目标数据量: {verification.get('target_count', 0):>10,}",
f"║ - 缺失补齐: {backfilled_missing:>10,}",
f"║ - 不一致补齐: {backfilled_mismatch:>10,}",
])
# 错误信息
if error_message:
error_str = str(error_message)[:48]
lines.extend([
"╠══════════════════════════════════════════════════════════════╣",
f"║ 错误信息: {error_str:<50}",
])
lines.append("╚══════════════════════════════════════════════════════════════╝")
return "\n".join(lines)
def format_pipeline_summary(
pipeline_name: str,
task_results: List[dict],
start_time: datetime,
end_time: datetime,
verification_summary: Optional[dict] = None,
) -> str:
"""
生成管道执行总结报告
Args:
pipeline_name: 管道名称
task_results: 各任务执行结果列表
start_time: 管道开始时间
end_time: 管道结束时间
verification_summary: 校验汇总(可选)
Returns:
格式化的管道总结字符串
"""
elapsed = (end_time - start_time).total_seconds()
elapsed_str = _format_duration(elapsed)
# 汇总统计
summary = summarize_counts(task_results)
totals = summary.get("total", {})
# 统计成功/失败
success_count = sum(1 for r in task_results if r.get("status") == "成功")
fail_count = len(task_results) - success_count
lines = [
"╔══════════════════════════════════════════════════════════════╗",
"║ 管道执行总结 ║",
"╠══════════════════════════════════════════════════════════════╣",
f"║ 管道名称: {pipeline_name:<50}",
f"║ 任务数量: {len(task_results)} (成功: {success_count}, 失败: {fail_count}){' '*(32-len(str(len(task_results)))-len(str(success_count))-len(str(fail_count)))}",
f"║ 执行时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {end_time.strftime('%H:%M:%S')} ({elapsed_str}){' '*(31-len(elapsed_str))}",
"╠══════════════════════════════════════════════════════════════╣",
"║ 数据汇总 ║",
f"║ - 总获取: {totals.get('fetched', 0):>12,}",
f"║ - 总新增: {totals.get('inserted', 0):>12,}",
f"║ - 总更新: {totals.get('updated', 0):>12,}",
f"║ - 总跳过: {totals.get('skipped', 0):>12,}",
f"║ - 总错误: {totals.get('errors', 0):>12,}",
]
# 校验汇总
if verification_summary:
total_backfilled_missing = verification_summary.get(
"total_backfilled_missing",
verification_summary.get("total_backfilled", 0),
)
total_backfilled_mismatch = verification_summary.get("total_backfilled_mismatch", 0)
lines.extend([
"╠══════════════════════════════════════════════════════════════╣",
"║ 校验汇总 ║",
f"║ - 校验表数: {verification_summary.get('total_tables', 0):>10,}",
f"║ - 一致表数: {verification_summary.get('consistent_tables', 0):>10,}",
f"║ - 总补齐数: {verification_summary.get('total_backfilled', 0):>10,}",
f"║ - 缺失补齐: {total_backfilled_missing:>10,}",
f"║ - 不一致补齐: {total_backfilled_mismatch:>8,}",
])
# 任务明细
lines.extend([
"╠══════════════════════════════════════════════════════════════╣",
"║ 任务明细 ║",
])
for result in task_results[:10]: # 最多显示10个
task_code = result.get("task_code", "UNKNOWN")[:25]
status = "" if result.get("status") == "成功" else ""
counts = result.get("counts", {})
fetched = counts.get("fetched", 0)
lines.append(f"{status} {task_code:<25} 获取:{fetched:>6,}")
if len(task_results) > 10:
lines.append(f"║ ... 还有 {len(task_results) - 10} 个任务 ... ║")
lines.append("╚══════════════════════════════════════════════════════════════╝")
return "\n".join(lines)
def _format_duration(seconds: float) -> str:
"""格式化时长"""
if seconds < 60:
return f"{seconds:.1f}"
elif seconds < 3600:
mins = int(seconds // 60)
secs = seconds % 60
return f"{mins}{secs:.0f}"
else:
hours = int(seconds // 3600)
mins = int((seconds % 3600) // 60)
return f"{hours}{mins}"