# -*- coding: utf-8 -*- """任务结果汇总与格式化工具。 提供多种格式的任务报告输出: - 简单文本格式 - 详细表格格式(ASCII) - 任务总结报告 """ from __future__ import annotations from datetime import datetime from typing import Any, Dict, Iterable, List, Optional def summarize_counts(task_results: Iterable[dict]) -> dict: """ 汇总多个任务的 counts,返回总计与逐任务明细。 task_results: 形如 {"task_code": str, "counts": {...}} 的字典序列。 """ totals = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0} details = [] for res in task_results: code = res.get("task_code") or res.get("code") or "UNKNOWN" counts = res.get("counts") or {} row = {"task_code": code} for key in totals.keys(): val = int(counts.get(key, 0) or 0) row[key] = val totals[key] += val details.append(row) return {"total": totals, "details": details} def format_report(summary: dict) -> str: """将 summarize_counts 的输出格式化为可读文案(简单格式)。""" lines = [] totals = summary.get("total", {}) lines.append( "TOTAL fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format( fetched=totals.get("fetched", 0), inserted=totals.get("inserted", 0), updated=totals.get("updated", 0), skipped=totals.get("skipped", 0), errors=totals.get("errors", 0), ) ) for row in summary.get("details", []): lines.append( "{task_code}: fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format( task_code=row.get("task_code", "UNKNOWN"), fetched=row.get("fetched", 0), inserted=row.get("inserted", 0), updated=row.get("updated", 0), skipped=row.get("skipped", 0), errors=row.get("errors", 0), ) ) return "\n".join(lines) def format_task_summary(result: dict) -> str: """ 生成格式化的任务总结报告 Args: result: 任务执行结果字典,包含: - task_code: 任务代码 - status: 执行状态 - start_time: 开始时间 - end_time: 结束时间 - elapsed_seconds: 耗时秒数 - counts: 统计数据 - verification_result: 校验结果(可选) - error_message: 错误信息(可选) Returns: 格式化的总结字符串(ASCII 边框) """ task_code = result.get("task_code", "UNKNOWN") status = result.get("status", "未知") counts = result.get("counts", {}) verification = result.get("verification_result") error_message = result.get("error_message") # 计算时间 start_time = result.get("start_time") end_time = result.get("end_time") elapsed = result.get("elapsed_seconds", 0) if isinstance(start_time, str): start_str = start_time[:19] elif isinstance(start_time, datetime): start_str = start_time.strftime("%Y-%m-%d %H:%M:%S") else: start_str = "-" if isinstance(end_time, str): end_str = end_time[11:19] if len(end_time) >= 19 else end_time elif isinstance(end_time, datetime): end_str = end_time.strftime("%H:%M:%S") else: end_str = "-" elapsed_str = _format_duration(elapsed) # 构建报告 lines = [ "╔══════════════════════════════════════════════════════════════╗", "║ 任务执行总结 ║", "╠══════════════════════════════════════════════════════════════╣", f"║ 任务代码: {task_code:<50} ║", f"║ 执行状态: {status:<50} ║", f"║ 执行时间: {start_str} ~ {end_str} ({elapsed_str}){' '*(31-len(elapsed_str))} ║", "╠══════════════════════════════════════════════════════════════╣", "║ 数据统计 ║", f"║ - 获取记录: {counts.get('fetched', 0):>10,} ║", f"║ - 新增记录: {counts.get('inserted', 0):>10,} ║", f"║ - 更新记录: {counts.get('updated', 0):>10,} ║", f"║ - 跳过记录: {counts.get('skipped', 0):>10,} ║", f"║ - 错误记录: {counts.get('errors', 0):>10,} ║", ] # 校验结果 if verification: backfilled_missing = verification.get("backfilled_missing_count", verification.get("backfilled_count", 0)) backfilled_mismatch = verification.get("backfilled_mismatch_count", 0) lines.extend([ "╠══════════════════════════════════════════════════════════════╣", "║ 校验结果 ║", f"║ - 源数据量: {verification.get('source_count', 0):>10,} ║", f"║ - 目标数据量: {verification.get('target_count', 0):>10,} ║", f"║ - 缺失补齐: {backfilled_missing:>10,} ║", f"║ - 不一致补齐: {backfilled_mismatch:>10,} ║", ]) # 错误信息 if error_message: error_str = str(error_message)[:48] lines.extend([ "╠══════════════════════════════════════════════════════════════╣", f"║ 错误信息: {error_str:<50} ║", ]) lines.append("╚══════════════════════════════════════════════════════════════╝") return "\n".join(lines) def format_flow_summary( flow_name: str, task_results: List[dict], start_time: datetime, end_time: datetime, verification_summary: Optional[dict] = None, ) -> str: """ 生成 Flow 执行总结报告 Args: flow_name: Flow 名称 task_results: 各任务执行结果列表 start_time: Flow 开始时间 end_time: Flow 结束时间 verification_summary: 校验汇总(可选) Returns: 格式化的 Flow 总结字符串 """ elapsed = (end_time - start_time).total_seconds() elapsed_str = _format_duration(elapsed) # 汇总统计 summary = summarize_counts(task_results) totals = summary.get("total", {}) # 统计成功/失败 success_count = sum(1 for r in task_results if r.get("status") == "成功") fail_count = len(task_results) - success_count lines = [ "╔══════════════════════════════════════════════════════════════╗", "║ Flow 执行总结 ║", "╠══════════════════════════════════════════════════════════════╣", f"║ Flow 名称: {flow_name:<50} ║", f"║ 任务数量: {len(task_results)} (成功: {success_count}, 失败: {fail_count}){' '*(32-len(str(len(task_results)))-len(str(success_count))-len(str(fail_count)))} ║", f"║ 执行时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {end_time.strftime('%H:%M:%S')} ({elapsed_str}){' '*(31-len(elapsed_str))} ║", "╠══════════════════════════════════════════════════════════════╣", "║ 数据汇总 ║", f"║ - 总获取: {totals.get('fetched', 0):>12,} ║", f"║ - 总新增: {totals.get('inserted', 0):>12,} ║", f"║ - 总更新: {totals.get('updated', 0):>12,} ║", f"║ - 总跳过: {totals.get('skipped', 0):>12,} ║", f"║ - 总错误: {totals.get('errors', 0):>12,} ║", ] # 校验汇总 if verification_summary: total_backfilled_missing = verification_summary.get( "total_backfilled_missing", verification_summary.get("total_backfilled", 0), ) total_backfilled_mismatch = verification_summary.get("total_backfilled_mismatch", 0) lines.extend([ "╠══════════════════════════════════════════════════════════════╣", "║ 校验汇总 ║", f"║ - 校验表数: {verification_summary.get('total_tables', 0):>10,} ║", f"║ - 一致表数: {verification_summary.get('consistent_tables', 0):>10,} ║", f"║ - 总补齐数: {verification_summary.get('total_backfilled', 0):>10,} ║", f"║ - 缺失补齐: {total_backfilled_missing:>10,} ║", f"║ - 不一致补齐: {total_backfilled_mismatch:>8,} ║", ]) # 任务明细 lines.extend([ "╠══════════════════════════════════════════════════════════════╣", "║ 任务明细 ║", ]) for result in task_results[:10]: # 最多显示10个 task_code = result.get("task_code", "UNKNOWN")[:25] status = "✓" if result.get("status") == "成功" else "✗" counts = result.get("counts", {}) fetched = counts.get("fetched", 0) lines.append(f"║ {status} {task_code:<25} 获取:{fetched:>6,} ║") if len(task_results) > 10: lines.append(f"║ ... 还有 {len(task_results) - 10} 个任务 ... ║") lines.append("╚══════════════════════════════════════════════════════════════╝") return "\n".join(lines) def _format_duration(seconds: float) -> str: """格式化时长""" if seconds < 60: return f"{seconds:.1f}秒" elif seconds < 3600: mins = int(seconds // 60) secs = seconds % 60 return f"{mins}分{secs:.0f}秒" else: hours = int(seconds // 3600) mins = int((seconds % 3600) // 60) return f"{hours}时{mins}分"