ZQYY.FQ-ETL/utils/reporting.py

# -*- coding: utf-8 -*-
"""任务结果汇总与格式化工具。

提供多种格式的任务报告输出：
- 简单文本格式
- 详细表格格式（ASCII）
- 任务总结报告
"""
from __future__ import annotations

from datetime import datetime
from typing import Any, Dict, Iterable, List, Optional


def summarize_counts(task_results: Iterable[dict]) -> dict:
    """
    汇总多个任务的 counts，返回总计与逐任务明细。
    task_results: 形如 {"task_code": str, "counts": {...}} 的字典序列。
    """
    totals = {"fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0}
    details = []

    for res in task_results:
        code = res.get("task_code") or res.get("code") or "UNKNOWN"
        counts = res.get("counts") or {}
        row = {"task_code": code}
        for key in totals.keys():
            val = int(counts.get(key, 0) or 0)
            row[key] = val
            totals[key] += val
        details.append(row)

    return {"total": totals, "details": details}


def format_report(summary: dict) -> str:
    """将 summarize_counts 的输出格式化为可读文案（简单格式）。"""
    lines = []
    totals = summary.get("total", {})
    lines.append(
        "TOTAL fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format(
            fetched=totals.get("fetched", 0),
            inserted=totals.get("inserted", 0),
            updated=totals.get("updated", 0),
            skipped=totals.get("skipped", 0),
            errors=totals.get("errors", 0),
        )
    )
    for row in summary.get("details", []):
        lines.append(
            "{task_code}: fetched={fetched} inserted={inserted} updated={updated} skipped={skipped} errors={errors}".format(
                task_code=row.get("task_code", "UNKNOWN"),
                fetched=row.get("fetched", 0),
                inserted=row.get("inserted", 0),
                updated=row.get("updated", 0),
                skipped=row.get("skipped", 0),
                errors=row.get("errors", 0),
            )
        )
    return "\n".join(lines)


def format_task_summary(result: dict) -> str:
    """
    生成格式化的任务总结报告

    Args:
        result: 任务执行结果字典，包含:
            - task_code: 任务代码
            - status: 执行状态
            - start_time: 开始时间
            - end_time: 结束时间
            - elapsed_seconds: 耗时秒数
            - counts: 统计数据
            - verification_result: 校验结果（可选）
            - error_message: 错误信息（可选）

    Returns:
        格式化的总结字符串（ASCII 边框）
    """
    task_code = result.get("task_code", "UNKNOWN")
    status = result.get("status", "未知")
    counts = result.get("counts", {})
    verification = result.get("verification_result")
    error_message = result.get("error_message")

    # 计算时间
    start_time = result.get("start_time")
    end_time = result.get("end_time")
    elapsed = result.get("elapsed_seconds", 0)

    if isinstance(start_time, str):
        start_str = start_time[:19]
    elif isinstance(start_time, datetime):
        start_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
    else:
        start_str = "-"

    if isinstance(end_time, str):
        end_str = end_time[11:19] if len(end_time) >= 19 else end_time
    elif isinstance(end_time, datetime):
        end_str = end_time.strftime("%H:%M:%S")
    else:
        end_str = "-"

    elapsed_str = _format_duration(elapsed)

    # 构建报告
    lines = [
        "╔══════════════════════════════════════════════════════════════╗",
        "║                     任务执行总结                              ║",
        "╠══════════════════════════════════════════════════════════════╣",
        f"║ 任务代码: {task_code:<50} ║",
        f"║ 执行状态: {status:<50} ║",
        f"║ 执行时间: {start_str} ~ {end_str} ({elapsed_str}){' '*(31-len(elapsed_str))} ║",
        "╠══════════════════════════════════════════════════════════════╣",
        "║ 数据统计                                                      ║",
        f"║   - 获取记录: {counts.get('fetched', 0):>10,}                                  ║",
        f"║   - 新增记录: {counts.get('inserted', 0):>10,}                                  ║",
        f"║   - 更新记录: {counts.get('updated', 0):>10,}                                  ║",
        f"║   - 跳过记录: {counts.get('skipped', 0):>10,}                                  ║",
        f"║   - 错误记录: {counts.get('errors', 0):>10,}                                  ║",
    ]

    # 校验结果
    if verification:
        backfilled_missing = verification.get("backfilled_missing_count", verification.get("backfilled_count", 0))
        backfilled_mismatch = verification.get("backfilled_mismatch_count", 0)
        lines.extend([
            "╠══════════════════════════════════════════════════════════════╣",
            "║ 校验结果                                                      ║",
            f"║   - 源数据量: {verification.get('source_count', 0):>10,}                                  ║",
            f"║   - 目标数据量: {verification.get('target_count', 0):>10,}                                ║",
            f"║   - 缺失补齐: {backfilled_missing:>10,}                                  ║",
            f"║   - 不一致补齐: {backfilled_mismatch:>10,}                                ║",
        ])

    # 错误信息
    if error_message:
        error_str = str(error_message)[:48]
        lines.extend([
            "╠══════════════════════════════════════════════════════════════╣",
            f"║ 错误信息: {error_str:<50} ║",
        ])

    lines.append("╚══════════════════════════════════════════════════════════════╝")

    return "\n".join(lines)


def format_pipeline_summary(
    pipeline_name: str,
    task_results: List[dict],
    start_time: datetime,
    end_time: datetime,
    verification_summary: Optional[dict] = None,
) -> str:
    """
    生成管道执行总结报告

    Args:
        pipeline_name: 管道名称
        task_results: 各任务执行结果列表
        start_time: 管道开始时间
        end_time: 管道结束时间
        verification_summary: 校验汇总（可选）

    Returns:
        格式化的管道总结字符串
    """
    elapsed = (end_time - start_time).total_seconds()
    elapsed_str = _format_duration(elapsed)

    # 汇总统计
    summary = summarize_counts(task_results)
    totals = summary.get("total", {})

    # 统计成功/失败
    success_count = sum(1 for r in task_results if r.get("status") == "成功")
    fail_count = len(task_results) - success_count

    lines = [
        "╔══════════════════════════════════════════════════════════════╗",
        "║                     管道执行总结                              ║",
        "╠══════════════════════════════════════════════════════════════╣",
        f"║ 管道名称: {pipeline_name:<50} ║",
        f"║ 任务数量: {len(task_results)} (成功: {success_count}, 失败: {fail_count}){' '*(32-len(str(len(task_results)))-len(str(success_count))-len(str(fail_count)))} ║",
        f"║ 执行时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {end_time.strftime('%H:%M:%S')} ({elapsed_str}){' '*(31-len(elapsed_str))} ║",
        "╠══════════════════════════════════════════════════════════════╣",
        "║ 数据汇总                                                      ║",
        f"║   - 总获取: {totals.get('fetched', 0):>12,}                                  ║",
        f"║   - 总新增: {totals.get('inserted', 0):>12,}                                  ║",
        f"║   - 总更新: {totals.get('updated', 0):>12,}                                  ║",
        f"║   - 总跳过: {totals.get('skipped', 0):>12,}                                  ║",
        f"║   - 总错误: {totals.get('errors', 0):>12,}                                  ║",
    ]

    # 校验汇总
    if verification_summary:
        total_backfilled_missing = verification_summary.get(
            "total_backfilled_missing",
            verification_summary.get("total_backfilled", 0),
        )
        total_backfilled_mismatch = verification_summary.get("total_backfilled_mismatch", 0)
        lines.extend([
            "╠══════════════════════════════════════════════════════════════╣",
            "║ 校验汇总                                                      ║",
            f"║   - 校验表数: {verification_summary.get('total_tables', 0):>10,}                                  ║",
            f"║   - 一致表数: {verification_summary.get('consistent_tables', 0):>10,}                                  ║",
            f"║   - 总补齐数: {verification_summary.get('total_backfilled', 0):>10,}                                  ║",
            f"║   - 缺失补齐: {total_backfilled_missing:>10,}                                  ║",
            f"║   - 不一致补齐: {total_backfilled_mismatch:>8,}                                  ║",
        ])

    # 任务明细
    lines.extend([
        "╠══════════════════════════════════════════════════════════════╣",
        "║ 任务明细                                                      ║",
    ])

    for result in task_results[:10]:  # 最多显示10个
        task_code = result.get("task_code", "UNKNOWN")[:25]
        status = "✓" if result.get("status") == "成功" else "✗"
        counts = result.get("counts", {})
        fetched = counts.get("fetched", 0)
        lines.append(f"║   {status} {task_code:<25} 获取:{fetched:>6,}                   ║")

    if len(task_results) > 10:
        lines.append(f"║   ... 还有 {len(task_results) - 10} 个任务 ...                                ║")

    lines.append("╚══════════════════════════════════════════════════════════════╝")

    return "\n".join(lines)


def _format_duration(seconds: float) -> str:
    """格式化时长"""
    if seconds < 60:
        return f"{seconds:.1f}秒"
    elif seconds < 3600:
        mins = int(seconds // 60)
        secs = seconds % 60
        return f"{mins}分{secs:.0f}秒"
    else:
        hours = int(seconds // 3600)
        mins = int((seconds % 3600) // 60)
        return f"{hours}时{mins}分"