# -*- coding: utf-8 -*- """导出第六次 ETL 执行结果报告,分析所有任务的成功/失败状态。""" import json import re from pathlib import Path from dotenv import load_dotenv load_dotenv(Path(__file__).resolve().parents[2] / ".env") from _env_paths import get_output_path raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v6.json" data = json.loads(raw_path.read_text(encoding="utf-8")) error_log = data.get("error_log", "") lines = error_log.strip().split("\n") print(f"日志总行数: {len(lines)}") # 解析任务结果 success_tasks = [] failed_tasks = [] # 匹配成功模式 success_patterns = [ r"任务完成:\s*(\S+)", r"工具类任务执行成功:\s*(\S+)", r"(\S+)\s*完成,统计=", ] # 匹配失败模式 fail_pattern = re.compile(r"任务\s+(\S+)\s+失败:\s*(.*)") for line in lines: # 成功 for pat in success_patterns: m = re.search(pat, line) if m: task = m.group(1).strip() if task not in success_tasks: success_tasks.append(task) break # 失败 m = fail_pattern.search(line) if m: task = m.group(1).strip() err_msg = m.group(2).strip()[:120] # 分类错误 if "InFailedSqlTransaction" in err_msg: err_type = "InFailedSqlTransaction(级联)" elif "UndefinedColumn" in err_msg: err_type = f"UndefinedColumn: {err_msg}" elif "UniqueViolation" in err_msg: err_type = "UniqueViolation" elif "UndefinedTable" in err_msg: err_type = f"UndefinedTable: {err_msg}" else: err_type = err_msg if task not in [t for t, _ in failed_tasks]: failed_tasks.append((task, err_type)) # 去掉成功列表中也出现在失败列表中的(可能先成功后失败) fail_names = {t for t, _ in failed_tasks} success_only = [t for t in success_tasks if t not in fail_names] print(f"\n成功: {len(success_only)}, 失败: {len(failed_tasks)}") print("\n--- 成功任务 ---") for i, t in enumerate(success_only, 1): print(f" {i}. {t}") print("\n--- 失败任务 ---") for i, (t, e) in enumerate(failed_tasks, 1): print(f" {i}. {t} → {e}") # 找出根因(非级联的失败) root_failures = [(t, e) for t, e in failed_tasks if "级联" not in e] cascade_failures = [(t, e) for t, e in failed_tasks if "级联" in e] print(f"\n--- 根因失败({len(root_failures)} 个)---") for t, e in root_failures: print(f" {t} → {e}") print(f"\n--- 级联失败({len(cascade_failures)} 个)---") for t, _ in cascade_failures: print(f" {t}") # 生成报告 report = f"""# 第六次 ETL 执行结果报告 - execution_id: `d9443781-e4ac-4df6-9f87-11c45d72e5ba` - 执行时间: 2026-02-21 20:45:18 ~ 21:14:45(29 分 26 秒) - exit_code: 0 - status: success - 总任务数: 31 - 数据统计: 获取 171,961 / 新增 13,662 / 更新 171,595 / 跳过 0 / 错误 0 ## 成功任务({len(success_only)} 个) | # | 任务 | |---|------| """ for i, t in enumerate(success_only, 1): report += f"| {i} | {t} |\n" if failed_tasks: report += f""" ## 失败任务({len(failed_tasks)} 个) | # | 任务 | 错误类型 | |---|------|----------| """ for i, (t, e) in enumerate(failed_tasks, 1): report += f"| {i} | {t} | {e} |\n" if root_failures: report += f""" ## 根因分析({len(root_failures)} 个非级联失败) """ for t, e in root_failures: report += f"- `{t}`: {e}\n" if cascade_failures: report += f""" ## 级联失败({len(cascade_failures)} 个) 由根因失败导致 psycopg2 连接进入 InFailedSqlTransaction 状态,后续任务全部级联失败。 """ report += """ ## 与前次对比 | 轮次 | 成功 | 失败 | 耗时 | 修复的 BUG | |------|------|------|------|-----------| """ report += f"| v1 | 10 | 31 | 9m51s | — |\n" report += f"| v2 | — | — | 2m30s | BUG 1 |\n" report += f"| v3 | 9 | 22 | 11m21s | BUG 2+3 |\n" report += f"| v4 | 10 | 21 | 11m55s | BUG 4 |\n" report += f"| v5 | 10 | 21 | 11m37s | BUG 5 |\n" report += f"| v6 | {len(success_only)} | {len(failed_tasks)} | 29m26s | BUG 5+6+7 |\n" out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v6.md" out_path.write_text(report, encoding="utf-8") print(f"\n报告已导出: {out_path}")