148 lines
4.3 KiB
Python
148 lines
4.3 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""导出第六次 ETL 执行结果报告,分析所有任务的成功/失败状态。"""
|
||
import json
|
||
import re
|
||
from pathlib import Path
|
||
|
||
from dotenv import load_dotenv
|
||
|
||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||
|
||
from _env_paths import get_output_path
|
||
|
||
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v6.json"
|
||
data = json.loads(raw_path.read_text(encoding="utf-8"))
|
||
|
||
error_log = data.get("error_log", "")
|
||
lines = error_log.strip().split("\n")
|
||
|
||
print(f"日志总行数: {len(lines)}")
|
||
|
||
# 解析任务结果
|
||
success_tasks = []
|
||
failed_tasks = []
|
||
|
||
# 匹配成功模式
|
||
success_patterns = [
|
||
r"任务完成:\s*(\S+)",
|
||
r"工具类任务执行成功:\s*(\S+)",
|
||
r"(\S+)\s*完成,统计=",
|
||
]
|
||
|
||
# 匹配失败模式
|
||
fail_pattern = re.compile(r"任务\s+(\S+)\s+失败:\s*(.*)")
|
||
|
||
for line in lines:
|
||
# 成功
|
||
for pat in success_patterns:
|
||
m = re.search(pat, line)
|
||
if m:
|
||
task = m.group(1).strip()
|
||
if task not in success_tasks:
|
||
success_tasks.append(task)
|
||
break
|
||
|
||
# 失败
|
||
m = fail_pattern.search(line)
|
||
if m:
|
||
task = m.group(1).strip()
|
||
err_msg = m.group(2).strip()[:120]
|
||
# 分类错误
|
||
if "InFailedSqlTransaction" in err_msg:
|
||
err_type = "InFailedSqlTransaction(级联)"
|
||
elif "UndefinedColumn" in err_msg:
|
||
err_type = f"UndefinedColumn: {err_msg}"
|
||
elif "UniqueViolation" in err_msg:
|
||
err_type = "UniqueViolation"
|
||
elif "UndefinedTable" in err_msg:
|
||
err_type = f"UndefinedTable: {err_msg}"
|
||
else:
|
||
err_type = err_msg
|
||
if task not in [t for t, _ in failed_tasks]:
|
||
failed_tasks.append((task, err_type))
|
||
|
||
# 去掉成功列表中也出现在失败列表中的(可能先成功后失败)
|
||
fail_names = {t for t, _ in failed_tasks}
|
||
success_only = [t for t in success_tasks if t not in fail_names]
|
||
|
||
print(f"\n成功: {len(success_only)}, 失败: {len(failed_tasks)}")
|
||
print("\n--- 成功任务 ---")
|
||
for i, t in enumerate(success_only, 1):
|
||
print(f" {i}. {t}")
|
||
|
||
print("\n--- 失败任务 ---")
|
||
for i, (t, e) in enumerate(failed_tasks, 1):
|
||
print(f" {i}. {t} → {e}")
|
||
|
||
# 找出根因(非级联的失败)
|
||
root_failures = [(t, e) for t, e in failed_tasks if "级联" not in e]
|
||
cascade_failures = [(t, e) for t, e in failed_tasks if "级联" in e]
|
||
|
||
print(f"\n--- 根因失败({len(root_failures)} 个)---")
|
||
for t, e in root_failures:
|
||
print(f" {t} → {e}")
|
||
|
||
print(f"\n--- 级联失败({len(cascade_failures)} 个)---")
|
||
for t, _ in cascade_failures:
|
||
print(f" {t}")
|
||
|
||
# 生成报告
|
||
report = f"""# 第六次 ETL 执行结果报告
|
||
|
||
- execution_id: `d9443781-e4ac-4df6-9f87-11c45d72e5ba`
|
||
- 执行时间: 2026-02-21 20:45:18 ~ 21:14:45(29 分 26 秒)
|
||
- exit_code: 0
|
||
- status: success
|
||
- 总任务数: 31
|
||
- 数据统计: 获取 171,961 / 新增 13,662 / 更新 171,595 / 跳过 0 / 错误 0
|
||
|
||
## 成功任务({len(success_only)} 个)
|
||
|
||
| # | 任务 |
|
||
|---|------|
|
||
"""
|
||
for i, t in enumerate(success_only, 1):
|
||
report += f"| {i} | {t} |\n"
|
||
|
||
if failed_tasks:
|
||
report += f"""
|
||
## 失败任务({len(failed_tasks)} 个)
|
||
|
||
| # | 任务 | 错误类型 |
|
||
|---|------|----------|
|
||
"""
|
||
for i, (t, e) in enumerate(failed_tasks, 1):
|
||
report += f"| {i} | {t} | {e} |\n"
|
||
|
||
if root_failures:
|
||
report += f"""
|
||
## 根因分析({len(root_failures)} 个非级联失败)
|
||
|
||
"""
|
||
for t, e in root_failures:
|
||
report += f"- `{t}`: {e}\n"
|
||
|
||
if cascade_failures:
|
||
report += f"""
|
||
## 级联失败({len(cascade_failures)} 个)
|
||
|
||
由根因失败导致 psycopg2 连接进入 InFailedSqlTransaction 状态,后续任务全部级联失败。
|
||
"""
|
||
|
||
report += """
|
||
## 与前次对比
|
||
|
||
| 轮次 | 成功 | 失败 | 耗时 | 修复的 BUG |
|
||
|------|------|------|------|-----------|
|
||
"""
|
||
report += f"| v1 | 10 | 31 | 9m51s | — |\n"
|
||
report += f"| v2 | — | — | 2m30s | BUG 1 |\n"
|
||
report += f"| v3 | 9 | 22 | 11m21s | BUG 2+3 |\n"
|
||
report += f"| v4 | 10 | 21 | 11m55s | BUG 4 |\n"
|
||
report += f"| v5 | 10 | 21 | 11m37s | BUG 5 |\n"
|
||
report += f"| v6 | {len(success_only)} | {len(failed_tasks)} | 29m26s | BUG 5+6+7 |\n"
|
||
|
||
out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v6.md"
|
||
out_path.write_text(report, encoding="utf-8")
|
||
print(f"\n报告已导出: {out_path}")
|