Files
Neo-ZQYY/scripts/ops/export_v6_report.py

148 lines
4.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""导出第六次 ETL 执行结果报告,分析所有任务的成功/失败状态。"""
import json
import re
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
from _env_paths import get_output_path
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v6.json"
data = json.loads(raw_path.read_text(encoding="utf-8"))
error_log = data.get("error_log", "")
lines = error_log.strip().split("\n")
print(f"日志总行数: {len(lines)}")
# 解析任务结果
success_tasks = []
failed_tasks = []
# 匹配成功模式
success_patterns = [
r"任务完成:\s*(\S+)",
r"工具类任务执行成功:\s*(\S+)",
r"(\S+)\s*完成,统计=",
]
# 匹配失败模式
fail_pattern = re.compile(r"任务\s+(\S+)\s+失败:\s*(.*)")
for line in lines:
# 成功
for pat in success_patterns:
m = re.search(pat, line)
if m:
task = m.group(1).strip()
if task not in success_tasks:
success_tasks.append(task)
break
# 失败
m = fail_pattern.search(line)
if m:
task = m.group(1).strip()
err_msg = m.group(2).strip()[:120]
# 分类错误
if "InFailedSqlTransaction" in err_msg:
err_type = "InFailedSqlTransaction级联"
elif "UndefinedColumn" in err_msg:
err_type = f"UndefinedColumn: {err_msg}"
elif "UniqueViolation" in err_msg:
err_type = "UniqueViolation"
elif "UndefinedTable" in err_msg:
err_type = f"UndefinedTable: {err_msg}"
else:
err_type = err_msg
if task not in [t for t, _ in failed_tasks]:
failed_tasks.append((task, err_type))
# 去掉成功列表中也出现在失败列表中的(可能先成功后失败)
fail_names = {t for t, _ in failed_tasks}
success_only = [t for t in success_tasks if t not in fail_names]
print(f"\n成功: {len(success_only)}, 失败: {len(failed_tasks)}")
print("\n--- 成功任务 ---")
for i, t in enumerate(success_only, 1):
print(f" {i}. {t}")
print("\n--- 失败任务 ---")
for i, (t, e) in enumerate(failed_tasks, 1):
print(f" {i}. {t}{e}")
# 找出根因(非级联的失败)
root_failures = [(t, e) for t, e in failed_tasks if "级联" not in e]
cascade_failures = [(t, e) for t, e in failed_tasks if "级联" in e]
print(f"\n--- 根因失败({len(root_failures)} 个)---")
for t, e in root_failures:
print(f" {t}{e}")
print(f"\n--- 级联失败({len(cascade_failures)} 个)---")
for t, _ in cascade_failures:
print(f" {t}")
# 生成报告
report = f"""# 第六次 ETL 执行结果报告
- execution_id: `d9443781-e4ac-4df6-9f87-11c45d72e5ba`
- 执行时间: 2026-02-21 20:45:18 ~ 21:14:4529 分 26 秒)
- exit_code: 0
- status: success
- 总任务数: 31
- 数据统计: 获取 171,961 / 新增 13,662 / 更新 171,595 / 跳过 0 / 错误 0
## 成功任务({len(success_only)} 个)
| # | 任务 |
|---|------|
"""
for i, t in enumerate(success_only, 1):
report += f"| {i} | {t} |\n"
if failed_tasks:
report += f"""
## 失败任务({len(failed_tasks)} 个)
| # | 任务 | 错误类型 |
|---|------|----------|
"""
for i, (t, e) in enumerate(failed_tasks, 1):
report += f"| {i} | {t} | {e} |\n"
if root_failures:
report += f"""
## 根因分析({len(root_failures)} 个非级联失败)
"""
for t, e in root_failures:
report += f"- `{t}`: {e}\n"
if cascade_failures:
report += f"""
## 级联失败({len(cascade_failures)} 个)
由根因失败导致 psycopg2 连接进入 InFailedSqlTransaction 状态,后续任务全部级联失败。
"""
report += """
## 与前次对比
| 轮次 | 成功 | 失败 | 耗时 | 修复的 BUG |
|------|------|------|------|-----------|
"""
report += f"| v1 | 10 | 31 | 9m51s | — |\n"
report += f"| v2 | — | — | 2m30s | BUG 1 |\n"
report += f"| v3 | 9 | 22 | 11m21s | BUG 2+3 |\n"
report += f"| v4 | 10 | 21 | 11m55s | BUG 4 |\n"
report += f"| v5 | 10 | 21 | 11m37s | BUG 5 |\n"
report += f"| v6 | {len(success_only)} | {len(failed_tasks)} | 29m26s | BUG 5+6+7 |\n"
out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v6.md"
out_path.write_text(report, encoding="utf-8")
print(f"\n报告已导出: {out_path}")