Neo-ZQYY/scripts/ops/analyze_v4.py

# -*- coding: utf-8 -*-
"""分析第四次执行结果。"""
import json
import re
from pathlib import Path
from dotenv import load_dotenv

load_dotenv(Path(__file__).resolve().parents[2] / ".env")
from _env_paths import get_output_path

log_root = get_output_path("SYSTEM_LOG_ROOT")
raw = json.loads((log_root / "2026-02-21__etl_run_raw_v4.json").read_text(encoding="utf-8"))

error_log = raw.get("error_log", "")
lines = error_log.split("\n")

# 提取任务列表
task_list_match = re.search(r"开始运行任务: \[([^\]]+)\]", error_log)
if task_list_match:
    tasks = [t.strip().strip("'") for t in task_list_match.group(1).split(",")]
    print(f"总任务数: {len(tasks)}")

# 分析每个任务的结果
success_tasks = []
failed_tasks = []

for task in tasks:
    # 检查是否有"完成"标记
    completed = re.search(rf"{task}: 完成，统计=", error_log) or \
                re.search(rf"{task}: 完成, 统计=", error_log) or \
                re.search(rf"{task} ODS 任务完成:", error_log) or \
                re.search(rf"{task}: 工具类任务执行成功", error_log)
    failed = re.search(rf"任务 {task} 失败: (.+?)(?:\\n|$)", error_log)

    if completed and not failed:
        success_tasks.append(task)
    elif failed:
        err_msg = failed.group(1)[:120]
        failed_tasks.append((task, err_msg))
    else:
        failed_tasks.append((task, "未知状态"))

print(f"\n✅ 成功: {len(success_tasks)} 个")
for t in success_tasks:
    print(f"  {t}")

print(f"\n❌ 失败: {len(failed_tasks)} 个")
# 找出根因（第一个非 InFailedSqlTransaction 的失败）
root_causes = []
cascade_count = 0
for t, err in failed_tasks:
    if "InFailedSqlTransaction" in err:
        cascade_count += 1
    else:
        root_causes.append((t, err))
        print(f"  🔴 {t}: {err}")

print(f"\n  级联失败 (InFailedSqlTransaction): {cascade_count} 个")

if root_causes:
    print(f"\n根因分析:")
    for t, err in root_causes:
        print(f"  {t}: {err}")