Files
Neo-ZQYY/scripts/ops/export_v5_report.py

121 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""导出第五次 ETL 执行结果报告。"""
import json
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
from _env_paths import get_output_path
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v5.json"
data = json.loads(raw_path.read_text(encoding="utf-8"))
error_log = data.get("error_log", "")
lines = error_log.strip().split("\n")
# 解析任务结果
tasks_success = []
tasks_failed = []
tasks_skipped = []
for line in lines:
if "完成,统计=" in line or "任务完成:" in line or "工具类任务执行成功" in line:
task_name = line.split("|")[-1].strip().split(":")[0].strip() if "|" in line else "?"
# 从日志行提取任务名
for part in line.split("|"):
part = part.strip()
if part.startswith("DWS_") or part.startswith("ODS_") or part.startswith("DWD_"):
task_name = part.split(":")[0].strip()
break
tasks_success.append(task_name)
elif "任务" in line and "失败:" in line:
# 提取任务名
idx = line.find("任务 ")
if idx >= 0:
rest = line[idx + 3:]
task_name = rest.split(" ")[0].strip()
# 提取错误类型
err = ""
if "UndefinedColumn" in line:
err = "UndefinedColumn"
elif "InFailedSqlTransaction" in line:
err = "InFailedSqlTransaction级联"
elif "UniqueViolation" in line:
err = "UniqueViolation"
else:
err = rest.split("失败:")[1].strip()[:80] if "失败:" in rest else "未知"
tasks_failed.append((task_name, err))
# 去重
seen_success = []
for t in tasks_success:
if t not in seen_success:
seen_success.append(t)
seen_failed = {}
for t, e in tasks_failed:
if t not in seen_failed:
seen_failed[t] = e
# 时间
start_time = "20:19:52"
end_time = "20:31:29"
report = f"""# 第五次 ETL 执行结果报告
- execution_id: `fe87144a-687d-4ce0-9b79-6bd0186b2be3`
- 执行时间: 2026-02-21 {start_time} ~ {end_time}(约 11m37s
- exit_code: 0
- 总任务数: 31
## 成功任务({len(seen_success)} 个)
| # | 任务 |
|---|------|
"""
for i, t in enumerate(seen_success, 1):
report += f"| {i} | {t} |\n"
report += f"""
## 失败任务({len(seen_failed)} 个)
| # | 任务 | 错误类型 |
|---|------|----------|
"""
for i, (t, e) in enumerate(seen_failed.items(), 1):
report += f"| {i} | {t} | {e} |\n"
report += """
## 根因分析
BUG 6: `DWS_MEMBER_VISIT` → `_extract_table_info()` 方法中 SQL 引用了 `dwd.dim_table.site_table_id`
但该表的主键字段实际为 `table_id`(参考 `db/etl_feiqiu/schemas/dwd.sql`)。
错误发生后psycopg2 连接进入 InFailedSqlTransaction 状态,导致后续所有任务级联失败。
## 修复措施
1. `member_visit_task.py` → `_extract_table_info()`:
- `site_table_id AS table_id` → `table_id AS table_id`
- `site_table_name AS table_name` → `table_name AS table_name`
2. `finance_income_task.py` → `_extract_income_by_area()`:
- JOIN 条件 `dt.site_table_id = tfl.site_table_id` → `dt.table_id = tfl.site_table_id`
- JOIN 条件 `dt.site_table_id = asl.site_table_id` → `dt.table_id = asl.site_table_id`
## BUG 5 验证
BUG 5birthday 字段)的修复已部署,但被 BUG 6 遮蔽,无法在本次执行中验证。
需要第六次执行来同时验证 BUG 5 + BUG 6 + BUG 7。
"""
out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v5.md"
out_path.write_text(report, encoding="utf-8")
print(f"报告已导出: {out_path}")
print(f"成功: {len(seen_success)}, 失败: {len(seen_failed)}")