121 lines
3.8 KiB
Python
121 lines
3.8 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""导出第五次 ETL 执行结果报告。"""
|
||
import json
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
|
||
from dotenv import load_dotenv
|
||
|
||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||
|
||
from _env_paths import get_output_path
|
||
|
||
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v5.json"
|
||
data = json.loads(raw_path.read_text(encoding="utf-8"))
|
||
|
||
error_log = data.get("error_log", "")
|
||
lines = error_log.strip().split("\n")
|
||
|
||
# 解析任务结果
|
||
tasks_success = []
|
||
tasks_failed = []
|
||
tasks_skipped = []
|
||
|
||
for line in lines:
|
||
if "完成,统计=" in line or "任务完成:" in line or "工具类任务执行成功" in line:
|
||
task_name = line.split("|")[-1].strip().split(":")[0].strip() if "|" in line else "?"
|
||
# 从日志行提取任务名
|
||
for part in line.split("|"):
|
||
part = part.strip()
|
||
if part.startswith("DWS_") or part.startswith("ODS_") or part.startswith("DWD_"):
|
||
task_name = part.split(":")[0].strip()
|
||
break
|
||
tasks_success.append(task_name)
|
||
elif "任务" in line and "失败:" in line:
|
||
# 提取任务名
|
||
idx = line.find("任务 ")
|
||
if idx >= 0:
|
||
rest = line[idx + 3:]
|
||
task_name = rest.split(" ")[0].strip()
|
||
# 提取错误类型
|
||
err = ""
|
||
if "UndefinedColumn" in line:
|
||
err = "UndefinedColumn"
|
||
elif "InFailedSqlTransaction" in line:
|
||
err = "InFailedSqlTransaction(级联)"
|
||
elif "UniqueViolation" in line:
|
||
err = "UniqueViolation"
|
||
else:
|
||
err = rest.split("失败:")[1].strip()[:80] if "失败:" in rest else "未知"
|
||
tasks_failed.append((task_name, err))
|
||
|
||
# 去重
|
||
seen_success = []
|
||
for t in tasks_success:
|
||
if t not in seen_success:
|
||
seen_success.append(t)
|
||
|
||
seen_failed = {}
|
||
for t, e in tasks_failed:
|
||
if t not in seen_failed:
|
||
seen_failed[t] = e
|
||
|
||
# 时间
|
||
start_time = "20:19:52"
|
||
end_time = "20:31:29"
|
||
|
||
report = f"""# 第五次 ETL 执行结果报告
|
||
|
||
- execution_id: `fe87144a-687d-4ce0-9b79-6bd0186b2be3`
|
||
- 执行时间: 2026-02-21 {start_time} ~ {end_time}(约 11m37s)
|
||
- exit_code: 0
|
||
- 总任务数: 31
|
||
|
||
## 成功任务({len(seen_success)} 个)
|
||
|
||
| # | 任务 |
|
||
|---|------|
|
||
"""
|
||
|
||
for i, t in enumerate(seen_success, 1):
|
||
report += f"| {i} | {t} |\n"
|
||
|
||
report += f"""
|
||
## 失败任务({len(seen_failed)} 个)
|
||
|
||
| # | 任务 | 错误类型 |
|
||
|---|------|----------|
|
||
"""
|
||
|
||
for i, (t, e) in enumerate(seen_failed.items(), 1):
|
||
report += f"| {i} | {t} | {e} |\n"
|
||
|
||
report += """
|
||
## 根因分析
|
||
|
||
BUG 6: `DWS_MEMBER_VISIT` → `_extract_table_info()` 方法中 SQL 引用了 `dwd.dim_table.site_table_id`,
|
||
但该表的主键字段实际为 `table_id`(参考 `db/etl_feiqiu/schemas/dwd.sql`)。
|
||
|
||
错误发生后,psycopg2 连接进入 InFailedSqlTransaction 状态,导致后续所有任务级联失败。
|
||
|
||
## 修复措施
|
||
|
||
1. `member_visit_task.py` → `_extract_table_info()`:
|
||
- `site_table_id AS table_id` → `table_id AS table_id`
|
||
- `site_table_name AS table_name` → `table_name AS table_name`
|
||
|
||
2. `finance_income_task.py` → `_extract_income_by_area()`:
|
||
- JOIN 条件 `dt.site_table_id = tfl.site_table_id` → `dt.table_id = tfl.site_table_id`
|
||
- JOIN 条件 `dt.site_table_id = asl.site_table_id` → `dt.table_id = asl.site_table_id`
|
||
|
||
## BUG 5 验证
|
||
|
||
BUG 5(birthday 字段)的修复已部署,但被 BUG 6 遮蔽,无法在本次执行中验证。
|
||
需要第六次执行来同时验证 BUG 5 + BUG 6 + BUG 7。
|
||
"""
|
||
|
||
out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v5.md"
|
||
out_path.write_text(report, encoding="utf-8")
|
||
print(f"报告已导出: {out_path}")
|
||
print(f"成功: {len(seen_success)}, 失败: {len(seen_failed)}")
|