在前后端开发联调前 的提交20260223
This commit is contained in:
120
scripts/ops/export_v5_report.py
Normal file
120
scripts/ops/export_v5_report.py
Normal file
@@ -0,0 +1,120 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导出第五次 ETL 执行结果报告。"""
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
from _env_paths import get_output_path
|
||||
|
||||
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v5.json"
|
||||
data = json.loads(raw_path.read_text(encoding="utf-8"))
|
||||
|
||||
error_log = data.get("error_log", "")
|
||||
lines = error_log.strip().split("\n")
|
||||
|
||||
# 解析任务结果
|
||||
tasks_success = []
|
||||
tasks_failed = []
|
||||
tasks_skipped = []
|
||||
|
||||
for line in lines:
|
||||
if "完成,统计=" in line or "任务完成:" in line or "工具类任务执行成功" in line:
|
||||
task_name = line.split("|")[-1].strip().split(":")[0].strip() if "|" in line else "?"
|
||||
# 从日志行提取任务名
|
||||
for part in line.split("|"):
|
||||
part = part.strip()
|
||||
if part.startswith("DWS_") or part.startswith("ODS_") or part.startswith("DWD_"):
|
||||
task_name = part.split(":")[0].strip()
|
||||
break
|
||||
tasks_success.append(task_name)
|
||||
elif "任务" in line and "失败:" in line:
|
||||
# 提取任务名
|
||||
idx = line.find("任务 ")
|
||||
if idx >= 0:
|
||||
rest = line[idx + 3:]
|
||||
task_name = rest.split(" ")[0].strip()
|
||||
# 提取错误类型
|
||||
err = ""
|
||||
if "UndefinedColumn" in line:
|
||||
err = "UndefinedColumn"
|
||||
elif "InFailedSqlTransaction" in line:
|
||||
err = "InFailedSqlTransaction(级联)"
|
||||
elif "UniqueViolation" in line:
|
||||
err = "UniqueViolation"
|
||||
else:
|
||||
err = rest.split("失败:")[1].strip()[:80] if "失败:" in rest else "未知"
|
||||
tasks_failed.append((task_name, err))
|
||||
|
||||
# 去重
|
||||
seen_success = []
|
||||
for t in tasks_success:
|
||||
if t not in seen_success:
|
||||
seen_success.append(t)
|
||||
|
||||
seen_failed = {}
|
||||
for t, e in tasks_failed:
|
||||
if t not in seen_failed:
|
||||
seen_failed[t] = e
|
||||
|
||||
# 时间
|
||||
start_time = "20:19:52"
|
||||
end_time = "20:31:29"
|
||||
|
||||
report = f"""# 第五次 ETL 执行结果报告
|
||||
|
||||
- execution_id: `fe87144a-687d-4ce0-9b79-6bd0186b2be3`
|
||||
- 执行时间: 2026-02-21 {start_time} ~ {end_time}(约 11m37s)
|
||||
- exit_code: 0
|
||||
- 总任务数: 31
|
||||
|
||||
## 成功任务({len(seen_success)} 个)
|
||||
|
||||
| # | 任务 |
|
||||
|---|------|
|
||||
"""
|
||||
|
||||
for i, t in enumerate(seen_success, 1):
|
||||
report += f"| {i} | {t} |\n"
|
||||
|
||||
report += f"""
|
||||
## 失败任务({len(seen_failed)} 个)
|
||||
|
||||
| # | 任务 | 错误类型 |
|
||||
|---|------|----------|
|
||||
"""
|
||||
|
||||
for i, (t, e) in enumerate(seen_failed.items(), 1):
|
||||
report += f"| {i} | {t} | {e} |\n"
|
||||
|
||||
report += """
|
||||
## 根因分析
|
||||
|
||||
BUG 6: `DWS_MEMBER_VISIT` → `_extract_table_info()` 方法中 SQL 引用了 `dwd.dim_table.site_table_id`,
|
||||
但该表的主键字段实际为 `table_id`(参考 `db/etl_feiqiu/schemas/dwd.sql`)。
|
||||
|
||||
错误发生后,psycopg2 连接进入 InFailedSqlTransaction 状态,导致后续所有任务级联失败。
|
||||
|
||||
## 修复措施
|
||||
|
||||
1. `member_visit_task.py` → `_extract_table_info()`:
|
||||
- `site_table_id AS table_id` → `table_id AS table_id`
|
||||
- `site_table_name AS table_name` → `table_name AS table_name`
|
||||
|
||||
2. `finance_income_task.py` → `_extract_income_by_area()`:
|
||||
- JOIN 条件 `dt.site_table_id = tfl.site_table_id` → `dt.table_id = tfl.site_table_id`
|
||||
- JOIN 条件 `dt.site_table_id = asl.site_table_id` → `dt.table_id = asl.site_table_id`
|
||||
|
||||
## BUG 5 验证
|
||||
|
||||
BUG 5(birthday 字段)的修复已部署,但被 BUG 6 遮蔽,无法在本次执行中验证。
|
||||
需要第六次执行来同时验证 BUG 5 + BUG 6 + BUG 7。
|
||||
"""
|
||||
|
||||
out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v5.md"
|
||||
out_path.write_text(report, encoding="utf-8")
|
||||
print(f"报告已导出: {out_path}")
|
||||
print(f"成功: {len(seen_success)}, 失败: {len(seen_failed)}")
|
||||
Reference in New Issue
Block a user