# -*- coding: utf-8 -*- """导出第五次 ETL 执行结果报告。""" import json from pathlib import Path from datetime import datetime from dotenv import load_dotenv load_dotenv(Path(__file__).resolve().parents[2] / ".env") from _env_paths import get_output_path raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v5.json" data = json.loads(raw_path.read_text(encoding="utf-8")) error_log = data.get("error_log", "") lines = error_log.strip().split("\n") # 解析任务结果 tasks_success = [] tasks_failed = [] tasks_skipped = [] for line in lines: if "完成,统计=" in line or "任务完成:" in line or "工具类任务执行成功" in line: task_name = line.split("|")[-1].strip().split(":")[0].strip() if "|" in line else "?" # 从日志行提取任务名 for part in line.split("|"): part = part.strip() if part.startswith("DWS_") or part.startswith("ODS_") or part.startswith("DWD_"): task_name = part.split(":")[0].strip() break tasks_success.append(task_name) elif "任务" in line and "失败:" in line: # 提取任务名 idx = line.find("任务 ") if idx >= 0: rest = line[idx + 3:] task_name = rest.split(" ")[0].strip() # 提取错误类型 err = "" if "UndefinedColumn" in line: err = "UndefinedColumn" elif "InFailedSqlTransaction" in line: err = "InFailedSqlTransaction(级联)" elif "UniqueViolation" in line: err = "UniqueViolation" else: err = rest.split("失败:")[1].strip()[:80] if "失败:" in rest else "未知" tasks_failed.append((task_name, err)) # 去重 seen_success = [] for t in tasks_success: if t not in seen_success: seen_success.append(t) seen_failed = {} for t, e in tasks_failed: if t not in seen_failed: seen_failed[t] = e # 时间 start_time = "20:19:52" end_time = "20:31:29" report = f"""# 第五次 ETL 执行结果报告 - execution_id: `fe87144a-687d-4ce0-9b79-6bd0186b2be3` - 执行时间: 2026-02-21 {start_time} ~ {end_time}(约 11m37s) - exit_code: 0 - 总任务数: 31 ## 成功任务({len(seen_success)} 个) | # | 任务 | |---|------| """ for i, t in enumerate(seen_success, 1): report += f"| {i} | {t} |\n" report += f""" ## 失败任务({len(seen_failed)} 个) | # | 任务 | 错误类型 | |---|------|----------| """ for i, (t, e) in enumerate(seen_failed.items(), 1): report += f"| {i} | {t} | {e} |\n" report += """ ## 根因分析 BUG 6: `DWS_MEMBER_VISIT` → `_extract_table_info()` 方法中 SQL 引用了 `dwd.dim_table.site_table_id`, 但该表的主键字段实际为 `table_id`(参考 `db/etl_feiqiu/schemas/dwd.sql`)。 错误发生后,psycopg2 连接进入 InFailedSqlTransaction 状态,导致后续所有任务级联失败。 ## 修复措施 1. `member_visit_task.py` → `_extract_table_info()`: - `site_table_id AS table_id` → `table_id AS table_id` - `site_table_name AS table_name` → `table_name AS table_name` 2. `finance_income_task.py` → `_extract_income_by_area()`: - JOIN 条件 `dt.site_table_id = tfl.site_table_id` → `dt.table_id = tfl.site_table_id` - JOIN 条件 `dt.site_table_id = asl.site_table_id` → `dt.table_id = asl.site_table_id` ## BUG 5 验证 BUG 5(birthday 字段)的修复已部署,但被 BUG 6 遮蔽,无法在本次执行中验证。 需要第六次执行来同时验证 BUG 5 + BUG 6 + BUG 7。 """ out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v5.md" out_path.write_text(report, encoding="utf-8") print(f"报告已导出: {out_path}") print(f"成功: {len(seen_success)}, 失败: {len(seen_failed)}")