在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -0,0 +1,120 @@
# -*- coding: utf-8 -*-
"""导出第五次 ETL 执行结果报告。"""
import json
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
from _env_paths import get_output_path
raw_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_raw_v5.json"
data = json.loads(raw_path.read_text(encoding="utf-8"))
error_log = data.get("error_log", "")
lines = error_log.strip().split("\n")
# 解析任务结果
tasks_success = []
tasks_failed = []
tasks_skipped = []
for line in lines:
if "完成,统计=" in line or "任务完成:" in line or "工具类任务执行成功" in line:
task_name = line.split("|")[-1].strip().split(":")[0].strip() if "|" in line else "?"
# 从日志行提取任务名
for part in line.split("|"):
part = part.strip()
if part.startswith("DWS_") or part.startswith("ODS_") or part.startswith("DWD_"):
task_name = part.split(":")[0].strip()
break
tasks_success.append(task_name)
elif "任务" in line and "失败:" in line:
# 提取任务名
idx = line.find("任务 ")
if idx >= 0:
rest = line[idx + 3:]
task_name = rest.split(" ")[0].strip()
# 提取错误类型
err = ""
if "UndefinedColumn" in line:
err = "UndefinedColumn"
elif "InFailedSqlTransaction" in line:
err = "InFailedSqlTransaction级联"
elif "UniqueViolation" in line:
err = "UniqueViolation"
else:
err = rest.split("失败:")[1].strip()[:80] if "失败:" in rest else "未知"
tasks_failed.append((task_name, err))
# 去重
seen_success = []
for t in tasks_success:
if t not in seen_success:
seen_success.append(t)
seen_failed = {}
for t, e in tasks_failed:
if t not in seen_failed:
seen_failed[t] = e
# 时间
start_time = "20:19:52"
end_time = "20:31:29"
report = f"""# 第五次 ETL 执行结果报告
- execution_id: `fe87144a-687d-4ce0-9b79-6bd0186b2be3`
- 执行时间: 2026-02-21 {start_time} ~ {end_time}(约 11m37s
- exit_code: 0
- 总任务数: 31
## 成功任务({len(seen_success)} 个)
| # | 任务 |
|---|------|
"""
for i, t in enumerate(seen_success, 1):
report += f"| {i} | {t} |\n"
report += f"""
## 失败任务({len(seen_failed)} 个)
| # | 任务 | 错误类型 |
|---|------|----------|
"""
for i, (t, e) in enumerate(seen_failed.items(), 1):
report += f"| {i} | {t} | {e} |\n"
report += """
## 根因分析
BUG 6: `DWS_MEMBER_VISIT` → `_extract_table_info()` 方法中 SQL 引用了 `dwd.dim_table.site_table_id`
但该表的主键字段实际为 `table_id`(参考 `db/etl_feiqiu/schemas/dwd.sql`)。
错误发生后psycopg2 连接进入 InFailedSqlTransaction 状态,导致后续所有任务级联失败。
## 修复措施
1. `member_visit_task.py` → `_extract_table_info()`:
- `site_table_id AS table_id` → `table_id AS table_id`
- `site_table_name AS table_name` → `table_name AS table_name`
2. `finance_income_task.py` → `_extract_income_by_area()`:
- JOIN 条件 `dt.site_table_id = tfl.site_table_id` → `dt.table_id = tfl.site_table_id`
- JOIN 条件 `dt.site_table_id = asl.site_table_id` → `dt.table_id = asl.site_table_id`
## BUG 5 验证
BUG 5birthday 字段)的修复已部署,但被 BUG 6 遮蔽,无法在本次执行中验证。
需要第六次执行来同时验证 BUG 5 + BUG 6 + BUG 7。
"""
out_path = get_output_path("SYSTEM_LOG_ROOT") / "2026-02-21__etl_run_result_v5.md"
out_path.write_text(report, encoding="utf-8")
print(f"报告已导出: {out_path}")
print(f"成功: {len(seen_success)}, 失败: {len(seen_failed)}")