Files
Neo-ZQYY/scripts/ops/export_etl_result_v3.py

160 lines
6.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""解析第三次 ETL 执行日志,生成结果报告。"""
from __future__ import annotations
import json
import re
from datetime import datetime
from pathlib import Path
from _env_paths import get_output_path
LOG_DIR = get_output_path("SYSTEM_LOG_ROOT")
raw = json.loads((LOG_DIR / "2026-02-21__etl_run_raw_v3.json").read_text("utf-8"))
error_log = raw.get("error_log", "")
lines = error_log.split("\n")
# 解析每个任务的结果
task_order = [
"DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY", "DWS_ASSISTANT_CUSTOMER",
"DWS_ASSISTANT_SALARY", "DWS_ASSISTANT_FINANCE",
"ODS_SETTLEMENT_RECORDS", "ODS_PAYMENT", "ODS_REFUND",
"DWS_BUILD_ORDER_SUMMARY", "DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT",
"ODS_GOODS_CATEGORY", "ODS_STORE_GOODS", "ODS_STORE_GOODS_SALES",
"ODS_TENANT_GOODS", "ODS_PLATFORM_COUPON", "ODS_GROUP_PACKAGE",
"ODS_GROUP_BUY_REDEMPTION", "ODS_INVENTORY_STOCK", "ODS_INVENTORY_CHANGE",
"DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY",
"DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE", "DWS_FINANCE_INCOME_STRUCTURE",
"DWS_FINANCE_DISCOUNT_DETAIL", "DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX",
"DWS_RELATION_INDEX", "DWD_LOAD_FROM_ODS",
]
results = {}
# 成功的任务:匹配 "完成,统计=" 或 "工具类任务执行成功"
for task in task_order:
# 检查成功
pat_ok = re.compile(rf"{task}: 完成,统计=(.+)")
pat_ok2 = re.compile(rf"{task}: 工具类任务执行成功")
pat_ok3 = re.compile(rf"{task}: 结果统计: (.+)")
pat_fail = re.compile(rf"任务 {task} 失败: (.+)")
for line in lines:
m = pat_ok.search(line)
if m:
results[task] = {"status": "✅ 成功", "stats": m.group(1)[:120]}
break
m2 = pat_ok2.search(line)
if m2:
# 找统计行
for line2 in lines:
m3 = pat_ok3.search(line2)
if m3:
results[task] = {"status": "✅ 成功", "stats": m3.group(1)[:120]}
break
else:
results[task] = {"status": "✅ 成功", "stats": ""}
break
m4 = pat_fail.search(line)
if m4:
err_msg = m4.group(1)[:120]
# 判断是否是级联失败
if "InFailedSqlTransaction" in err_msg:
results[task] = {"status": "❌ 级联失败", "stats": "InFailedSqlTransaction"}
elif "UndefinedColumn" in err_msg:
results[task] = {"status": "❌ 字段错误", "stats": err_msg}
elif "UniqueViolation" in err_msg:
results[task] = {"status": "❌ 唯一约束", "stats": err_msg}
else:
results[task] = {"status": "❌ 失败", "stats": err_msg}
break
else:
results[task] = {"status": "⚠️ 未知", "stats": "日志中未找到"}
# 找根因错误(第一个非级联失败)
root_cause = None
for task in task_order:
r = results.get(task, {})
if r["status"] in ("❌ 字段错误", "❌ 唯一约束", "❌ 失败"):
root_cause = (task, r)
break
success_count = sum(1 for r in results.values() if r["status"] == "✅ 成功")
fail_count = sum(1 for r in results.values() if "" in r["status"])
unknown_count = sum(1 for r in results.values() if "⚠️" in r["status"])
# 生成报告
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
report = f"""# ETL 回归执行结果报告(第三次)
> 生成时间:{now}
> execution_idabc94b2d-615f-42ea-83cc-ce687524a6ea
> 目的:验证 BUG 2DWS_ASSISTANT_MONTHLY UniqueViolation和 BUG 3DWS_ASSISTANT_CUSTOMER UndefinedColumn修复
---
## 执行概览
| 项目 | 值 |
|------|-----|
| 状态 | success |
| 开始时间 | 2026-02-21 19:41:02 |
| 结束时间 | 2026-02-21 19:52:22 |
| 总时长 | 681.2s (11m19s) |
| 退出码 | 0 |
| 任务总数 | 31 |
| 成功 | {success_count} |
| 失败 | {fail_count} |
| 未知 | {unknown_count} |
| 数据统计 | 获取 52,982 / 新增 13,296 / 更新 52,982 |
## BUG 修复验证
| BUG | 任务 | 第二次结果 | 第三次结果 | 验证 |
|-----|------|-----------|-----------|------|
| BUG 1 | DWS_ASSISTANT_DAILY | ✅ 已修复 | {results.get("DWS_ASSISTANT_DAILY", {}).get("status", "?")} | {"✅ 持续通过" if "成功" in results.get("DWS_ASSISTANT_DAILY", {}).get("status", "") else ""} |
| BUG 2 | DWS_ASSISTANT_MONTHLY | ❌ UniqueViolation | {results.get("DWS_ASSISTANT_MONTHLY", {}).get("status", "?")} | {"✅ 修复验证通过" if "成功" in results.get("DWS_ASSISTANT_MONTHLY", {}).get("status", "") else "❌ 仍失败"} |
| BUG 3 | DWS_ASSISTANT_CUSTOMER | ❌ UndefinedColumn | {results.get("DWS_ASSISTANT_CUSTOMER", {}).get("status", "?")} | {"✅ 修复验证通过" if "成功" in results.get("DWS_ASSISTANT_CUSTOMER", {}).get("status", "") else "❌ 仍失败"} |
## 逐任务结果
| # | 任务 | 状态 | 统计/错误 |
|---|------|------|----------|
"""
for i, task in enumerate(task_order, 1):
r = results.get(task, {"status": "?", "stats": "?"})
report += f"| {i} | {task} | {r['status']} | {r['stats'][:80]} |\n"
if root_cause:
report += f"""
## 根因分析
本次新发现的根因错误:
- 任务:`{root_cause[0]}`
- 错误:{root_cause[1]["stats"]}
- 影响:后续所有任务因 `InFailedSqlTransaction` 级联失败
"""
report += f"""
## 三次执行对比
| 项目 | 第一次 | 第二次 | 第三次(本次) |
|------|--------|--------|---------------|
| 任务数 | 41 | 31 | 31 |
| 耗时 | 590.7s | 150.4s | 681.2s |
| 成功 | 10/41 | 3/31 | {success_count}/31 |
| 失败 | 31/41 | 28/31 | {fail_count}/31 |
| 根因 | DWS_ASSISTANT_DAILY SQL 字段 | DWS_ASSISTANT_MONTHLY UK + DWS_ASSISTANT_CUSTOMER site_id | {"DWS_MEMBER_CONSUMPTION site_id" if root_cause and "MEMBER_CONSUMPTION" in root_cause[0] else root_cause[0] if root_cause else ""} |
"""
out_path = LOG_DIR / "2026-02-21__etl_run_result_v3.md"
out_path.write_text(report, encoding="utf-8")
print(f"报告已保存: {out_path}")
print(f"\n成功: {success_count}, 失败: {fail_count}, 未知: {unknown_count}")
if root_cause:
print(f"根因: {root_cause[0]}{root_cause[1]['stats'][:80]}")