# -*- coding: utf-8 -*- """解析第三次 ETL 执行日志,生成结果报告。""" from __future__ import annotations import json import re from datetime import datetime from pathlib import Path from _env_paths import get_output_path LOG_DIR = get_output_path("SYSTEM_LOG_ROOT") raw = json.loads((LOG_DIR / "2026-02-21__etl_run_raw_v3.json").read_text("utf-8")) error_log = raw.get("error_log", "") lines = error_log.split("\n") # 解析每个任务的结果 task_order = [ "DWS_ASSISTANT_DAILY", "DWS_ASSISTANT_MONTHLY", "DWS_ASSISTANT_CUSTOMER", "DWS_ASSISTANT_SALARY", "DWS_ASSISTANT_FINANCE", "ODS_SETTLEMENT_RECORDS", "ODS_PAYMENT", "ODS_REFUND", "DWS_BUILD_ORDER_SUMMARY", "DWS_MEMBER_CONSUMPTION", "DWS_MEMBER_VISIT", "ODS_GOODS_CATEGORY", "ODS_STORE_GOODS", "ODS_STORE_GOODS_SALES", "ODS_TENANT_GOODS", "ODS_PLATFORM_COUPON", "ODS_GROUP_PACKAGE", "ODS_GROUP_BUY_REDEMPTION", "ODS_INVENTORY_STOCK", "ODS_INVENTORY_CHANGE", "DWS_GOODS_STOCK_DAILY", "DWS_GOODS_STOCK_WEEKLY", "DWS_GOODS_STOCK_MONTHLY", "DWS_FINANCE_DAILY", "DWS_FINANCE_RECHARGE", "DWS_FINANCE_INCOME_STRUCTURE", "DWS_FINANCE_DISCOUNT_DETAIL", "DWS_WINBACK_INDEX", "DWS_NEWCONV_INDEX", "DWS_RELATION_INDEX", "DWD_LOAD_FROM_ODS", ] results = {} # 成功的任务:匹配 "完成,统计=" 或 "工具类任务执行成功" for task in task_order: # 检查成功 pat_ok = re.compile(rf"{task}: 完成,统计=(.+)") pat_ok2 = re.compile(rf"{task}: 工具类任务执行成功") pat_ok3 = re.compile(rf"{task}: 结果统计: (.+)") pat_fail = re.compile(rf"任务 {task} 失败: (.+)") for line in lines: m = pat_ok.search(line) if m: results[task] = {"status": "✅ 成功", "stats": m.group(1)[:120]} break m2 = pat_ok2.search(line) if m2: # 找统计行 for line2 in lines: m3 = pat_ok3.search(line2) if m3: results[task] = {"status": "✅ 成功", "stats": m3.group(1)[:120]} break else: results[task] = {"status": "✅ 成功", "stats": "—"} break m4 = pat_fail.search(line) if m4: err_msg = m4.group(1)[:120] # 判断是否是级联失败 if "InFailedSqlTransaction" in err_msg: results[task] = {"status": "❌ 级联失败", "stats": "InFailedSqlTransaction"} elif "UndefinedColumn" in err_msg: results[task] = {"status": "❌ 字段错误", "stats": err_msg} elif "UniqueViolation" in err_msg: results[task] = {"status": "❌ 唯一约束", "stats": err_msg} else: results[task] = {"status": "❌ 失败", "stats": err_msg} break else: results[task] = {"status": "⚠️ 未知", "stats": "日志中未找到"} # 找根因错误(第一个非级联失败) root_cause = None for task in task_order: r = results.get(task, {}) if r["status"] in ("❌ 字段错误", "❌ 唯一约束", "❌ 失败"): root_cause = (task, r) break success_count = sum(1 for r in results.values() if r["status"] == "✅ 成功") fail_count = sum(1 for r in results.values() if "❌" in r["status"]) unknown_count = sum(1 for r in results.values() if "⚠️" in r["status"]) # 生成报告 now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") report = f"""# ETL 回归执行结果报告(第三次) > 生成时间:{now} > execution_id:abc94b2d-615f-42ea-83cc-ce687524a6ea > 目的:验证 BUG 2(DWS_ASSISTANT_MONTHLY UniqueViolation)和 BUG 3(DWS_ASSISTANT_CUSTOMER UndefinedColumn)修复 --- ## 执行概览 | 项目 | 值 | |------|-----| | 状态 | success | | 开始时间 | 2026-02-21 19:41:02 | | 结束时间 | 2026-02-21 19:52:22 | | 总时长 | 681.2s (11m19s) | | 退出码 | 0 | | 任务总数 | 31 | | 成功 | {success_count} | | 失败 | {fail_count} | | 未知 | {unknown_count} | | 数据统计 | 获取 52,982 / 新增 13,296 / 更新 52,982 | ## BUG 修复验证 | BUG | 任务 | 第二次结果 | 第三次结果 | 验证 | |-----|------|-----------|-----------|------| | BUG 1 | DWS_ASSISTANT_DAILY | ✅ 已修复 | {results.get("DWS_ASSISTANT_DAILY", {}).get("status", "?")} | {"✅ 持续通过" if "成功" in results.get("DWS_ASSISTANT_DAILY", {}).get("status", "") else "❌"} | | BUG 2 | DWS_ASSISTANT_MONTHLY | ❌ UniqueViolation | {results.get("DWS_ASSISTANT_MONTHLY", {}).get("status", "?")} | {"✅ 修复验证通过" if "成功" in results.get("DWS_ASSISTANT_MONTHLY", {}).get("status", "") else "❌ 仍失败"} | | BUG 3 | DWS_ASSISTANT_CUSTOMER | ❌ UndefinedColumn | {results.get("DWS_ASSISTANT_CUSTOMER", {}).get("status", "?")} | {"✅ 修复验证通过" if "成功" in results.get("DWS_ASSISTANT_CUSTOMER", {}).get("status", "") else "❌ 仍失败"} | ## 逐任务结果 | # | 任务 | 状态 | 统计/错误 | |---|------|------|----------| """ for i, task in enumerate(task_order, 1): r = results.get(task, {"status": "?", "stats": "?"}) report += f"| {i} | {task} | {r['status']} | {r['stats'][:80]} |\n" if root_cause: report += f""" ## 根因分析 本次新发现的根因错误: - 任务:`{root_cause[0]}` - 错误:{root_cause[1]["stats"]} - 影响:后续所有任务因 `InFailedSqlTransaction` 级联失败 """ report += f""" ## 三次执行对比 | 项目 | 第一次 | 第二次 | 第三次(本次) | |------|--------|--------|---------------| | 任务数 | 41 | 31 | 31 | | 耗时 | 590.7s | 150.4s | 681.2s | | 成功 | 10/41 | 3/31 | {success_count}/31 | | 失败 | 31/41 | 28/31 | {fail_count}/31 | | 根因 | DWS_ASSISTANT_DAILY SQL 字段 | DWS_ASSISTANT_MONTHLY UK + DWS_ASSISTANT_CUSTOMER site_id | {"DWS_MEMBER_CONSUMPTION site_id" if root_cause and "MEMBER_CONSUMPTION" in root_cause[0] else root_cause[0] if root_cause else "无"} | """ out_path = LOG_DIR / "2026-02-21__etl_run_result_v3.md" out_path.write_text(report, encoding="utf-8") print(f"报告已保存: {out_path}") print(f"\n成功: {success_count}, 失败: {fail_count}, 未知: {unknown_count}") if root_cause: print(f"根因: {root_cause[0]} — {root_cause[1]['stats'][:80]}")