微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
285
scripts/ops/_etl_integration_report.py
Normal file
285
scripts/ops/_etl_integration_report.py
Normal file
@@ -0,0 +1,285 @@
|
||||
"""ETL 全流程联调报告生成脚本
|
||||
解析日志文件,提取计时数据、错误/警告,生成综合报告。
|
||||
输出到 SYSTEM_LOG_ROOT 环境变量指定的目录。
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 加载根 .env
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
SYSTEM_LOG_ROOT = os.environ.get("SYSTEM_LOG_ROOT")
|
||||
if not SYSTEM_LOG_ROOT:
|
||||
raise RuntimeError("SYSTEM_LOG_ROOT 环境变量未设置")
|
||||
|
||||
LOG_FILE = Path(r"C:\NeoZQYY\export\ETL-Connectors\feiqiu\LOGS\2681a85399e64c76a040163f956e1907.log")
|
||||
if not LOG_FILE.exists():
|
||||
raise FileNotFoundError(f"日志文件不存在: {LOG_FILE}")
|
||||
|
||||
# ── 解析日志 ──────────────────────────────────────────────
|
||||
TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(\w+)\] [\w.]+: (.+)$")
|
||||
TASK_START_RE = re.compile(r"^(\S+): 抓取阶段开始")
|
||||
TASK_TOOL_START_RE = re.compile(r"^(\S+): 开始执行工具类任务")
|
||||
TASK_DONE_RE = re.compile(r"^(\S+): 完成,统计=(.+)$")
|
||||
TASK_TOOL_DONE_RE = re.compile(r"^(\S+): 工具类任务执行成功$")
|
||||
WINDOW_START_RE = re.compile(r"^(\S+): 开始执行\((\d+)/(\d+)\),窗口\[(.+)\]$")
|
||||
WINDOW_DONE_RE = re.compile(r"^(\S+): 完成\((\d+)/(\d+)\),已处理")
|
||||
DWD_LOAD_RE = re.compile(r"^DWD_LOAD_FROM_ODS: (.+)$")
|
||||
|
||||
lines = LOG_FILE.read_text(encoding="utf-8", errors="replace").splitlines()
|
||||
|
||||
errors = []
|
||||
warnings = []
|
||||
task_timings = {} # task_name -> {start, end, stats}
|
||||
window_timings = defaultdict(list) # task_name -> [(slice_idx, start_ts, end_ts)]
|
||||
first_ts = None
|
||||
last_ts = None
|
||||
all_tasks_done_ts = None
|
||||
|
||||
for line in lines:
|
||||
m = TS_RE.match(line)
|
||||
if not m:
|
||||
continue
|
||||
ts_str, level, msg = m.groups()
|
||||
ts = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if first_ts is None:
|
||||
first_ts = ts
|
||||
last_ts = ts
|
||||
|
||||
if level == "ERROR" or level == "CRITICAL":
|
||||
errors.append((ts_str, msg))
|
||||
elif level == "WARNING":
|
||||
warnings.append((ts_str, msg))
|
||||
|
||||
if msg.strip() == "所有任务执行完成":
|
||||
all_tasks_done_ts = ts
|
||||
|
||||
# 任务开始
|
||||
sm = TASK_START_RE.match(msg)
|
||||
if sm:
|
||||
tname = sm.group(1)
|
||||
if tname not in task_timings:
|
||||
task_timings[tname] = {"start": ts, "end": None, "stats": None}
|
||||
else:
|
||||
task_timings[tname]["start"] = ts
|
||||
|
||||
sm2 = TASK_TOOL_START_RE.match(msg)
|
||||
if sm2:
|
||||
tname = sm2.group(1)
|
||||
task_timings[tname] = {"start": ts, "end": None, "stats": None}
|
||||
|
||||
# 任务完成
|
||||
dm = TASK_DONE_RE.match(msg)
|
||||
if dm:
|
||||
tname = dm.group(1)
|
||||
if tname in task_timings:
|
||||
task_timings[tname]["end"] = ts
|
||||
task_timings[tname]["stats"] = dm.group(2)
|
||||
|
||||
dm2 = TASK_TOOL_DONE_RE.match(msg)
|
||||
if dm2:
|
||||
tname = dm2.group(1)
|
||||
if tname in task_timings:
|
||||
task_timings[tname]["end"] = ts
|
||||
|
||||
# 窗口切片
|
||||
wm = WINDOW_START_RE.match(msg)
|
||||
if wm:
|
||||
tname, idx = wm.group(1), int(wm.group(2))
|
||||
window_timings[tname].append({"idx": idx, "total": int(wm.group(3)), "start": ts, "end": None})
|
||||
|
||||
wd = WINDOW_DONE_RE.match(msg)
|
||||
if wd:
|
||||
tname, idx = wd.group(1), int(wd.group(2))
|
||||
for w in window_timings[tname]:
|
||||
if w["idx"] == idx and w["end"] is None:
|
||||
w["end"] = ts
|
||||
break
|
||||
|
||||
# DWD_LOAD_FROM_ODS 特殊处理 — 从日志中找开始/结束
|
||||
for line in lines:
|
||||
m = TS_RE.match(line)
|
||||
if not m:
|
||||
continue
|
||||
ts_str, level, msg = m.groups()
|
||||
ts = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S")
|
||||
if "DWD_LOAD_FROM_ODS" in msg and "DWD_LOAD_FROM_ODS" not in task_timings:
|
||||
task_timings["DWD_LOAD_FROM_ODS"] = {"start": ts, "end": None, "stats": None}
|
||||
if "DWD_LOAD_FROM_ODS" in msg:
|
||||
task_timings.setdefault("DWD_LOAD_FROM_ODS", {"start": ts, "end": None, "stats": None})
|
||||
task_timings["DWD_LOAD_FROM_ODS"]["end"] = ts
|
||||
|
||||
# ── 分层统计 ──────────────────────────────────────────────
|
||||
def classify_layer(name):
|
||||
if name.startswith("ODS_"):
|
||||
return "ODS"
|
||||
elif name.startswith("DWD_"):
|
||||
return "DWD"
|
||||
elif name.startswith("DWS_"):
|
||||
if "INDEX" in name:
|
||||
return "INDEX"
|
||||
return "DWS"
|
||||
return "OTHER"
|
||||
|
||||
layer_tasks = defaultdict(list)
|
||||
for tname, info in task_timings.items():
|
||||
layer_tasks[classify_layer(tname)].append((tname, info))
|
||||
|
||||
# ── 生成报告 ──────────────────────────────────────────────
|
||||
total_duration = (last_ts - first_ts).total_seconds() if first_ts and last_ts else 0
|
||||
total_min = total_duration / 60
|
||||
|
||||
out_dir = Path(SYSTEM_LOG_ROOT)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
report_path = out_dir / "20260227__etl_integration_report.md"
|
||||
|
||||
|
||||
rpt = []
|
||||
rpt.append("# ETL 全流程联调报告")
|
||||
rpt.append("")
|
||||
rpt.append(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
rpt.append(f"日志文件: `{LOG_FILE}`")
|
||||
rpt.append("")
|
||||
|
||||
# ── 执行概要 ──
|
||||
rpt.append("## 执行概要")
|
||||
rpt.append("")
|
||||
rpt.append("| 项目 | 值 |")
|
||||
rpt.append("|------|-----|")
|
||||
rpt.append(f"| Flow | `api_full` |")
|
||||
rpt.append(f"| 处理模式 | `full_window` |")
|
||||
rpt.append(f"| 时间窗口 | 2025-11-01 ~ 2026-02-27 |")
|
||||
rpt.append(f"| 窗口切分 | 30 天 |")
|
||||
rpt.append(f"| force_full | ✅ |")
|
||||
rpt.append(f"| 开始时间 | {first_ts.strftime('%Y-%m-%d %H:%M:%S') if first_ts else 'N/A'} |")
|
||||
rpt.append(f"| 结束时间 | {last_ts.strftime('%Y-%m-%d %H:%M:%S') if last_ts else 'N/A'} |")
|
||||
rpt.append(f"| 总耗时 | {total_min:.1f} 分钟 ({total_duration:.0f} 秒) |")
|
||||
rpt.append(f"| 任务总数 | {len(task_timings)} |")
|
||||
rpt.append(f"| 错误数 | {len(errors)} |")
|
||||
rpt.append(f"| 警告数 | {len(warnings)} |")
|
||||
rpt.append(f"| 最终状态 | {'✅ 成功' if all_tasks_done_ts else '❌ 未检测到完成标记'} |")
|
||||
rpt.append("")
|
||||
|
||||
# ── 性能报告 ──
|
||||
rpt.append("## 性能报告")
|
||||
rpt.append("")
|
||||
|
||||
# 按层汇总
|
||||
for layer in ["ODS", "DWD", "DWS", "INDEX"]:
|
||||
tasks = layer_tasks.get(layer, [])
|
||||
if not tasks:
|
||||
continue
|
||||
rpt.append(f"### {layer} 层({len(tasks)} 个任务)")
|
||||
rpt.append("")
|
||||
rpt.append("| 任务 | 开始 | 结束 | 耗时(秒) | 统计 |")
|
||||
rpt.append("|------|------|------|---------|------|")
|
||||
for tname, info in sorted(tasks, key=lambda x: x[1]["start"] if x[1]["start"] else datetime.min):
|
||||
s = info["start"].strftime("%H:%M:%S") if info["start"] else "-"
|
||||
e = info["end"].strftime("%H:%M:%S") if info["end"] else "-"
|
||||
dur = ""
|
||||
if info["start"] and info["end"]:
|
||||
dur = f"{(info['end'] - info['start']).total_seconds():.0f}"
|
||||
stats_short = ""
|
||||
if info["stats"]:
|
||||
# 提取 inserted/deleted 数字
|
||||
ins_m = re.search(r"'inserted': (\d+)", info["stats"])
|
||||
del_m = re.search(r"'deleted': (\d+)", info["stats"])
|
||||
err_m = re.search(r"'errors': (\d+)", info["stats"])
|
||||
parts = []
|
||||
if ins_m:
|
||||
parts.append(f"ins={ins_m.group(1)}")
|
||||
if del_m:
|
||||
parts.append(f"del={del_m.group(1)}")
|
||||
if err_m and int(err_m.group(1)) > 0:
|
||||
parts.append(f"err={err_m.group(1)}")
|
||||
stats_short = ", ".join(parts)
|
||||
rpt.append(f"| {tname} | {s} | {e} | {dur} | {stats_short} |")
|
||||
rpt.append("")
|
||||
|
||||
# Top-5 耗时
|
||||
rpt.append("### Top-5 耗时任务")
|
||||
rpt.append("")
|
||||
ranked = []
|
||||
for tname, info in task_timings.items():
|
||||
if info["start"] and info["end"]:
|
||||
dur = (info["end"] - info["start"]).total_seconds()
|
||||
ranked.append((tname, dur))
|
||||
ranked.sort(key=lambda x: -x[1])
|
||||
rpt.append("| 排名 | 任务 | 耗时(秒) | 耗时(分) |")
|
||||
rpt.append("|------|------|---------|---------|")
|
||||
for i, (tname, dur) in enumerate(ranked[:5], 1):
|
||||
rpt.append(f"| {i} | {tname} | {dur:.0f} | {dur/60:.1f} |")
|
||||
rpt.append("")
|
||||
|
||||
# 窗口切片耗时
|
||||
has_windows = any(len(v) > 0 for v in window_timings.values())
|
||||
if has_windows:
|
||||
rpt.append("### 窗口切片耗时(部分任务)")
|
||||
rpt.append("")
|
||||
for tname in sorted(window_timings.keys()):
|
||||
slices = window_timings[tname]
|
||||
if len(slices) <= 1:
|
||||
continue
|
||||
rpt.append(f"**{tname}** ({len(slices)} 个切片)")
|
||||
rpt.append("")
|
||||
rpt.append("| 切片 | 开始 | 结束 | 耗时(秒) |")
|
||||
rpt.append("|------|------|------|---------|")
|
||||
for w in slices:
|
||||
s = w["start"].strftime("%H:%M:%S")
|
||||
e = w["end"].strftime("%H:%M:%S") if w["end"] else "-"
|
||||
dur = f"{(w['end'] - w['start']).total_seconds():.0f}" if w["end"] else "-"
|
||||
rpt.append(f"| {w['idx']}/{w['total']} | {s} | {e} | {dur} |")
|
||||
rpt.append("")
|
||||
|
||||
# ── DEBUG 报告 ──
|
||||
rpt.append("## DEBUG 报告")
|
||||
rpt.append("")
|
||||
|
||||
if errors:
|
||||
rpt.append(f"### 错误({len(errors)} 条)")
|
||||
rpt.append("")
|
||||
for ts_str, msg in errors:
|
||||
rpt.append(f"- `{ts_str}` {msg}")
|
||||
rpt.append("")
|
||||
else:
|
||||
rpt.append("### 错误")
|
||||
rpt.append("")
|
||||
rpt.append("无错误。")
|
||||
rpt.append("")
|
||||
|
||||
if warnings:
|
||||
rpt.append(f"### 警告({len(warnings)} 条)")
|
||||
rpt.append("")
|
||||
rpt.append("<details>")
|
||||
rpt.append("<summary>展开查看全部警告</summary>")
|
||||
rpt.append("")
|
||||
for ts_str, msg in warnings:
|
||||
rpt.append(f"- `{ts_str}` {msg}")
|
||||
rpt.append("")
|
||||
rpt.append("</details>")
|
||||
rpt.append("")
|
||||
else:
|
||||
rpt.append("### 警告")
|
||||
rpt.append("")
|
||||
rpt.append("无警告。")
|
||||
rpt.append("")
|
||||
|
||||
# ── 黑盒测试报告占位 ──
|
||||
rpt.append("## 黑盒测试报告")
|
||||
rpt.append("")
|
||||
rpt.append("(待 Task 5.3 追加)")
|
||||
rpt.append("")
|
||||
|
||||
report_text = "\n".join(rpt)
|
||||
report_path.write_text(report_text, encoding="utf-8")
|
||||
print(f"报告已生成: {report_path}")
|
||||
print(f"总耗时: {total_min:.1f} 分钟")
|
||||
print(f"任务数: {len(task_timings)}")
|
||||
print(f"错误: {len(errors)}, 警告: {len(warnings)}")
|
||||
Reference in New Issue
Block a user