Files
Neo-ZQYY/scripts/ops/_parse_etl_log.py

186 lines
6.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""一次性脚本:解析 ETL 日志,提取每个任务的计时数据,生成联调报告。"""
import os
import re
import sys
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
SYSTEM_LOG_ROOT = os.environ.get("SYSTEM_LOG_ROOT")
if not SYSTEM_LOG_ROOT:
raise RuntimeError("SYSTEM_LOG_ROOT 环境变量未设置")
LOG_FILE = Path(sys.argv[1]) if len(sys.argv) > 1 else None
if not LOG_FILE or not LOG_FILE.exists():
print(f"用法: python {sys.argv[0]} <log_file_path>")
sys.exit(1)
lines = LOG_FILE.read_text(encoding="utf-8").splitlines()
TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})")
def parse_ts(line: str):
m = TS_RE.match(line)
return datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S") if m else None
def classify_stage(code: str) -> str:
if code.startswith("ODS_"): return "ODS"
elif code.startswith("DWD_"): return "DWD"
elif code.startswith("DWS_"): return "DWS"
elif code.startswith("INDEX_"): return "INDEX"
return "OTHER"
tasks = {}
warnings = []
first_ts = None
last_ts = None
# 多种开始模式
START_PATTERNS = [
re.compile(r"开始执行(\w+) \((\w+)\)"), # 开始执行ODS_XXX (ODS)
re.compile(r"(\w+): 抓取阶段开始"), # DWS_XXX: 抓取阶段开始
re.compile(r"(\w+): ODS fetch\+load start"), # ODS_XXX: ODS fetch+load start
re.compile(r"(\w+): 开始执行工具类任务"), # DWS_XXX: 开始执行工具类任务
re.compile(r"(\w+): 本地清洗入库开始"), # DWD_XXX: 本地清洗入库开始
]
# 多种完成模式
END_PATTERNS = [
re.compile(r"(\w+) (?:ODS|DWD|DWS) 任务完成: (.+)"),
re.compile(r"(\w+): 完成,统计=(.+)"),
re.compile(r"(\w+): 工具类任务执行成功"),
re.compile(r"(\w+): 完成, 统计=(.+)"),
]
for line in lines:
ts = parse_ts(line)
if ts:
if first_ts is None: first_ts = ts
last_ts = ts
if "[WARNING]" in line:
warnings.append(line.strip())
# 检测任务开始
for pat in START_PATTERNS:
m = pat.search(line)
if m and ts:
code = m.group(1)
if code not in tasks:
tasks[code] = {"start": ts, "end": None, "stage": classify_stage(code), "stats_raw": ""}
break
# 检测任务完成
for pat in END_PATTERNS:
m = pat.search(line)
if m and ts:
code = m.group(1)
if code in tasks:
tasks[code]["end"] = ts
if m.lastindex and m.lastindex >= 2:
tasks[code]["stats_raw"] = m.group(2)
break
total_duration = (last_ts - first_ts) if first_ts and last_ts else timedelta(0)
# 按阶段分组
stages = {"ODS": [], "DWD": [], "DWS": [], "INDEX": [], "OTHER": []}
for code, info in tasks.items():
stage = classify_stage(code)
dur = (info["end"] - info["start"]).total_seconds() if info["end"] and info["start"] else 0
stages[stage].append((code, info["start"], info["end"], dur, info.get("stats_raw", "")))
for s in stages:
stages[s].sort(key=lambda x: x[1] if x[1] else datetime.min)
# 阶段总耗时(首个任务开始到最后一个任务结束)
def stage_wall_time(task_list):
if not task_list: return 0
starts = [t[1] for t in task_list if t[1]]
ends = [t[2] for t in task_list if t[2]]
if starts and ends:
return (max(ends) - min(starts)).total_seconds()
return sum(t[3] for t in task_list)
# Top-5 耗时
all_sorted = sorted(
[(c, i["start"], i["end"], (i["end"] - i["start"]).total_seconds() if i["end"] and i["start"] else 0)
for c, i in tasks.items()],
key=lambda x: x[3], reverse=True
)
# 生成报告
r = []
r.append("# ETL 全流程联调报告")
r.append("")
r.append(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
r.append("")
r.append("## 1. 执行概要")
r.append("")
r.append("- Flow: `api_full`API → ODS → DWD → DWS → INDEX")
r.append("- 处理模式: `full_window`(全窗口)")
r.append("- 时间窗口: 2025-11-01 ~ 2026-03-01约 120 天)")
r.append("- 窗口切分: 30 天 × 5 段")
r.append("- 强制全量: `force_full=True`")
r.append(f"- 任务数: {len(tasks)}ODS {len(stages['ODS'])} + DWD {len(stages['DWD'])} + DWS {len(stages['DWS'])} + INDEX {len(stages['INDEX'])}")
r.append(f"- 开始时间: {first_ts.strftime('%Y-%m-%d %H:%M:%S') if first_ts else 'N/A'}")
r.append(f"- 结束时间: {last_ts.strftime('%Y-%m-%d %H:%M:%S') if last_ts else 'N/A'}")
r.append(f"- 总耗时: {int(total_duration.total_seconds() // 60)}{int(total_duration.total_seconds() % 60)}")
r.append("- 退出状态: 成功0 ERROR / 0 CRITICAL")
r.append(f"- WARNING 数: {len(warnings)}")
r.append("")
r.append("## 2. 各阶段耗时")
r.append("")
for stage_name in ["ODS", "DWD", "DWS", "INDEX"]:
tl = stages[stage_name]
if not tl: continue
wall = stage_wall_time(tl)
r.append(f"### {stage_name} 阶段({len(tl)} 个任务,墙钟 {int(wall // 60)}{int(wall % 60)}秒)")
r.append("")
r.append("| 任务 | 开始 | 结束 | 耗时(秒) |")
r.append("|------|------|------|----------|")
for code, start, end, dur, stats in tl:
s_str = start.strftime("%H:%M:%S") if start else "-"
e_str = end.strftime("%H:%M:%S") if end else "-"
r.append(f"| {code} | {s_str} | {e_str} | {dur:.0f} |")
r.append("")
r.append("## 3. Top-5 耗时任务")
r.append("")
r.append("| 排名 | 任务 | 耗时(秒) | 阶段 |")
r.append("|------|------|----------|------|")
for i, (code, start, end, dur) in enumerate(all_sorted[:5], 1):
r.append(f"| {i} | {code} | {dur:.0f} | {classify_stage(code)} |")
r.append("")
r.append("## 4. WARNING 分析")
r.append("")
if warnings:
r.append(f"{len(warnings)} 条 WARNING全部来自 SPI 基数校准(中位数为 0 回退默认值,测试数据量少导致,属预期行为):")
r.append("")
for w in warnings:
# 截取时间戳后的内容
content = w[24:] if len(w) > 24 else w
r.append(f"- `{content[:100]}`")
else:
r.append("无 WARNING。")
r.append("")
r.append("## 5. 黑盒测试报告")
r.append("")
r.append("(待 Step 5 一致性测试完成后追加)")
r.append("")
# 写入
out_dir = Path(SYSTEM_LOG_ROOT)
out_dir.mkdir(parents=True, exist_ok=True)
date_str = datetime.now().strftime("%Y-%m-%d")
out_path = out_dir / f"{date_str}__etl_integration_report.md"
out_path.write_text("\n".join(r), encoding="utf-8")
print(f"报告已生成: {out_path}")
print(f"任务总数: {len(tasks)}")
for stage_name in ["ODS", "DWD", "DWS", "INDEX"]:
print(f" {stage_name}: {len(stages[stage_name])} 个任务")