在前后端开发联调前 的提交20260223
This commit is contained in:
364
scripts/ops/export_etl_result.py
Normal file
364
scripts/ops/export_etl_result.py
Normal file
@@ -0,0 +1,364 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
从后端 API 获取 ETL 执行日志,解析各任务结果,导出执行结果报告。
|
||||
|
||||
用法:python scripts/ops/export_etl_result.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from _env_paths import get_output_path
|
||||
|
||||
BACKEND_URL = "http://localhost:8000"
|
||||
EXECUTION_ID = "dbf0c29a-253a-4705-a1ef-35cd71243d48"
|
||||
TOKEN_FILE = Path(__file__).parent / ".monitor_token"
|
||||
|
||||
|
||||
def get_token() -> str:
|
||||
if TOKEN_FILE.exists():
|
||||
return TOKEN_FILE.read_text(encoding="utf-8").strip()
|
||||
return ""
|
||||
|
||||
|
||||
def fetch_history(token: str) -> dict:
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/history",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
params={"limit": 5},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
for item in r.json():
|
||||
if item.get("id") == EXECUTION_ID:
|
||||
return item
|
||||
return r.json()[0] if r.json() else {}
|
||||
|
||||
|
||||
def fetch_logs(token: str) -> dict:
|
||||
r = requests.get(
|
||||
f"{BACKEND_URL}/api/execution/{EXECUTION_ID}/logs",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
|
||||
def parse_log(error_log: str) -> list[dict]:
|
||||
"""从 stderr 日志解析各任务的执行结果和计时"""
|
||||
results = []
|
||||
lines = error_log.split("\n") if error_log else []
|
||||
|
||||
# 正则:提取时间戳
|
||||
ts_re = re.compile(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]")
|
||||
# 正则:任务开始
|
||||
start_re = re.compile(r"开始执行(\S+)\s+\((\w+)\)")
|
||||
# 正则:ODS 任务完成
|
||||
ods_done_re = re.compile(r"(\S+)\s+ODS 任务完成:\s+(\{.*\})")
|
||||
# 正则:任务失败
|
||||
fail_re = re.compile(r"任务\s+(\S+)\s+失败:\s+(.*)")
|
||||
# 正则:DWS 抓取阶段开始
|
||||
dws_start_re = re.compile(r"(\S+):\s+抓取阶段开始")
|
||||
# 正则:DWS 提取数据
|
||||
dws_extract_re = re.compile(r"(\S+):\s+提取数据")
|
||||
# 正则:DWD 完成
|
||||
dwd_done_re = re.compile(r"(\S+)\s+DWD.*完成|(\S+):\s+DWD.*装载完成")
|
||||
# 正则:工具类任务开始
|
||||
util_start_re = re.compile(r"(\S+):\s+开始执行工具类任务")
|
||||
# 正则:工具类任务失败
|
||||
util_fail_re = re.compile(r"(\S+):\s+工具类任务执行失败")
|
||||
# 正则:DWS/INDEX 任务完成
|
||||
dws_done_re = re.compile(r"(\S+)\s+(?:DWS|INDEX)\s+任务完成")
|
||||
# 正则:窗口拆分
|
||||
window_re = re.compile(r"(\S+):\s+窗口拆分为\s+(\d+)\s+段")
|
||||
|
||||
task_starts: dict[str, str] = {} # task_code -> start_timestamp
|
||||
task_windows: dict[str, int] = {} # task_code -> window_count
|
||||
|
||||
for line in lines:
|
||||
ts_match = ts_re.search(line)
|
||||
ts = ts_match.group(1) if ts_match else ""
|
||||
|
||||
# 任务开始
|
||||
m = start_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
task_starts[task_code] = ts
|
||||
continue
|
||||
|
||||
# DWS 抓取阶段开始
|
||||
m = dws_start_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
if task_code not in task_starts:
|
||||
task_starts[task_code] = ts
|
||||
continue
|
||||
|
||||
# 工具类任务开始
|
||||
m = util_start_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
if task_code not in task_starts:
|
||||
task_starts[task_code] = ts
|
||||
continue
|
||||
|
||||
# 窗口拆分
|
||||
m = window_re.search(line)
|
||||
if m:
|
||||
task_windows[m.group(1)] = int(m.group(2))
|
||||
continue
|
||||
|
||||
# ODS 任务完成
|
||||
m = ods_done_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
stats_str = m.group(2)
|
||||
results.append({
|
||||
"task": task_code,
|
||||
"layer": "ODS",
|
||||
"status": "success",
|
||||
"start": task_starts.get(task_code, ""),
|
||||
"end": ts,
|
||||
"windows": task_windows.get(task_code, 0),
|
||||
"stats": stats_str,
|
||||
})
|
||||
continue
|
||||
|
||||
# 任务失败
|
||||
m = fail_re.search(line)
|
||||
if m:
|
||||
task_code = m.group(1)
|
||||
error_msg = m.group(2).strip()
|
||||
# 避免重复记录(级联错误会多次出现)
|
||||
if not any(r["task"] == task_code for r in results):
|
||||
results.append({
|
||||
"task": task_code,
|
||||
"layer": guess_layer(task_code),
|
||||
"status": "failed",
|
||||
"start": task_starts.get(task_code, ""),
|
||||
"end": ts,
|
||||
"windows": task_windows.get(task_code, 0),
|
||||
"error": error_msg[:120],
|
||||
})
|
||||
continue
|
||||
|
||||
# 检查是否有 DWD_LOAD_FROM_ODS 完成的标记
|
||||
for line in lines:
|
||||
if "DWD_LOAD_FROM_ODS" in line and "完成" in line:
|
||||
ts_match = ts_re.search(line)
|
||||
ts = ts_match.group(1) if ts_match else ""
|
||||
if not any(r["task"] == "DWD_LOAD_FROM_ODS" for r in results):
|
||||
results.append({
|
||||
"task": "DWD_LOAD_FROM_ODS",
|
||||
"layer": "DWD",
|
||||
"status": "success",
|
||||
"start": task_starts.get("DWD_LOAD_FROM_ODS", ""),
|
||||
"end": ts,
|
||||
"windows": 0,
|
||||
"stats": "",
|
||||
})
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def guess_layer(task_code: str) -> str:
|
||||
if task_code.startswith("ODS_"):
|
||||
return "ODS"
|
||||
if task_code.startswith("DWD_"):
|
||||
return "DWD"
|
||||
if task_code.startswith("DWS_"):
|
||||
return "DWS"
|
||||
if task_code.startswith("INDEX_"):
|
||||
return "INDEX"
|
||||
return "OTHER"
|
||||
|
||||
|
||||
|
||||
def calc_duration(start: str, end: str) -> str:
|
||||
"""计算时长"""
|
||||
if not start or not end:
|
||||
return "—"
|
||||
try:
|
||||
fmt = "%Y-%m-%d %H:%M:%S"
|
||||
s = datetime.strptime(start, fmt)
|
||||
e = datetime.strptime(end, fmt)
|
||||
delta = (e - s).total_seconds()
|
||||
if delta < 60:
|
||||
return f"{delta:.1f}s"
|
||||
elif delta < 3600:
|
||||
return f"{delta / 60:.1f}m"
|
||||
else:
|
||||
return f"{delta / 3600:.1f}h"
|
||||
except Exception:
|
||||
return "—"
|
||||
|
||||
|
||||
def generate_report(execution: dict, task_results: list[dict]) -> str:
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
status = execution.get("status", "unknown")
|
||||
started = execution.get("started_at", "—")
|
||||
finished = execution.get("finished_at", "—")
|
||||
duration_ms = execution.get("duration_ms", 0)
|
||||
exit_code = execution.get("exit_code", "—")
|
||||
|
||||
if duration_ms:
|
||||
dur_str = f"{duration_ms / 1000:.1f}s ({duration_ms / 60000:.1f}m)"
|
||||
else:
|
||||
dur_str = "—"
|
||||
|
||||
success_count = sum(1 for r in task_results if r["status"] == "success")
|
||||
failed_count = sum(1 for r in task_results if r["status"] == "failed")
|
||||
|
||||
lines = [
|
||||
"# ETL 执行结果报告",
|
||||
"",
|
||||
f"> 生成时间:{now}",
|
||||
f"> execution_id:{EXECUTION_ID}",
|
||||
f"> run_uuid:4ba9d2d365ee4a858f1c4104b1942dc2",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 执行概览",
|
||||
"",
|
||||
"| 项目 | 值 |",
|
||||
"|------|-----|",
|
||||
f"| 状态 | {status} |",
|
||||
f"| 开始时间 | {started} |",
|
||||
f"| 结束时间 | {finished} |",
|
||||
f"| 总时长 | {dur_str} |",
|
||||
f"| 退出码 | {exit_code} |",
|
||||
f"| 任务总数 | {len(execution.get('task_codes', []))} |",
|
||||
f"| 成功 | {success_count} |",
|
||||
f"| 失败 | {failed_count} |",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 任务级结果",
|
||||
"",
|
||||
"| # | 任务 | 层 | 状态 | 开始 | 结束 | 耗时 | 窗口数 | 备注 |",
|
||||
"|---|------|-----|------|------|------|------|--------|------|",
|
||||
]
|
||||
|
||||
for i, r in enumerate(task_results, 1):
|
||||
dur = calc_duration(r.get("start", ""), r.get("end", ""))
|
||||
note = r.get("stats", r.get("error", ""))
|
||||
if len(note) > 60:
|
||||
note = note[:57] + "..."
|
||||
win = r.get("windows", 0)
|
||||
win_str = str(win) if win else "—"
|
||||
start_short = r.get("start", "—")
|
||||
if start_short and len(start_short) > 8:
|
||||
start_short = start_short.split(" ")[-1] if " " in start_short else start_short
|
||||
end_short = r.get("end", "—")
|
||||
if end_short and len(end_short) > 8:
|
||||
end_short = end_short.split(" ")[-1] if " " in end_short else end_short
|
||||
|
||||
status_emoji = "✅" if r["status"] == "success" else "❌"
|
||||
lines.append(
|
||||
f"| {i} | {r['task']} | {r['layer']} | {status_emoji} {r['status']} "
|
||||
f"| {start_short} | {end_short} | {dur} | {win_str} | {note} |"
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 失败任务分析",
|
||||
"",
|
||||
])
|
||||
|
||||
failed_tasks = [r for r in task_results if r["status"] == "failed"]
|
||||
if failed_tasks:
|
||||
root_cause = failed_tasks[0] if failed_tasks else None
|
||||
cascade = failed_tasks[1:] if len(failed_tasks) > 1 else []
|
||||
|
||||
lines.extend([
|
||||
f"### 根因:{root_cause['task']}",
|
||||
"",
|
||||
f"错误:`{root_cause.get('error', '未知')}`",
|
||||
"",
|
||||
"原因:`_extract_trash_records` SQL 引用了 `dwd_assistant_trash_event` 中不存在的字段 `assistant_service_id`。",
|
||||
"",
|
||||
"### 级联失败",
|
||||
"",
|
||||
])
|
||||
|
||||
if cascade:
|
||||
for r in cascade:
|
||||
lines.append(f"- {r['task']}:InFailedSqlTransaction(事务污染)")
|
||||
else:
|
||||
lines.append("无级联失败。")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"### 修复状态",
|
||||
"",
|
||||
"代码已修复(4 处改动),待下次执行验证。",
|
||||
"详见:`export/SYSTEM/LOGS/2026-02-21__dws_assistant_daily_bug_fix.md`",
|
||||
])
|
||||
else:
|
||||
lines.append("无失败任务。")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## 下一步",
|
||||
"",
|
||||
"1. 重新提交包含 9 个失败任务的执行,验证修复",
|
||||
"2. 运行 ETL Data Consistency Check",
|
||||
"3. 运行 /audit 审计",
|
||||
])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
out_dir = get_output_path("SYSTEM_LOG_ROOT")
|
||||
token = get_token()
|
||||
|
||||
print("获取执行历史...")
|
||||
execution = fetch_history(token)
|
||||
print(f" 状态: {execution.get('status')}, 时长: {execution.get('duration_ms', 0) / 1000:.1f}s")
|
||||
|
||||
print("获取执行日志...")
|
||||
logs = fetch_logs(token)
|
||||
error_log = logs.get("error_log", "")
|
||||
print(f" error_log 长度: {len(error_log)} 字符")
|
||||
|
||||
print("解析任务结果...")
|
||||
task_results = parse_log(error_log)
|
||||
print(f" 解析到 {len(task_results)} 个任务结果")
|
||||
|
||||
print("生成报告...")
|
||||
report = generate_report(execution, task_results)
|
||||
out_file = out_dir / "2026-02-21__etl_run_result.md"
|
||||
out_file.write_text(report, encoding="utf-8")
|
||||
print(f"执行结果报告已导出: {out_file}")
|
||||
|
||||
# 保存原始 API 数据
|
||||
raw_file = out_dir / "2026-02-21__etl_run_raw.json"
|
||||
raw_data = {
|
||||
"execution": execution,
|
||||
"error_log_length": len(error_log),
|
||||
"task_results_parsed": task_results,
|
||||
}
|
||||
raw_file.write_text(
|
||||
json.dumps(raw_data, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
print(f"原始数据已导出: {raw_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user