在前后端开发联调前 的提交20260223

This commit is contained in:
Neo
2026-02-23 23:02:20 +08:00
parent 254ccb1e77
commit fafc95e64c
1142 changed files with 10366960 additions and 36957 deletions

View File

@@ -0,0 +1,328 @@
# -*- coding: utf-8 -*-
"""
监控当前 ETL 执行状态,完成后导出执行结果报告到 SYSTEM_LOG_ROOT。
通过后端 API 轮询执行历史,检测 run_uuid 对应的执行是否完成。
完成后从浏览器日志或 API 提取任务级结果,生成 Markdown 报告。
用法python scripts/ops/monitor_etl_run.py
"""
from __future__ import annotations
import json
import sys
import time
from datetime import datetime
from pathlib import Path
import requests
sys.path.insert(0, str(Path(__file__).parent))
from _env_paths import get_output_path
BACKEND_URL = "http://localhost:8000"
TARGET_RUN_UUID = "4ba9d2d365ee4a858f1c4104b1942dc2"
POLL_INTERVAL = 30 # 秒
def get_auth_token() -> str:
"""从后端登录获取 JWT token使用测试账号"""
# 尝试读取已有 token
token_file = Path(__file__).parent / ".monitor_token"
if token_file.exists():
token = token_file.read_text(encoding="utf-8").strip()
# 验证 token 是否有效
try:
r = requests.get(
f"{BACKEND_URL}/api/execution/history",
headers={"Authorization": f"Bearer {token}"},
params={"limit": 1},
timeout=5,
)
if r.status_code == 200:
return token
except Exception:
pass
# token 无效,需要重新登录
print("需要登录后端获取 token。请在浏览器中登录后")
print("从浏览器 DevTools > Application > Local Storage 中复制 token")
print("或直接输入(留空跳过,使用无认证模式):")
token = input("JWT Token: ").strip()
if token:
token_file.write_text(token, encoding="utf-8")
return token
def poll_execution_status(token: str) -> dict | None:
"""轮询执行状态"""
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
try:
r = requests.get(
f"{BACKEND_URL}/api/execution/history",
headers=headers,
params={"limit": 5},
timeout=10,
)
if r.status_code != 200:
print(f" API 返回 {r.status_code}: {r.text[:200]}")
return None
data = r.json()
items = data if isinstance(data, list) else data.get("items", data.get("data", []))
for item in items:
if item.get("run_uuid") == TARGET_RUN_UUID:
return item
# 没找到精确匹配,返回最新的
if items:
return items[0]
return None
except requests.exceptions.ConnectionError:
print(" 后端连接失败,可能已停止")
return None
except Exception as e:
print(f" API 请求异常: {e}")
return None
def extract_log_from_api(token: str) -> str | None:
"""尝试从 API 获取执行日志"""
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
try:
# 尝试获取日志
r = requests.get(
f"{BACKEND_URL}/api/execution/log/{TARGET_RUN_UUID}",
headers=headers,
timeout=30,
)
if r.status_code == 200:
return r.text
except Exception:
pass
return None
def parse_task_results_from_log(log_text: str) -> list[dict]:
"""从日志文本解析各任务的执行结果"""
results = []
lines = log_text.split("\n") if log_text else []
current_task = None
task_start_time = None
for line in lines:
# 检测任务开始
if "开始执行" in line and "ODS" in line or "DWS" in line or "DWD" in line:
# 提取时间戳
ts = extract_timestamp(line)
# 提取任务名
for token in line.split():
if token.startswith("ODS_") or token.startswith("DWS_") or token.startswith("DWD_"):
task_name = token.rstrip(":")
current_task = task_name
task_start_time = ts
break
# 检测任务完成
if current_task and "任务完成" in line and current_task in line:
ts = extract_timestamp(line)
# 提取统计信息
stats = extract_stats(line)
results.append({
"task": current_task,
"status": "success",
"start": task_start_time,
"end": ts,
"stats": stats,
})
current_task = None
# 检测任务失败
if "任务" in line and "失败" in line:
ts = extract_timestamp(line)
for token in line.split():
if token.startswith("ODS_") or token.startswith("DWS_") or token.startswith("DWD_"):
task_name = token.rstrip(":")
# 提取错误信息
error_msg = line.split("失败:")[-1].strip() if "失败:" in line else "未知错误"
results.append({
"task": task_name,
"status": "failed",
"start": task_start_time if current_task == task_name else ts,
"end": ts,
"error": error_msg,
})
if current_task == task_name:
current_task = None
break
return results
def extract_timestamp(line: str) -> str:
"""从日志行提取时间戳"""
# 格式: [2026-02-21 15:29:21]
if "[" in line and "]" in line:
start = line.index("[") + 1
end = line.index("]", start)
return line[start:end]
return ""
def extract_stats(line: str) -> str:
"""从日志行提取统计信息"""
if "{" in line and "}" in line:
start = line.index("{")
end = line.index("}") + 1
return line[start:end]
return ""
def generate_report(execution: dict, task_results: list[dict]) -> str:
"""生成执行结果 Markdown 报告"""
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
status = execution.get("status", "unknown")
start_time = execution.get("started_at", execution.get("start_time", ""))
end_time = execution.get("ended_at", execution.get("end_time", ""))
duration = execution.get("duration", "")
exit_code = execution.get("exit_code", "")
lines = [
f"# ETL 执行结果报告",
f"",
f"> 生成时间:{now}",
f"> run_uuid{TARGET_RUN_UUID}",
f"",
f"---",
f"",
f"## 执行概览",
f"",
f"| 项目 | 值 |",
f"|------|-----|",
f"| 状态 | {status} |",
f"| 开始时间 | {start_time} |",
f"| 结束时间 | {end_time} |",
f"| 时长 | {duration} |",
f"| 退出码 | {exit_code} |",
f"",
]
# 任务级结果
if task_results:
success_count = sum(1 for r in task_results if r["status"] == "success")
failed_count = sum(1 for r in task_results if r["status"] == "failed")
lines.extend([
f"## 任务级结果",
f"",
f"成功:{success_count} | 失败:{failed_count} | 总计:{len(task_results)}",
f"",
f"| # | 任务 | 状态 | 开始 | 结束 | 备注 |",
f"|---|------|------|------|------|------|",
])
for i, r in enumerate(task_results, 1):
note = r.get("stats", r.get("error", ""))
if len(note) > 80:
note = note[:77] + "..."
lines.append(
f"| {i} | {r['task']} | {r['status']} | {r.get('start', '')} | {r.get('end', '')} | {note} |"
)
lines.append("")
# 已知问题
lines.extend([
f"## 已知问题",
f"",
f"### DWS_ASSISTANT_DAILY 字段引用错误(已修复)",
f"",
f"根因:`_extract_trash_records` SQL 引用了 `dwd_assistant_trash_event` 中不存在的字段。",
f"级联影响9 个任务失败DWS_ASSISTANT_DAILY 及其下游 + ODS_SETTLEMENT_RECORDS/PAYMENT/REFUND/BUILD_ORDER_SUMMARY",
f"修复状态:代码已修复,待下次执行验证。",
f"详见:`export/SYSTEM/LOGS/2026-02-21__dws_assistant_daily_bug_fix.md`",
f"",
f"---",
f"",
f"## 下一步",
f"",
f"1. 重新提交包含失败任务的执行,验证修复",
f"2. 运行 ETL Data Consistency Check",
f"3. 运行 /audit 审计",
])
return "\n".join(lines)
def main():
out_dir = get_output_path("SYSTEM_LOG_ROOT")
print(f"ETL 执行监控启动")
print(f" 目标 run_uuid: {TARGET_RUN_UUID}")
print(f" 轮询间隔: {POLL_INTERVAL}s")
print(f" 输出目录: {out_dir}")
print()
# 获取认证 token — 非交互模式,直接尝试无 token
token = ""
token_file = Path(__file__).parent / ".monitor_token"
if token_file.exists():
token = token_file.read_text(encoding="utf-8").strip()
poll_count = 0
max_polls = 120 # 最多轮询 60 分钟
while poll_count < max_polls:
poll_count += 1
now = datetime.now().strftime("%H:%M:%S")
print(f"[{now}] 轮询 #{poll_count}...", end=" ")
execution = poll_execution_status(token)
if execution is None:
print("未获取到执行信息")
time.sleep(POLL_INTERVAL)
continue
status = execution.get("status", "unknown")
print(f"状态: {status}")
if status in ("success", "failed", "completed", "error", "stopped"):
print(f"\n执行已完成,状态: {status}")
# 尝试获取日志
log_text = extract_log_from_api(token)
task_results = parse_task_results_from_log(log_text) if log_text else []
# 生成报告
report = generate_report(execution, task_results)
out_file = out_dir / "2026-02-21__etl_run_result.md"
out_file.write_text(report, encoding="utf-8")
print(f"\n执行结果报告已导出: {out_file}")
# 同时保存原始 API 响应
raw_file = out_dir / "2026-02-21__etl_run_raw.json"
raw_file.write_text(
json.dumps(execution, ensure_ascii=False, indent=2, default=str),
encoding="utf-8",
)
print(f"原始数据已导出: {raw_file}")
return
time.sleep(POLL_INTERVAL)
print(f"\n超过最大轮询次数 ({max_polls}),退出监控")
if __name__ == "__main__":
main()