286 lines
9.8 KiB
Python
286 lines
9.8 KiB
Python
"""
|
||
单独重跑集成测试中失败的 DWS/INDEX 任务,验证 bugfix 效果。
|
||
|
||
使用与集成测试相同的参数:
|
||
- flow: api_full(但只跑 DWS/INDEX 层)
|
||
- processing_mode: full_window
|
||
- window: 2025-11-01 ~ 2026-02-26
|
||
- window_split_days: 30
|
||
- force_full: True
|
||
|
||
通过后端 API 提交,与集成测试路径一致。
|
||
"""
|
||
import os
|
||
import sys
|
||
import json
|
||
import time
|
||
import requests
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
from dotenv import load_dotenv
|
||
from zoneinfo import ZoneInfo
|
||
|
||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||
|
||
TZ = ZoneInfo("Asia/Shanghai")
|
||
BASE_URL = "http://localhost:8000"
|
||
|
||
# 之前失败的任务(DWS_MEMBER_VISIT 是根因,其余为级联失败)
|
||
FAILED_TASKS = [
|
||
# 根因任务
|
||
"DWS_MEMBER_VISIT",
|
||
"DWS_MEMBER_CONSUMPTION", # _extract_card_balances 也有同样 bug,需验证
|
||
# 级联失败的 DWS 任务
|
||
"DWS_FINANCE_DAILY",
|
||
"DWS_FINANCE_RECHARGE",
|
||
"DWS_FINANCE_INCOME_STRUCTURE",
|
||
"DWS_FINANCE_DISCOUNT_DETAIL",
|
||
"DWS_ASSISTANT_MONTHLY",
|
||
"DWS_ASSISTANT_FINANCE",
|
||
# INDEX 层(依赖 DWS)
|
||
"DWS_WINBACK_INDEX",
|
||
"DWS_NEWCONV_INDEX",
|
||
"DWS_RELATION_INDEX",
|
||
"DWS_SPENDING_POWER_INDEX",
|
||
]
|
||
|
||
def login() -> str:
|
||
"""登录获取 JWT"""
|
||
resp = requests.post(f"{BASE_URL}/api/auth/login", json={
|
||
"username": "admin",
|
||
"password": "admin123",
|
||
})
|
||
resp.raise_for_status()
|
||
return resp.json()["access_token"]
|
||
|
||
def submit_task(token: str, tasks: list[str]) -> dict:
|
||
"""提交 ETL 任务"""
|
||
headers = {"Authorization": f"Bearer {token}"}
|
||
payload = {
|
||
"flow": "api_full",
|
||
"processing_mode": "full_window",
|
||
"window_mode": "custom",
|
||
"window_start": "2025-11-01 00:00",
|
||
"window_end": "2026-02-26 23:59",
|
||
"window_split": "day",
|
||
"window_split_days": 30,
|
||
"force_full": True,
|
||
"dry_run": False,
|
||
"tasks": tasks,
|
||
}
|
||
resp = requests.post(f"{BASE_URL}/api/execution/run", json=payload, headers=headers)
|
||
resp.raise_for_status()
|
||
return resp.json()
|
||
|
||
def poll_execution(token: str, execution_id: str, timeout_minutes: int = 60) -> dict:
|
||
"""轮询执行状态"""
|
||
headers = {"Authorization": f"Bearer {token}"}
|
||
start = time.time()
|
||
last_log_count = 0
|
||
|
||
while True:
|
||
elapsed = time.time() - start
|
||
if elapsed > timeout_minutes * 60:
|
||
print(f"\n超时({timeout_minutes}分钟),停止等待")
|
||
return {"status": "timeout"}
|
||
|
||
try:
|
||
# 查询状态
|
||
resp = requests.get(f"{BASE_URL}/api/execution/queue", headers=headers)
|
||
resp.raise_for_status()
|
||
queue = resp.json()
|
||
|
||
current = None
|
||
for item in queue.get("items", []):
|
||
if item.get("execution_id") == execution_id:
|
||
current = item
|
||
break
|
||
|
||
if current is None:
|
||
# 可能已完成,查历史
|
||
resp2 = requests.get(
|
||
f"{BASE_URL}/api/execution/{execution_id}/logs",
|
||
headers=headers, params={"offset": 0, "limit": 5000}
|
||
)
|
||
if resp2.status_code == 200:
|
||
logs_data = resp2.json()
|
||
logs = logs_data.get("logs", [])
|
||
# 打印新日志
|
||
for log in logs[last_log_count:]:
|
||
ts = log.get("timestamp", "")
|
||
msg = log.get("message", "")
|
||
level = log.get("level", "INFO")
|
||
if level in ("ERROR", "CRITICAL"):
|
||
print(f" ❌ [{ts}] {msg}")
|
||
elif level == "WARNING":
|
||
print(f" ⚠️ [{ts}] {msg}")
|
||
elif "成功" in msg or "完成" in msg or "SUCCESS" in msg.upper():
|
||
print(f" ✅ [{ts}] {msg}")
|
||
else:
|
||
print(f" [{ts}] {msg}")
|
||
last_log_count = len(logs)
|
||
print(f"\n执行已结束({elapsed:.0f}s)")
|
||
return {"status": "completed", "elapsed": elapsed}
|
||
|
||
status = current.get("status", "unknown")
|
||
progress = current.get("progress", "")
|
||
mins = int(elapsed) // 60
|
||
secs = int(elapsed) % 60
|
||
print(f"\r [{mins:02d}:{secs:02d}] 状态={status} {progress}", end="", flush=True)
|
||
|
||
# 获取日志
|
||
resp3 = requests.get(
|
||
f"{BASE_URL}/api/execution/{execution_id}/logs",
|
||
headers=headers, params={"offset": last_log_count, "limit": 200}
|
||
)
|
||
if resp3.status_code == 200:
|
||
logs_data = resp3.json()
|
||
logs = logs_data.get("logs", [])
|
||
for log in logs:
|
||
ts = log.get("timestamp", "")
|
||
msg = log.get("message", "")
|
||
level = log.get("level", "INFO")
|
||
if level in ("ERROR", "CRITICAL"):
|
||
print(f"\n ❌ [{ts}] {msg}")
|
||
elif level == "WARNING":
|
||
print(f"\n ⚠️ [{ts}] {msg}")
|
||
last_log_count += len(logs)
|
||
|
||
if status in ("completed", "failed", "cancelled"):
|
||
exit_code = current.get("exit_code")
|
||
print(f"\n执行结束: status={status}, exit_code={exit_code}, 耗时={elapsed:.0f}s")
|
||
return {"status": status, "exit_code": exit_code, "elapsed": elapsed}
|
||
|
||
except requests.RequestException as e:
|
||
print(f"\n 请求异常: {e}")
|
||
|
||
time.sleep(15)
|
||
|
||
def get_final_logs(token: str, execution_id: str) -> list[dict]:
|
||
"""获取完整日志"""
|
||
headers = {"Authorization": f"Bearer {token}"}
|
||
resp = requests.get(
|
||
f"{BASE_URL}/api/execution/{execution_id}/logs",
|
||
headers=headers, params={"offset": 0, "limit": 10000}
|
||
)
|
||
if resp.status_code == 200:
|
||
return resp.json().get("logs", [])
|
||
return []
|
||
|
||
def analyze_logs(logs: list[dict]) -> dict:
|
||
"""分析日志,提取任务结果"""
|
||
errors = []
|
||
warnings = []
|
||
task_results = {}
|
||
|
||
for log in logs:
|
||
msg = log.get("message", "")
|
||
level = log.get("level", "INFO")
|
||
|
||
if level in ("ERROR", "CRITICAL"):
|
||
errors.append(msg)
|
||
elif level == "WARNING":
|
||
warnings.append(msg)
|
||
|
||
# 解析任务结果
|
||
if "任务完成" in msg or "SUCCESS" in msg.upper():
|
||
for task in FAILED_TASKS:
|
||
if task in msg:
|
||
task_results[task] = "SUCCESS"
|
||
if "失败" in msg or "FAILED" in msg.upper() or "ERROR" in msg.upper():
|
||
for task in FAILED_TASKS:
|
||
if task in msg:
|
||
task_results[task] = "FAILED"
|
||
|
||
return {
|
||
"errors": errors,
|
||
"warnings": warnings,
|
||
"task_results": task_results,
|
||
}
|
||
|
||
|
||
def main():
|
||
now = datetime.now(TZ)
|
||
print(f"=== 失败任务重跑验证 ===")
|
||
print(f"时间: {now.isoformat()}")
|
||
print(f"任务数: {len(FAILED_TASKS)}")
|
||
print(f"任务列表: {', '.join(FAILED_TASKS)}")
|
||
print()
|
||
|
||
# 1. 检查后端是否在线
|
||
try:
|
||
resp = requests.get(f"{BASE_URL}/api/health", timeout=5)
|
||
print(f"后端状态: {resp.status_code}")
|
||
except requests.RequestException:
|
||
print("❌ 后端未启动,请先启动后端服务")
|
||
print(" cd apps/backend && uvicorn app.main:app --reload --port 8000")
|
||
sys.exit(1)
|
||
|
||
# 2. 登录
|
||
print("登录中...")
|
||
token = login()
|
||
print(f"登录成功")
|
||
|
||
# 3. 提交任务
|
||
print(f"\n提交 {len(FAILED_TASKS)} 个失败任务重跑...")
|
||
result = submit_task(token, FAILED_TASKS)
|
||
execution_id = result.get("execution_id")
|
||
print(f"execution_id: {execution_id}")
|
||
|
||
# 4. 监控执行
|
||
print(f"\n开始监控执行...")
|
||
poll_result = poll_execution(token, execution_id, timeout_minutes=60)
|
||
|
||
# 5. 获取完整日志并分析
|
||
print(f"\n获取完整日志...")
|
||
logs = get_final_logs(token, execution_id)
|
||
print(f"日志行数: {len(logs)}")
|
||
|
||
analysis = analyze_logs(logs)
|
||
|
||
# 6. 输出结果
|
||
print(f"\n{'='*60}")
|
||
print(f"=== 重跑结果 ===")
|
||
print(f"{'='*60}")
|
||
print(f"执行状态: {poll_result.get('status')}")
|
||
print(f"退出码: {poll_result.get('exit_code', 'N/A')}")
|
||
print(f"耗时: {poll_result.get('elapsed', 0):.0f}s")
|
||
print(f"错误数: {len(analysis['errors'])}")
|
||
print(f"警告数: {len(analysis['warnings'])}")
|
||
|
||
print(f"\n--- 任务级结果 ---")
|
||
for task in FAILED_TASKS:
|
||
status = analysis['task_results'].get(task, "未检测到")
|
||
icon = "✅" if status == "SUCCESS" else "❌" if status == "FAILED" else "❓"
|
||
print(f" {icon} {task}: {status}")
|
||
|
||
if analysis['errors']:
|
||
print(f"\n--- 错误详情 ---")
|
||
for i, err in enumerate(analysis['errors'][:20], 1):
|
||
print(f" {i}. {err[:200]}")
|
||
|
||
if analysis['warnings']:
|
||
print(f"\n--- 警告详情(前10条)---")
|
||
for i, warn in enumerate(analysis['warnings'][:10], 1):
|
||
print(f" {i}. {warn[:200]}")
|
||
|
||
# 7. 保存日志到文件
|
||
log_root = os.environ.get("SYSTEM_LOG_ROOT")
|
||
if log_root:
|
||
log_dir = Path(log_root)
|
||
log_dir.mkdir(parents=True, exist_ok=True)
|
||
log_file = log_dir / f"{now.strftime('%Y%m%d')}_rerun_failed_tasks.json"
|
||
with open(log_file, "w", encoding="utf-8") as f:
|
||
json.dump({
|
||
"execution_id": execution_id,
|
||
"tasks": FAILED_TASKS,
|
||
"poll_result": poll_result,
|
||
"analysis": analysis,
|
||
"log_count": len(logs),
|
||
}, f, ensure_ascii=False, indent=2, default=str)
|
||
print(f"\n日志已保存: {log_file}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|