微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
@@ -17,19 +17,24 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import logging
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from ..config import ETL_PROJECT_PATH
|
||||
# CHANGE 2026-03-07 | 只保留模块引用,execute() 中实时读取属性值
|
||||
# 禁止 from ..config import ETL_PROJECT_PATH(值拷贝,reload 后过期)
|
||||
from .. import config as _config_module
|
||||
from ..database import get_connection
|
||||
from ..schemas.tasks import TaskConfigSchema
|
||||
from ..services.cli_builder import cli_builder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 实例标识:用于区分多后端实例写入同一 DB 的记录
|
||||
import platform as _platform
|
||||
_INSTANCE_HOST = _platform.node() # hostname
|
||||
|
||||
|
||||
class TaskExecutor:
|
||||
"""管理 ETL CLI 子进程的生命周期"""
|
||||
@@ -112,21 +117,58 @@ class TaskExecutor:
|
||||
execution_id: str,
|
||||
queue_id: str | None = None,
|
||||
site_id: int | None = None,
|
||||
schedule_id: str | None = None,
|
||||
) -> None:
|
||||
"""以子进程方式调用 ETL CLI。
|
||||
|
||||
使用 subprocess.Popen + 线程读取,兼容 Windows(避免
|
||||
asyncio.create_subprocess_exec 在 Windows 上的 NotImplementedError)。
|
||||
"""
|
||||
# CHANGE 2026-03-07 | 实时从 config 模块读取,避免 import 时复制的值过期
|
||||
etl_path = _config_module.ETL_PROJECT_PATH
|
||||
etl_python = _config_module.ETL_PYTHON_EXECUTABLE
|
||||
|
||||
cmd = cli_builder.build_command(
|
||||
config, ETL_PROJECT_PATH, python_executable=sys.executable
|
||||
config, etl_path, python_executable=etl_python
|
||||
)
|
||||
command_str = " ".join(cmd)
|
||||
|
||||
# CHANGE 2026-03-07 | 运行时防护:拒绝执行包含非预期路径的命令
|
||||
# 检测两种异常:
|
||||
# 1. D 盘路径(junction 穿透)
|
||||
# 2. 多环境子目录(test/repo、prod/repo)
|
||||
_cmd_normalized = command_str.replace("/", "\\")
|
||||
_bad_patterns = []
|
||||
if "D:\\" in command_str or "D:/" in command_str:
|
||||
_bad_patterns.append("D盘路径")
|
||||
if "\\test\\repo" in _cmd_normalized or "\\prod\\repo" in _cmd_normalized:
|
||||
_bad_patterns.append("多环境子目录(test/repo或prod/repo)")
|
||||
|
||||
if _bad_patterns:
|
||||
_issues = " + ".join(_bad_patterns)
|
||||
logger.error(
|
||||
"路径防护触发:命令包含 %s,拒绝执行。"
|
||||
" command=%s | ETL_PY=%s | ETL_PATH=%s"
|
||||
" | NEOZQYY_ROOT=%s | config.__file__=%s",
|
||||
_issues, command_str, etl_python, etl_path,
|
||||
__import__('os').environ.get("NEOZQYY_ROOT", "<未设置>"),
|
||||
_config_module.__file__,
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"ETL 命令包含异常路径({_issues}),拒绝执行。"
|
||||
f" 请检查 .env 中 ETL_PYTHON_EXECUTABLE 和 ETL_PROJECT_PATH 配置。"
|
||||
f" 当前值: ETL_PY={etl_python}, ETL_PATH={etl_path}"
|
||||
)
|
||||
|
||||
effective_site_id = site_id or config.store_id
|
||||
|
||||
# CHANGE 2026-03-07 | 在 command 前缀中注入实例标识,
|
||||
# 便于在多后端实例共享同一 DB 时区分记录来源
|
||||
command_str_with_host = f"[{_INSTANCE_HOST}] {command_str}"
|
||||
|
||||
logger.info(
|
||||
"启动 ETL 子进程 [%s]: %s (cwd=%s)",
|
||||
execution_id, command_str, ETL_PROJECT_PATH,
|
||||
execution_id, command_str, etl_path,
|
||||
)
|
||||
|
||||
self._log_buffers[execution_id] = []
|
||||
@@ -140,7 +182,8 @@ class TaskExecutor:
|
||||
task_codes=config.tasks,
|
||||
status="running",
|
||||
started_at=started_at,
|
||||
command=command_str,
|
||||
command=command_str_with_host,
|
||||
schedule_id=schedule_id,
|
||||
)
|
||||
|
||||
exit_code: int | None = None
|
||||
@@ -226,7 +269,7 @@ class TaskExecutor:
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=ETL_PROJECT_PATH,
|
||||
cwd=_config_module.ETL_PROJECT_PATH,
|
||||
env=env,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
@@ -302,18 +345,30 @@ class TaskExecutor:
|
||||
status: str,
|
||||
started_at: datetime,
|
||||
command: str,
|
||||
schedule_id: str | None = None,
|
||||
) -> None:
|
||||
"""插入一条执行日志记录(running 状态)。"""
|
||||
try:
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# 如果调用方未传 schedule_id,尝试从 task_queue 回查
|
||||
effective_schedule_id = schedule_id
|
||||
if effective_schedule_id is None and queue_id is not None:
|
||||
cur.execute(
|
||||
"SELECT schedule_id FROM task_queue WHERE id = %s",
|
||||
(queue_id,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if row and row[0]:
|
||||
effective_schedule_id = str(row[0])
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO task_execution_log
|
||||
(id, queue_id, site_id, task_codes, status,
|
||||
started_at, command)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
started_at, command, schedule_id)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
""",
|
||||
(
|
||||
execution_id,
|
||||
@@ -323,6 +378,7 @@ class TaskExecutor:
|
||||
status,
|
||||
started_at,
|
||||
command,
|
||||
effective_schedule_id,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
Reference in New Issue
Block a user