feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本
包含多个会话的累积代码变更: - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8) - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,6 +26,7 @@ from typing import Any
|
||||
# 禁止 from ..config import ETL_PROJECT_PATH(值拷贝,reload 后过期)
|
||||
from .. import config as _config_module
|
||||
from ..database import get_connection
|
||||
from psycopg2.extras import Json
|
||||
from ..schemas.tasks import TaskConfigSchema
|
||||
from ..services.cli_builder import cli_builder
|
||||
|
||||
@@ -184,6 +185,7 @@ class TaskExecutor:
|
||||
started_at=started_at,
|
||||
command=command_str_with_host,
|
||||
schedule_id=schedule_id,
|
||||
config_json=config.model_dump(mode="json"),
|
||||
)
|
||||
|
||||
exit_code: int | None = None
|
||||
@@ -249,6 +251,9 @@ class TaskExecutor:
|
||||
error_log="\n".join(stderr_lines),
|
||||
)
|
||||
|
||||
# CHANGE 2026-03-22 | 释放内存缓冲区,防止长期运行内存泄漏
|
||||
self.cleanup(execution_id)
|
||||
|
||||
def _run_subprocess(
|
||||
self,
|
||||
cmd: list[str],
|
||||
@@ -379,6 +384,7 @@ class TaskExecutor:
|
||||
started_at: datetime,
|
||||
command: str,
|
||||
schedule_id: str | None = None,
|
||||
config_json: dict | None = None,
|
||||
) -> None:
|
||||
"""插入一条执行日志记录(running 状态)。"""
|
||||
try:
|
||||
@@ -396,12 +402,13 @@ class TaskExecutor:
|
||||
if row and row[0]:
|
||||
effective_schedule_id = str(row[0])
|
||||
|
||||
# CHANGE 2026-03-22 | 存储完整 TaskConfig JSON,供 rerun 还原原始参数
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO task_execution_log
|
||||
(id, queue_id, site_id, task_codes, status,
|
||||
started_at, command, schedule_id)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
started_at, command, schedule_id, config)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
""",
|
||||
(
|
||||
execution_id,
|
||||
@@ -412,6 +419,7 @@ class TaskExecutor:
|
||||
started_at,
|
||||
command,
|
||||
effective_schedule_id,
|
||||
Json(config_json) if config_json else None,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
@@ -475,6 +483,115 @@ class TaskExecutor:
|
||||
self._log_buffers.pop(execution_id, None)
|
||||
self._subscribers.pop(execution_id, None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 优雅关闭:终止所有子进程并回写状态
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def shutdown(self, timeout: float = 3.0) -> int:
|
||||
"""优雅关闭:终止所有正在运行的子进程,等待回写完成。
|
||||
|
||||
Args:
|
||||
timeout: 等待子进程退出的超时秒数,超时后强制 kill。
|
||||
|
||||
Returns:
|
||||
被终止的进程数量。
|
||||
"""
|
||||
running_ids = list(self._processes.keys())
|
||||
if not running_ids:
|
||||
return 0
|
||||
|
||||
logger.info(
|
||||
"优雅关闭:终止 %d 个运行中的子进程,超时 %.1fs",
|
||||
len(running_ids), timeout,
|
||||
)
|
||||
|
||||
# 先发 terminate 信号
|
||||
for eid, proc in list(self._processes.items()):
|
||||
if proc.poll() is None:
|
||||
try:
|
||||
proc.terminate()
|
||||
logger.info("已发送 terminate 信号: %s (pid=%s)", eid, proc.pid)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
# 等待子进程退出(给 finally 块执行的机会)
|
||||
import time
|
||||
deadline = time.monotonic() + timeout
|
||||
for eid, proc in list(self._processes.items()):
|
||||
remaining = deadline - time.monotonic()
|
||||
if remaining > 0 and proc.poll() is None:
|
||||
try:
|
||||
proc.wait(timeout=remaining)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 超时后强制 kill 仍存活的进程
|
||||
for eid, proc in list(self._processes.items()):
|
||||
if proc.poll() is None:
|
||||
try:
|
||||
proc.kill()
|
||||
logger.warning("强制 kill: %s (pid=%s)", eid, proc.pid)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
# 注意:execute() 的 finally 块会在 run_in_executor 返回后执行,
|
||||
# 此处不需要手动回写——asyncio 事件循环关闭前会处理。
|
||||
# 但如果 finally 来不及执行,recover_stale() 会在下次启动时兜底。
|
||||
|
||||
count = len(running_ids)
|
||||
logger.info("优雅关闭完成,已终止 %d 个子进程", count)
|
||||
return count
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 启动时僵尸任务清理
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def recover_stale(self) -> int:
|
||||
"""启动时清理本机的僵尸任务(status=running 但进程已不存在)。
|
||||
|
||||
仅清理 command 中包含本机主机名标识 [hostname] 的记录。
|
||||
|
||||
Returns:
|
||||
被标记为 interrupted 的记录数量。
|
||||
"""
|
||||
# CHANGE 2026-03-22 | 启动时僵尸清理,仅限本机
|
||||
host_tag = f"[{_INSTANCE_HOST}]"
|
||||
try:
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE task_execution_log
|
||||
SET status = 'interrupted',
|
||||
finished_at = NOW(),
|
||||
error_log = COALESCE(error_log, '')
|
||||
|| E'\n[recover_stale] 后端重启,进程已丢失,标记为 interrupted'
|
||||
WHERE status = 'running'
|
||||
AND command LIKE %s
|
||||
RETURNING id
|
||||
""",
|
||||
(f"{host_tag}%",),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
count = len(rows)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if count > 0:
|
||||
ids = [str(r[0]) for r in rows]
|
||||
logger.warning(
|
||||
"启动清理:%d 条僵尸任务标记为 interrupted: %s",
|
||||
count, ", ".join(ids),
|
||||
)
|
||||
else:
|
||||
logger.info("启动清理:无僵尸任务")
|
||||
return count
|
||||
except Exception:
|
||||
logger.exception("启动清理僵尸任务失败")
|
||||
return 0
|
||||
|
||||
|
||||
# 全局单例
|
||||
task_executor = TaskExecutor()
|
||||
|
||||
Reference in New Issue
Block a user