feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本

包含多个会话的累积代码变更:
- backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔
- admin-web: ETL 状态页、任务管理、调度配置、登录优化
- miniprogram: 看板页面、聊天集成、UI 组件、导航更新
- etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强
- tenant-admin: 项目初始化
- db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8)
- packages/shared: 枚举和工具函数更新
- tools: 数据库工具、报表生成、健康检查
- docs: PRD/架构/部署/合约文档更新

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Neo
2026-04-06 00:03:48 +08:00
parent 70324d8542
commit 6f8f12314f
515 changed files with 76604 additions and 7456 deletions

View File

@@ -26,6 +26,7 @@ from typing import Any
# 禁止 from ..config import ETL_PROJECT_PATH值拷贝reload 后过期)
from .. import config as _config_module
from ..database import get_connection
from psycopg2.extras import Json
from ..schemas.tasks import TaskConfigSchema
from ..services.cli_builder import cli_builder
@@ -184,6 +185,7 @@ class TaskExecutor:
started_at=started_at,
command=command_str_with_host,
schedule_id=schedule_id,
config_json=config.model_dump(mode="json"),
)
exit_code: int | None = None
@@ -249,6 +251,9 @@ class TaskExecutor:
error_log="\n".join(stderr_lines),
)
# CHANGE 2026-03-22 | 释放内存缓冲区,防止长期运行内存泄漏
self.cleanup(execution_id)
def _run_subprocess(
self,
cmd: list[str],
@@ -379,6 +384,7 @@ class TaskExecutor:
started_at: datetime,
command: str,
schedule_id: str | None = None,
config_json: dict | None = None,
) -> None:
"""插入一条执行日志记录running 状态)。"""
try:
@@ -396,12 +402,13 @@ class TaskExecutor:
if row and row[0]:
effective_schedule_id = str(row[0])
# CHANGE 2026-03-22 | 存储完整 TaskConfig JSON供 rerun 还原原始参数
cur.execute(
"""
INSERT INTO task_execution_log
(id, queue_id, site_id, task_codes, status,
started_at, command, schedule_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
started_at, command, schedule_id, config)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
""",
(
execution_id,
@@ -412,6 +419,7 @@ class TaskExecutor:
started_at,
command,
effective_schedule_id,
Json(config_json) if config_json else None,
),
)
conn.commit()
@@ -475,6 +483,115 @@ class TaskExecutor:
self._log_buffers.pop(execution_id, None)
self._subscribers.pop(execution_id, None)
# ------------------------------------------------------------------
# 优雅关闭:终止所有子进程并回写状态
# ------------------------------------------------------------------
async def shutdown(self, timeout: float = 3.0) -> int:
"""优雅关闭:终止所有正在运行的子进程,等待回写完成。
Args:
timeout: 等待子进程退出的超时秒数,超时后强制 kill。
Returns:
被终止的进程数量。
"""
running_ids = list(self._processes.keys())
if not running_ids:
return 0
logger.info(
"优雅关闭:终止 %d 个运行中的子进程,超时 %.1fs",
len(running_ids), timeout,
)
# 先发 terminate 信号
for eid, proc in list(self._processes.items()):
if proc.poll() is None:
try:
proc.terminate()
logger.info("已发送 terminate 信号: %s (pid=%s)", eid, proc.pid)
except ProcessLookupError:
pass
# 等待子进程退出(给 finally 块执行的机会)
import time
deadline = time.monotonic() + timeout
for eid, proc in list(self._processes.items()):
remaining = deadline - time.monotonic()
if remaining > 0 and proc.poll() is None:
try:
proc.wait(timeout=remaining)
except Exception:
pass
# 超时后强制 kill 仍存活的进程
for eid, proc in list(self._processes.items()):
if proc.poll() is None:
try:
proc.kill()
logger.warning("强制 kill: %s (pid=%s)", eid, proc.pid)
except ProcessLookupError:
pass
# 注意execute() 的 finally 块会在 run_in_executor 返回后执行,
# 此处不需要手动回写——asyncio 事件循环关闭前会处理。
# 但如果 finally 来不及执行recover_stale() 会在下次启动时兜底。
count = len(running_ids)
logger.info("优雅关闭完成,已终止 %d 个子进程", count)
return count
# ------------------------------------------------------------------
# 启动时僵尸任务清理
# ------------------------------------------------------------------
def recover_stale(self) -> int:
"""启动时清理本机的僵尸任务status=running 但进程已不存在)。
仅清理 command 中包含本机主机名标识 [hostname] 的记录。
Returns:
被标记为 interrupted 的记录数量。
"""
# CHANGE 2026-03-22 | 启动时僵尸清理,仅限本机
host_tag = f"[{_INSTANCE_HOST}]"
try:
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute(
"""
UPDATE task_execution_log
SET status = 'interrupted',
finished_at = NOW(),
error_log = COALESCE(error_log, '')
|| E'\n[recover_stale] 后端重启,进程已丢失,标记为 interrupted'
WHERE status = 'running'
AND command LIKE %s
RETURNING id
""",
(f"{host_tag}%",),
)
rows = cur.fetchall()
count = len(rows)
conn.commit()
finally:
conn.close()
if count > 0:
ids = [str(r[0]) for r in rows]
logger.warning(
"启动清理:%d 条僵尸任务标记为 interrupted: %s",
count, ", ".join(ids),
)
else:
logger.info("启动清理:无僵尸任务")
return count
except Exception:
logger.exception("启动清理僵尸任务失败")
return 0
# 全局单例
task_executor = TaskExecutor()