在准备环境前提交次全部更改。
This commit is contained in:
303
apps/backend/app/services/scheduler.py
Normal file
303
apps/backend/app/services/scheduler.py
Normal file
@@ -0,0 +1,303 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""调度器服务
|
||||
|
||||
后台 asyncio 循环,每 30 秒检查一次到期的调度任务,
|
||||
将其 TaskConfig 入队到 TaskQueue。
|
||||
|
||||
核心逻辑:
|
||||
- check_and_enqueue():查询 enabled=true 且 next_run_at <= now 的调度任务
|
||||
- start() / stop():管理后台循环生命周期
|
||||
- _calculate_next_run():根据 ScheduleConfig 计算下次执行时间
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from ..database import get_connection
|
||||
from ..schemas.schedules import ScheduleConfigSchema
|
||||
from ..schemas.tasks import TaskConfigSchema
|
||||
from .task_queue import task_queue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 调度器轮询间隔(秒)
|
||||
SCHEDULER_POLL_INTERVAL = 30
|
||||
|
||||
|
||||
def _parse_time(time_str: str) -> tuple[int, int]:
|
||||
"""解析 HH:MM 格式的时间字符串,返回 (hour, minute)。"""
|
||||
parts = time_str.split(":")
|
||||
return int(parts[0]), int(parts[1])
|
||||
|
||||
|
||||
def calculate_next_run(
|
||||
schedule_config: ScheduleConfigSchema,
|
||||
now: datetime | None = None,
|
||||
) -> datetime | None:
|
||||
"""根据调度配置计算下次执行时间。
|
||||
|
||||
Args:
|
||||
schedule_config: 调度配置
|
||||
now: 当前时间(默认 UTC now),方便测试注入
|
||||
|
||||
Returns:
|
||||
下次执行时间(UTC),once 类型返回 None 表示不再执行
|
||||
"""
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
stype = schedule_config.schedule_type
|
||||
|
||||
if stype == "once":
|
||||
# 一次性任务执行后不再调度
|
||||
return None
|
||||
|
||||
if stype == "interval":
|
||||
unit_map = {
|
||||
"minutes": timedelta(minutes=schedule_config.interval_value),
|
||||
"hours": timedelta(hours=schedule_config.interval_value),
|
||||
"days": timedelta(days=schedule_config.interval_value),
|
||||
}
|
||||
delta = unit_map.get(schedule_config.interval_unit)
|
||||
if delta is None:
|
||||
logger.warning("未知的 interval_unit: %s", schedule_config.interval_unit)
|
||||
return None
|
||||
return now + delta
|
||||
|
||||
if stype == "daily":
|
||||
hour, minute = _parse_time(schedule_config.daily_time)
|
||||
# 计算明天的 daily_time
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
if stype == "weekly":
|
||||
hour, minute = _parse_time(schedule_config.weekly_time)
|
||||
days = sorted(schedule_config.weekly_days) if schedule_config.weekly_days else [1]
|
||||
# ISO weekday: 1=Monday ... 7=Sunday
|
||||
current_weekday = now.isoweekday()
|
||||
|
||||
# 找到下一个匹配的 weekday
|
||||
for day in days:
|
||||
if day > current_weekday:
|
||||
delta_days = day - current_weekday
|
||||
next_dt = now + timedelta(days=delta_days)
|
||||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
# 本周没有更晚的 weekday,跳到下周第一个
|
||||
first_day = days[0]
|
||||
delta_days = 7 - current_weekday + first_day
|
||||
next_dt = now + timedelta(days=delta_days)
|
||||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
if stype == "cron":
|
||||
# 简单 cron 解析:仅支持 "minute hour * * *" 格式(每日定时)
|
||||
# 复杂 cron 表达式可后续引入 croniter 库
|
||||
return _parse_simple_cron(schedule_config.cron_expression, now)
|
||||
|
||||
logger.warning("未知的 schedule_type: %s", stype)
|
||||
return None
|
||||
|
||||
|
||||
def _parse_simple_cron(expression: str, now: datetime) -> datetime | None:
|
||||
"""简单 cron 解析器,支持基本的 5 字段格式。
|
||||
|
||||
支持的格式:
|
||||
- "M H * * *" → 每天 H:M
|
||||
- "M H * * D" → 每周 D 的 H:M(D 为 0-6,0=Sunday)
|
||||
- 其他格式回退到每天 04:00
|
||||
|
||||
不支持范围、列表、步进等高级语法。如需完整 cron 支持,
|
||||
可在 pyproject.toml 中添加 croniter 依赖。
|
||||
"""
|
||||
parts = expression.strip().split()
|
||||
if len(parts) != 5:
|
||||
logger.warning("无法解析 cron 表达式: %s,回退到明天 04:00", expression)
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0)
|
||||
|
||||
minute_str, hour_str, dom, month, dow = parts
|
||||
|
||||
try:
|
||||
minute = int(minute_str) if minute_str != "*" else 0
|
||||
hour = int(hour_str) if hour_str != "*" else 0
|
||||
except ValueError:
|
||||
logger.warning("cron 表达式时间字段无法解析: %s,回退到明天 04:00", expression)
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0)
|
||||
|
||||
# 如果指定了 day-of-week(非 *)
|
||||
if dow != "*":
|
||||
try:
|
||||
cron_dow = int(dow) # 0=Sunday, 1=Monday, ..., 6=Saturday
|
||||
except ValueError:
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
# 转换为 ISO weekday(1=Monday, 7=Sunday)
|
||||
iso_dow = 7 if cron_dow == 0 else cron_dow
|
||||
current_iso = now.isoweekday()
|
||||
|
||||
if iso_dow > current_iso:
|
||||
delta_days = iso_dow - current_iso
|
||||
elif iso_dow < current_iso:
|
||||
delta_days = 7 - current_iso + iso_dow
|
||||
else:
|
||||
# 同一天,看时间是否已过
|
||||
target_today = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
if now < target_today:
|
||||
delta_days = 0
|
||||
else:
|
||||
delta_days = 7
|
||||
|
||||
next_dt = now + timedelta(days=delta_days)
|
||||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
# 每天定时(dom=* month=* dow=*)
|
||||
tomorrow = now + timedelta(days=1)
|
||||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
|
||||
class Scheduler:
|
||||
"""基于 PostgreSQL 的定时调度器
|
||||
|
||||
后台 asyncio 循环每 SCHEDULER_POLL_INTERVAL 秒检查一次到期任务,
|
||||
将其 TaskConfig 入队到 TaskQueue。
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._running = False
|
||||
self._loop_task: asyncio.Task | None = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 核心:检查到期任务并入队
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def check_and_enqueue(self) -> int:
|
||||
"""查询 enabled=true 且 next_run_at <= now 的调度任务,将其入队。
|
||||
|
||||
Returns:
|
||||
本次入队的任务数量
|
||||
"""
|
||||
conn = get_connection()
|
||||
enqueued = 0
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, site_id, task_config, schedule_config
|
||||
FROM scheduled_tasks
|
||||
WHERE enabled = TRUE
|
||||
AND next_run_at IS NOT NULL
|
||||
AND next_run_at <= NOW()
|
||||
ORDER BY next_run_at ASC
|
||||
"""
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
for row in rows:
|
||||
task_id = str(row[0])
|
||||
site_id = row[1]
|
||||
task_config_raw = row[2] if isinstance(row[2], dict) else json.loads(row[2])
|
||||
schedule_config_raw = row[3] if isinstance(row[3], dict) else json.loads(row[3])
|
||||
|
||||
try:
|
||||
config = TaskConfigSchema(**task_config_raw)
|
||||
schedule_cfg = ScheduleConfigSchema(**schedule_config_raw)
|
||||
except Exception:
|
||||
logger.exception("调度任务 [%s] 配置反序列化失败,跳过", task_id)
|
||||
continue
|
||||
|
||||
# 入队
|
||||
try:
|
||||
queue_id = task_queue.enqueue(config, site_id)
|
||||
logger.info(
|
||||
"调度任务 [%s] 入队成功 → queue_id=%s site_id=%s",
|
||||
task_id, queue_id, site_id,
|
||||
)
|
||||
enqueued += 1
|
||||
except Exception:
|
||||
logger.exception("调度任务 [%s] 入队失败", task_id)
|
||||
continue
|
||||
|
||||
# 更新调度任务状态
|
||||
now = datetime.now(timezone.utc)
|
||||
next_run = calculate_next_run(schedule_cfg, now)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE scheduled_tasks
|
||||
SET last_run_at = NOW(),
|
||||
run_count = run_count + 1,
|
||||
next_run_at = %s,
|
||||
last_status = 'enqueued',
|
||||
updated_at = NOW()
|
||||
WHERE id = %s
|
||||
""",
|
||||
(next_run, task_id),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
except Exception:
|
||||
logger.exception("check_and_enqueue 执行异常")
|
||||
try:
|
||||
conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if enqueued > 0:
|
||||
logger.info("本轮调度检查:%d 个任务入队", enqueued)
|
||||
return enqueued
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 后台循环
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _loop(self) -> None:
|
||||
"""后台 asyncio 循环,每 SCHEDULER_POLL_INTERVAL 秒检查一次。"""
|
||||
self._running = True
|
||||
logger.info("Scheduler 后台循环启动(间隔 %ds)", SCHEDULER_POLL_INTERVAL)
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
# 在线程池中执行同步数据库操作,避免阻塞事件循环
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(None, self.check_and_enqueue)
|
||||
except Exception:
|
||||
logger.exception("Scheduler 循环迭代异常")
|
||||
|
||||
await asyncio.sleep(SCHEDULER_POLL_INTERVAL)
|
||||
|
||||
logger.info("Scheduler 后台循环停止")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 生命周期
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def start(self) -> None:
|
||||
"""启动后台调度循环(在 FastAPI lifespan 中调用)。"""
|
||||
if self._loop_task is None or self._loop_task.done():
|
||||
self._loop_task = asyncio.create_task(self._loop())
|
||||
logger.info("Scheduler 已启动")
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""停止后台调度循环。"""
|
||||
self._running = False
|
||||
if self._loop_task and not self._loop_task.done():
|
||||
self._loop_task.cancel()
|
||||
try:
|
||||
await self._loop_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._loop_task = None
|
||||
logger.info("Scheduler 已停止")
|
||||
|
||||
|
||||
# 全局单例
|
||||
scheduler = Scheduler()
|
||||
Reference in New Issue
Block a user