304 lines
11 KiB
Python
304 lines
11 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""调度器服务
|
||
|
||
后台 asyncio 循环,每 30 秒检查一次到期的调度任务,
|
||
将其 TaskConfig 入队到 TaskQueue。
|
||
|
||
核心逻辑:
|
||
- check_and_enqueue():查询 enabled=true 且 next_run_at <= now 的调度任务
|
||
- start() / stop():管理后台循环生命周期
|
||
- _calculate_next_run():根据 ScheduleConfig 计算下次执行时间
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
from datetime import datetime, timedelta, timezone
|
||
|
||
from ..database import get_connection
|
||
from ..schemas.schedules import ScheduleConfigSchema
|
||
from ..schemas.tasks import TaskConfigSchema
|
||
from .task_queue import task_queue
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 调度器轮询间隔(秒)
|
||
SCHEDULER_POLL_INTERVAL = 30
|
||
|
||
|
||
def _parse_time(time_str: str) -> tuple[int, int]:
|
||
"""解析 HH:MM 格式的时间字符串,返回 (hour, minute)。"""
|
||
parts = time_str.split(":")
|
||
return int(parts[0]), int(parts[1])
|
||
|
||
|
||
def calculate_next_run(
|
||
schedule_config: ScheduleConfigSchema,
|
||
now: datetime | None = None,
|
||
) -> datetime | None:
|
||
"""根据调度配置计算下次执行时间。
|
||
|
||
Args:
|
||
schedule_config: 调度配置
|
||
now: 当前时间(默认 UTC now),方便测试注入
|
||
|
||
Returns:
|
||
下次执行时间(UTC),once 类型返回 None 表示不再执行
|
||
"""
|
||
if now is None:
|
||
now = datetime.now(timezone.utc)
|
||
|
||
stype = schedule_config.schedule_type
|
||
|
||
if stype == "once":
|
||
# 一次性任务执行后不再调度
|
||
return None
|
||
|
||
if stype == "interval":
|
||
unit_map = {
|
||
"minutes": timedelta(minutes=schedule_config.interval_value),
|
||
"hours": timedelta(hours=schedule_config.interval_value),
|
||
"days": timedelta(days=schedule_config.interval_value),
|
||
}
|
||
delta = unit_map.get(schedule_config.interval_unit)
|
||
if delta is None:
|
||
logger.warning("未知的 interval_unit: %s", schedule_config.interval_unit)
|
||
return None
|
||
return now + delta
|
||
|
||
if stype == "daily":
|
||
hour, minute = _parse_time(schedule_config.daily_time)
|
||
# 计算明天的 daily_time
|
||
tomorrow = now + timedelta(days=1)
|
||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||
|
||
if stype == "weekly":
|
||
hour, minute = _parse_time(schedule_config.weekly_time)
|
||
days = sorted(schedule_config.weekly_days) if schedule_config.weekly_days else [1]
|
||
# ISO weekday: 1=Monday ... 7=Sunday
|
||
current_weekday = now.isoweekday()
|
||
|
||
# 找到下一个匹配的 weekday
|
||
for day in days:
|
||
if day > current_weekday:
|
||
delta_days = day - current_weekday
|
||
next_dt = now + timedelta(days=delta_days)
|
||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||
|
||
# 本周没有更晚的 weekday,跳到下周第一个
|
||
first_day = days[0]
|
||
delta_days = 7 - current_weekday + first_day
|
||
next_dt = now + timedelta(days=delta_days)
|
||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||
|
||
if stype == "cron":
|
||
# 简单 cron 解析:仅支持 "minute hour * * *" 格式(每日定时)
|
||
# 复杂 cron 表达式可后续引入 croniter 库
|
||
return _parse_simple_cron(schedule_config.cron_expression, now)
|
||
|
||
logger.warning("未知的 schedule_type: %s", stype)
|
||
return None
|
||
|
||
|
||
def _parse_simple_cron(expression: str, now: datetime) -> datetime | None:
|
||
"""简单 cron 解析器,支持基本的 5 字段格式。
|
||
|
||
支持的格式:
|
||
- "M H * * *" → 每天 H:M
|
||
- "M H * * D" → 每周 D 的 H:M(D 为 0-6,0=Sunday)
|
||
- 其他格式回退到每天 04:00
|
||
|
||
不支持范围、列表、步进等高级语法。如需完整 cron 支持,
|
||
可在 pyproject.toml 中添加 croniter 依赖。
|
||
"""
|
||
parts = expression.strip().split()
|
||
if len(parts) != 5:
|
||
logger.warning("无法解析 cron 表达式: %s,回退到明天 04:00", expression)
|
||
tomorrow = now + timedelta(days=1)
|
||
return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0)
|
||
|
||
minute_str, hour_str, dom, month, dow = parts
|
||
|
||
try:
|
||
minute = int(minute_str) if minute_str != "*" else 0
|
||
hour = int(hour_str) if hour_str != "*" else 0
|
||
except ValueError:
|
||
logger.warning("cron 表达式时间字段无法解析: %s,回退到明天 04:00", expression)
|
||
tomorrow = now + timedelta(days=1)
|
||
return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0)
|
||
|
||
# 如果指定了 day-of-week(非 *)
|
||
if dow != "*":
|
||
try:
|
||
cron_dow = int(dow) # 0=Sunday, 1=Monday, ..., 6=Saturday
|
||
except ValueError:
|
||
tomorrow = now + timedelta(days=1)
|
||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||
|
||
# 转换为 ISO weekday(1=Monday, 7=Sunday)
|
||
iso_dow = 7 if cron_dow == 0 else cron_dow
|
||
current_iso = now.isoweekday()
|
||
|
||
if iso_dow > current_iso:
|
||
delta_days = iso_dow - current_iso
|
||
elif iso_dow < current_iso:
|
||
delta_days = 7 - current_iso + iso_dow
|
||
else:
|
||
# 同一天,看时间是否已过
|
||
target_today = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||
if now < target_today:
|
||
delta_days = 0
|
||
else:
|
||
delta_days = 7
|
||
|
||
next_dt = now + timedelta(days=delta_days)
|
||
return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||
|
||
# 每天定时(dom=* month=* dow=*)
|
||
tomorrow = now + timedelta(days=1)
|
||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||
|
||
|
||
class Scheduler:
|
||
"""基于 PostgreSQL 的定时调度器
|
||
|
||
后台 asyncio 循环每 SCHEDULER_POLL_INTERVAL 秒检查一次到期任务,
|
||
将其 TaskConfig 入队到 TaskQueue。
|
||
"""
|
||
|
||
def __init__(self) -> None:
|
||
self._running = False
|
||
self._loop_task: asyncio.Task | None = None
|
||
|
||
# ------------------------------------------------------------------
|
||
# 核心:检查到期任务并入队
|
||
# ------------------------------------------------------------------
|
||
|
||
def check_and_enqueue(self) -> int:
|
||
"""查询 enabled=true 且 next_run_at <= now 的调度任务,将其入队。
|
||
|
||
Returns:
|
||
本次入队的任务数量
|
||
"""
|
||
conn = get_connection()
|
||
enqueued = 0
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
SELECT id, site_id, task_config, schedule_config
|
||
FROM scheduled_tasks
|
||
WHERE enabled = TRUE
|
||
AND next_run_at IS NOT NULL
|
||
AND next_run_at <= NOW()
|
||
ORDER BY next_run_at ASC
|
||
"""
|
||
)
|
||
rows = cur.fetchall()
|
||
|
||
for row in rows:
|
||
task_id = str(row[0])
|
||
site_id = row[1]
|
||
task_config_raw = row[2] if isinstance(row[2], dict) else json.loads(row[2])
|
||
schedule_config_raw = row[3] if isinstance(row[3], dict) else json.loads(row[3])
|
||
|
||
try:
|
||
config = TaskConfigSchema(**task_config_raw)
|
||
schedule_cfg = ScheduleConfigSchema(**schedule_config_raw)
|
||
except Exception:
|
||
logger.exception("调度任务 [%s] 配置反序列化失败,跳过", task_id)
|
||
continue
|
||
|
||
# 入队
|
||
try:
|
||
queue_id = task_queue.enqueue(config, site_id)
|
||
logger.info(
|
||
"调度任务 [%s] 入队成功 → queue_id=%s site_id=%s",
|
||
task_id, queue_id, site_id,
|
||
)
|
||
enqueued += 1
|
||
except Exception:
|
||
logger.exception("调度任务 [%s] 入队失败", task_id)
|
||
continue
|
||
|
||
# 更新调度任务状态
|
||
now = datetime.now(timezone.utc)
|
||
next_run = calculate_next_run(schedule_cfg, now)
|
||
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
UPDATE scheduled_tasks
|
||
SET last_run_at = NOW(),
|
||
run_count = run_count + 1,
|
||
next_run_at = %s,
|
||
last_status = 'enqueued',
|
||
updated_at = NOW()
|
||
WHERE id = %s
|
||
""",
|
||
(next_run, task_id),
|
||
)
|
||
conn.commit()
|
||
|
||
except Exception:
|
||
logger.exception("check_and_enqueue 执行异常")
|
||
try:
|
||
conn.rollback()
|
||
except Exception:
|
||
pass
|
||
finally:
|
||
conn.close()
|
||
|
||
if enqueued > 0:
|
||
logger.info("本轮调度检查:%d 个任务入队", enqueued)
|
||
return enqueued
|
||
|
||
# ------------------------------------------------------------------
|
||
# 后台循环
|
||
# ------------------------------------------------------------------
|
||
|
||
async def _loop(self) -> None:
|
||
"""后台 asyncio 循环,每 SCHEDULER_POLL_INTERVAL 秒检查一次。"""
|
||
self._running = True
|
||
logger.info("Scheduler 后台循环启动(间隔 %ds)", SCHEDULER_POLL_INTERVAL)
|
||
|
||
while self._running:
|
||
try:
|
||
# 在线程池中执行同步数据库操作,避免阻塞事件循环
|
||
loop = asyncio.get_running_loop()
|
||
await loop.run_in_executor(None, self.check_and_enqueue)
|
||
except Exception:
|
||
logger.exception("Scheduler 循环迭代异常")
|
||
|
||
await asyncio.sleep(SCHEDULER_POLL_INTERVAL)
|
||
|
||
logger.info("Scheduler 后台循环停止")
|
||
|
||
# ------------------------------------------------------------------
|
||
# 生命周期
|
||
# ------------------------------------------------------------------
|
||
|
||
def start(self) -> None:
|
||
"""启动后台调度循环(在 FastAPI lifespan 中调用)。"""
|
||
if self._loop_task is None or self._loop_task.done():
|
||
self._loop_task = asyncio.create_task(self._loop())
|
||
logger.info("Scheduler 已启动")
|
||
|
||
async def stop(self) -> None:
|
||
"""停止后台调度循环。"""
|
||
self._running = False
|
||
if self._loop_task and not self._loop_task.done():
|
||
self._loop_task.cancel()
|
||
try:
|
||
await self._loop_task
|
||
except asyncio.CancelledError:
|
||
pass
|
||
self._loop_task = None
|
||
logger.info("Scheduler 已停止")
|
||
|
||
|
||
# 全局单例
|
||
scheduler = Scheduler()
|