# -*- coding: utf-8 -*- """调度器服务 后台 asyncio 循环,每 30 秒检查一次到期的调度任务, 将其 TaskConfig 入队到 TaskQueue。 核心逻辑: - check_and_enqueue():查询 enabled=true 且 next_run_at <= now 的调度任务 - start() / stop():管理后台循环生命周期 - _calculate_next_run():根据 ScheduleConfig 计算下次执行时间 """ from __future__ import annotations import asyncio import json import logging from datetime import datetime, timedelta, timezone from ..database import get_connection from ..schemas.schedules import ScheduleConfigSchema from ..schemas.tasks import TaskConfigSchema from .task_queue import task_queue logger = logging.getLogger(__name__) # 调度器轮询间隔(秒) SCHEDULER_POLL_INTERVAL = 30 def _parse_time(time_str: str) -> tuple[int, int]: """解析 HH:MM 格式的时间字符串,返回 (hour, minute)。""" parts = time_str.split(":") return int(parts[0]), int(parts[1]) def calculate_next_run( schedule_config: ScheduleConfigSchema, now: datetime | None = None, ) -> datetime | None: """根据调度配置计算下次执行时间。 Args: schedule_config: 调度配置 now: 当前时间(默认 UTC now),方便测试注入 Returns: 下次执行时间(UTC),once 类型返回 None 表示不再执行 """ if now is None: now = datetime.now(timezone.utc) stype = schedule_config.schedule_type if stype == "once": # 一次性任务执行后不再调度 return None if stype == "interval": unit_map = { "minutes": timedelta(minutes=schedule_config.interval_value), "hours": timedelta(hours=schedule_config.interval_value), "days": timedelta(days=schedule_config.interval_value), } delta = unit_map.get(schedule_config.interval_unit) if delta is None: logger.warning("未知的 interval_unit: %s", schedule_config.interval_unit) return None return now + delta if stype == "daily": hour, minute = _parse_time(schedule_config.daily_time) # 计算明天的 daily_time tomorrow = now + timedelta(days=1) return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0) if stype == "weekly": hour, minute = _parse_time(schedule_config.weekly_time) days = sorted(schedule_config.weekly_days) if schedule_config.weekly_days else [1] # ISO weekday: 1=Monday ... 7=Sunday current_weekday = now.isoweekday() # 找到下一个匹配的 weekday for day in days: if day > current_weekday: delta_days = day - current_weekday next_dt = now + timedelta(days=delta_days) return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0) # 本周没有更晚的 weekday,跳到下周第一个 first_day = days[0] delta_days = 7 - current_weekday + first_day next_dt = now + timedelta(days=delta_days) return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0) if stype == "cron": # 简单 cron 解析:仅支持 "minute hour * * *" 格式(每日定时) # 复杂 cron 表达式可后续引入 croniter 库 return _parse_simple_cron(schedule_config.cron_expression, now) logger.warning("未知的 schedule_type: %s", stype) return None def _parse_simple_cron(expression: str, now: datetime) -> datetime | None: """简单 cron 解析器,支持基本的 5 字段格式。 支持的格式: - "M H * * *" → 每天 H:M - "M H * * D" → 每周 D 的 H:M(D 为 0-6,0=Sunday) - 其他格式回退到每天 04:00 不支持范围、列表、步进等高级语法。如需完整 cron 支持, 可在 pyproject.toml 中添加 croniter 依赖。 """ parts = expression.strip().split() if len(parts) != 5: logger.warning("无法解析 cron 表达式: %s,回退到明天 04:00", expression) tomorrow = now + timedelta(days=1) return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0) minute_str, hour_str, dom, month, dow = parts try: minute = int(minute_str) if minute_str != "*" else 0 hour = int(hour_str) if hour_str != "*" else 0 except ValueError: logger.warning("cron 表达式时间字段无法解析: %s,回退到明天 04:00", expression) tomorrow = now + timedelta(days=1) return tomorrow.replace(hour=4, minute=0, second=0, microsecond=0) # 如果指定了 day-of-week(非 *) if dow != "*": try: cron_dow = int(dow) # 0=Sunday, 1=Monday, ..., 6=Saturday except ValueError: tomorrow = now + timedelta(days=1) return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0) # 转换为 ISO weekday(1=Monday, 7=Sunday) iso_dow = 7 if cron_dow == 0 else cron_dow current_iso = now.isoweekday() if iso_dow > current_iso: delta_days = iso_dow - current_iso elif iso_dow < current_iso: delta_days = 7 - current_iso + iso_dow else: # 同一天,看时间是否已过 target_today = now.replace(hour=hour, minute=minute, second=0, microsecond=0) if now < target_today: delta_days = 0 else: delta_days = 7 next_dt = now + timedelta(days=delta_days) return next_dt.replace(hour=hour, minute=minute, second=0, microsecond=0) # 每天定时(dom=* month=* dow=*) tomorrow = now + timedelta(days=1) return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0) class Scheduler: """基于 PostgreSQL 的定时调度器 后台 asyncio 循环每 SCHEDULER_POLL_INTERVAL 秒检查一次到期任务, 将其 TaskConfig 入队到 TaskQueue。 """ def __init__(self) -> None: self._running = False self._loop_task: asyncio.Task | None = None # ------------------------------------------------------------------ # 核心:检查到期任务并入队 # ------------------------------------------------------------------ def check_and_enqueue(self) -> int: """查询 enabled=true 且 next_run_at <= now 的调度任务,将其入队。 Returns: 本次入队的任务数量 """ conn = get_connection() enqueued = 0 try: with conn.cursor() as cur: cur.execute( """ SELECT id, site_id, task_config, schedule_config FROM scheduled_tasks WHERE enabled = TRUE AND next_run_at IS NOT NULL AND next_run_at <= NOW() ORDER BY next_run_at ASC """ ) rows = cur.fetchall() for row in rows: task_id = str(row[0]) site_id = row[1] task_config_raw = row[2] if isinstance(row[2], dict) else json.loads(row[2]) schedule_config_raw = row[3] if isinstance(row[3], dict) else json.loads(row[3]) try: config = TaskConfigSchema(**task_config_raw) schedule_cfg = ScheduleConfigSchema(**schedule_config_raw) except Exception: logger.exception("调度任务 [%s] 配置反序列化失败,跳过", task_id) continue # 入队 try: queue_id = task_queue.enqueue(config, site_id) logger.info( "调度任务 [%s] 入队成功 → queue_id=%s site_id=%s", task_id, queue_id, site_id, ) enqueued += 1 except Exception: logger.exception("调度任务 [%s] 入队失败", task_id) continue # 更新调度任务状态 now = datetime.now(timezone.utc) next_run = calculate_next_run(schedule_cfg, now) with conn.cursor() as cur: cur.execute( """ UPDATE scheduled_tasks SET last_run_at = NOW(), run_count = run_count + 1, next_run_at = %s, last_status = 'enqueued', updated_at = NOW() WHERE id = %s """, (next_run, task_id), ) conn.commit() except Exception: logger.exception("check_and_enqueue 执行异常") try: conn.rollback() except Exception: pass finally: conn.close() if enqueued > 0: logger.info("本轮调度检查:%d 个任务入队", enqueued) return enqueued # ------------------------------------------------------------------ # 后台循环 # ------------------------------------------------------------------ async def _loop(self) -> None: """后台 asyncio 循环,每 SCHEDULER_POLL_INTERVAL 秒检查一次。""" self._running = True logger.info("Scheduler 后台循环启动(间隔 %ds)", SCHEDULER_POLL_INTERVAL) while self._running: try: # 在线程池中执行同步数据库操作,避免阻塞事件循环 loop = asyncio.get_running_loop() await loop.run_in_executor(None, self.check_and_enqueue) except Exception: logger.exception("Scheduler 循环迭代异常") await asyncio.sleep(SCHEDULER_POLL_INTERVAL) logger.info("Scheduler 后台循环停止") # ------------------------------------------------------------------ # 生命周期 # ------------------------------------------------------------------ def start(self) -> None: """启动后台调度循环(在 FastAPI lifespan 中调用)。""" if self._loop_task is None or self._loop_task.done(): self._loop_task = asyncio.create_task(self._loop()) logger.info("Scheduler 已启动") async def stop(self) -> None: """停止后台调度循环。""" self._running = False if self._loop_task and not self._loop_task.done(): self._loop_task.cancel() try: await self._loop_task except asyncio.CancelledError: pass self._loop_task = None logger.info("Scheduler 已停止") # 全局单例 scheduler = Scheduler()