feiqiu-ETL/etl_billiards/orchestration/scheduler.py

# -*- coding: utf-8 -*-
"""ETL调度器"""
import uuid
from datetime import datetime
from pathlib import Path
from zoneinfo import ZoneInfo

from database.connection import DatabaseConnection
from database.operations import DatabaseOperations
from api.client import APIClient
from orchestration.cursor_manager import CursorManager
from orchestration.run_tracker import RunTracker
from orchestration.task_registry import default_registry

class ETLScheduler:
    """ETL任务调度器"""

    def __init__(self, config, logger):
        self.config = config
        self.logger = logger
        self.tz = ZoneInfo(config.get("app.timezone", "Asia/Taipei"))

        # 初始化组件
        self.db_conn = DatabaseConnection(
            dsn=config["db"]["dsn"],
            session=config["db"].get("session"),
            connect_timeout=config["db"].get("connect_timeout_sec")
        )
        self.db_ops = DatabaseOperations(self.db_conn)

        self.api_client = APIClient(
            base_url=config["api"]["base_url"],
            token=config["api"]["token"],
            timeout=config["api"]["timeout_sec"],
            retry_max=config["api"]["retries"]["max_attempts"],
            headers_extra=config["api"].get("headers_extra")
        )

        self.cursor_mgr = CursorManager(self.db_conn)
        self.run_tracker = RunTracker(self.db_conn)
        self.task_registry = default_registry

    def run_tasks(self, task_codes: list = None):
        """运行任务列表"""
        run_uuid = uuid.uuid4().hex
        store_id = self.config.get("app.store_id")

        if not task_codes:
            task_codes = self.config.get("run.tasks", [])

        self.logger.info(f"开始运行任务: {task_codes}, run_uuid={run_uuid}")

        for task_code in task_codes:
            try:
                self._run_single_task(task_code, run_uuid, store_id)
            except Exception as e:
                self.logger.error(f"任务 {task_code} 失败: {e}", exc_info=True)
                continue

        self.logger.info("所有任务执行完成")

    def _run_single_task(self, task_code: str, run_uuid: str, store_id: int):
        """运行单个任务"""
        # 创建任务实例
        task = self.task_registry.create_task(
            task_code, self.config, self.db_ops, self.api_client, self.logger
        )

        # 获取任务配置（从数据库）
        task_cfg = self._load_task_config(task_code, store_id)
        if not task_cfg:
            self.logger.warning(f"任务 {task_code} 未启用或不存在")
            return

        task_id = task_cfg["task_id"]

        # 创建运行记录
        export_dir = Path(self.config["io"]["export_root"]) / datetime.now(self.tz).strftime("%Y%m%d")
        log_path = str(Path(self.config["io"]["log_root"]) / f"{run_uuid}.log")

        run_id = self.run_tracker.create_run(
            task_id=task_id,
            store_id=store_id,
            run_uuid=run_uuid,
            export_dir=str(export_dir),
            log_path=log_path,
            status="RUNNING"
        )

        # 执行任务
        try:
            result = task.execute()

            # 更新运行记录
            self.run_tracker.update_run(
                run_id=run_id,
                counts=result["counts"],
                status=result["status"],
                ended_at=datetime.now(self.tz)
            )

            # 推进游标
            if result["status"] == "SUCCESS":
                # TODO: 从任务结果中获取窗口信息
                pass

        except Exception as e:
            self.run_tracker.update_run(
                run_id=run_id,
                counts={},
                status="FAIL",
                ended_at=datetime.now(self.tz),
                error_message=str(e)
            )
            raise

    def _load_task_config(self, task_code: str, store_id: int) -> dict:
        """从数据库加载任务配置"""
        sql = """
        SELECT task_id, task_code, store_id, enabled, cursor_field,
               window_minutes_default, overlap_seconds, page_size, retry_max, params
        FROM etl_admin.etl_task
        WHERE store_id = %s AND task_code = %s AND enabled = TRUE
        """

        rows = self.db_conn.query(sql, (store_id, task_code))
        return rows[0] if rows else None

    def close(self):
        """关闭连接"""
        self.db_conn.close()