# -*- coding: utf-8 -*- """统一任务日志器 提供统一的日志输出格式,支持: - 任务开始/结束记录 - 进度追踪 - 统计计数 - 格式化的任务总结 """ import logging import time from datetime import datetime from typing import Any, Dict, Optional # 统一日志格式 UNIFIED_LOG_FORMAT = "[%(asctime)s] %(levelname)-5s | %(name)s | %(message)s" UNIFIED_DATE_FORMAT = "%Y-%m-%d %H:%M:%S" class TaskLogger: """任务日志器,统一 print 和 logging 输出""" def __init__( self, task_code: str, logger: Optional[logging.Logger] = None, ): """ 初始化任务日志器 Args: task_code: 任务代码 logger: 底层日志器,如果不提供则创建新的 """ self.task_code = task_code self.logger = logger or logging.getLogger(f"task.{task_code}") # 任务状态 self.start_time: Optional[datetime] = None self.end_time: Optional[datetime] = None self.status: str = "pending" # 统计计数 self.counts: Dict[str, int] = { "fetched": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": 0, } # 额外信息 self.extra_info: Dict[str, Any] = {} # 校验结果(如果有) self.verification_result: Optional[dict] = None def start(self, message: str = "任务开始"): """ 记录任务开始 Args: message: 开始消息 """ self.start_time = datetime.now() self.status = "running" self.logger.info( "%s | %s | 开始时间: %s", self.task_code, message, self.start_time.strftime(UNIFIED_DATE_FORMAT) ) def progress(self, message: str, **kwargs): """ 记录进度 Args: message: 进度消息 **kwargs: 额外的统计信息 """ # 更新计数 for key, value in kwargs.items(): if key in self.counts: if isinstance(value, int): self.counts[key] += value else: self.counts[key] = value else: self.extra_info[key] = value # 构建进度字符串 counts_str = ", ".join(f"{k}={v}" for k, v in self.counts.items() if v > 0) if counts_str: self.logger.info("%s | %s | %s", self.task_code, message, counts_str) else: self.logger.info("%s | %s", self.task_code, message) def info(self, message: str, *args): """记录信息级别日志""" if args: self.logger.info(f"{self.task_code} | {message}", *args) else: self.logger.info(f"{self.task_code} | {message}") def warning(self, message: str, *args): """记录警告级别日志""" if args: self.logger.warning(f"{self.task_code} | {message}", *args) else: self.logger.warning(f"{self.task_code} | {message}") def error(self, message: str, *args, exc_info: bool = False): """记录错误级别日志""" self.counts["errors"] += 1 if args: self.logger.error(f"{self.task_code} | {message}", *args, exc_info=exc_info) else: self.logger.error(f"{self.task_code} | {message}", exc_info=exc_info) def set_counts(self, **counts): """直接设置计数""" for key, value in counts.items(): if key in self.counts: self.counts[key] = value def add_counts(self, **counts): """累加计数""" for key, value in counts.items(): if key in self.counts: self.counts[key] += value def set_verification_result(self, result: dict): """设置校验结果""" self.verification_result = result def end(self, status: str = "成功", error_message: Optional[str] = None) -> str: """ 记录任务结束,返回格式化的总结 Args: status: 状态 ("成功" / "失败" / "取消") error_message: 错误信息(如果失败) Returns: 格式化的任务总结字符串 """ self.end_time = datetime.now() self.status = status # 计算耗时 if self.start_time: elapsed = (self.end_time - self.start_time).total_seconds() elapsed_str = self._format_duration(elapsed) else: elapsed = 0 elapsed_str = "-" # 生成总结 summary = self._format_summary(status, elapsed_str, error_message) # 记录日志 if status == "成功": self.logger.info("\n%s", summary) else: self.logger.error("\n%s", summary) return summary def _format_duration(self, seconds: float) -> str: """格式化时长""" if seconds < 60: return f"{seconds:.1f}秒" elif seconds < 3600: mins = int(seconds // 60) secs = seconds % 60 return f"{mins}分{secs:.0f}秒" else: hours = int(seconds // 3600) mins = int((seconds % 3600) // 60) return f"{hours}时{mins}分" def _format_summary( self, status: str, elapsed_str: str, error_message: Optional[str] = None, ) -> str: """格式化任务总结""" lines = [ "╔══════════════════════════════════════════════════════════════╗", "║ 任务执行总结 ║", "╠══════════════════════════════════════════════════════════════╣", f"║ 任务代码: {self.task_code:<50} ║", f"║ 执行状态: {status:<50} ║", ] if self.start_time and self.end_time: time_range = f"{self.start_time.strftime('%Y-%m-%d %H:%M:%S')} ~ {self.end_time.strftime('%H:%M:%S')} ({elapsed_str})" lines.append(f"║ 执行时间: {time_range:<50} ║") lines.extend([ "╠══════════════════════════════════════════════════════════════╣", "║ 数据统计 ║", f"║ - 获取记录: {self.counts['fetched']:>10,} ║", f"║ - 新增记录: {self.counts['inserted']:>10,} ║", f"║ - 更新记录: {self.counts['updated']:>10,} ║", f"║ - 跳过记录: {self.counts['skipped']:>10,} ║", f"║ - 错误记录: {self.counts['errors']:>10,} ║", ]) # 校验结果 if self.verification_result: backfilled_missing = self.verification_result.get( "backfilled_missing_count", self.verification_result.get("backfilled_count", 0), ) backfilled_mismatch = self.verification_result.get("backfilled_mismatch_count", 0) lines.extend([ "╠══════════════════════════════════════════════════════════════╣", "║ 校验结果 ║", f"║ - 源数据量: {self.verification_result.get('source_count', 0):>10,} ║", f"║ - 目标数据量: {self.verification_result.get('target_count', 0):>10,} ║", f"║ - 缺失补齐: {backfilled_missing:>10,} ║", f"║ - 不一致补齐: {backfilled_mismatch:>10,} ║", ]) # 错误信息 if error_message: lines.extend([ "╠══════════════════════════════════════════════════════════════╣", f"║ 错误信息: {error_message[:50]:<50} ║", ]) lines.append("╚══════════════════════════════════════════════════════════════╝") return "\n".join(lines) def get_result(self) -> dict: """获取任务结果字典""" elapsed = 0 if self.start_time and self.end_time: elapsed = (self.end_time - self.start_time).total_seconds() return { "task_code": self.task_code, "status": self.status, "start_time": self.start_time.isoformat() if self.start_time else None, "end_time": self.end_time.isoformat() if self.end_time else None, "elapsed_seconds": elapsed, "counts": self.counts.copy(), "extra_info": self.extra_info.copy(), "verification_result": self.verification_result, } def configure_task_logging( name: str = "fq_etl", level: str = "INFO", ) -> logging.Logger: """ 配置任务日志 Args: name: 日志器名称 level: 日志级别 Returns: 配置好的日志器 """ logger = logging.getLogger(name) logger.setLevel(getattr(logging, level.upper(), logging.INFO)) # 清除已有处理器 logger.handlers.clear() # 添加控制台处理器 handler = logging.StreamHandler() handler.setLevel(logging.DEBUG) # 设置格式 formatter = logging.Formatter( UNIFIED_LOG_FORMAT, UNIFIED_DATE_FORMAT, ) handler.setFormatter(formatter) logger.addHandler(handler) logger.propagate = False return logger