# -*- coding: utf-8 -*- """ 日志自动清理模块 提供同步清理函数和定时任务调度器: - cleanup_old_logs: 保留最新 N 个日期目录(按记录日期排序,非日历天数), 超出总量上限时额外清理 - cleanup_date_range: 按日期范围清理指定目录 - schedule_daily_cleanup: 返回 async 函数,每天凌晨 2:00 执行清理 """ from __future__ import annotations import asyncio import json import logging import shutil from datetime import datetime, timedelta from pathlib import Path from typing import Any from app.trace.config import get_trace_config logger = logging.getLogger(__name__) # 日期目录名格式 _DATE_FORMAT = "%Y-%m-%d" def _is_date_dir(name: str) -> bool: """判断目录名是否为 YYYY-MM-DD 格式的日期目录。""" try: datetime.strptime(name, _DATE_FORMAT) return True except ValueError: return False def _parse_date(name: str) -> datetime: """将 YYYY-MM-DD 格式的目录名解析为 datetime。""" return datetime.strptime(name, _DATE_FORMAT) def _dir_size(path: Path) -> int: """递归计算目录总大小(字节)。""" total = 0 try: for f in path.rglob("*"): if f.is_file(): total += f.stat().st_size except OSError: pass return total def _update_root_index(base_dir: Path, deleted_dirs: list[str]) -> None: """更新根目录的 _index.json,移除已删除目录的引用。 根索引结构示例: { "dates": { "2026-03-20": { "files": {...} }, "2026-03-21": { "files": {...} } } } 也兼容日期目录内的 _index.json(files 字典)。 """ index_path = base_dir / "_index.json" if not index_path.exists(): return try: index: dict[str, Any] = json.loads(index_path.read_text(encoding="utf-8")) except (json.JSONDecodeError, OSError): return changed = False # 移除 dates 字典中的已删除日期 if "dates" in index and isinstance(index["dates"], dict): for d in deleted_dirs: if d in index["dates"]: del index["dates"][d] changed = True # 兼容:如果根索引直接用日期作为 key(扁平结构) for d in deleted_dirs: if d in index and d != "dates": del index[d] changed = True if changed: index_path.write_text( json.dumps(index, ensure_ascii=False, indent=2), encoding="utf-8", ) def cleanup_old_logs( base_dir: str | None = None, retention_days: int | None = None, max_total_mb: int = 200, ) -> dict[str, Any]: """保留最新 N 个日期目录,删除其余;超出总量上限时额外清理。 策略:按日期目录名排序(非日历天数),保留最新 retention_days 个, 不论日期是否连续。之后如果总量仍超过 max_total_mb,从最旧开始 继续删除直到低于上限。 Args: base_dir: 日志根目录,默认从 TraceConfig 读取 retention_days: 保留的日期目录个数,默认从 TraceConfig 读取 max_total_mb: 总量上限(MB),超过时从最旧额外清理 Returns: 清理结果 dict: { "deleted_dirs": ["2026-03-15", ...], "deleted_count": 2, "freed_bytes": 12345 } """ cfg = get_trace_config() if base_dir is None: base_dir = cfg.log_dir if retention_days is None: retention_days = cfg.retention_days base_path = Path(base_dir) if not base_path.exists(): return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0} # 收集所有日期目录,按名称排序(即按日期升序) date_dirs = sorted( [d for d in base_path.iterdir() if d.is_dir() and _is_date_dir(d.name)], key=lambda d: d.name, ) deleted_dirs: list[str] = [] freed_bytes = 0 # 第 1 步:保留最新 retention_days 个,删除其余 if len(date_dirs) > retention_days: to_delete = date_dirs[: len(date_dirs) - retention_days] for entry in to_delete: size = _dir_size(entry) try: shutil.rmtree(entry) deleted_dirs.append(entry.name) freed_bytes += size except OSError: logger.warning("清理日期目录失败: %s", entry, exc_info=True) # 更新剩余列表 date_dirs = date_dirs[len(date_dirs) - retention_days :] # 第 2 步:总量上限保护 max_bytes = max_total_mb * 1024 * 1024 total_size = sum(_dir_size(d) for d in date_dirs) if total_size > max_bytes: logger.warning( "日志总量 %.1f MB 超过上限 %d MB,启动额外清理", total_size / (1024 * 1024), max_total_mb, ) for entry in list(date_dirs): if total_size <= max_bytes: break size = _dir_size(entry) try: shutil.rmtree(entry) deleted_dirs.append(entry.name) freed_bytes += size total_size -= size date_dirs.remove(entry) except OSError: logger.warning("清理日期目录失败: %s", entry, exc_info=True) # 更新根索引 if deleted_dirs: _update_root_index(base_path, deleted_dirs) return { "deleted_dirs": deleted_dirs, "deleted_count": len(deleted_dirs), "freed_bytes": freed_bytes, } def cleanup_date_range( start_date: str, end_date: str, base_dir: str | None = None, ) -> dict[str, Any]: """按日期范围清理日期目录。 Args: start_date: 起始日期(含),格式 YYYY-MM-DD end_date: 结束日期(含),格式 YYYY-MM-DD base_dir: 日志根目录,默认从 TraceConfig 读取 Returns: 清理结果 dict(同 cleanup_old_logs) """ if base_dir is None: base_dir = get_trace_config().log_dir base_path = Path(base_dir) if not base_path.exists(): return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0} start_dt = datetime.strptime(start_date, _DATE_FORMAT) end_dt = datetime.strptime(end_date, _DATE_FORMAT) deleted_dirs: list[str] = [] freed_bytes = 0 for entry in sorted(base_path.iterdir()): if not entry.is_dir(): continue if not _is_date_dir(entry.name): continue dir_date = _parse_date(entry.name) if start_dt <= dir_date <= end_dt: size = _dir_size(entry) try: shutil.rmtree(entry) deleted_dirs.append(entry.name) freed_bytes += size except OSError: logger.warning("清理日期目录失败: %s", entry, exc_info=True) if deleted_dirs: _update_root_index(base_path, deleted_dirs) return { "deleted_dirs": deleted_dirs, "deleted_count": len(deleted_dirs), "freed_bytes": freed_bytes, } def schedule_daily_cleanup(): """返回一个 async 函数,可在 lifespan 中注册为定时任务。 每天凌晨 2:00 执行 cleanup_old_logs,使用 asyncio.sleep 循环等待。 """ async def _daily_cleanup_loop() -> None: """定时清理循环:计算到下一个凌晨 2:00 的等待秒数,执行清理后继续循环。""" while True: now = datetime.now() # 计算下一个凌晨 2:00 target = now.replace(hour=2, minute=0, second=0, microsecond=0) if now >= target: # 今天的 2:00 已过,等到明天 target += timedelta(days=1) wait_seconds = (target - now).total_seconds() logger.info("日志清理定时任务:将在 %.0f 秒后执行(%s)", wait_seconds, target.isoformat()) await asyncio.sleep(wait_seconds) try: result = cleanup_old_logs() if result["deleted_count"] > 0: logger.info( "日志自动清理完成:删除 %d 个目录,释放 %d 字节", result["deleted_count"], result["freed_bytes"], ) else: logger.debug("日志自动清理:无过期目录需要清理") except Exception: logger.warning("日志自动清理失败", exc_info=True) return _daily_cleanup_loop