Files
Neo-ZQYY/apps/backend/app/trace/cleanup.py
Neo 6f8f12314f feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本
包含多个会话的累积代码变更:
- backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔
- admin-web: ETL 状态页、任务管理、调度配置、登录优化
- miniprogram: 看板页面、聊天集成、UI 组件、导航更新
- etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强
- tenant-admin: 项目初始化
- db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8)
- packages/shared: 枚举和工具函数更新
- tools: 数据库工具、报表生成、健康检查
- docs: PRD/架构/部署/合约文档更新

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-06 00:03:48 +08:00

278 lines
8.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
日志自动清理模块
提供同步清理函数和定时任务调度器:
- cleanup_old_logs: 保留最新 N 个日期目录(按记录日期排序,非日历天数),
超出总量上限时额外清理
- cleanup_date_range: 按日期范围清理指定目录
- schedule_daily_cleanup: 返回 async 函数,每天凌晨 2:00 执行清理
"""
from __future__ import annotations
import asyncio
import json
import logging
import shutil
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
from app.trace.config import get_trace_config
logger = logging.getLogger(__name__)
# 日期目录名格式
_DATE_FORMAT = "%Y-%m-%d"
def _is_date_dir(name: str) -> bool:
"""判断目录名是否为 YYYY-MM-DD 格式的日期目录。"""
try:
datetime.strptime(name, _DATE_FORMAT)
return True
except ValueError:
return False
def _parse_date(name: str) -> datetime:
"""将 YYYY-MM-DD 格式的目录名解析为 datetime。"""
return datetime.strptime(name, _DATE_FORMAT)
def _dir_size(path: Path) -> int:
"""递归计算目录总大小(字节)。"""
total = 0
try:
for f in path.rglob("*"):
if f.is_file():
total += f.stat().st_size
except OSError:
pass
return total
def _update_root_index(base_dir: Path, deleted_dirs: list[str]) -> None:
"""更新根目录的 _index.json移除已删除目录的引用。
根索引结构示例:
{
"dates": {
"2026-03-20": { "files": {...} },
"2026-03-21": { "files": {...} }
}
}
也兼容日期目录内的 _index.jsonfiles 字典)。
"""
index_path = base_dir / "_index.json"
if not index_path.exists():
return
try:
index: dict[str, Any] = json.loads(index_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return
changed = False
# 移除 dates 字典中的已删除日期
if "dates" in index and isinstance(index["dates"], dict):
for d in deleted_dirs:
if d in index["dates"]:
del index["dates"][d]
changed = True
# 兼容:如果根索引直接用日期作为 key扁平结构
for d in deleted_dirs:
if d in index and d != "dates":
del index[d]
changed = True
if changed:
index_path.write_text(
json.dumps(index, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def cleanup_old_logs(
base_dir: str | None = None,
retention_days: int | None = None,
max_total_mb: int = 200,
) -> dict[str, Any]:
"""保留最新 N 个日期目录,删除其余;超出总量上限时额外清理。
策略:按日期目录名排序(非日历天数),保留最新 retention_days 个,
不论日期是否连续。之后如果总量仍超过 max_total_mb从最旧开始
继续删除直到低于上限。
Args:
base_dir: 日志根目录,默认从 TraceConfig 读取
retention_days: 保留的日期目录个数,默认从 TraceConfig 读取
max_total_mb: 总量上限MB超过时从最旧额外清理
Returns:
清理结果 dict
{
"deleted_dirs": ["2026-03-15", ...],
"deleted_count": 2,
"freed_bytes": 12345
}
"""
cfg = get_trace_config()
if base_dir is None:
base_dir = cfg.log_dir
if retention_days is None:
retention_days = cfg.retention_days
base_path = Path(base_dir)
if not base_path.exists():
return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0}
# 收集所有日期目录,按名称排序(即按日期升序)
date_dirs = sorted(
[d for d in base_path.iterdir() if d.is_dir() and _is_date_dir(d.name)],
key=lambda d: d.name,
)
deleted_dirs: list[str] = []
freed_bytes = 0
# 第 1 步:保留最新 retention_days 个,删除其余
if len(date_dirs) > retention_days:
to_delete = date_dirs[: len(date_dirs) - retention_days]
for entry in to_delete:
size = _dir_size(entry)
try:
shutil.rmtree(entry)
deleted_dirs.append(entry.name)
freed_bytes += size
except OSError:
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
# 更新剩余列表
date_dirs = date_dirs[len(date_dirs) - retention_days :]
# 第 2 步:总量上限保护
max_bytes = max_total_mb * 1024 * 1024
total_size = sum(_dir_size(d) for d in date_dirs)
if total_size > max_bytes:
logger.warning(
"日志总量 %.1f MB 超过上限 %d MB启动额外清理",
total_size / (1024 * 1024),
max_total_mb,
)
for entry in list(date_dirs):
if total_size <= max_bytes:
break
size = _dir_size(entry)
try:
shutil.rmtree(entry)
deleted_dirs.append(entry.name)
freed_bytes += size
total_size -= size
date_dirs.remove(entry)
except OSError:
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
# 更新根索引
if deleted_dirs:
_update_root_index(base_path, deleted_dirs)
return {
"deleted_dirs": deleted_dirs,
"deleted_count": len(deleted_dirs),
"freed_bytes": freed_bytes,
}
def cleanup_date_range(
start_date: str,
end_date: str,
base_dir: str | None = None,
) -> dict[str, Any]:
"""按日期范围清理日期目录。
Args:
start_date: 起始日期(含),格式 YYYY-MM-DD
end_date: 结束日期(含),格式 YYYY-MM-DD
base_dir: 日志根目录,默认从 TraceConfig 读取
Returns:
清理结果 dict同 cleanup_old_logs
"""
if base_dir is None:
base_dir = get_trace_config().log_dir
base_path = Path(base_dir)
if not base_path.exists():
return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0}
start_dt = datetime.strptime(start_date, _DATE_FORMAT)
end_dt = datetime.strptime(end_date, _DATE_FORMAT)
deleted_dirs: list[str] = []
freed_bytes = 0
for entry in sorted(base_path.iterdir()):
if not entry.is_dir():
continue
if not _is_date_dir(entry.name):
continue
dir_date = _parse_date(entry.name)
if start_dt <= dir_date <= end_dt:
size = _dir_size(entry)
try:
shutil.rmtree(entry)
deleted_dirs.append(entry.name)
freed_bytes += size
except OSError:
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
if deleted_dirs:
_update_root_index(base_path, deleted_dirs)
return {
"deleted_dirs": deleted_dirs,
"deleted_count": len(deleted_dirs),
"freed_bytes": freed_bytes,
}
def schedule_daily_cleanup():
"""返回一个 async 函数,可在 lifespan 中注册为定时任务。
每天凌晨 2:00 执行 cleanup_old_logs使用 asyncio.sleep 循环等待。
"""
async def _daily_cleanup_loop() -> None:
"""定时清理循环:计算到下一个凌晨 2:00 的等待秒数,执行清理后继续循环。"""
while True:
now = datetime.now()
# 计算下一个凌晨 2:00
target = now.replace(hour=2, minute=0, second=0, microsecond=0)
if now >= target:
# 今天的 2:00 已过,等到明天
target += timedelta(days=1)
wait_seconds = (target - now).total_seconds()
logger.info("日志清理定时任务:将在 %.0f 秒后执行(%s", wait_seconds, target.isoformat())
await asyncio.sleep(wait_seconds)
try:
result = cleanup_old_logs()
if result["deleted_count"] > 0:
logger.info(
"日志自动清理完成:删除 %d 个目录,释放 %d 字节",
result["deleted_count"],
result["freed_bytes"],
)
else:
logger.debug("日志自动清理:无过期目录需要清理")
except Exception:
logger.warning("日志自动清理失败", exc_info=True)
return _daily_cleanup_loop