包含多个会话的累积代码变更: - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8) - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
278 lines
8.5 KiB
Python
278 lines
8.5 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
日志自动清理模块
|
||
|
||
提供同步清理函数和定时任务调度器:
|
||
- cleanup_old_logs: 保留最新 N 个日期目录(按记录日期排序,非日历天数),
|
||
超出总量上限时额外清理
|
||
- cleanup_date_range: 按日期范围清理指定目录
|
||
- schedule_daily_cleanup: 返回 async 函数,每天凌晨 2:00 执行清理
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import shutil
|
||
from datetime import datetime, timedelta
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
from app.trace.config import get_trace_config
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 日期目录名格式
|
||
_DATE_FORMAT = "%Y-%m-%d"
|
||
|
||
|
||
def _is_date_dir(name: str) -> bool:
|
||
"""判断目录名是否为 YYYY-MM-DD 格式的日期目录。"""
|
||
try:
|
||
datetime.strptime(name, _DATE_FORMAT)
|
||
return True
|
||
except ValueError:
|
||
return False
|
||
|
||
|
||
def _parse_date(name: str) -> datetime:
|
||
"""将 YYYY-MM-DD 格式的目录名解析为 datetime。"""
|
||
return datetime.strptime(name, _DATE_FORMAT)
|
||
|
||
|
||
def _dir_size(path: Path) -> int:
|
||
"""递归计算目录总大小(字节)。"""
|
||
total = 0
|
||
try:
|
||
for f in path.rglob("*"):
|
||
if f.is_file():
|
||
total += f.stat().st_size
|
||
except OSError:
|
||
pass
|
||
return total
|
||
|
||
|
||
def _update_root_index(base_dir: Path, deleted_dirs: list[str]) -> None:
|
||
"""更新根目录的 _index.json,移除已删除目录的引用。
|
||
|
||
根索引结构示例:
|
||
{
|
||
"dates": {
|
||
"2026-03-20": { "files": {...} },
|
||
"2026-03-21": { "files": {...} }
|
||
}
|
||
}
|
||
也兼容日期目录内的 _index.json(files 字典)。
|
||
"""
|
||
index_path = base_dir / "_index.json"
|
||
if not index_path.exists():
|
||
return
|
||
|
||
try:
|
||
index: dict[str, Any] = json.loads(index_path.read_text(encoding="utf-8"))
|
||
except (json.JSONDecodeError, OSError):
|
||
return
|
||
|
||
changed = False
|
||
|
||
# 移除 dates 字典中的已删除日期
|
||
if "dates" in index and isinstance(index["dates"], dict):
|
||
for d in deleted_dirs:
|
||
if d in index["dates"]:
|
||
del index["dates"][d]
|
||
changed = True
|
||
|
||
# 兼容:如果根索引直接用日期作为 key(扁平结构)
|
||
for d in deleted_dirs:
|
||
if d in index and d != "dates":
|
||
del index[d]
|
||
changed = True
|
||
|
||
if changed:
|
||
index_path.write_text(
|
||
json.dumps(index, ensure_ascii=False, indent=2),
|
||
encoding="utf-8",
|
||
)
|
||
|
||
|
||
def cleanup_old_logs(
|
||
base_dir: str | None = None,
|
||
retention_days: int | None = None,
|
||
max_total_mb: int = 200,
|
||
) -> dict[str, Any]:
|
||
"""保留最新 N 个日期目录,删除其余;超出总量上限时额外清理。
|
||
|
||
策略:按日期目录名排序(非日历天数),保留最新 retention_days 个,
|
||
不论日期是否连续。之后如果总量仍超过 max_total_mb,从最旧开始
|
||
继续删除直到低于上限。
|
||
|
||
Args:
|
||
base_dir: 日志根目录,默认从 TraceConfig 读取
|
||
retention_days: 保留的日期目录个数,默认从 TraceConfig 读取
|
||
max_total_mb: 总量上限(MB),超过时从最旧额外清理
|
||
|
||
Returns:
|
||
清理结果 dict:
|
||
{
|
||
"deleted_dirs": ["2026-03-15", ...],
|
||
"deleted_count": 2,
|
||
"freed_bytes": 12345
|
||
}
|
||
"""
|
||
cfg = get_trace_config()
|
||
if base_dir is None:
|
||
base_dir = cfg.log_dir
|
||
if retention_days is None:
|
||
retention_days = cfg.retention_days
|
||
|
||
base_path = Path(base_dir)
|
||
if not base_path.exists():
|
||
return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0}
|
||
|
||
# 收集所有日期目录,按名称排序(即按日期升序)
|
||
date_dirs = sorted(
|
||
[d for d in base_path.iterdir() if d.is_dir() and _is_date_dir(d.name)],
|
||
key=lambda d: d.name,
|
||
)
|
||
|
||
deleted_dirs: list[str] = []
|
||
freed_bytes = 0
|
||
|
||
# 第 1 步:保留最新 retention_days 个,删除其余
|
||
if len(date_dirs) > retention_days:
|
||
to_delete = date_dirs[: len(date_dirs) - retention_days]
|
||
for entry in to_delete:
|
||
size = _dir_size(entry)
|
||
try:
|
||
shutil.rmtree(entry)
|
||
deleted_dirs.append(entry.name)
|
||
freed_bytes += size
|
||
except OSError:
|
||
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
|
||
# 更新剩余列表
|
||
date_dirs = date_dirs[len(date_dirs) - retention_days :]
|
||
|
||
# 第 2 步:总量上限保护
|
||
max_bytes = max_total_mb * 1024 * 1024
|
||
total_size = sum(_dir_size(d) for d in date_dirs)
|
||
if total_size > max_bytes:
|
||
logger.warning(
|
||
"日志总量 %.1f MB 超过上限 %d MB,启动额外清理",
|
||
total_size / (1024 * 1024),
|
||
max_total_mb,
|
||
)
|
||
for entry in list(date_dirs):
|
||
if total_size <= max_bytes:
|
||
break
|
||
size = _dir_size(entry)
|
||
try:
|
||
shutil.rmtree(entry)
|
||
deleted_dirs.append(entry.name)
|
||
freed_bytes += size
|
||
total_size -= size
|
||
date_dirs.remove(entry)
|
||
except OSError:
|
||
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
|
||
|
||
# 更新根索引
|
||
if deleted_dirs:
|
||
_update_root_index(base_path, deleted_dirs)
|
||
|
||
return {
|
||
"deleted_dirs": deleted_dirs,
|
||
"deleted_count": len(deleted_dirs),
|
||
"freed_bytes": freed_bytes,
|
||
}
|
||
|
||
|
||
def cleanup_date_range(
|
||
start_date: str,
|
||
end_date: str,
|
||
base_dir: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""按日期范围清理日期目录。
|
||
|
||
Args:
|
||
start_date: 起始日期(含),格式 YYYY-MM-DD
|
||
end_date: 结束日期(含),格式 YYYY-MM-DD
|
||
base_dir: 日志根目录,默认从 TraceConfig 读取
|
||
|
||
Returns:
|
||
清理结果 dict(同 cleanup_old_logs)
|
||
"""
|
||
if base_dir is None:
|
||
base_dir = get_trace_config().log_dir
|
||
|
||
base_path = Path(base_dir)
|
||
if not base_path.exists():
|
||
return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0}
|
||
|
||
start_dt = datetime.strptime(start_date, _DATE_FORMAT)
|
||
end_dt = datetime.strptime(end_date, _DATE_FORMAT)
|
||
|
||
deleted_dirs: list[str] = []
|
||
freed_bytes = 0
|
||
|
||
for entry in sorted(base_path.iterdir()):
|
||
if not entry.is_dir():
|
||
continue
|
||
if not _is_date_dir(entry.name):
|
||
continue
|
||
|
||
dir_date = _parse_date(entry.name)
|
||
if start_dt <= dir_date <= end_dt:
|
||
size = _dir_size(entry)
|
||
try:
|
||
shutil.rmtree(entry)
|
||
deleted_dirs.append(entry.name)
|
||
freed_bytes += size
|
||
except OSError:
|
||
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
|
||
|
||
if deleted_dirs:
|
||
_update_root_index(base_path, deleted_dirs)
|
||
|
||
return {
|
||
"deleted_dirs": deleted_dirs,
|
||
"deleted_count": len(deleted_dirs),
|
||
"freed_bytes": freed_bytes,
|
||
}
|
||
|
||
|
||
def schedule_daily_cleanup():
|
||
"""返回一个 async 函数,可在 lifespan 中注册为定时任务。
|
||
|
||
每天凌晨 2:00 执行 cleanup_old_logs,使用 asyncio.sleep 循环等待。
|
||
"""
|
||
|
||
async def _daily_cleanup_loop() -> None:
|
||
"""定时清理循环:计算到下一个凌晨 2:00 的等待秒数,执行清理后继续循环。"""
|
||
while True:
|
||
now = datetime.now()
|
||
# 计算下一个凌晨 2:00
|
||
target = now.replace(hour=2, minute=0, second=0, microsecond=0)
|
||
if now >= target:
|
||
# 今天的 2:00 已过,等到明天
|
||
target += timedelta(days=1)
|
||
|
||
wait_seconds = (target - now).total_seconds()
|
||
logger.info("日志清理定时任务:将在 %.0f 秒后执行(%s)", wait_seconds, target.isoformat())
|
||
|
||
await asyncio.sleep(wait_seconds)
|
||
|
||
try:
|
||
result = cleanup_old_logs()
|
||
if result["deleted_count"] > 0:
|
||
logger.info(
|
||
"日志自动清理完成:删除 %d 个目录,释放 %d 字节",
|
||
result["deleted_count"],
|
||
result["freed_bytes"],
|
||
)
|
||
else:
|
||
logger.debug("日志自动清理:无过期目录需要清理")
|
||
except Exception:
|
||
logger.warning("日志自动清理失败", exc_info=True)
|
||
|
||
return _daily_cleanup_loop
|