feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本
包含多个会话的累积代码变更: - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8) - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
277
apps/backend/app/trace/cleanup.py
Normal file
277
apps/backend/app/trace/cleanup.py
Normal file
@@ -0,0 +1,277 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
日志自动清理模块
|
||||
|
||||
提供同步清理函数和定时任务调度器:
|
||||
- cleanup_old_logs: 保留最新 N 个日期目录(按记录日期排序,非日历天数),
|
||||
超出总量上限时额外清理
|
||||
- cleanup_date_range: 按日期范围清理指定目录
|
||||
- schedule_daily_cleanup: 返回 async 函数,每天凌晨 2:00 执行清理
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from app.trace.config import get_trace_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 日期目录名格式
|
||||
_DATE_FORMAT = "%Y-%m-%d"
|
||||
|
||||
|
||||
def _is_date_dir(name: str) -> bool:
|
||||
"""判断目录名是否为 YYYY-MM-DD 格式的日期目录。"""
|
||||
try:
|
||||
datetime.strptime(name, _DATE_FORMAT)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _parse_date(name: str) -> datetime:
|
||||
"""将 YYYY-MM-DD 格式的目录名解析为 datetime。"""
|
||||
return datetime.strptime(name, _DATE_FORMAT)
|
||||
|
||||
|
||||
def _dir_size(path: Path) -> int:
|
||||
"""递归计算目录总大小(字节)。"""
|
||||
total = 0
|
||||
try:
|
||||
for f in path.rglob("*"):
|
||||
if f.is_file():
|
||||
total += f.stat().st_size
|
||||
except OSError:
|
||||
pass
|
||||
return total
|
||||
|
||||
|
||||
def _update_root_index(base_dir: Path, deleted_dirs: list[str]) -> None:
|
||||
"""更新根目录的 _index.json,移除已删除目录的引用。
|
||||
|
||||
根索引结构示例:
|
||||
{
|
||||
"dates": {
|
||||
"2026-03-20": { "files": {...} },
|
||||
"2026-03-21": { "files": {...} }
|
||||
}
|
||||
}
|
||||
也兼容日期目录内的 _index.json(files 字典)。
|
||||
"""
|
||||
index_path = base_dir / "_index.json"
|
||||
if not index_path.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
index: dict[str, Any] = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return
|
||||
|
||||
changed = False
|
||||
|
||||
# 移除 dates 字典中的已删除日期
|
||||
if "dates" in index and isinstance(index["dates"], dict):
|
||||
for d in deleted_dirs:
|
||||
if d in index["dates"]:
|
||||
del index["dates"][d]
|
||||
changed = True
|
||||
|
||||
# 兼容:如果根索引直接用日期作为 key(扁平结构)
|
||||
for d in deleted_dirs:
|
||||
if d in index and d != "dates":
|
||||
del index[d]
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
index_path.write_text(
|
||||
json.dumps(index, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def cleanup_old_logs(
|
||||
base_dir: str | None = None,
|
||||
retention_days: int | None = None,
|
||||
max_total_mb: int = 200,
|
||||
) -> dict[str, Any]:
|
||||
"""保留最新 N 个日期目录,删除其余;超出总量上限时额外清理。
|
||||
|
||||
策略:按日期目录名排序(非日历天数),保留最新 retention_days 个,
|
||||
不论日期是否连续。之后如果总量仍超过 max_total_mb,从最旧开始
|
||||
继续删除直到低于上限。
|
||||
|
||||
Args:
|
||||
base_dir: 日志根目录,默认从 TraceConfig 读取
|
||||
retention_days: 保留的日期目录个数,默认从 TraceConfig 读取
|
||||
max_total_mb: 总量上限(MB),超过时从最旧额外清理
|
||||
|
||||
Returns:
|
||||
清理结果 dict:
|
||||
{
|
||||
"deleted_dirs": ["2026-03-15", ...],
|
||||
"deleted_count": 2,
|
||||
"freed_bytes": 12345
|
||||
}
|
||||
"""
|
||||
cfg = get_trace_config()
|
||||
if base_dir is None:
|
||||
base_dir = cfg.log_dir
|
||||
if retention_days is None:
|
||||
retention_days = cfg.retention_days
|
||||
|
||||
base_path = Path(base_dir)
|
||||
if not base_path.exists():
|
||||
return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0}
|
||||
|
||||
# 收集所有日期目录,按名称排序(即按日期升序)
|
||||
date_dirs = sorted(
|
||||
[d for d in base_path.iterdir() if d.is_dir() and _is_date_dir(d.name)],
|
||||
key=lambda d: d.name,
|
||||
)
|
||||
|
||||
deleted_dirs: list[str] = []
|
||||
freed_bytes = 0
|
||||
|
||||
# 第 1 步:保留最新 retention_days 个,删除其余
|
||||
if len(date_dirs) > retention_days:
|
||||
to_delete = date_dirs[: len(date_dirs) - retention_days]
|
||||
for entry in to_delete:
|
||||
size = _dir_size(entry)
|
||||
try:
|
||||
shutil.rmtree(entry)
|
||||
deleted_dirs.append(entry.name)
|
||||
freed_bytes += size
|
||||
except OSError:
|
||||
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
|
||||
# 更新剩余列表
|
||||
date_dirs = date_dirs[len(date_dirs) - retention_days :]
|
||||
|
||||
# 第 2 步:总量上限保护
|
||||
max_bytes = max_total_mb * 1024 * 1024
|
||||
total_size = sum(_dir_size(d) for d in date_dirs)
|
||||
if total_size > max_bytes:
|
||||
logger.warning(
|
||||
"日志总量 %.1f MB 超过上限 %d MB,启动额外清理",
|
||||
total_size / (1024 * 1024),
|
||||
max_total_mb,
|
||||
)
|
||||
for entry in list(date_dirs):
|
||||
if total_size <= max_bytes:
|
||||
break
|
||||
size = _dir_size(entry)
|
||||
try:
|
||||
shutil.rmtree(entry)
|
||||
deleted_dirs.append(entry.name)
|
||||
freed_bytes += size
|
||||
total_size -= size
|
||||
date_dirs.remove(entry)
|
||||
except OSError:
|
||||
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
|
||||
|
||||
# 更新根索引
|
||||
if deleted_dirs:
|
||||
_update_root_index(base_path, deleted_dirs)
|
||||
|
||||
return {
|
||||
"deleted_dirs": deleted_dirs,
|
||||
"deleted_count": len(deleted_dirs),
|
||||
"freed_bytes": freed_bytes,
|
||||
}
|
||||
|
||||
|
||||
def cleanup_date_range(
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
base_dir: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""按日期范围清理日期目录。
|
||||
|
||||
Args:
|
||||
start_date: 起始日期(含),格式 YYYY-MM-DD
|
||||
end_date: 结束日期(含),格式 YYYY-MM-DD
|
||||
base_dir: 日志根目录,默认从 TraceConfig 读取
|
||||
|
||||
Returns:
|
||||
清理结果 dict(同 cleanup_old_logs)
|
||||
"""
|
||||
if base_dir is None:
|
||||
base_dir = get_trace_config().log_dir
|
||||
|
||||
base_path = Path(base_dir)
|
||||
if not base_path.exists():
|
||||
return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0}
|
||||
|
||||
start_dt = datetime.strptime(start_date, _DATE_FORMAT)
|
||||
end_dt = datetime.strptime(end_date, _DATE_FORMAT)
|
||||
|
||||
deleted_dirs: list[str] = []
|
||||
freed_bytes = 0
|
||||
|
||||
for entry in sorted(base_path.iterdir()):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
if not _is_date_dir(entry.name):
|
||||
continue
|
||||
|
||||
dir_date = _parse_date(entry.name)
|
||||
if start_dt <= dir_date <= end_dt:
|
||||
size = _dir_size(entry)
|
||||
try:
|
||||
shutil.rmtree(entry)
|
||||
deleted_dirs.append(entry.name)
|
||||
freed_bytes += size
|
||||
except OSError:
|
||||
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
|
||||
|
||||
if deleted_dirs:
|
||||
_update_root_index(base_path, deleted_dirs)
|
||||
|
||||
return {
|
||||
"deleted_dirs": deleted_dirs,
|
||||
"deleted_count": len(deleted_dirs),
|
||||
"freed_bytes": freed_bytes,
|
||||
}
|
||||
|
||||
|
||||
def schedule_daily_cleanup():
|
||||
"""返回一个 async 函数,可在 lifespan 中注册为定时任务。
|
||||
|
||||
每天凌晨 2:00 执行 cleanup_old_logs,使用 asyncio.sleep 循环等待。
|
||||
"""
|
||||
|
||||
async def _daily_cleanup_loop() -> None:
|
||||
"""定时清理循环:计算到下一个凌晨 2:00 的等待秒数,执行清理后继续循环。"""
|
||||
while True:
|
||||
now = datetime.now()
|
||||
# 计算下一个凌晨 2:00
|
||||
target = now.replace(hour=2, minute=0, second=0, microsecond=0)
|
||||
if now >= target:
|
||||
# 今天的 2:00 已过,等到明天
|
||||
target += timedelta(days=1)
|
||||
|
||||
wait_seconds = (target - now).total_seconds()
|
||||
logger.info("日志清理定时任务:将在 %.0f 秒后执行(%s)", wait_seconds, target.isoformat())
|
||||
|
||||
await asyncio.sleep(wait_seconds)
|
||||
|
||||
try:
|
||||
result = cleanup_old_logs()
|
||||
if result["deleted_count"] > 0:
|
||||
logger.info(
|
||||
"日志自动清理完成:删除 %d 个目录,释放 %d 字节",
|
||||
result["deleted_count"],
|
||||
result["freed_bytes"],
|
||||
)
|
||||
else:
|
||||
logger.debug("日志自动清理:无过期目录需要清理")
|
||||
except Exception:
|
||||
logger.warning("日志自动清理失败", exc_info=True)
|
||||
|
||||
return _daily_cleanup_loop
|
||||
Reference in New Issue
Block a user