feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本

包含多个会话的累积代码变更:
- backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔
- admin-web: ETL 状态页、任务管理、调度配置、登录优化
- miniprogram: 看板页面、聊天集成、UI 组件、导航更新
- etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强
- tenant-admin: 项目初始化
- db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8)
- packages/shared: 枚举和工具函数更新
- tools: 数据库工具、报表生成、健康检查
- docs: PRD/架构/部署/合约文档更新

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Neo
2026-04-06 00:03:48 +08:00
parent 70324d8542
commit 6f8f12314f
515 changed files with 76604 additions and 7456 deletions

View File

@@ -0,0 +1,277 @@
# -*- coding: utf-8 -*-
"""
日志自动清理模块
提供同步清理函数和定时任务调度器:
- cleanup_old_logs: 保留最新 N 个日期目录(按记录日期排序,非日历天数),
超出总量上限时额外清理
- cleanup_date_range: 按日期范围清理指定目录
- schedule_daily_cleanup: 返回 async 函数,每天凌晨 2:00 执行清理
"""
from __future__ import annotations
import asyncio
import json
import logging
import shutil
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
from app.trace.config import get_trace_config
logger = logging.getLogger(__name__)
# 日期目录名格式
_DATE_FORMAT = "%Y-%m-%d"
def _is_date_dir(name: str) -> bool:
"""判断目录名是否为 YYYY-MM-DD 格式的日期目录。"""
try:
datetime.strptime(name, _DATE_FORMAT)
return True
except ValueError:
return False
def _parse_date(name: str) -> datetime:
"""将 YYYY-MM-DD 格式的目录名解析为 datetime。"""
return datetime.strptime(name, _DATE_FORMAT)
def _dir_size(path: Path) -> int:
"""递归计算目录总大小(字节)。"""
total = 0
try:
for f in path.rglob("*"):
if f.is_file():
total += f.stat().st_size
except OSError:
pass
return total
def _update_root_index(base_dir: Path, deleted_dirs: list[str]) -> None:
"""更新根目录的 _index.json移除已删除目录的引用。
根索引结构示例:
{
"dates": {
"2026-03-20": { "files": {...} },
"2026-03-21": { "files": {...} }
}
}
也兼容日期目录内的 _index.jsonfiles 字典)。
"""
index_path = base_dir / "_index.json"
if not index_path.exists():
return
try:
index: dict[str, Any] = json.loads(index_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return
changed = False
# 移除 dates 字典中的已删除日期
if "dates" in index and isinstance(index["dates"], dict):
for d in deleted_dirs:
if d in index["dates"]:
del index["dates"][d]
changed = True
# 兼容:如果根索引直接用日期作为 key扁平结构
for d in deleted_dirs:
if d in index and d != "dates":
del index[d]
changed = True
if changed:
index_path.write_text(
json.dumps(index, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def cleanup_old_logs(
base_dir: str | None = None,
retention_days: int | None = None,
max_total_mb: int = 200,
) -> dict[str, Any]:
"""保留最新 N 个日期目录,删除其余;超出总量上限时额外清理。
策略:按日期目录名排序(非日历天数),保留最新 retention_days 个,
不论日期是否连续。之后如果总量仍超过 max_total_mb从最旧开始
继续删除直到低于上限。
Args:
base_dir: 日志根目录,默认从 TraceConfig 读取
retention_days: 保留的日期目录个数,默认从 TraceConfig 读取
max_total_mb: 总量上限MB超过时从最旧额外清理
Returns:
清理结果 dict
{
"deleted_dirs": ["2026-03-15", ...],
"deleted_count": 2,
"freed_bytes": 12345
}
"""
cfg = get_trace_config()
if base_dir is None:
base_dir = cfg.log_dir
if retention_days is None:
retention_days = cfg.retention_days
base_path = Path(base_dir)
if not base_path.exists():
return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0}
# 收集所有日期目录,按名称排序(即按日期升序)
date_dirs = sorted(
[d for d in base_path.iterdir() if d.is_dir() and _is_date_dir(d.name)],
key=lambda d: d.name,
)
deleted_dirs: list[str] = []
freed_bytes = 0
# 第 1 步:保留最新 retention_days 个,删除其余
if len(date_dirs) > retention_days:
to_delete = date_dirs[: len(date_dirs) - retention_days]
for entry in to_delete:
size = _dir_size(entry)
try:
shutil.rmtree(entry)
deleted_dirs.append(entry.name)
freed_bytes += size
except OSError:
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
# 更新剩余列表
date_dirs = date_dirs[len(date_dirs) - retention_days :]
# 第 2 步:总量上限保护
max_bytes = max_total_mb * 1024 * 1024
total_size = sum(_dir_size(d) for d in date_dirs)
if total_size > max_bytes:
logger.warning(
"日志总量 %.1f MB 超过上限 %d MB启动额外清理",
total_size / (1024 * 1024),
max_total_mb,
)
for entry in list(date_dirs):
if total_size <= max_bytes:
break
size = _dir_size(entry)
try:
shutil.rmtree(entry)
deleted_dirs.append(entry.name)
freed_bytes += size
total_size -= size
date_dirs.remove(entry)
except OSError:
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
# 更新根索引
if deleted_dirs:
_update_root_index(base_path, deleted_dirs)
return {
"deleted_dirs": deleted_dirs,
"deleted_count": len(deleted_dirs),
"freed_bytes": freed_bytes,
}
def cleanup_date_range(
start_date: str,
end_date: str,
base_dir: str | None = None,
) -> dict[str, Any]:
"""按日期范围清理日期目录。
Args:
start_date: 起始日期(含),格式 YYYY-MM-DD
end_date: 结束日期(含),格式 YYYY-MM-DD
base_dir: 日志根目录,默认从 TraceConfig 读取
Returns:
清理结果 dict同 cleanup_old_logs
"""
if base_dir is None:
base_dir = get_trace_config().log_dir
base_path = Path(base_dir)
if not base_path.exists():
return {"deleted_dirs": [], "deleted_count": 0, "freed_bytes": 0}
start_dt = datetime.strptime(start_date, _DATE_FORMAT)
end_dt = datetime.strptime(end_date, _DATE_FORMAT)
deleted_dirs: list[str] = []
freed_bytes = 0
for entry in sorted(base_path.iterdir()):
if not entry.is_dir():
continue
if not _is_date_dir(entry.name):
continue
dir_date = _parse_date(entry.name)
if start_dt <= dir_date <= end_dt:
size = _dir_size(entry)
try:
shutil.rmtree(entry)
deleted_dirs.append(entry.name)
freed_bytes += size
except OSError:
logger.warning("清理日期目录失败: %s", entry, exc_info=True)
if deleted_dirs:
_update_root_index(base_path, deleted_dirs)
return {
"deleted_dirs": deleted_dirs,
"deleted_count": len(deleted_dirs),
"freed_bytes": freed_bytes,
}
def schedule_daily_cleanup():
"""返回一个 async 函数,可在 lifespan 中注册为定时任务。
每天凌晨 2:00 执行 cleanup_old_logs使用 asyncio.sleep 循环等待。
"""
async def _daily_cleanup_loop() -> None:
"""定时清理循环:计算到下一个凌晨 2:00 的等待秒数,执行清理后继续循环。"""
while True:
now = datetime.now()
# 计算下一个凌晨 2:00
target = now.replace(hour=2, minute=0, second=0, microsecond=0)
if now >= target:
# 今天的 2:00 已过,等到明天
target += timedelta(days=1)
wait_seconds = (target - now).total_seconds()
logger.info("日志清理定时任务:将在 %.0f 秒后执行(%s", wait_seconds, target.isoformat())
await asyncio.sleep(wait_seconds)
try:
result = cleanup_old_logs()
if result["deleted_count"] > 0:
logger.info(
"日志自动清理完成:删除 %d 个目录,释放 %d 字节",
result["deleted_count"],
result["freed_bytes"],
)
else:
logger.debug("日志自动清理:无过期目录需要清理")
except Exception:
logger.warning("日志自动清理失败", exc_info=True)
return _daily_cleanup_loop