feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本
包含多个会话的累积代码变更: - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8) - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
188
apps/backend/app/services/ai/cleanup_service.py
Normal file
188
apps/backend/app/services/ai/cleanup_service.py
Normal file
@@ -0,0 +1,188 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI 数据清理服务。
|
||||
|
||||
由定时任务每日凌晨 03:00 调用,执行三步清理:
|
||||
1. 删除 90 天前的 ai_run_logs
|
||||
2. 删除 90 天前的 ai_trigger_jobs
|
||||
3. 每个 App 类型(App2~App8)的 ai_cache 保留最新 20,000 条
|
||||
|
||||
永久保留 App1 对话记录(ai_conversations + ai_messages),不清理。
|
||||
|
||||
需求: E1.1, E1.2, E1.3, E1.4, E2.1, E2.2, E2.3
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from app.trace.decorators import trace_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AICleanupService:
|
||||
"""AI 数据清理服务,由定时任务调用。"""
|
||||
|
||||
RETENTION_DAYS = 90
|
||||
CACHE_LIMIT_PER_APP = 20_000
|
||||
CACHE_APP_TYPES = [
|
||||
"app2_finance",
|
||||
"app3_clue",
|
||||
"app4_analysis",
|
||||
"app5_tactics",
|
||||
"app6_note_analysis",
|
||||
"app7_customer_analysis",
|
||||
"app8_clue_consolidated",
|
||||
]
|
||||
|
||||
async def run_cleanup(self) -> dict:
|
||||
"""执行全部清理,返回各步骤删除记录数。
|
||||
|
||||
单步清理失败记录错误日志,继续执行后续步骤。
|
||||
"""
|
||||
result: dict = {}
|
||||
|
||||
# 步骤 1:清理 ai_run_logs
|
||||
try:
|
||||
result["run_logs_deleted"] = await self._cleanup_run_logs()
|
||||
except Exception:
|
||||
logger.exception("清理 ai_run_logs 失败")
|
||||
result["run_logs_deleted"] = -1
|
||||
|
||||
# 步骤 2:清理 ai_trigger_jobs
|
||||
try:
|
||||
result["trigger_jobs_deleted"] = await self._cleanup_trigger_jobs()
|
||||
except Exception:
|
||||
logger.exception("清理 ai_trigger_jobs 失败")
|
||||
result["trigger_jobs_deleted"] = -1
|
||||
|
||||
# 步骤 3:清理 ai_cache(每个 App 类型)
|
||||
try:
|
||||
result["cache_deleted"] = await self._cleanup_cache()
|
||||
except Exception:
|
||||
logger.exception("清理 ai_cache 失败")
|
||||
result["cache_deleted"] = {}
|
||||
|
||||
logger.info("AI 数据清理完成: %s", result)
|
||||
return result
|
||||
|
||||
async def _cleanup_run_logs(self) -> int:
|
||||
"""DELETE FROM ai_run_logs WHERE created_at < now() - 90 days。"""
|
||||
from app.database import get_connection
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# 防止锁等待超时(5 分钟)
|
||||
cur.execute("SET statement_timeout = 300000")
|
||||
cur.execute(
|
||||
"""
|
||||
DELETE FROM biz.ai_run_logs
|
||||
WHERE created_at < NOW() - INTERVAL '%s days'
|
||||
""",
|
||||
(self.RETENTION_DAYS,),
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
conn.commit()
|
||||
logger.info("清理 ai_run_logs: 删除 %d 条", deleted)
|
||||
return deleted
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
async def _cleanup_trigger_jobs(self) -> int:
|
||||
"""DELETE FROM ai_trigger_jobs WHERE created_at < now() - 90 days。"""
|
||||
from app.database import get_connection
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SET statement_timeout = 300000")
|
||||
cur.execute(
|
||||
"""
|
||||
DELETE FROM biz.ai_trigger_jobs
|
||||
WHERE created_at < NOW() - INTERVAL '%s days'
|
||||
""",
|
||||
(self.RETENTION_DAYS,),
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
conn.commit()
|
||||
logger.info("清理 ai_trigger_jobs: 删除 %d 条", deleted)
|
||||
return deleted
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
async def _cleanup_cache(self) -> dict[str, int]:
|
||||
"""每个 App 类型保留最新 20,000 条,删除超出部分。"""
|
||||
from app.database import get_connection
|
||||
|
||||
result: dict[str, int] = {}
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SET statement_timeout = 300000")
|
||||
for app_type in self.CACHE_APP_TYPES:
|
||||
try:
|
||||
# 子查询:找到该 app_type 第 20001 条的 created_at 作为截断点
|
||||
cur.execute(
|
||||
"""
|
||||
DELETE FROM biz.ai_cache
|
||||
WHERE app_type = %s
|
||||
AND id NOT IN (
|
||||
SELECT id FROM biz.ai_cache
|
||||
WHERE app_type = %s
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s
|
||||
)
|
||||
""",
|
||||
(app_type, app_type, self.CACHE_LIMIT_PER_APP),
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
result[app_type] = deleted
|
||||
if deleted > 0:
|
||||
logger.info(
|
||||
"清理 ai_cache [%s]: 删除 %d 条",
|
||||
app_type,
|
||||
deleted,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("清理 ai_cache [%s] 失败", app_type)
|
||||
result[app_type] = -1
|
||||
conn.rollback()
|
||||
# 重新开始事务以继续后续 app_type
|
||||
continue
|
||||
conn.commit()
|
||||
return result
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@trace_service(description_zh="register_cleanup_job", description_en="Register Cleanup Job")
|
||||
def register_cleanup_job(scheduler) -> None: # noqa: ANN001
|
||||
"""注册清理定时任务到调度器。每日 03:00 执行。
|
||||
|
||||
在 main.py lifespan 中调用,或通过 scheduled_tasks 表注册。
|
||||
实际调度由 trigger_scheduler 的 cron 机制驱动:
|
||||
- job_type: 'ai_data_cleanup'
|
||||
- trigger_condition: 'cron'
|
||||
- trigger_config: {"cron_expression": "0 3 * * *"}
|
||||
|
||||
需求: E2.1, E2.2, E2.3
|
||||
"""
|
||||
from app.services.trigger_scheduler import register_job
|
||||
|
||||
def _run_cleanup(**_kw):
|
||||
"""同步包装器:在新事件循环中执行异步清理。"""
|
||||
result = asyncio.run(AICleanupService().run_cleanup())
|
||||
logger.info("定时清理任务完成: %s", result)
|
||||
|
||||
register_job("ai_data_cleanup", _run_cleanup)
|
||||
Reference in New Issue
Block a user