Files
Neo-ZQYY/apps/backend/app/services/ai/cleanup_service.py
Neo 6f8f12314f feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本
包含多个会话的累积代码变更:
- backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔
- admin-web: ETL 状态页、任务管理、调度配置、登录优化
- miniprogram: 看板页面、聊天集成、UI 组件、导航更新
- etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强
- tenant-admin: 项目初始化
- db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8)
- packages/shared: 枚举和工具函数更新
- tools: 数据库工具、报表生成、健康检查
- docs: PRD/架构/部署/合约文档更新

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-06 00:03:48 +08:00

189 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
AI 数据清理服务。
由定时任务每日凌晨 03:00 调用,执行三步清理:
1. 删除 90 天前的 ai_run_logs
2. 删除 90 天前的 ai_trigger_jobs
3. 每个 App 类型App2~App8的 ai_cache 保留最新 20,000 条
永久保留 App1 对话记录ai_conversations + ai_messages不清理。
需求: E1.1, E1.2, E1.3, E1.4, E2.1, E2.2, E2.3
"""
from __future__ import annotations
import asyncio
import logging
from app.trace.decorators import trace_service
logger = logging.getLogger(__name__)
class AICleanupService:
"""AI 数据清理服务,由定时任务调用。"""
RETENTION_DAYS = 90
CACHE_LIMIT_PER_APP = 20_000
CACHE_APP_TYPES = [
"app2_finance",
"app3_clue",
"app4_analysis",
"app5_tactics",
"app6_note_analysis",
"app7_customer_analysis",
"app8_clue_consolidated",
]
async def run_cleanup(self) -> dict:
"""执行全部清理,返回各步骤删除记录数。
单步清理失败记录错误日志,继续执行后续步骤。
"""
result: dict = {}
# 步骤 1清理 ai_run_logs
try:
result["run_logs_deleted"] = await self._cleanup_run_logs()
except Exception:
logger.exception("清理 ai_run_logs 失败")
result["run_logs_deleted"] = -1
# 步骤 2清理 ai_trigger_jobs
try:
result["trigger_jobs_deleted"] = await self._cleanup_trigger_jobs()
except Exception:
logger.exception("清理 ai_trigger_jobs 失败")
result["trigger_jobs_deleted"] = -1
# 步骤 3清理 ai_cache每个 App 类型)
try:
result["cache_deleted"] = await self._cleanup_cache()
except Exception:
logger.exception("清理 ai_cache 失败")
result["cache_deleted"] = {}
logger.info("AI 数据清理完成: %s", result)
return result
async def _cleanup_run_logs(self) -> int:
"""DELETE FROM ai_run_logs WHERE created_at < now() - 90 days。"""
from app.database import get_connection
conn = get_connection()
try:
with conn.cursor() as cur:
# 防止锁等待超时5 分钟)
cur.execute("SET statement_timeout = 300000")
cur.execute(
"""
DELETE FROM biz.ai_run_logs
WHERE created_at < NOW() - INTERVAL '%s days'
""",
(self.RETENTION_DAYS,),
)
deleted = cur.rowcount
conn.commit()
logger.info("清理 ai_run_logs: 删除 %d", deleted)
return deleted
except Exception:
conn.rollback()
raise
finally:
conn.close()
async def _cleanup_trigger_jobs(self) -> int:
"""DELETE FROM ai_trigger_jobs WHERE created_at < now() - 90 days。"""
from app.database import get_connection
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute("SET statement_timeout = 300000")
cur.execute(
"""
DELETE FROM biz.ai_trigger_jobs
WHERE created_at < NOW() - INTERVAL '%s days'
""",
(self.RETENTION_DAYS,),
)
deleted = cur.rowcount
conn.commit()
logger.info("清理 ai_trigger_jobs: 删除 %d", deleted)
return deleted
except Exception:
conn.rollback()
raise
finally:
conn.close()
async def _cleanup_cache(self) -> dict[str, int]:
"""每个 App 类型保留最新 20,000 条,删除超出部分。"""
from app.database import get_connection
result: dict[str, int] = {}
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute("SET statement_timeout = 300000")
for app_type in self.CACHE_APP_TYPES:
try:
# 子查询:找到该 app_type 第 20001 条的 created_at 作为截断点
cur.execute(
"""
DELETE FROM biz.ai_cache
WHERE app_type = %s
AND id NOT IN (
SELECT id FROM biz.ai_cache
WHERE app_type = %s
ORDER BY created_at DESC
LIMIT %s
)
""",
(app_type, app_type, self.CACHE_LIMIT_PER_APP),
)
deleted = cur.rowcount
result[app_type] = deleted
if deleted > 0:
logger.info(
"清理 ai_cache [%s]: 删除 %d",
app_type,
deleted,
)
except Exception:
logger.exception("清理 ai_cache [%s] 失败", app_type)
result[app_type] = -1
conn.rollback()
# 重新开始事务以继续后续 app_type
continue
conn.commit()
return result
except Exception:
conn.rollback()
raise
finally:
conn.close()
@trace_service(description_zh="register_cleanup_job", description_en="Register Cleanup Job")
def register_cleanup_job(scheduler) -> None: # noqa: ANN001
"""注册清理定时任务到调度器。每日 03:00 执行。
在 main.py lifespan 中调用,或通过 scheduled_tasks 表注册。
实际调度由 trigger_scheduler 的 cron 机制驱动:
- job_type: 'ai_data_cleanup'
- trigger_condition: 'cron'
- trigger_config: {"cron_expression": "0 3 * * *"}
需求: E2.1, E2.2, E2.3
"""
from app.services.trigger_scheduler import register_job
def _run_cleanup(**_kw):
"""同步包装器:在新事件循环中执行异步清理。"""
result = asyncio.run(AICleanupService().run_cleanup())
logger.info("定时清理任务完成: %s", result)
register_job("ai_data_cleanup", _run_cleanup)