# -*- coding: utf-8 -*- """ AI 数据清理服务。 由定时任务每日凌晨 03:00 调用,执行三步清理: 1. 删除 90 天前的 ai_run_logs 2. 删除 90 天前的 ai_trigger_jobs 3. 每个 App 类型(App2~App8)的 ai_cache 保留最新 20,000 条 永久保留 App1 对话记录(ai_conversations + ai_messages),不清理。 需求: E1.1, E1.2, E1.3, E1.4, E2.1, E2.2, E2.3 """ from __future__ import annotations import asyncio import logging from app.trace.decorators import trace_service logger = logging.getLogger(__name__) class AICleanupService: """AI 数据清理服务,由定时任务调用。""" RETENTION_DAYS = 90 CACHE_LIMIT_PER_APP = 20_000 CACHE_APP_TYPES = [ "app2_finance", "app3_clue", "app4_analysis", "app5_tactics", "app6_note_analysis", "app7_customer_analysis", "app8_clue_consolidated", ] async def run_cleanup(self) -> dict: """执行全部清理,返回各步骤删除记录数。 单步清理失败记录错误日志,继续执行后续步骤。 """ result: dict = {} # 步骤 1:清理 ai_run_logs try: result["run_logs_deleted"] = await self._cleanup_run_logs() except Exception: logger.exception("清理 ai_run_logs 失败") result["run_logs_deleted"] = -1 # 步骤 2:清理 ai_trigger_jobs try: result["trigger_jobs_deleted"] = await self._cleanup_trigger_jobs() except Exception: logger.exception("清理 ai_trigger_jobs 失败") result["trigger_jobs_deleted"] = -1 # 步骤 3:清理 ai_cache(每个 App 类型) try: result["cache_deleted"] = await self._cleanup_cache() except Exception: logger.exception("清理 ai_cache 失败") result["cache_deleted"] = {} logger.info("AI 数据清理完成: %s", result) return result async def _cleanup_run_logs(self) -> int: """DELETE FROM ai_run_logs WHERE created_at < now() - 90 days。""" from app.database import get_connection conn = get_connection() try: with conn.cursor() as cur: # 防止锁等待超时(5 分钟) cur.execute("SET statement_timeout = 300000") cur.execute( """ DELETE FROM biz.ai_run_logs WHERE created_at < NOW() - INTERVAL '%s days' """, (self.RETENTION_DAYS,), ) deleted = cur.rowcount conn.commit() logger.info("清理 ai_run_logs: 删除 %d 条", deleted) return deleted except Exception: conn.rollback() raise finally: conn.close() async def _cleanup_trigger_jobs(self) -> int: """DELETE FROM ai_trigger_jobs WHERE created_at < now() - 90 days。""" from app.database import get_connection conn = get_connection() try: with conn.cursor() as cur: cur.execute("SET statement_timeout = 300000") cur.execute( """ DELETE FROM biz.ai_trigger_jobs WHERE created_at < NOW() - INTERVAL '%s days' """, (self.RETENTION_DAYS,), ) deleted = cur.rowcount conn.commit() logger.info("清理 ai_trigger_jobs: 删除 %d 条", deleted) return deleted except Exception: conn.rollback() raise finally: conn.close() async def _cleanup_cache(self) -> dict[str, int]: """每个 App 类型保留最新 20,000 条,删除超出部分。""" from app.database import get_connection result: dict[str, int] = {} conn = get_connection() try: with conn.cursor() as cur: cur.execute("SET statement_timeout = 300000") for app_type in self.CACHE_APP_TYPES: try: # 子查询:找到该 app_type 第 20001 条的 created_at 作为截断点 cur.execute( """ DELETE FROM biz.ai_cache WHERE app_type = %s AND id NOT IN ( SELECT id FROM biz.ai_cache WHERE app_type = %s ORDER BY created_at DESC LIMIT %s ) """, (app_type, app_type, self.CACHE_LIMIT_PER_APP), ) deleted = cur.rowcount result[app_type] = deleted if deleted > 0: logger.info( "清理 ai_cache [%s]: 删除 %d 条", app_type, deleted, ) except Exception: logger.exception("清理 ai_cache [%s] 失败", app_type) result[app_type] = -1 conn.rollback() # 重新开始事务以继续后续 app_type continue conn.commit() return result except Exception: conn.rollback() raise finally: conn.close() @trace_service(description_zh="register_cleanup_job", description_en="Register Cleanup Job") def register_cleanup_job(scheduler) -> None: # noqa: ANN001 """注册清理定时任务到调度器。每日 03:00 执行。 在 main.py lifespan 中调用,或通过 scheduled_tasks 表注册。 实际调度由 trigger_scheduler 的 cron 机制驱动: - job_type: 'ai_data_cleanup' - trigger_condition: 'cron' - trigger_config: {"cron_expression": "0 3 * * *"} 需求: E2.1, E2.2, E2.3 """ from app.services.trigger_scheduler import register_job def _run_cleanup(**_kw): """同步包装器:在新事件循环中执行异步清理。""" result = asyncio.run(AICleanupService().run_cleanup()) logger.info("定时清理任务完成: %s", result) register_job("ai_data_cleanup", _run_cleanup)