包含多个会话的累积代码变更: - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8) - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
189 lines
6.5 KiB
Python
189 lines
6.5 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
AI 数据清理服务。
|
||
|
||
由定时任务每日凌晨 03:00 调用,执行三步清理:
|
||
1. 删除 90 天前的 ai_run_logs
|
||
2. 删除 90 天前的 ai_trigger_jobs
|
||
3. 每个 App 类型(App2~App8)的 ai_cache 保留最新 20,000 条
|
||
|
||
永久保留 App1 对话记录(ai_conversations + ai_messages),不清理。
|
||
|
||
需求: E1.1, E1.2, E1.3, E1.4, E2.1, E2.2, E2.3
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import logging
|
||
|
||
from app.trace.decorators import trace_service
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class AICleanupService:
|
||
"""AI 数据清理服务,由定时任务调用。"""
|
||
|
||
RETENTION_DAYS = 90
|
||
CACHE_LIMIT_PER_APP = 20_000
|
||
CACHE_APP_TYPES = [
|
||
"app2_finance",
|
||
"app3_clue",
|
||
"app4_analysis",
|
||
"app5_tactics",
|
||
"app6_note_analysis",
|
||
"app7_customer_analysis",
|
||
"app8_clue_consolidated",
|
||
]
|
||
|
||
async def run_cleanup(self) -> dict:
|
||
"""执行全部清理,返回各步骤删除记录数。
|
||
|
||
单步清理失败记录错误日志,继续执行后续步骤。
|
||
"""
|
||
result: dict = {}
|
||
|
||
# 步骤 1:清理 ai_run_logs
|
||
try:
|
||
result["run_logs_deleted"] = await self._cleanup_run_logs()
|
||
except Exception:
|
||
logger.exception("清理 ai_run_logs 失败")
|
||
result["run_logs_deleted"] = -1
|
||
|
||
# 步骤 2:清理 ai_trigger_jobs
|
||
try:
|
||
result["trigger_jobs_deleted"] = await self._cleanup_trigger_jobs()
|
||
except Exception:
|
||
logger.exception("清理 ai_trigger_jobs 失败")
|
||
result["trigger_jobs_deleted"] = -1
|
||
|
||
# 步骤 3:清理 ai_cache(每个 App 类型)
|
||
try:
|
||
result["cache_deleted"] = await self._cleanup_cache()
|
||
except Exception:
|
||
logger.exception("清理 ai_cache 失败")
|
||
result["cache_deleted"] = {}
|
||
|
||
logger.info("AI 数据清理完成: %s", result)
|
||
return result
|
||
|
||
async def _cleanup_run_logs(self) -> int:
|
||
"""DELETE FROM ai_run_logs WHERE created_at < now() - 90 days。"""
|
||
from app.database import get_connection
|
||
|
||
conn = get_connection()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
# 防止锁等待超时(5 分钟)
|
||
cur.execute("SET statement_timeout = 300000")
|
||
cur.execute(
|
||
"""
|
||
DELETE FROM biz.ai_run_logs
|
||
WHERE created_at < NOW() - INTERVAL '%s days'
|
||
""",
|
||
(self.RETENTION_DAYS,),
|
||
)
|
||
deleted = cur.rowcount
|
||
conn.commit()
|
||
logger.info("清理 ai_run_logs: 删除 %d 条", deleted)
|
||
return deleted
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
finally:
|
||
conn.close()
|
||
|
||
async def _cleanup_trigger_jobs(self) -> int:
|
||
"""DELETE FROM ai_trigger_jobs WHERE created_at < now() - 90 days。"""
|
||
from app.database import get_connection
|
||
|
||
conn = get_connection()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute("SET statement_timeout = 300000")
|
||
cur.execute(
|
||
"""
|
||
DELETE FROM biz.ai_trigger_jobs
|
||
WHERE created_at < NOW() - INTERVAL '%s days'
|
||
""",
|
||
(self.RETENTION_DAYS,),
|
||
)
|
||
deleted = cur.rowcount
|
||
conn.commit()
|
||
logger.info("清理 ai_trigger_jobs: 删除 %d 条", deleted)
|
||
return deleted
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
finally:
|
||
conn.close()
|
||
|
||
async def _cleanup_cache(self) -> dict[str, int]:
|
||
"""每个 App 类型保留最新 20,000 条,删除超出部分。"""
|
||
from app.database import get_connection
|
||
|
||
result: dict[str, int] = {}
|
||
conn = get_connection()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute("SET statement_timeout = 300000")
|
||
for app_type in self.CACHE_APP_TYPES:
|
||
try:
|
||
# 子查询:找到该 app_type 第 20001 条的 created_at 作为截断点
|
||
cur.execute(
|
||
"""
|
||
DELETE FROM biz.ai_cache
|
||
WHERE app_type = %s
|
||
AND id NOT IN (
|
||
SELECT id FROM biz.ai_cache
|
||
WHERE app_type = %s
|
||
ORDER BY created_at DESC
|
||
LIMIT %s
|
||
)
|
||
""",
|
||
(app_type, app_type, self.CACHE_LIMIT_PER_APP),
|
||
)
|
||
deleted = cur.rowcount
|
||
result[app_type] = deleted
|
||
if deleted > 0:
|
||
logger.info(
|
||
"清理 ai_cache [%s]: 删除 %d 条",
|
||
app_type,
|
||
deleted,
|
||
)
|
||
except Exception:
|
||
logger.exception("清理 ai_cache [%s] 失败", app_type)
|
||
result[app_type] = -1
|
||
conn.rollback()
|
||
# 重新开始事务以继续后续 app_type
|
||
continue
|
||
conn.commit()
|
||
return result
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
@trace_service(description_zh="register_cleanup_job", description_en="Register Cleanup Job")
|
||
def register_cleanup_job(scheduler) -> None: # noqa: ANN001
|
||
"""注册清理定时任务到调度器。每日 03:00 执行。
|
||
|
||
在 main.py lifespan 中调用,或通过 scheduled_tasks 表注册。
|
||
实际调度由 trigger_scheduler 的 cron 机制驱动:
|
||
- job_type: 'ai_data_cleanup'
|
||
- trigger_condition: 'cron'
|
||
- trigger_config: {"cron_expression": "0 3 * * *"}
|
||
|
||
需求: E2.1, E2.2, E2.3
|
||
"""
|
||
from app.services.trigger_scheduler import register_job
|
||
|
||
def _run_cleanup(**_kw):
|
||
"""同步包装器:在新事件循环中执行异步清理。"""
|
||
result = asyncio.run(AICleanupService().run_cleanup())
|
||
logger.info("定时清理任务完成: %s", result)
|
||
|
||
register_job("ai_data_cleanup", _run_cleanup)
|