Files
Neo-ZQYY/apps/backend/app/services/ai/cleanup_service.py
Neo 2dfc926f96 feat(ai): W1-AI-CLOSURE 超级 Sprint — 9 APP 全链路收口 + chat 上下文真激活
Phase 2.3 chat 上下文捕获链路从未真正激活到完整工作:
- 14 处 ai-float-button 补 sourcePage,chat.ts 三分支同步设 pageFilters.contextId
- 后端 page_context 4 层 BUG 修(列名错位 + RLS site_id 未重设)
- xcx_chat filters.pop 破坏 body.page_context 引用 — dict() 浅拷贝隔离
- chat 流式 markdown 实时解析(表格/标题/列表/加粗 + KPI 富卡)
- reference_card KPI 富卡接入 SSE 路径,db 真写入
- 维客线索 source 显示规则:AI 来源用机器人 icon 替代长文字

数据库:
- public.member_retention_clue 加 emoji + runtime_mode + sandbox_instance_id
- biz.ai_run_logs 加 assistant_id + 复合索引
- chk_ai_cache_type CHECK 约束 8 类应用名
- cache_type / app_type 命名统一(app6_note / app7_customer / app8_consolidation)
- 历史 emoji 抽取脚本 44/44 成功

后端 silent failure 修:
- cleanup_service WHERE app_type → cache_type(90 天清理 + 20K 上限重新生效)
- _build_ai_insight 字段错位修复(app4 → app7 + 字段对齐 prompt schema)
- task_manager talkingPoints 改 app5_tactics + tactics 字段
- task_manager aiSuggestion 改取 one_line_summary
- cache_service.CACHE_EXPIRY_DAYS 加 app2a_finance_area
- WS /ws/ai-cache 加 token + JWT + site_id 校验(P0 信息泄露漏洞)
- internal_ai token 改 hmac.compare_digest

工具/文档:
- main.py 加 RotatingFileHandler logs/backend.log + uvicorn /health 过滤
- 新建 utils/clue_category.py(VI 6 类配色 + emoji fallback + source 显示规则)
- 新建 utils/markdown.ts(轻量 md 转 rich-text 解析 + streaming 容错)
- audit + 数据库变更说明 + backlog §七 #14 收口 + #15-#38 残余子任务
- backlog 追加 §十一 App1 参数/MCP/沙箱审计 + §十二 百炼/SQL MCP 主任务线

实地 MCP 走查:14 入口数据层 + 5 代表入口 sourcePage 注入 + customer-detail 全模块 + chat md 渲染 + reference_card 富卡 都已验证。9 项预先 BUG/UX 登记 §七 #29-#38 后续修复。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 16:39:07 +08:00

197 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
AI 数据清理服务。
由定时任务每日凌晨 03:00 调用,执行三步清理:
1. 删除 90 天前的 ai_run_logs
2. 删除 90 天前的 ai_trigger_jobs
3. 每个 App 类型App2~App8的 ai_cache 保留最新 20,000 条
永久保留 App1 对话记录ai_conversations + ai_messages不清理。
需求: E1.1, E1.2, E1.3, E1.4, E2.1, E2.2, E2.3
"""
from __future__ import annotations
import asyncio
import logging
from app.trace.decorators import trace_service
logger = logging.getLogger(__name__)
class AICleanupService:
"""AI 数据清理服务,由定时任务调用。"""
RETENTION_DAYS = 90
CACHE_LIMIT_PER_APP = 20_000
# 8 类需要走 ai_cache 的应用(app1_chat 走 ai_messages,不进 ai_cache)
# 命名与 prompt 文件名一致(W1-AI-CLOSURE 组 1 数据库迁移已统一)。
CACHE_TYPES: tuple[str, ...] = (
"app2_finance",
"app2a_finance_area", # P0-11 修正:之前漏掉,导致 64 区域组合永不清理
"app3_clue",
"app4_analysis",
"app5_tactics",
"app6_note",
"app7_customer",
"app8_consolidation",
)
async def run_cleanup(self) -> dict:
"""执行全部清理,返回各步骤删除记录数。
单步清理失败记录错误日志,继续执行后续步骤。
"""
result: dict = {}
# 步骤 1清理 ai_run_logs
try:
result["run_logs_deleted"] = await self._cleanup_run_logs()
except Exception:
logger.exception("清理 ai_run_logs 失败")
result["run_logs_deleted"] = -1
# 步骤 2清理 ai_trigger_jobs
try:
result["trigger_jobs_deleted"] = await self._cleanup_trigger_jobs()
except Exception:
logger.exception("清理 ai_trigger_jobs 失败")
result["trigger_jobs_deleted"] = -1
# 步骤 3清理 ai_cache每个 App 类型)
try:
result["cache_deleted"] = await self._cleanup_cache()
except Exception:
logger.exception("清理 ai_cache 失败")
result["cache_deleted"] = {}
logger.info("AI 数据清理完成: %s", result)
return result
async def _cleanup_run_logs(self) -> int:
"""DELETE FROM ai_run_logs WHERE created_at < now() - 90 days。"""
from app.database import get_connection
conn = get_connection()
try:
with conn.cursor() as cur:
# 防止锁等待超时5 分钟)
cur.execute("SET statement_timeout = 300000")
cur.execute(
"""
DELETE FROM biz.ai_run_logs
WHERE created_at < NOW() - INTERVAL '%s days'
""",
(self.RETENTION_DAYS,),
)
deleted = cur.rowcount
conn.commit()
logger.info("清理 ai_run_logs: 删除 %d", deleted)
return deleted
except Exception:
conn.rollback()
raise
finally:
conn.close()
async def _cleanup_trigger_jobs(self) -> int:
"""DELETE FROM ai_trigger_jobs WHERE created_at < now() - 90 days。"""
from app.database import get_connection
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute("SET statement_timeout = 300000")
cur.execute(
"""
DELETE FROM biz.ai_trigger_jobs
WHERE created_at < NOW() - INTERVAL '%s days'
""",
(self.RETENTION_DAYS,),
)
deleted = cur.rowcount
conn.commit()
logger.info("清理 ai_trigger_jobs: 删除 %d", deleted)
return deleted
except Exception:
conn.rollback()
raise
finally:
conn.close()
async def _cleanup_cache(self) -> dict[str, int]:
"""每个 cache_type 保留最新 20,000 条,删除超出部分。
P0-8 修正:ai_cache 表列名是 cache_type 不是 app_type;之前 SQL `WHERE
app_type=%s` 一直抛 UndefinedColumn 错误被 except 静默吞,导致 90 天清理
与 20K 上限完全失效,生产 ai_cache 表无限膨胀。
"""
from app.database import get_connection
result: dict[str, int] = {}
conn = get_connection()
try:
with conn.cursor() as cur:
cur.execute("SET statement_timeout = 300000")
for cache_type in self.CACHE_TYPES:
try:
# 子查询:找到该 cache_type 第 20001 条的 created_at 作为截断点
cur.execute(
"""
DELETE FROM biz.ai_cache
WHERE cache_type = %s
AND id NOT IN (
SELECT id FROM biz.ai_cache
WHERE cache_type = %s
ORDER BY created_at DESC
LIMIT %s
)
""",
(cache_type, cache_type, self.CACHE_LIMIT_PER_APP),
)
deleted = cur.rowcount
result[cache_type] = deleted
if deleted > 0:
logger.info(
"清理 ai_cache [%s]: 删除 %d",
cache_type,
deleted,
)
except Exception:
logger.exception("清理 ai_cache [%s] 失败", cache_type)
result[cache_type] = -1
conn.rollback()
# 重新开始事务以继续后续 cache_type
continue
conn.commit()
return result
except Exception:
conn.rollback()
raise
finally:
conn.close()
@trace_service(description_zh="register_cleanup_job", description_en="Register Cleanup Job")
def register_cleanup_job(scheduler) -> None: # noqa: ANN001
"""注册清理定时任务到调度器。每日 03:00 执行。
在 main.py lifespan 中调用,或通过 scheduled_tasks 表注册。
实际调度由 trigger_scheduler 的 cron 机制驱动:
- job_type: 'ai_data_cleanup'
- trigger_condition: 'cron'
- trigger_config: {"cron_expression": "0 3 * * *"}
需求: E2.1, E2.2, E2.3
"""
from app.services.trigger_scheduler import register_job
def _run_cleanup(**_kw):
"""同步包装器:在新事件循环中执行异步清理。"""
result = asyncio.run(AICleanupService().run_cleanup())
logger.info("定时清理任务完成: %s", result)
register_job("ai_data_cleanup", _run_cleanup)