Phase 2.3 chat 上下文捕获链路从未真正激活到完整工作: - 14 处 ai-float-button 补 sourcePage,chat.ts 三分支同步设 pageFilters.contextId - 后端 page_context 4 层 BUG 修(列名错位 + RLS site_id 未重设) - xcx_chat filters.pop 破坏 body.page_context 引用 — dict() 浅拷贝隔离 - chat 流式 markdown 实时解析(表格/标题/列表/加粗 + KPI 富卡) - reference_card KPI 富卡接入 SSE 路径,db 真写入 - 维客线索 source 显示规则:AI 来源用机器人 icon 替代长文字 数据库: - public.member_retention_clue 加 emoji + runtime_mode + sandbox_instance_id - biz.ai_run_logs 加 assistant_id + 复合索引 - chk_ai_cache_type CHECK 约束 8 类应用名 - cache_type / app_type 命名统一(app6_note / app7_customer / app8_consolidation) - 历史 emoji 抽取脚本 44/44 成功 后端 silent failure 修: - cleanup_service WHERE app_type → cache_type(90 天清理 + 20K 上限重新生效) - _build_ai_insight 字段错位修复(app4 → app7 + 字段对齐 prompt schema) - task_manager talkingPoints 改 app5_tactics + tactics 字段 - task_manager aiSuggestion 改取 one_line_summary - cache_service.CACHE_EXPIRY_DAYS 加 app2a_finance_area - WS /ws/ai-cache 加 token + JWT + site_id 校验(P0 信息泄露漏洞) - internal_ai token 改 hmac.compare_digest 工具/文档: - main.py 加 RotatingFileHandler logs/backend.log + uvicorn /health 过滤 - 新建 utils/clue_category.py(VI 6 类配色 + emoji fallback + source 显示规则) - 新建 utils/markdown.ts(轻量 md 转 rich-text 解析 + streaming 容错) - audit + 数据库变更说明 + backlog §七 #14 收口 + #15-#38 残余子任务 - backlog 追加 §十一 App1 参数/MCP/沙箱审计 + §十二 百炼/SQL MCP 主任务线 实地 MCP 走查:14 入口数据层 + 5 代表入口 sourcePage 注入 + customer-detail 全模块 + chat md 渲染 + reference_card 富卡 都已验证。9 项预先 BUG/UX 登记 §七 #29-#38 后续修复。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
197 lines
7.0 KiB
Python
197 lines
7.0 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
AI 数据清理服务。
|
||
|
||
由定时任务每日凌晨 03:00 调用,执行三步清理:
|
||
1. 删除 90 天前的 ai_run_logs
|
||
2. 删除 90 天前的 ai_trigger_jobs
|
||
3. 每个 App 类型(App2~App8)的 ai_cache 保留最新 20,000 条
|
||
|
||
永久保留 App1 对话记录(ai_conversations + ai_messages),不清理。
|
||
|
||
需求: E1.1, E1.2, E1.3, E1.4, E2.1, E2.2, E2.3
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import logging
|
||
|
||
from app.trace.decorators import trace_service
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class AICleanupService:
|
||
"""AI 数据清理服务,由定时任务调用。"""
|
||
|
||
RETENTION_DAYS = 90
|
||
CACHE_LIMIT_PER_APP = 20_000
|
||
# 8 类需要走 ai_cache 的应用(app1_chat 走 ai_messages,不进 ai_cache)
|
||
# 命名与 prompt 文件名一致(W1-AI-CLOSURE 组 1 数据库迁移已统一)。
|
||
CACHE_TYPES: tuple[str, ...] = (
|
||
"app2_finance",
|
||
"app2a_finance_area", # P0-11 修正:之前漏掉,导致 64 区域组合永不清理
|
||
"app3_clue",
|
||
"app4_analysis",
|
||
"app5_tactics",
|
||
"app6_note",
|
||
"app7_customer",
|
||
"app8_consolidation",
|
||
)
|
||
|
||
async def run_cleanup(self) -> dict:
|
||
"""执行全部清理,返回各步骤删除记录数。
|
||
|
||
单步清理失败记录错误日志,继续执行后续步骤。
|
||
"""
|
||
result: dict = {}
|
||
|
||
# 步骤 1:清理 ai_run_logs
|
||
try:
|
||
result["run_logs_deleted"] = await self._cleanup_run_logs()
|
||
except Exception:
|
||
logger.exception("清理 ai_run_logs 失败")
|
||
result["run_logs_deleted"] = -1
|
||
|
||
# 步骤 2:清理 ai_trigger_jobs
|
||
try:
|
||
result["trigger_jobs_deleted"] = await self._cleanup_trigger_jobs()
|
||
except Exception:
|
||
logger.exception("清理 ai_trigger_jobs 失败")
|
||
result["trigger_jobs_deleted"] = -1
|
||
|
||
# 步骤 3:清理 ai_cache(每个 App 类型)
|
||
try:
|
||
result["cache_deleted"] = await self._cleanup_cache()
|
||
except Exception:
|
||
logger.exception("清理 ai_cache 失败")
|
||
result["cache_deleted"] = {}
|
||
|
||
logger.info("AI 数据清理完成: %s", result)
|
||
return result
|
||
|
||
async def _cleanup_run_logs(self) -> int:
|
||
"""DELETE FROM ai_run_logs WHERE created_at < now() - 90 days。"""
|
||
from app.database import get_connection
|
||
|
||
conn = get_connection()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
# 防止锁等待超时(5 分钟)
|
||
cur.execute("SET statement_timeout = 300000")
|
||
cur.execute(
|
||
"""
|
||
DELETE FROM biz.ai_run_logs
|
||
WHERE created_at < NOW() - INTERVAL '%s days'
|
||
""",
|
||
(self.RETENTION_DAYS,),
|
||
)
|
||
deleted = cur.rowcount
|
||
conn.commit()
|
||
logger.info("清理 ai_run_logs: 删除 %d 条", deleted)
|
||
return deleted
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
finally:
|
||
conn.close()
|
||
|
||
async def _cleanup_trigger_jobs(self) -> int:
|
||
"""DELETE FROM ai_trigger_jobs WHERE created_at < now() - 90 days。"""
|
||
from app.database import get_connection
|
||
|
||
conn = get_connection()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute("SET statement_timeout = 300000")
|
||
cur.execute(
|
||
"""
|
||
DELETE FROM biz.ai_trigger_jobs
|
||
WHERE created_at < NOW() - INTERVAL '%s days'
|
||
""",
|
||
(self.RETENTION_DAYS,),
|
||
)
|
||
deleted = cur.rowcount
|
||
conn.commit()
|
||
logger.info("清理 ai_trigger_jobs: 删除 %d 条", deleted)
|
||
return deleted
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
finally:
|
||
conn.close()
|
||
|
||
async def _cleanup_cache(self) -> dict[str, int]:
|
||
"""每个 cache_type 保留最新 20,000 条,删除超出部分。
|
||
|
||
P0-8 修正:ai_cache 表列名是 cache_type 不是 app_type;之前 SQL `WHERE
|
||
app_type=%s` 一直抛 UndefinedColumn 错误被 except 静默吞,导致 90 天清理
|
||
与 20K 上限完全失效,生产 ai_cache 表无限膨胀。
|
||
"""
|
||
from app.database import get_connection
|
||
|
||
result: dict[str, int] = {}
|
||
conn = get_connection()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute("SET statement_timeout = 300000")
|
||
for cache_type in self.CACHE_TYPES:
|
||
try:
|
||
# 子查询:找到该 cache_type 第 20001 条的 created_at 作为截断点
|
||
cur.execute(
|
||
"""
|
||
DELETE FROM biz.ai_cache
|
||
WHERE cache_type = %s
|
||
AND id NOT IN (
|
||
SELECT id FROM biz.ai_cache
|
||
WHERE cache_type = %s
|
||
ORDER BY created_at DESC
|
||
LIMIT %s
|
||
)
|
||
""",
|
||
(cache_type, cache_type, self.CACHE_LIMIT_PER_APP),
|
||
)
|
||
deleted = cur.rowcount
|
||
result[cache_type] = deleted
|
||
if deleted > 0:
|
||
logger.info(
|
||
"清理 ai_cache [%s]: 删除 %d 条",
|
||
cache_type,
|
||
deleted,
|
||
)
|
||
except Exception:
|
||
logger.exception("清理 ai_cache [%s] 失败", cache_type)
|
||
result[cache_type] = -1
|
||
conn.rollback()
|
||
# 重新开始事务以继续后续 cache_type
|
||
continue
|
||
conn.commit()
|
||
return result
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
@trace_service(description_zh="register_cleanup_job", description_en="Register Cleanup Job")
|
||
def register_cleanup_job(scheduler) -> None: # noqa: ANN001
|
||
"""注册清理定时任务到调度器。每日 03:00 执行。
|
||
|
||
在 main.py lifespan 中调用,或通过 scheduled_tasks 表注册。
|
||
实际调度由 trigger_scheduler 的 cron 机制驱动:
|
||
- job_type: 'ai_data_cleanup'
|
||
- trigger_condition: 'cron'
|
||
- trigger_config: {"cron_expression": "0 3 * * *"}
|
||
|
||
需求: E2.1, E2.2, E2.3
|
||
"""
|
||
from app.services.trigger_scheduler import register_job
|
||
|
||
def _run_cleanup(**_kw):
|
||
"""同步包装器:在新事件循环中执行异步清理。"""
|
||
result = asyncio.run(AICleanupService().run_cleanup())
|
||
logger.info("定时清理任务完成: %s", result)
|
||
|
||
register_job("ai_data_cleanup", _run_cleanup)
|