feat: 累积功能变更 — 聊天集成、租户管理、小程序更新、ETL 增强、迁移脚本
包含多个会话的累积代码变更: - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8) - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
1
apps/backend/app/services/ai/__init__.py
Normal file
1
apps/backend/app/services/ai/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# AI 监控后台服务层
|
||||
721
apps/backend/app/services/ai/admin_service.py
Normal file
721
apps/backend/app/services/ai/admin_service.py
Normal file
@@ -0,0 +1,721 @@
|
||||
"""AI 监控后台聚合服务层。
|
||||
|
||||
提供 Dashboard 总览、调度任务管理、调用记录查询、缓存失效、
|
||||
Token 预算、批量执行(含成本二次确认)、告警管理等功能。
|
||||
|
||||
所有数据库操作使用 psycopg2 同步连接,方法签名为 async(FastAPI 兼容)。
|
||||
查询强制 site_id 隔离(当 site_id 参数不为 None 时)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
from app.ai.budget_tracker import BudgetTracker
|
||||
from app.database import get_connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 批量执行预估:每次调用平均 Token 消耗
|
||||
AVG_TOKENS_PER_CALL = 2000
|
||||
|
||||
# 批量执行内存存储 TTL(秒)
|
||||
_BATCH_TTL_SECONDS = 600 # 10 分钟
|
||||
|
||||
|
||||
class AdminAIService:
|
||||
"""AI 监控后台聚合服务。"""
|
||||
|
||||
def __init__(self, budget_tracker: BudgetTracker | None = None) -> None:
|
||||
self._budget = budget_tracker
|
||||
self._batch_store: dict[str, dict] = {} # batch_id → {params, expires_at}
|
||||
|
||||
# ── Dashboard ─────────────────────────────────────────
|
||||
|
||||
async def get_dashboard(self, site_id: int | None = None) -> dict:
|
||||
"""聚合所有 Dashboard 数据。"""
|
||||
today_stats = await self._get_today_stats(site_id)
|
||||
trend_7d = await self._get_7d_trend(site_id)
|
||||
app_dist = await self._get_app_distribution(site_id)
|
||||
app_health = await self._get_app_health(site_id)
|
||||
budget = await self.get_budget()
|
||||
recent_alerts = await self._get_recent_alerts(site_id)
|
||||
return {
|
||||
**today_stats,
|
||||
"trend_7d": trend_7d,
|
||||
"app_distribution": app_dist,
|
||||
"budget": budget,
|
||||
"recent_alerts": recent_alerts,
|
||||
"app_health": app_health,
|
||||
}
|
||||
|
||||
async def _get_today_stats(self, site_id: int | None) -> dict:
|
||||
"""今日调用次数、成功率、Token 消耗、平均延迟。"""
|
||||
site_clause, params = _site_filter(site_id)
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT
|
||||
COUNT(*) AS total_calls,
|
||||
COUNT(*) FILTER (WHERE status = 'success') AS success_count,
|
||||
COALESCE(SUM(tokens_used), 0) AS total_tokens,
|
||||
COALESCE(AVG(latency_ms) FILTER (WHERE latency_ms IS NOT NULL), 0)
|
||||
AS avg_latency
|
||||
FROM biz.ai_run_logs
|
||||
WHERE created_at >= CURRENT_DATE
|
||||
AND created_at < CURRENT_DATE + INTERVAL '1 day'
|
||||
{site_clause}
|
||||
""",
|
||||
params,
|
||||
)
|
||||
row = cur.fetchone()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
total, success, tokens, avg_lat = row if row else (0, 0, 0, 0)
|
||||
rate = round(success / total, 4) if total > 0 else 0.0
|
||||
return {
|
||||
"today_calls": total,
|
||||
"today_success_rate": rate,
|
||||
"today_tokens": int(tokens),
|
||||
"today_avg_latency_ms": round(float(avg_lat), 2),
|
||||
}
|
||||
|
||||
async def _get_7d_trend(self, site_id: int | None) -> list[dict]:
|
||||
"""近 7 天按日聚合。"""
|
||||
site_clause, params = _site_filter(site_id)
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT
|
||||
created_at::date AS day,
|
||||
COUNT(*) AS calls,
|
||||
COUNT(*) FILTER (WHERE status = 'success') AS success_count
|
||||
FROM biz.ai_run_logs
|
||||
WHERE created_at >= CURRENT_DATE - INTERVAL '6 days'
|
||||
{site_clause}
|
||||
GROUP BY day
|
||||
ORDER BY day
|
||||
""",
|
||||
params,
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return [
|
||||
{
|
||||
"date": row[0].isoformat(),
|
||||
"calls": row[1],
|
||||
"success_rate": round(row[2] / row[1], 4) if row[1] > 0 else 0.0,
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
async def _get_app_distribution(self, site_id: int | None) -> list[dict]:
|
||||
"""各 App 调用占比。"""
|
||||
site_clause, params = _site_filter(site_id)
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT app_type, COUNT(*) AS cnt
|
||||
FROM biz.ai_run_logs
|
||||
WHERE created_at >= CURRENT_DATE - INTERVAL '6 days'
|
||||
{site_clause}
|
||||
GROUP BY app_type
|
||||
ORDER BY cnt DESC
|
||||
""",
|
||||
params,
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
total = sum(r[1] for r in rows) or 1
|
||||
return [
|
||||
{
|
||||
"app_type": row[0],
|
||||
"count": row[1],
|
||||
"percentage": round(row[1] / total, 4),
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
async def _get_app_health(self, site_id: int | None) -> list[dict]:
|
||||
"""各 App 最近一次调用状态。"""
|
||||
site_clause, params = _site_filter(site_id)
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT DISTINCT ON (app_type)
|
||||
app_type,
|
||||
status AS last_status,
|
||||
created_at AS last_call_at
|
||||
FROM biz.ai_run_logs
|
||||
WHERE TRUE {site_clause}
|
||||
ORDER BY app_type, created_at DESC
|
||||
""",
|
||||
params,
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return [
|
||||
{
|
||||
"app_type": row[0],
|
||||
"last_status": row[1],
|
||||
"last_call_at": row[2].isoformat() if row[2] else None,
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
async def _get_recent_alerts(self, site_id: int | None, limit: int = 10) -> list[dict]:
|
||||
"""最近告警事件(Dashboard 用)。"""
|
||||
site_clause, params = _site_filter(site_id)
|
||||
params = (*params, limit)
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT id, app_type, status, alert_status,
|
||||
error_message, created_at
|
||||
FROM biz.ai_run_logs
|
||||
WHERE status IN ('failed', 'timeout', 'circuit_open')
|
||||
{site_clause}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s
|
||||
""",
|
||||
params,
|
||||
)
|
||||
cols = [d[0] for d in cur.description]
|
||||
rows = cur.fetchall()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return [_row_to_dict(cols, r) for r in rows]
|
||||
|
||||
# ── 调度任务 ──────────────────────────────────────────
|
||||
|
||||
async def list_trigger_jobs(
|
||||
self, filters: dict, page: int = 1, page_size: int = 20,
|
||||
) -> dict:
|
||||
"""分页查询 ai_trigger_jobs + 今日去重统计。"""
|
||||
where_parts: list[str] = []
|
||||
params: list = []
|
||||
|
||||
for key in ("event_type", "status", "site_id"):
|
||||
if filters.get(key) is not None:
|
||||
where_parts.append(f"{key} = %s")
|
||||
params.append(filters[key])
|
||||
|
||||
if filters.get("date_from"):
|
||||
where_parts.append("created_at >= %s")
|
||||
params.append(filters["date_from"])
|
||||
if filters.get("date_to"):
|
||||
where_parts.append("created_at <= %s")
|
||||
params.append(filters["date_to"])
|
||||
|
||||
where_sql = ("WHERE " + " AND ".join(where_parts)) if where_parts else ""
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# 总数
|
||||
cur.execute(
|
||||
f"SELECT COUNT(*) FROM biz.ai_trigger_jobs {where_sql}",
|
||||
params,
|
||||
)
|
||||
total = cur.fetchone()[0]
|
||||
|
||||
# 分页数据
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT id, event_type, member_id, status, app_chain,
|
||||
is_forced, site_id, started_at, finished_at, created_at
|
||||
FROM biz.ai_trigger_jobs
|
||||
{where_sql}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s OFFSET %s
|
||||
""",
|
||||
(*params, page_size, offset),
|
||||
)
|
||||
cols = [d[0] for d in cur.description]
|
||||
rows = cur.fetchall()
|
||||
|
||||
# 今日去重跳过数
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT COUNT(*)
|
||||
FROM biz.ai_trigger_jobs
|
||||
WHERE status = 'skipped_duplicate'
|
||||
AND created_at >= CURRENT_DATE
|
||||
AND created_at < CURRENT_DATE + INTERVAL '1 day'
|
||||
""",
|
||||
)
|
||||
today_skipped = cur.fetchone()[0]
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
"items": [_row_to_dict(cols, r) for r in rows],
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
"today_skipped_duplicates": today_skipped,
|
||||
}
|
||||
|
||||
async def get_trigger_job(self, job_id: int) -> dict | None:
|
||||
"""单条调度任务详情。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, event_type, member_id, status, app_chain,
|
||||
is_forced, site_id, started_at, finished_at,
|
||||
created_at, payload, error_message, connector_type
|
||||
FROM biz.ai_trigger_jobs
|
||||
WHERE id = %s
|
||||
""",
|
||||
(job_id,),
|
||||
)
|
||||
cols = [d[0] for d in cur.description]
|
||||
row = cur.fetchone()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
return _row_to_dict(cols, row)
|
||||
|
||||
async def retry_trigger_job(self, job_id: int) -> int:
|
||||
"""创建新 trigger_job(is_forced=true),返回新 job_id。"""
|
||||
original = await self.get_trigger_job(job_id)
|
||||
if original is None:
|
||||
raise ValueError(f"trigger_job {job_id} 不存在")
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO biz.ai_trigger_jobs
|
||||
(event_type, member_id, site_id, connector_type,
|
||||
payload, app_chain, is_forced, status)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, true, 'pending')
|
||||
RETURNING id
|
||||
""",
|
||||
(
|
||||
original["event_type"],
|
||||
original.get("member_id"),
|
||||
original["site_id"],
|
||||
original.get("connector_type", "feiqiu"),
|
||||
original.get("payload"),
|
||||
original.get("app_chain"),
|
||||
),
|
||||
)
|
||||
new_id = cur.fetchone()[0]
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return new_id
|
||||
|
||||
# ── 调用记录 ──────────────────────────────────────────
|
||||
|
||||
async def list_run_logs(
|
||||
self, filters: dict, page: int = 1, page_size: int = 20,
|
||||
) -> dict:
|
||||
"""分页查询 ai_run_logs。"""
|
||||
where_parts: list[str] = []
|
||||
params: list = []
|
||||
|
||||
for key in ("app_type", "status", "trigger_type", "site_id"):
|
||||
if filters.get(key) is not None:
|
||||
where_parts.append(f"{key} = %s")
|
||||
params.append(filters[key])
|
||||
|
||||
if filters.get("date_from"):
|
||||
where_parts.append("created_at >= %s")
|
||||
params.append(filters["date_from"])
|
||||
if filters.get("date_to"):
|
||||
where_parts.append("created_at <= %s")
|
||||
params.append(filters["date_to"])
|
||||
|
||||
where_sql = ("WHERE " + " AND ".join(where_parts)) if where_parts else ""
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"SELECT COUNT(*) FROM biz.ai_run_logs {where_sql}",
|
||||
params,
|
||||
)
|
||||
total = cur.fetchone()[0]
|
||||
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT id, app_type, trigger_type, member_id,
|
||||
tokens_used, latency_ms, status, site_id, created_at
|
||||
FROM biz.ai_run_logs
|
||||
{where_sql}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s OFFSET %s
|
||||
""",
|
||||
(*params, page_size, offset),
|
||||
)
|
||||
cols = [d[0] for d in cur.description]
|
||||
rows = cur.fetchall()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
"items": [_row_to_dict(cols, r) for r in rows],
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
}
|
||||
|
||||
async def get_run_log(self, log_id: int) -> dict | None:
|
||||
"""单条调用记录详情(含完整 prompt/response,不脱敏)。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, app_type, trigger_type, member_id,
|
||||
tokens_used, latency_ms, status, site_id,
|
||||
created_at, request_prompt, response_text,
|
||||
error_message, session_id, finished_at
|
||||
FROM biz.ai_run_logs
|
||||
WHERE id = %s
|
||||
""",
|
||||
(log_id,),
|
||||
)
|
||||
cols = [d[0] for d in cur.description]
|
||||
row = cur.fetchone()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
return _row_to_dict(cols, row)
|
||||
|
||||
# ── 缓存管理 ──────────────────────────────────────────
|
||||
|
||||
async def invalidate_cache(
|
||||
self, site_id: int, app_type: str | None = None, member_id: int | None = None,
|
||||
) -> int:
|
||||
"""批量缓存失效,返回受影响记录数。"""
|
||||
where_parts = ["site_id = %s"]
|
||||
params: list = [site_id]
|
||||
|
||||
if app_type is not None:
|
||||
where_parts.append("cache_type = %s")
|
||||
params.append(app_type)
|
||||
if member_id is not None:
|
||||
where_parts.append("target_id = %s")
|
||||
params.append(str(member_id))
|
||||
|
||||
where_sql = " AND ".join(where_parts)
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
UPDATE biz.ai_cache
|
||||
SET status = 'invalidated'
|
||||
WHERE {where_sql}
|
||||
AND status != 'invalidated'
|
||||
""",
|
||||
params,
|
||||
)
|
||||
affected = cur.rowcount
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return affected
|
||||
|
||||
# ── Token 预算 ────────────────────────────────────────
|
||||
|
||||
async def get_budget(self) -> dict:
|
||||
"""Token 预算使用情况。"""
|
||||
if self._budget is not None:
|
||||
status = self._budget.check_budget()
|
||||
daily_limit = self._budget.daily_limit
|
||||
monthly_limit = self._budget.monthly_limit
|
||||
return {
|
||||
"daily_used": status.daily_used,
|
||||
"daily_limit": daily_limit,
|
||||
"daily_pct": round(status.daily_used / daily_limit, 4) if daily_limit > 0 else 0.0,
|
||||
"monthly_used": status.monthly_used,
|
||||
"monthly_limit": monthly_limit,
|
||||
"monthly_pct": round(status.monthly_used / monthly_limit, 4) if monthly_limit > 0 else 0.0,
|
||||
}
|
||||
|
||||
# 无 BudgetTracker 时直接查询
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
COALESCE(SUM(tokens_used) FILTER (
|
||||
WHERE created_at >= CURRENT_DATE
|
||||
AND created_at < CURRENT_DATE + INTERVAL '1 day'
|
||||
), 0) AS daily_used,
|
||||
COALESCE(SUM(tokens_used) FILTER (
|
||||
WHERE created_at >= date_trunc('month', CURRENT_DATE)
|
||||
AND created_at < date_trunc('month', CURRENT_DATE) + INTERVAL '1 month'
|
||||
), 0) AS monthly_used
|
||||
FROM biz.ai_run_logs
|
||||
WHERE status = 'success'
|
||||
""",
|
||||
)
|
||||
row = cur.fetchone()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
daily_used, monthly_used = (int(row[0]), int(row[1])) if row else (0, 0)
|
||||
daily_limit = 100_000
|
||||
monthly_limit = 2_000_000
|
||||
return {
|
||||
"daily_used": daily_used,
|
||||
"daily_limit": daily_limit,
|
||||
"daily_pct": round(daily_used / daily_limit, 4) if daily_limit > 0 else 0.0,
|
||||
"monthly_used": monthly_used,
|
||||
"monthly_limit": monthly_limit,
|
||||
"monthly_pct": round(monthly_used / monthly_limit, 4) if monthly_limit > 0 else 0.0,
|
||||
}
|
||||
|
||||
# ── 批量执行 ──────────────────────────────────────────
|
||||
|
||||
async def estimate_batch(
|
||||
self, app_types: list[str], member_ids: list[int], site_id: int,
|
||||
) -> dict:
|
||||
"""生成 batch_id,存入内存(TTL 10min),返回预估。"""
|
||||
self._cleanup_expired_batches()
|
||||
|
||||
batch_id = uuid.uuid4().hex
|
||||
estimated_calls = len(app_types) * len(member_ids)
|
||||
estimated_tokens = estimated_calls * AVG_TOKENS_PER_CALL
|
||||
|
||||
self._batch_store[batch_id] = {
|
||||
"params": {
|
||||
"app_types": app_types,
|
||||
"member_ids": member_ids,
|
||||
"site_id": site_id,
|
||||
},
|
||||
"expires_at": datetime.now(timezone.utc) + timedelta(seconds=_BATCH_TTL_SECONDS),
|
||||
}
|
||||
|
||||
return {
|
||||
"batch_id": batch_id,
|
||||
"estimated_calls": estimated_calls,
|
||||
"estimated_tokens": estimated_tokens,
|
||||
}
|
||||
|
||||
async def confirm_batch(self, batch_id: str) -> None:
|
||||
"""取出参数,异步执行批量调用。"""
|
||||
self._cleanup_expired_batches()
|
||||
|
||||
entry = self._batch_store.pop(batch_id, None)
|
||||
if entry is None:
|
||||
raise ValueError(f"batch_id 无效或已过期: {batch_id}")
|
||||
|
||||
params = entry["params"]
|
||||
logger.info(
|
||||
"批量执行确认: batch_id=%s apps=%s members=%d site_id=%s",
|
||||
batch_id,
|
||||
params["app_types"],
|
||||
len(params["member_ids"]),
|
||||
params["site_id"],
|
||||
)
|
||||
# 后台异步执行(具体调用链由路由层注入 dispatcher 处理)
|
||||
asyncio.create_task(
|
||||
self._run_batch(params["app_types"], params["member_ids"], params["site_id"])
|
||||
)
|
||||
|
||||
async def _run_batch(
|
||||
self, app_types: list[str], member_ids: list[int], site_id: int,
|
||||
) -> None:
|
||||
"""后台批量执行(占位实现,实际由 dispatcher 驱动)。"""
|
||||
logger.info(
|
||||
"批量执行开始: apps=%s members=%d site_id=%s",
|
||||
app_types, len(member_ids), site_id,
|
||||
)
|
||||
# 实际执行逻辑在路由层通过 dispatcher.handle_trigger 驱动
|
||||
# 此处仅记录日志,避免服务层直接依赖 dispatcher 实例
|
||||
|
||||
def _cleanup_expired_batches(self) -> None:
|
||||
"""清理过期 batch。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
expired = [
|
||||
bid for bid, entry in self._batch_store.items()
|
||||
if entry["expires_at"] <= now
|
||||
]
|
||||
for bid in expired:
|
||||
del self._batch_store[bid]
|
||||
if expired:
|
||||
logger.debug("清理过期 batch: %d 个", len(expired))
|
||||
|
||||
# ── 告警管理 ──────────────────────────────────────────
|
||||
|
||||
async def list_alerts(
|
||||
self,
|
||||
alert_status: str | None = None,
|
||||
site_id: int | None = None,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
) -> dict:
|
||||
"""告警列表:ai_run_logs WHERE status IN ('failed','timeout','circuit_open')。"""
|
||||
where_parts = ["status IN ('failed', 'timeout', 'circuit_open')"]
|
||||
params: list = []
|
||||
|
||||
if alert_status is not None:
|
||||
if alert_status == "pending":
|
||||
# pending 包含 NULL 和 'pending'
|
||||
where_parts.append("(alert_status IS NULL OR alert_status = 'pending')")
|
||||
else:
|
||||
where_parts.append("alert_status = %s")
|
||||
params.append(alert_status)
|
||||
|
||||
if site_id is not None:
|
||||
where_parts.append("site_id = %s")
|
||||
params.append(site_id)
|
||||
|
||||
where_sql = "WHERE " + " AND ".join(where_parts)
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"SELECT COUNT(*) FROM biz.ai_run_logs {where_sql}",
|
||||
params,
|
||||
)
|
||||
total = cur.fetchone()[0]
|
||||
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT id, app_type, status, alert_status,
|
||||
error_message, created_at
|
||||
FROM biz.ai_run_logs
|
||||
{where_sql}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s OFFSET %s
|
||||
""",
|
||||
(*params, page_size, offset),
|
||||
)
|
||||
cols = [d[0] for d in cur.description]
|
||||
rows = cur.fetchall()
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
"items": [_row_to_dict(cols, r) for r in rows],
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
}
|
||||
|
||||
async def ack_alert(self, log_id: int) -> str:
|
||||
"""确认告警:alert_status → acknowledged。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE biz.ai_run_logs
|
||||
SET alert_status = 'acknowledged'
|
||||
WHERE id = %s
|
||||
AND status IN ('failed', 'timeout', 'circuit_open')
|
||||
""",
|
||||
(log_id,),
|
||||
)
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return "acknowledged"
|
||||
|
||||
async def ignore_alert(self, log_id: int) -> str:
|
||||
"""忽略告警:alert_status → ignored。"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE biz.ai_run_logs
|
||||
SET alert_status = 'ignored'
|
||||
WHERE id = %s
|
||||
AND status IN ('failed', 'timeout', 'circuit_open')
|
||||
""",
|
||||
(log_id,),
|
||||
)
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return "ignored"
|
||||
|
||||
|
||||
# ── 工具函数 ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def _site_filter(site_id: int | None) -> tuple[str, tuple]:
|
||||
"""生成 site_id 过滤子句和参数。"""
|
||||
if site_id is None:
|
||||
return "", ()
|
||||
return "AND site_id = %s", (site_id,)
|
||||
|
||||
|
||||
def _row_to_dict(columns: list[str], row: tuple) -> dict:
|
||||
"""将数据库行转换为 dict,处理 datetime 序列化。"""
|
||||
result = {}
|
||||
for col, val in zip(columns, row):
|
||||
if isinstance(val, datetime):
|
||||
result[col] = val.isoformat()
|
||||
else:
|
||||
result[col] = val
|
||||
return result
|
||||
188
apps/backend/app/services/ai/cleanup_service.py
Normal file
188
apps/backend/app/services/ai/cleanup_service.py
Normal file
@@ -0,0 +1,188 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI 数据清理服务。
|
||||
|
||||
由定时任务每日凌晨 03:00 调用,执行三步清理:
|
||||
1. 删除 90 天前的 ai_run_logs
|
||||
2. 删除 90 天前的 ai_trigger_jobs
|
||||
3. 每个 App 类型(App2~App8)的 ai_cache 保留最新 20,000 条
|
||||
|
||||
永久保留 App1 对话记录(ai_conversations + ai_messages),不清理。
|
||||
|
||||
需求: E1.1, E1.2, E1.3, E1.4, E2.1, E2.2, E2.3
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from app.trace.decorators import trace_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AICleanupService:
|
||||
"""AI 数据清理服务,由定时任务调用。"""
|
||||
|
||||
RETENTION_DAYS = 90
|
||||
CACHE_LIMIT_PER_APP = 20_000
|
||||
CACHE_APP_TYPES = [
|
||||
"app2_finance",
|
||||
"app3_clue",
|
||||
"app4_analysis",
|
||||
"app5_tactics",
|
||||
"app6_note_analysis",
|
||||
"app7_customer_analysis",
|
||||
"app8_clue_consolidated",
|
||||
]
|
||||
|
||||
async def run_cleanup(self) -> dict:
|
||||
"""执行全部清理,返回各步骤删除记录数。
|
||||
|
||||
单步清理失败记录错误日志,继续执行后续步骤。
|
||||
"""
|
||||
result: dict = {}
|
||||
|
||||
# 步骤 1:清理 ai_run_logs
|
||||
try:
|
||||
result["run_logs_deleted"] = await self._cleanup_run_logs()
|
||||
except Exception:
|
||||
logger.exception("清理 ai_run_logs 失败")
|
||||
result["run_logs_deleted"] = -1
|
||||
|
||||
# 步骤 2:清理 ai_trigger_jobs
|
||||
try:
|
||||
result["trigger_jobs_deleted"] = await self._cleanup_trigger_jobs()
|
||||
except Exception:
|
||||
logger.exception("清理 ai_trigger_jobs 失败")
|
||||
result["trigger_jobs_deleted"] = -1
|
||||
|
||||
# 步骤 3:清理 ai_cache(每个 App 类型)
|
||||
try:
|
||||
result["cache_deleted"] = await self._cleanup_cache()
|
||||
except Exception:
|
||||
logger.exception("清理 ai_cache 失败")
|
||||
result["cache_deleted"] = {}
|
||||
|
||||
logger.info("AI 数据清理完成: %s", result)
|
||||
return result
|
||||
|
||||
async def _cleanup_run_logs(self) -> int:
|
||||
"""DELETE FROM ai_run_logs WHERE created_at < now() - 90 days。"""
|
||||
from app.database import get_connection
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# 防止锁等待超时(5 分钟)
|
||||
cur.execute("SET statement_timeout = 300000")
|
||||
cur.execute(
|
||||
"""
|
||||
DELETE FROM biz.ai_run_logs
|
||||
WHERE created_at < NOW() - INTERVAL '%s days'
|
||||
""",
|
||||
(self.RETENTION_DAYS,),
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
conn.commit()
|
||||
logger.info("清理 ai_run_logs: 删除 %d 条", deleted)
|
||||
return deleted
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
async def _cleanup_trigger_jobs(self) -> int:
|
||||
"""DELETE FROM ai_trigger_jobs WHERE created_at < now() - 90 days。"""
|
||||
from app.database import get_connection
|
||||
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SET statement_timeout = 300000")
|
||||
cur.execute(
|
||||
"""
|
||||
DELETE FROM biz.ai_trigger_jobs
|
||||
WHERE created_at < NOW() - INTERVAL '%s days'
|
||||
""",
|
||||
(self.RETENTION_DAYS,),
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
conn.commit()
|
||||
logger.info("清理 ai_trigger_jobs: 删除 %d 条", deleted)
|
||||
return deleted
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
async def _cleanup_cache(self) -> dict[str, int]:
|
||||
"""每个 App 类型保留最新 20,000 条,删除超出部分。"""
|
||||
from app.database import get_connection
|
||||
|
||||
result: dict[str, int] = {}
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SET statement_timeout = 300000")
|
||||
for app_type in self.CACHE_APP_TYPES:
|
||||
try:
|
||||
# 子查询:找到该 app_type 第 20001 条的 created_at 作为截断点
|
||||
cur.execute(
|
||||
"""
|
||||
DELETE FROM biz.ai_cache
|
||||
WHERE app_type = %s
|
||||
AND id NOT IN (
|
||||
SELECT id FROM biz.ai_cache
|
||||
WHERE app_type = %s
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s
|
||||
)
|
||||
""",
|
||||
(app_type, app_type, self.CACHE_LIMIT_PER_APP),
|
||||
)
|
||||
deleted = cur.rowcount
|
||||
result[app_type] = deleted
|
||||
if deleted > 0:
|
||||
logger.info(
|
||||
"清理 ai_cache [%s]: 删除 %d 条",
|
||||
app_type,
|
||||
deleted,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("清理 ai_cache [%s] 失败", app_type)
|
||||
result[app_type] = -1
|
||||
conn.rollback()
|
||||
# 重新开始事务以继续后续 app_type
|
||||
continue
|
||||
conn.commit()
|
||||
return result
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@trace_service(description_zh="register_cleanup_job", description_en="Register Cleanup Job")
|
||||
def register_cleanup_job(scheduler) -> None: # noqa: ANN001
|
||||
"""注册清理定时任务到调度器。每日 03:00 执行。
|
||||
|
||||
在 main.py lifespan 中调用,或通过 scheduled_tasks 表注册。
|
||||
实际调度由 trigger_scheduler 的 cron 机制驱动:
|
||||
- job_type: 'ai_data_cleanup'
|
||||
- trigger_condition: 'cron'
|
||||
- trigger_config: {"cron_expression": "0 3 * * *"}
|
||||
|
||||
需求: E2.1, E2.2, E2.3
|
||||
"""
|
||||
from app.services.trigger_scheduler import register_job
|
||||
|
||||
def _run_cleanup(**_kw):
|
||||
"""同步包装器:在新事件循环中执行异步清理。"""
|
||||
result = asyncio.run(AICleanupService().run_cleanup())
|
||||
logger.info("定时清理任务完成: %s", result)
|
||||
|
||||
register_job("ai_data_cleanup", _run_cleanup)
|
||||
Reference in New Issue
Block a user