Neo F1-5 反馈: "让沙箱起到其真正的作用. 真正的模拟日期, 仅能看到沙箱设定日期 及之前日期的数据, 并运行 AI 的各个业务." 调研发现 (4 个并行子代理): batch-run 端点 _run_batch 是空壳 stub (只 logger.info, 实际不跑 AI), GUC apply_runtime_session_vars 0 处调用 (dead code), 7 张业务表 6 张有 runtime 复合索引唯独 ai_run_logs 漏建, App2/2a 3 行 _calc_date_range 漏传 ref_date. 本 commit (F1-5a 阶段 A 主体, F1-5b 后续完整 zqyy_app RLS 视图层): 后端核心: - admin_service.py: _run_batch 真实化 (Semaphore(5)+asyncio.gather+ return_exceptions=True+ctx_snapshot 防漂移); estimate 入口抓 RuntimeContext 快照, confirm 取出传给 worker - admin_ai.py: confirm_batch_run lazy 注入 dispatcher - admin_service.retry_trigger_job: INSERT 落 runtime_mode + sandbox_instance_id 列 (用 runtime_insert_columns helper) - runtime_context.py: get_runtime_context 加 bind_to_session 参数, 激活 GUC app.current_business_date / app.current_runtime_mode - run_log_service.create_log: 启用 bind_to_session=True 试点 App2/2a 3 行 ref_date 修复: - app2_finance_prompt.py:817 储值卡余额变化板块 - app2_finance_prompt.py:841 日粒度 series + 异常检测窗口 - app2a_finance_area_prompt.py:466 区域日粒度 series DB: - migrations/20260505__ai_run_logs_runtime_index.sql: 补 (site_id, runtime_mode, sandbox_instance_id, created_at DESC) 复合索引 前端: - AIOperations.tsx: 顶部加 sandbox 模式提示条 (Alert 显示 sandbox_date + sandbox_instance_id + 影响范围 + 切回 live 入口) 未做 (留 F1-5b 完整 zqyy_app RLS 视图层一并): - B1 admin_service 6 处 CURRENT_DATE -> business_date - B2 fdw_queries 异常分支兜底 - GUC 完整传递 (fdw_queries / page_context 等) - 测试 3 套 (.gitignore:71 排除, F2-2 入仓时 commit) - P20 SPEC \xa76/\xa710/\xa711/\xa715 (F1-5b 完整收口后同步更准确) Neo 决策: docs/_overview/wave1-findings/F1-5-impl-decisions.md 详见 docs/audit/changes/2026-05-05__wave1_f1_5a_sandbox_batch_run.md
220 lines
7.9 KiB
Python
220 lines
7.9 KiB
Python
"""AI 运行日志服务 — biz.ai_run_logs 表的 CRUD 操作。
|
||
|
||
每次 Application API 调用前创建 pending 记录,调用过程中更新状态,
|
||
调用结束后记录结果。同时提供日/月 token 聚合查询,实现 UsageProvider 协议
|
||
以便注入 BudgetTracker。
|
||
|
||
request_prompt 写入前截断为前 2000 字符,避免大 prompt 占用过多存储。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime, timezone
|
||
from typing import Callable
|
||
|
||
import psycopg2.extensions
|
||
|
||
from app.services.runtime_context import LIVE_INSTANCE_ID, MODE_LIVE, MODE_SANDBOX, get_runtime_context
|
||
|
||
# prompt 最大存储长度
|
||
# 2026-04-22:2000→8000。app2_finance 真实 prompt 约 4-8KB(72 组合财务看板 + 中文 key 膨胀),
|
||
# 2000 字符截断会丢掉 optimization-critical 字段(如 discount_items 含团购折扣明细),
|
||
# admin-web 调用详情页无法完整审阅 → 提高到 8000 覆盖绝大部分场景
|
||
_MAX_PROMPT_LENGTH = 8000
|
||
|
||
|
||
def _truncate_prompt(prompt: str | None) -> str | None:
|
||
"""截断 prompt 为 _MAX_PROMPT_LENGTH 字符上限。None 原样返回。"""
|
||
if prompt is None:
|
||
return None
|
||
return prompt[:_MAX_PROMPT_LENGTH]
|
||
|
||
|
||
class AIRunLogService:
|
||
"""AI 运行日志 CRUD,实现 UsageProvider 协议。
|
||
|
||
构造函数接受 get_conn callable,每次操作时获取数据库连接,
|
||
避免长期持有连接导致超时或连接池耗尽。
|
||
"""
|
||
|
||
def __init__(self, get_conn: Callable[[], psycopg2.extensions.connection]) -> None:
|
||
self._get_conn = get_conn
|
||
|
||
# ── 创建 ──────────────────────────────────────────────
|
||
|
||
def create_log(
|
||
self,
|
||
site_id: int,
|
||
app_type: str,
|
||
trigger_type: str,
|
||
*,
|
||
member_id: int | None = None,
|
||
request_prompt: str | None = None,
|
||
session_id: str | None = None,
|
||
) -> int:
|
||
"""创建日志记录(status: pending),返回 log_id。
|
||
|
||
request_prompt 自动截断为前 2000 字符。
|
||
"""
|
||
truncated = _truncate_prompt(request_prompt)
|
||
conn = self._get_conn()
|
||
try:
|
||
# F1-5a: bind_to_session=True 激活 GUC app.current_business_date,
|
||
# 让本事务内所有 ETL 库 app.v_* 视图自动按 business_date 上界裁剪
|
||
# (如果后续 fetch 走 ETL 视图)。
|
||
ctx = get_runtime_context(site_id, conn=conn, bind_to_session=True)
|
||
runtime_mode = MODE_SANDBOX if ctx.is_sandbox else MODE_LIVE
|
||
sandbox_instance_id = ctx.sandbox_instance_id if ctx.is_sandbox else LIVE_INSTANCE_ID
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
INSERT INTO biz.ai_run_logs
|
||
(site_id, app_type, trigger_type, member_id,
|
||
request_prompt, session_id, status,
|
||
runtime_mode, sandbox_instance_id)
|
||
VALUES (%s, %s, %s, %s, %s, %s, 'pending', %s, %s)
|
||
RETURNING id
|
||
""",
|
||
(site_id, app_type, trigger_type, member_id,
|
||
truncated, session_id, runtime_mode, sandbox_instance_id),
|
||
)
|
||
row = cur.fetchone()
|
||
assert row is not None, "INSERT RETURNING 应返回 id"
|
||
log_id: int = row[0]
|
||
conn.commit()
|
||
return log_id
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
|
||
# ── 状态转换 ──────────────────────────────────────────
|
||
|
||
def update_running(self, log_id: int) -> None:
|
||
"""更新为 running。"""
|
||
conn = self._get_conn()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
UPDATE biz.ai_run_logs
|
||
SET status = 'running'
|
||
WHERE id = %s
|
||
""",
|
||
(log_id,),
|
||
)
|
||
conn.commit()
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
|
||
def update_success(
|
||
self,
|
||
log_id: int,
|
||
response_text: str,
|
||
tokens_used: int,
|
||
latency_ms: int,
|
||
) -> None:
|
||
"""更新为 success,记录响应、token 消耗和耗时。"""
|
||
now = datetime.now(timezone.utc)
|
||
conn = self._get_conn()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
UPDATE biz.ai_run_logs
|
||
SET status = 'success',
|
||
response_text = %s,
|
||
tokens_used = %s,
|
||
latency_ms = %s,
|
||
finished_at = %s
|
||
WHERE id = %s
|
||
""",
|
||
(response_text, tokens_used, latency_ms, now, log_id),
|
||
)
|
||
conn.commit()
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
|
||
def update_failed(
|
||
self,
|
||
log_id: int,
|
||
error_message: str,
|
||
latency_ms: int,
|
||
) -> None:
|
||
"""更新为 failed,记录错误信息和耗时。"""
|
||
now = datetime.now(timezone.utc)
|
||
conn = self._get_conn()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
UPDATE biz.ai_run_logs
|
||
SET status = 'failed',
|
||
error_message = %s,
|
||
latency_ms = %s,
|
||
finished_at = %s
|
||
WHERE id = %s
|
||
""",
|
||
(error_message, latency_ms, now, log_id),
|
||
)
|
||
conn.commit()
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
|
||
def update_timeout(self, log_id: int, latency_ms: int) -> None:
|
||
"""更新为 timeout。"""
|
||
now = datetime.now(timezone.utc)
|
||
conn = self._get_conn()
|
||
try:
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
UPDATE biz.ai_run_logs
|
||
SET status = 'timeout',
|
||
latency_ms = %s,
|
||
finished_at = %s
|
||
WHERE id = %s
|
||
""",
|
||
(latency_ms, now, log_id),
|
||
)
|
||
conn.commit()
|
||
except Exception:
|
||
conn.rollback()
|
||
raise
|
||
|
||
# ── UsageProvider 协议实现 ────────────────────────────
|
||
|
||
def get_daily_usage(self) -> int:
|
||
"""聚合今日 token 消耗(status='success',created_at 为今日)。"""
|
||
conn = self._get_conn()
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
SELECT COALESCE(SUM(tokens_used), 0)
|
||
FROM biz.ai_run_logs
|
||
WHERE status = 'success'
|
||
AND created_at >= CURRENT_DATE
|
||
AND created_at < CURRENT_DATE + INTERVAL '1 day'
|
||
"""
|
||
)
|
||
row = cur.fetchone()
|
||
return int(row[0]) if row else 0
|
||
|
||
def get_monthly_usage(self) -> int:
|
||
"""聚合本月 token 消耗(status='success',created_at 为本月)。"""
|
||
conn = self._get_conn()
|
||
with conn.cursor() as cur:
|
||
cur.execute(
|
||
"""
|
||
SELECT COALESCE(SUM(tokens_used), 0)
|
||
FROM biz.ai_run_logs
|
||
WHERE status = 'success'
|
||
AND created_at >= date_trunc('month', CURRENT_DATE)
|
||
AND created_at < date_trunc('month', CURRENT_DATE) + INTERVAL '1 month'
|
||
"""
|
||
)
|
||
row = cur.fetchone()
|
||
return int(row[0]) if row else 0
|