Files
Neo-ZQYY/apps/backend/app/ai/run_log_service.py
Neo 421e193041 fix(ai): F1-5a 沙箱 batch-run 接入 runtime_context (W1 / 阶段 A 主体)
Neo F1-5 反馈: "让沙箱起到其真正的作用. 真正的模拟日期, 仅能看到沙箱设定日期
及之前日期的数据, 并运行 AI 的各个业务."

调研发现 (4 个并行子代理): batch-run 端点 _run_batch 是空壳 stub
(只 logger.info, 实际不跑 AI), GUC apply_runtime_session_vars 0 处调用
(dead code), 7 张业务表 6 张有 runtime 复合索引唯独 ai_run_logs 漏建,
App2/2a 3 行 _calc_date_range 漏传 ref_date.

本 commit (F1-5a 阶段 A 主体, F1-5b 后续完整 zqyy_app RLS 视图层):

后端核心:
- admin_service.py: _run_batch 真实化 (Semaphore(5)+asyncio.gather+
  return_exceptions=True+ctx_snapshot 防漂移); estimate 入口抓
  RuntimeContext 快照, confirm 取出传给 worker
- admin_ai.py: confirm_batch_run lazy 注入 dispatcher
- admin_service.retry_trigger_job: INSERT 落 runtime_mode +
  sandbox_instance_id 列 (用 runtime_insert_columns helper)
- runtime_context.py: get_runtime_context 加 bind_to_session 参数,
  激活 GUC app.current_business_date / app.current_runtime_mode
- run_log_service.create_log: 启用 bind_to_session=True 试点

App2/2a 3 行 ref_date 修复:
- app2_finance_prompt.py:817 储值卡余额变化板块
- app2_finance_prompt.py:841 日粒度 series + 异常检测窗口
- app2a_finance_area_prompt.py:466 区域日粒度 series

DB:
- migrations/20260505__ai_run_logs_runtime_index.sql:
  补 (site_id, runtime_mode, sandbox_instance_id, created_at DESC) 复合索引

前端:
- AIOperations.tsx: 顶部加 sandbox 模式提示条 (Alert 显示 sandbox_date +
  sandbox_instance_id + 影响范围 + 切回 live 入口)

未做 (留 F1-5b 完整 zqyy_app RLS 视图层一并):
- B1 admin_service 6 处 CURRENT_DATE -> business_date
- B2 fdw_queries 异常分支兜底
- GUC 完整传递 (fdw_queries / page_context 等)
- 测试 3 套 (.gitignore:71 排除, F2-2 入仓时 commit)
- P20 SPEC \xa76/\xa710/\xa711/\xa715 (F1-5b 完整收口后同步更准确)

Neo 决策: docs/_overview/wave1-findings/F1-5-impl-decisions.md

详见 docs/audit/changes/2026-05-05__wave1_f1_5a_sandbox_batch_run.md
2026-05-05 03:01:48 +08:00

220 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""AI 运行日志服务 — biz.ai_run_logs 表的 CRUD 操作。
每次 Application API 调用前创建 pending 记录,调用过程中更新状态,
调用结束后记录结果。同时提供日/月 token 聚合查询,实现 UsageProvider 协议
以便注入 BudgetTracker。
request_prompt 写入前截断为前 2000 字符,避免大 prompt 占用过多存储。
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Callable
import psycopg2.extensions
from app.services.runtime_context import LIVE_INSTANCE_ID, MODE_LIVE, MODE_SANDBOX, get_runtime_context
# prompt 最大存储长度
# 2026-04-222000→8000。app2_finance 真实 prompt 约 4-8KB72 组合财务看板 + 中文 key 膨胀),
# 2000 字符截断会丢掉 optimization-critical 字段(如 discount_items 含团购折扣明细),
# admin-web 调用详情页无法完整审阅 → 提高到 8000 覆盖绝大部分场景
_MAX_PROMPT_LENGTH = 8000
def _truncate_prompt(prompt: str | None) -> str | None:
"""截断 prompt 为 _MAX_PROMPT_LENGTH 字符上限。None 原样返回。"""
if prompt is None:
return None
return prompt[:_MAX_PROMPT_LENGTH]
class AIRunLogService:
"""AI 运行日志 CRUD实现 UsageProvider 协议。
构造函数接受 get_conn callable每次操作时获取数据库连接
避免长期持有连接导致超时或连接池耗尽。
"""
def __init__(self, get_conn: Callable[[], psycopg2.extensions.connection]) -> None:
self._get_conn = get_conn
# ── 创建 ──────────────────────────────────────────────
def create_log(
self,
site_id: int,
app_type: str,
trigger_type: str,
*,
member_id: int | None = None,
request_prompt: str | None = None,
session_id: str | None = None,
) -> int:
"""创建日志记录status: pending返回 log_id。
request_prompt 自动截断为前 2000 字符。
"""
truncated = _truncate_prompt(request_prompt)
conn = self._get_conn()
try:
# F1-5a: bind_to_session=True 激活 GUC app.current_business_date,
# 让本事务内所有 ETL 库 app.v_* 视图自动按 business_date 上界裁剪
# (如果后续 fetch 走 ETL 视图)。
ctx = get_runtime_context(site_id, conn=conn, bind_to_session=True)
runtime_mode = MODE_SANDBOX if ctx.is_sandbox else MODE_LIVE
sandbox_instance_id = ctx.sandbox_instance_id if ctx.is_sandbox else LIVE_INSTANCE_ID
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO biz.ai_run_logs
(site_id, app_type, trigger_type, member_id,
request_prompt, session_id, status,
runtime_mode, sandbox_instance_id)
VALUES (%s, %s, %s, %s, %s, %s, 'pending', %s, %s)
RETURNING id
""",
(site_id, app_type, trigger_type, member_id,
truncated, session_id, runtime_mode, sandbox_instance_id),
)
row = cur.fetchone()
assert row is not None, "INSERT RETURNING 应返回 id"
log_id: int = row[0]
conn.commit()
return log_id
except Exception:
conn.rollback()
raise
# ── 状态转换 ──────────────────────────────────────────
def update_running(self, log_id: int) -> None:
"""更新为 running。"""
conn = self._get_conn()
try:
with conn.cursor() as cur:
cur.execute(
"""
UPDATE biz.ai_run_logs
SET status = 'running'
WHERE id = %s
""",
(log_id,),
)
conn.commit()
except Exception:
conn.rollback()
raise
def update_success(
self,
log_id: int,
response_text: str,
tokens_used: int,
latency_ms: int,
) -> None:
"""更新为 success记录响应、token 消耗和耗时。"""
now = datetime.now(timezone.utc)
conn = self._get_conn()
try:
with conn.cursor() as cur:
cur.execute(
"""
UPDATE biz.ai_run_logs
SET status = 'success',
response_text = %s,
tokens_used = %s,
latency_ms = %s,
finished_at = %s
WHERE id = %s
""",
(response_text, tokens_used, latency_ms, now, log_id),
)
conn.commit()
except Exception:
conn.rollback()
raise
def update_failed(
self,
log_id: int,
error_message: str,
latency_ms: int,
) -> None:
"""更新为 failed记录错误信息和耗时。"""
now = datetime.now(timezone.utc)
conn = self._get_conn()
try:
with conn.cursor() as cur:
cur.execute(
"""
UPDATE biz.ai_run_logs
SET status = 'failed',
error_message = %s,
latency_ms = %s,
finished_at = %s
WHERE id = %s
""",
(error_message, latency_ms, now, log_id),
)
conn.commit()
except Exception:
conn.rollback()
raise
def update_timeout(self, log_id: int, latency_ms: int) -> None:
"""更新为 timeout。"""
now = datetime.now(timezone.utc)
conn = self._get_conn()
try:
with conn.cursor() as cur:
cur.execute(
"""
UPDATE biz.ai_run_logs
SET status = 'timeout',
latency_ms = %s,
finished_at = %s
WHERE id = %s
""",
(latency_ms, now, log_id),
)
conn.commit()
except Exception:
conn.rollback()
raise
# ── UsageProvider 协议实现 ────────────────────────────
def get_daily_usage(self) -> int:
"""聚合今日 token 消耗status='success'created_at 为今日)。"""
conn = self._get_conn()
with conn.cursor() as cur:
cur.execute(
"""
SELECT COALESCE(SUM(tokens_used), 0)
FROM biz.ai_run_logs
WHERE status = 'success'
AND created_at >= CURRENT_DATE
AND created_at < CURRENT_DATE + INTERVAL '1 day'
"""
)
row = cur.fetchone()
return int(row[0]) if row else 0
def get_monthly_usage(self) -> int:
"""聚合本月 token 消耗status='success'created_at 为本月)。"""
conn = self._get_conn()
with conn.cursor() as cur:
cur.execute(
"""
SELECT COALESCE(SUM(tokens_used), 0)
FROM biz.ai_run_logs
WHERE status = 'success'
AND created_at >= date_trunc('month', CURRENT_DATE)
AND created_at < date_trunc('month', CURRENT_DATE) + INTERVAL '1 month'
"""
)
row = cur.fetchone()
return int(row[0]) if row else 0