包含多个会话的累积代码变更: - backend: AI 聊天服务、触发器调度、认证增强、WebSocket、调度器最小间隔 - admin-web: ETL 状态页、任务管理、调度配置、登录优化 - miniprogram: 看板页面、聊天集成、UI 组件、导航更新 - etl: DWS 新任务(finance_area_daily/board_cache)、连接器增强 - tenant-admin: 项目初始化 - db: 19 个迁移脚本(etl_feiqiu 11 + zqyy_app 8) - packages/shared: 枚举和工具函数更新 - tools: 数据库工具、报表生成、健康检查 - docs: PRD/架构/部署/合约文档更新 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
117 lines
3.9 KiB
Python
117 lines
3.9 KiB
Python
"""熔断器 — 按 app_id 独立的断路保护。
|
||
|
||
状态机:CLOSED → OPEN(连续失败达阈值)→ HALF_OPEN(超时后探测)→ CLOSED/OPEN。
|
||
内存实现,单实例部署,不依赖外部存储。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import enum
|
||
import time
|
||
from dataclasses import dataclass, field
|
||
|
||
|
||
class CircuitState(enum.Enum):
|
||
"""熔断器状态。"""
|
||
|
||
CLOSED = "closed" # 正常放行
|
||
OPEN = "open" # 熔断中,拒绝请求
|
||
HALF_OPEN = "half_open" # 探测中,放行单个请求
|
||
|
||
|
||
@dataclass
|
||
class _BreakerState:
|
||
"""单个 app_id 的熔断内部状态。"""
|
||
|
||
state: CircuitState = CircuitState.CLOSED
|
||
failure_count: int = 0
|
||
last_failure_time: float = 0.0
|
||
last_state_change: float = field(default_factory=time.monotonic)
|
||
|
||
|
||
class CircuitBreaker:
|
||
"""按 app_id 独立的熔断器。
|
||
|
||
- check():检查当前状态,OPEN 且超时自动转 HALF_OPEN
|
||
- record_success():HALF_OPEN→CLOSED;CLOSED 重置失败计数
|
||
- record_failure():连续达阈值→OPEN;HALF_OPEN 失败→重新 OPEN
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
failure_threshold: int = 5,
|
||
recovery_timeout: int = 60,
|
||
) -> None:
|
||
self._failure_threshold = failure_threshold
|
||
self._recovery_timeout = recovery_timeout
|
||
self._breakers: dict[str, _BreakerState] = {}
|
||
|
||
def _get_state(self, app_id: str) -> _BreakerState:
|
||
"""获取或初始化指定 app_id 的状态。"""
|
||
if app_id not in self._breakers:
|
||
self._breakers[app_id] = _BreakerState()
|
||
return self._breakers[app_id]
|
||
|
||
def check(self, app_id: str) -> CircuitState:
|
||
"""检查当前熔断状态。
|
||
|
||
- CLOSED / HALF_OPEN:允许通过,返回对应状态
|
||
- OPEN 且未超时:返回 OPEN(拒绝)
|
||
- OPEN 且已超时:自动转 HALF_OPEN,返回 HALF_OPEN(允许探测)
|
||
"""
|
||
breaker = self._get_state(app_id)
|
||
|
||
if breaker.state == CircuitState.CLOSED:
|
||
return CircuitState.CLOSED
|
||
|
||
if breaker.state == CircuitState.HALF_OPEN:
|
||
return CircuitState.HALF_OPEN
|
||
|
||
# OPEN 状态:检查是否超过恢复超时
|
||
elapsed = time.monotonic() - breaker.last_failure_time
|
||
if elapsed >= self._recovery_timeout:
|
||
# 超时,转为 HALF_OPEN 探测
|
||
breaker.state = CircuitState.HALF_OPEN
|
||
breaker.last_state_change = time.monotonic()
|
||
return CircuitState.HALF_OPEN
|
||
|
||
return CircuitState.OPEN
|
||
|
||
def record_success(self, app_id: str) -> None:
|
||
"""记录调用成功。
|
||
|
||
- HALF_OPEN→CLOSED(探测成功,恢复正常)
|
||
- CLOSED 下重置失败计数
|
||
"""
|
||
breaker = self._get_state(app_id)
|
||
|
||
if breaker.state == CircuitState.HALF_OPEN:
|
||
breaker.state = CircuitState.CLOSED
|
||
breaker.failure_count = 0
|
||
breaker.last_state_change = time.monotonic()
|
||
elif breaker.state == CircuitState.CLOSED:
|
||
# CLOSED 状态下成功重置失败计数
|
||
breaker.failure_count = 0
|
||
|
||
def record_failure(self, app_id: str) -> None:
|
||
"""记录调用失败。
|
||
|
||
- CLOSED:累加失败计数,达阈值→OPEN
|
||
- HALF_OPEN:探测失败→重新 OPEN
|
||
"""
|
||
breaker = self._get_state(app_id)
|
||
now = time.monotonic()
|
||
|
||
if breaker.state == CircuitState.HALF_OPEN:
|
||
# 探测失败,重新熔断
|
||
breaker.state = CircuitState.OPEN
|
||
breaker.failure_count = self._failure_threshold
|
||
breaker.last_failure_time = now
|
||
breaker.last_state_change = now
|
||
elif breaker.state == CircuitState.CLOSED:
|
||
breaker.failure_count += 1
|
||
breaker.last_failure_time = now
|
||
if breaker.failure_count >= self._failure_threshold:
|
||
breaker.state = CircuitState.OPEN
|
||
breaker.last_state_change = now
|