Files
common/middleware/circuit_breaker.go

1398 lines
43 KiB
Go
Raw Normal View History

2025-12-31 23:38:33 +08:00
package middleware
import (
"context"
"fmt"
2026-01-01 13:14:46 +08:00
"strconv"
2025-12-31 23:38:33 +08:00
"strings"
"sync"
2026-01-01 07:38:00 +08:00
"sync/atomic"
2025-12-31 23:38:33 +08:00
"time"
2026-02-24 15:42:36 +08:00
"gitea.com/red-future/common/redis"
2026-01-01 01:33:59 +08:00
"github.com/alibaba/sentinel-golang/api"
"github.com/alibaba/sentinel-golang/core/circuitbreaker"
2025-12-31 23:38:33 +08:00
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/net/ghttp"
)
2026-01-01 01:33:59 +08:00
// CircuitBreakerState 熔断器状态
type CircuitBreakerState string
2025-12-31 23:38:33 +08:00
const (
2026-01-01 13:14:46 +08:00
StateClosed CircuitBreakerState = "closed"
StateOpen CircuitBreakerState = "open"
StateHalfOpen CircuitBreakerState = "halfopen"
2025-12-31 23:38:33 +08:00
)
2026-01-01 12:05:12 +08:00
// 熔断器状态常量用于atomic.Int64
const (
2026-01-01 13:14:46 +08:00
stateClosed int64 = 0
stateOpen int64 = 1
stateHalfOpen int64 = 2
2026-01-01 12:05:12 +08:00
)
2026-01-01 01:33:59 +08:00
// CircuitBreakerConfig 熔断器配置
2025-12-31 23:38:33 +08:00
type CircuitBreakerConfig struct {
2026-01-01 13:14:46 +08:00
Enabled bool
MaxFailures int
Timeout string
TimeoutParsed time.Duration
SuccessStatusCodes []int
SlowRequestThreshold string
SlowRequestThresholdParsed time.Duration
EnableSlidingWindow bool
FailureRateThreshold float64
EnableFallback bool
FallbackMessage string
RequestTimeout int
DistributedTTL int
StatIntervalMs int
MinRequestAmount int
HalfOpenMaxRequests int
HalfOpenSuccessThreshold float64
WarmupDuration string
WarmupDurationParsed time.Duration
EnableAdaptiveThreshold bool
AdaptiveMinThreshold float64
AdaptiveMaxThreshold float64
2026-01-01 07:38:00 +08:00
}
// CircuitBreakerMetrics 熔断器指标
type CircuitBreakerMetrics struct {
2026-01-01 13:14:46 +08:00
// 请求统计
TotalRequests atomic.Int64
PassRequests atomic.Int64
BlockRequests atomic.Int64
FailureRequests atomic.Int64
SlowRequests atomic.Int64
// 状态统计
OpenCount atomic.Int64
ClosedCount atomic.Int64
HalfOpenCount atomic.Int64
// 时间戳
LastResetTime atomic.Int64
LastOpenTime atomic.Int64
NextRetryTime atomic.Int64
LastCloseTime atomic.Int64
LastHalfOpenTime atomic.Int64
// 半开状态统计
HalfOpenRequests atomic.Int64
HalfOpenPassed atomic.Int64
HalfOpenFailed atomic.Int64
// 性能指标
TotalResponseTime atomic.Int64 // 总响应时间(纳秒)
MinResponseTime atomic.Int64 // 最小响应时间(纳秒)
MaxResponseTime atomic.Int64 // 最大响应时间(纳秒)
// 窗口统计(用于计算成功率等)
WindowStartTime atomic.Int64 // 统计窗口开始时间
WindowRequests atomic.Int64 // 窗口内请求总数
WindowFailures atomic.Int64 // 窗口内失败数
2025-12-31 23:38:33 +08:00
}
2026-01-01 15:13:36 +08:00
// 实现HalfOpenMetrics接口
func (m *CircuitBreakerMetrics) GetHalfOpenRequests() *atomic.Int64 {
return &m.HalfOpenRequests
}
func (m *CircuitBreakerMetrics) GetHalfOpenPassed() *atomic.Int64 {
return &m.HalfOpenPassed
}
func (m *CircuitBreakerMetrics) GetHalfOpenFailed() *atomic.Int64 {
return &m.HalfOpenFailed
}
func (m *CircuitBreakerMetrics) AddHalfOpenRequests(delta int64) {
m.HalfOpenRequests.Add(delta)
}
func (m *CircuitBreakerMetrics) AddHalfOpenPassed(delta int64) {
m.HalfOpenPassed.Add(delta)
}
func (m *CircuitBreakerMetrics) AddHalfOpenFailed(delta int64) {
m.HalfOpenFailed.Add(delta)
}
// 半开状态指标接口,定义半开状态管理需要的原子操作
type HalfOpenMetrics interface {
GetHalfOpenRequests() *atomic.Int64
GetHalfOpenPassed() *atomic.Int64
GetHalfOpenFailed() *atomic.Int64
AddHalfOpenRequests(delta int64)
AddHalfOpenPassed(delta int64)
AddHalfOpenFailed(delta int64)
}
// HalfOpenManager 半开状态管理器
// 解决多个goroutine同时操作半开状态时可能出现的竞态条件和数据不一致问题
type HalfOpenManager struct {
mu sync.RWMutex
}
// NewHalfOpenManager 创建半开状态管理器实例
func NewHalfOpenManager() *HalfOpenManager {
return &HalfOpenManager{}
}
var (
halfOpenManagerInstance *HalfOpenManager
halfOpenManagerOnce sync.Once
)
// GetHalfOpenManager 获取半开状态管理器单例
func GetHalfOpenManager() *HalfOpenManager {
halfOpenManagerOnce.Do(func() {
halfOpenManagerInstance = NewHalfOpenManager()
})
return halfOpenManagerInstance
}
// TryAcquireHalfOpenSlot 尝试获取半开状态的请求槽位
func (m *HalfOpenManager) TryAcquireHalfOpenSlot(metrics HalfOpenMetrics, maxRequests int) (bool, int) {
if maxRequests <= 0 {
return false, 0
}
m.mu.Lock()
defer m.mu.Unlock()
currentRequests := int(metrics.GetHalfOpenRequests().Load())
if currentRequests >= maxRequests {
return false, currentRequests
}
2026-01-05 16:14:14 +08:00
// 原子性保证在一个CAS操作中增加计数
// 这样可以确保HalfOpenRequests和HalfOpenPassed的一致性
2026-01-01 15:13:36 +08:00
metrics.AddHalfOpenRequests(1)
metrics.AddHalfOpenPassed(1)
return true, currentRequests + 1
}
// RecordHalfOpenResult 记录半开状态请求结果,并检查是否达到成功阈值
func (m *HalfOpenManager) RecordHalfOpenResult(metrics HalfOpenMetrics, isSuccess bool, successThreshold float64) bool {
if successThreshold < 0 || successThreshold > 1 {
successThreshold = 0.5
}
m.mu.Lock()
defer m.mu.Unlock()
2026-01-05 16:14:14 +08:00
// 原子性:先减少请求计数
2026-01-01 15:13:36 +08:00
metrics.AddHalfOpenRequests(-1)
2026-01-05 16:14:14 +08:00
// 记录结果
2026-01-01 15:13:36 +08:00
if isSuccess {
metrics.AddHalfOpenPassed(1)
} else {
metrics.AddHalfOpenFailed(1)
}
2026-01-05 16:14:14 +08:00
// 在锁保护下检查阈值,确保读取到一致的数据
2026-01-01 15:13:36 +08:00
return m.checkHalfOpenSuccessThreshold(metrics, successThreshold)
}
// checkHalfOpenSuccessThreshold 检查半开状态的成功率是否达到阈值
func (m *HalfOpenManager) checkHalfOpenSuccessThreshold(metrics HalfOpenMetrics, successThreshold float64) bool {
2026-01-05 16:14:14 +08:00
// 原子性:一次性读取所有计数器,避免读取到不一致的数据
2026-01-01 15:13:36 +08:00
passedRequests := metrics.GetHalfOpenPassed().Load()
2026-01-05 16:14:14 +08:00
failedRequests := metrics.GetHalfOpenFailed().Load()
totalRequests := passedRequests + failedRequests
2026-01-01 15:13:36 +08:00
if totalRequests == 0 {
return false
}
2026-01-05 16:28:29 +08:00
// 使用浮点数除法计算成功率,避免整数除法精度丢失问题
// 例如: passedRequests=1, failedRequests=2, 则 successRate = 0.333... 而不是 0
2026-01-01 15:13:36 +08:00
successRate := float64(passedRequests) / float64(totalRequests)
return successRate >= successThreshold
}
// ResetHalfOpenStats 重置半开状态统计
func (m *HalfOpenManager) ResetHalfOpenStats(metrics HalfOpenMetrics) {
m.mu.Lock()
defer m.mu.Unlock()
metrics.GetHalfOpenRequests().Store(0)
metrics.GetHalfOpenPassed().Store(0)
metrics.GetHalfOpenFailed().Store(0)
}
2026-01-01 01:33:59 +08:00
// CircuitBreakerInfo 熔断器信息
type CircuitBreakerInfo struct {
2026-01-01 13:14:46 +08:00
ResourceName string
State atomic.Int64
Config *CircuitBreakerConfig
Metrics *CircuitBreakerMetrics
SuccessCodeMap map[int]bool
AdaptiveThreshold float64
WarmupEndTime int64
2025-12-31 23:38:33 +08:00
}
var (
2026-01-01 13:14:46 +08:00
circuitBreakers sync.Map
stateChangeListeners sync.Map
2026-01-01 10:48:47 +08:00
stateChangeListenersRegistered sync.Map
2025-12-31 23:38:33 +08:00
)
2026-01-01 13:14:46 +08:00
// 默认值常量
const (
defaultMaxFailures = 5
defaultTimeout = "60s"
defaultSlowRequestThreshold = "3s"
defaultStatIntervalMs = 1000
defaultRequestTimeout = 30000
defaultDistributedTTL = 300
defaultHalfOpenMaxRequests = 5
defaultWarmupDuration = "10s"
defaultHalfOpenSuccessThreshold = 0.5
)
// getState 获取熔断器状态
func (cb *CircuitBreakerInfo) getState() CircuitBreakerState {
switch cb.State.Load() {
case stateOpen:
return StateOpen
case stateHalfOpen:
return StateHalfOpen
default:
return StateClosed
}
}
// setState 设置熔断器状态
func (cb *CircuitBreakerInfo) setState(state CircuitBreakerState) CircuitBreakerState {
return cb.setStateWithMetrics(state, true)
}
// setStateWithMetrics 设置熔断器状态并更新指标
func (cb *CircuitBreakerInfo) setStateWithMetrics(state CircuitBreakerState, updateMetrics bool) CircuitBreakerState {
2026-01-01 13:39:51 +08:00
newState := cb.stateToInt64(state)
oldState := cb.State.Swap(newState)
oldStateEnum := cb.int64ToState(oldState)
// 如果状态发生了变化且需要更新指标
if oldStateEnum != state && updateMetrics {
cb.updateStateMetrics(state)
}
return oldStateEnum
}
// init 初始化熔断器信息
func (cb *CircuitBreakerInfo) init() {
cb.State.Store(stateClosed)
cb.Metrics.LastResetTime.Store(time.Now().Unix())
cb.Metrics.LastCloseTime.Store(time.Now().Unix())
cb.Metrics.WindowStartTime.Store(time.Now().Unix())
}
// stateToInt64 将CircuitBreakerState转换为int64状态
func (cb *CircuitBreakerInfo) stateToInt64(state CircuitBreakerState) int64 {
2026-01-01 13:14:46 +08:00
switch state {
case StateOpen:
2026-01-01 13:39:51 +08:00
return stateOpen
2026-01-01 13:14:46 +08:00
case StateHalfOpen:
2026-01-01 13:39:51 +08:00
return stateHalfOpen
2026-01-01 13:14:46 +08:00
default:
2026-01-01 13:39:51 +08:00
return stateClosed
2026-01-01 13:14:46 +08:00
}
2026-01-01 13:39:51 +08:00
}
2026-01-01 13:14:46 +08:00
2026-01-01 13:39:51 +08:00
// int64ToState 将int64状态转换为CircuitBreakerState
func (cb *CircuitBreakerInfo) int64ToState(state int64) CircuitBreakerState {
switch state {
2026-01-01 13:14:46 +08:00
case stateOpen:
2026-01-01 13:39:51 +08:00
return StateOpen
2026-01-01 13:14:46 +08:00
case stateHalfOpen:
2026-01-01 13:39:51 +08:00
return StateHalfOpen
2026-01-01 13:14:46 +08:00
default:
2026-01-01 13:39:51 +08:00
return StateClosed
2026-01-01 13:14:46 +08:00
}
2026-01-01 13:39:51 +08:00
}
2026-01-01 13:14:46 +08:00
2026-01-01 13:39:51 +08:00
// updateStateMetrics 更新状态相关的指标
func (cb *CircuitBreakerInfo) updateStateMetrics(state CircuitBreakerState) {
now := time.Now().Unix()
2026-01-01 13:14:46 +08:00
2026-01-05 16:14:14 +08:00
// 防护:确保时间戳在合理范围内
// 32位系统上Unix时间戳在2038年1月19日会溢出
// 这里做一些防护,确保存储的时间戳是有效的
if now < 0 || now > 1<<62 {
g.Log().Warningf(context.Background(), "检测到异常时间戳: %d, 将使用当前系统时间", now)
now = time.Now().Unix()
}
2026-01-01 13:39:51 +08:00
// 根据新状态更新计数器
switch state {
case StateOpen:
cb.Metrics.OpenCount.Add(1)
cb.Metrics.LastOpenTime.Store(now)
// 设置下一次重试时间
2026-01-05 16:14:14 +08:00
nextRetry := time.Now().Add(cb.Config.TimeoutParsed).Unix()
if nextRetry < 0 || nextRetry > 1<<62 {
// 如果计算出异常时间,使用当前时间+超时秒数
nextRetry = now + int64(cb.Config.TimeoutParsed.Seconds())
}
cb.Metrics.NextRetryTime.Store(nextRetry)
2026-01-01 13:39:51 +08:00
case StateClosed:
cb.Metrics.ClosedCount.Add(1)
cb.Metrics.LastCloseTime.Store(now)
case StateHalfOpen:
cb.Metrics.HalfOpenCount.Add(1)
cb.Metrics.LastHalfOpenTime.Store(now)
2026-01-01 13:14:46 +08:00
}
}
2026-01-05 15:59:51 +08:00
// getCircuitBreakerInfoByResource 根据资源名获取熔断器信息
// 支持精确匹配和前缀匹配
func getCircuitBreakerInfoByResource(resourceName string) (*CircuitBreakerInfo, *CircuitBreakerConfig) {
// 先尝试精确匹配
if cbInfoVal, ok := circuitBreakers.Load(resourceName); ok {
cbInfo, ok := cbInfoVal.(*CircuitBreakerInfo)
if ok {
return cbInfo, cbInfo.Config
}
2026-01-01 13:54:34 +08:00
}
2026-01-05 15:59:51 +08:00
// 尝试前缀匹配:去掉查询参数部分
if idx := strings.Index(resourceName, "?"); idx > 0 {
prefix := resourceName[:idx]
if cbInfoVal, ok := circuitBreakers.Load(prefix); ok {
cbInfo, ok := cbInfoVal.(*CircuitBreakerInfo)
if ok {
return cbInfo, cbInfo.Config
}
}
2026-01-01 13:54:34 +08:00
}
2026-01-05 15:59:51 +08:00
return nil, nil
2026-01-01 13:54:34 +08:00
}
// updateResponseTimeStats 更新响应时间统计
func updateResponseTimeStats(cbInfo *CircuitBreakerInfo, duration time.Duration, config *CircuitBreakerConfig) {
durationNs := duration.Nanoseconds()
cbInfo.Metrics.TotalResponseTime.Add(durationNs)
// 原子更新最小和最大响应时间
atomicUpdateMin(&cbInfo.Metrics.MinResponseTime, durationNs)
atomicUpdateMax(&cbInfo.Metrics.MaxResponseTime, durationNs)
if duration > config.SlowRequestThresholdParsed {
cbInfo.Metrics.SlowRequests.Add(1)
}
}
// formatUnixTime 格式化Unix时间戳
func formatUnixTime(timestamp int64) string {
2026-01-01 14:07:14 +08:00
if timestamp <= 0 {
return ""
2026-01-01 13:54:34 +08:00
}
2026-01-01 14:07:14 +08:00
return time.Unix(timestamp, 0).Format("2006-01-02 15:04:05")
2026-01-01 13:54:34 +08:00
}
2026-01-01 01:33:59 +08:00
// InitCircuitBreaker 初始化Sentinel熔断器
func InitCircuitBreaker() error {
ctx := context.Background()
2026-01-01 13:14:46 +08:00
if err := api.InitDefault(); err != nil {
2026-01-01 10:48:47 +08:00
return fmt.Errorf("sentinel初始化失败: %v", err)
2025-12-31 23:38:33 +08:00
}
2026-01-01 10:37:01 +08:00
registerStateChangeListeners()
g.Log().Infof(ctx, "Sentinel熔断器初始化成功")
2026-01-01 07:38:00 +08:00
2026-01-05 15:59:51 +08:00
// 加载接口级别的熔断器配置
configs := g.Cfg().MustGet(ctx, "circuitBreaker.interfaces").Map()
if len(configs) == 0 {
g.Log().Infof(ctx, "未配置任何接口熔断器")
2026-01-01 10:37:01 +08:00
return nil
2025-12-31 23:38:33 +08:00
}
2026-01-01 10:37:01 +08:00
enabledCount := 0
2026-01-05 15:59:51 +08:00
for resourcePattern, configData := range configs {
config, err := loadInterfaceCircuitBreakerConfig(ctx, resourcePattern, configData)
if err != nil {
g.Log().Errorf(ctx, "加载接口 %s 熔断器配置失败: %v", resourcePattern, err)
continue
}
2026-01-01 13:14:46 +08:00
if config != nil && config.Enabled {
2026-01-05 15:59:51 +08:00
if err := initInterfaceCircuitBreaker(resourcePattern, config); err != nil {
g.Log().Errorf(ctx, "接口 %s 熔断器初始化失败: %v", resourcePattern, err)
2026-01-01 07:38:00 +08:00
} else {
2026-01-05 15:59:51 +08:00
g.Log().Infof(ctx, "接口 %s 熔断器初始化成功", resourcePattern)
2026-01-01 10:37:01 +08:00
enabledCount++
2025-12-31 23:38:33 +08:00
}
}
}
2026-01-05 15:59:51 +08:00
g.Log().Infof(ctx, "共初始化 %d 个接口熔断器,其中 %d 个已启用", len(configs), enabledCount)
2026-01-01 01:33:59 +08:00
return nil
2025-12-31 23:38:33 +08:00
}
2026-01-01 07:38:00 +08:00
// ReloadCircuitBreakerConfig 动态重新加载熔断器配置
2026-01-05 15:59:51 +08:00
// loadInterfaceCircuitBreakerConfig 加载接口级别的熔断器配置
func loadInterfaceCircuitBreakerConfig(ctx context.Context, resourcePattern string, configData interface{}) (*CircuitBreakerConfig, error) {
configMap, ok := configData.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("接口 %s 配置格式错误: %v", resourcePattern, configData)
2025-12-31 23:38:33 +08:00
}
2026-01-01 07:38:00 +08:00
2026-01-01 13:14:46 +08:00
config := &CircuitBreakerConfig{
2026-01-05 15:59:51 +08:00
Enabled: getBoolFromMap(configMap, "enabled", true),
MaxFailures: getIntFromMap(configMap, "maxFailures", defaultMaxFailures),
Timeout: getStringFromMap(configMap, "timeout", defaultTimeout),
SlowRequestThreshold: getStringFromMap(configMap, "slowRequestThreshold", defaultSlowRequestThreshold),
EnableSlidingWindow: getBoolFromMap(configMap, "enableSlidingWindow", false),
FailureRateThreshold: getFloatFromMap(configMap, "failureRateThreshold", 0.5),
EnableFallback: getBoolFromMap(configMap, "enableFallback", false),
FallbackMessage: getStringFromMap(configMap, "fallbackMessage", ""),
RequestTimeout: getIntFromMap(configMap, "requestTimeout", defaultRequestTimeout),
DistributedTTL: getIntFromMap(configMap, "distributedTTL", defaultDistributedTTL),
StatIntervalMs: getIntFromMap(configMap, "statIntervalMs", defaultStatIntervalMs),
HalfOpenMaxRequests: getIntFromMap(configMap, "halfOpenMaxRequests", defaultHalfOpenMaxRequests),
HalfOpenSuccessThreshold: getFloatFromMap(configMap, "halfOpenSuccessThreshold", defaultHalfOpenSuccessThreshold),
WarmupDuration: getStringFromMap(configMap, "warmupDuration", defaultWarmupDuration),
EnableAdaptiveThreshold: getBoolFromMap(configMap, "enableAdaptiveThreshold", false),
AdaptiveMinThreshold: getFloatFromMap(configMap, "adaptiveMinThreshold", 0.3),
AdaptiveMaxThreshold: getFloatFromMap(configMap, "adaptiveMaxThreshold", 0.7),
}
config.MinRequestAmount = getIntFromMap(configMap, "minRequestAmount", 0)
2026-01-01 13:14:46 +08:00
if config.MinRequestAmount == 0 {
config.MinRequestAmount = config.MaxFailures
}
// 解析时间 - 使用默认值处理解析错误
2026-01-01 13:39:51 +08:00
config.TimeoutParsed, config.Timeout = parseDurationWithDefault(ctx, config.Timeout, defaultTimeout, "timeout")
config.SlowRequestThresholdParsed, config.SlowRequestThreshold = parseDurationWithDefault(ctx, config.SlowRequestThreshold, defaultSlowRequestThreshold, "slowRequestThreshold")
config.WarmupDurationParsed, config.WarmupDuration = parseDurationWithDefault(ctx, config.WarmupDuration, defaultWarmupDuration, "warmupDuration")
2026-01-01 13:14:46 +08:00
// 解析状态码
2026-01-05 15:59:51 +08:00
successCodes := getStringFromMap(configMap, "successStatusCodes", "200,201,204")
2026-01-01 13:14:46 +08:00
config.SuccessStatusCodes = parseIntSlice(successCodes)
2026-01-05 15:59:51 +08:00
return config, nil
}
2026-01-01 13:14:46 +08:00
2026-01-05 15:59:51 +08:00
// 辅助函数从map中获取值
func getBoolFromMap(m map[string]interface{}, key string, defaultValue bool) bool {
if val, ok := m[key]; ok {
if b, ok := val.(bool); ok {
return b
}
}
return defaultValue
2025-12-31 23:38:33 +08:00
}
2026-01-05 15:59:51 +08:00
func getIntFromMap(m map[string]interface{}, key string, defaultValue int) int {
if val, ok := m[key]; ok {
switch v := val.(type) {
case int:
return v
case float64:
return int(v)
case string:
if i, err := strconv.Atoi(v); err == nil {
return i
}
2026-01-01 12:05:12 +08:00
}
}
2026-01-05 15:59:51 +08:00
return defaultValue
2026-01-01 12:05:12 +08:00
}
2026-01-05 15:59:51 +08:00
func getFloatFromMap(m map[string]interface{}, key string, defaultValue float64) float64 {
if val, ok := m[key]; ok {
switch v := val.(type) {
case float64:
return v
case int:
return float64(v)
case string:
if f, err := strconv.ParseFloat(v, 64); err == nil {
return f
}
}
}
return defaultValue
}
func getStringFromMap(m map[string]interface{}, key string, defaultValue string) string {
if val, ok := m[key]; ok {
if s, ok := val.(string); ok {
return s
}
}
return defaultValue
}
// initInterfaceCircuitBreaker 初始化接口级别的熔断器
func initInterfaceCircuitBreaker(resourcePattern string, config *CircuitBreakerConfig) error {
if err := validateCircuitBreakerConfig(config); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
2026-01-05 15:59:51 +08:00
threshold := config.FailureRateThreshold
if config.EnableAdaptiveThreshold {
threshold = (config.AdaptiveMinThreshold + config.AdaptiveMaxThreshold) / 2
}
var rule []*circuitbreaker.Rule
baseRule := &circuitbreaker.Rule{
Resource: resourcePattern,
RetryTimeoutMs: uint32(config.TimeoutParsed.Milliseconds()),
MinRequestAmount: uint64(config.MinRequestAmount),
StatIntervalMs: uint32(config.StatIntervalMs),
}
if config.EnableSlidingWindow {
baseRule.Strategy = circuitbreaker.SlowRequestRatio
baseRule.StatSlidingWindowBucketCount = 10
baseRule.MaxAllowedRtMs = uint64(config.SlowRequestThresholdParsed.Milliseconds())
baseRule.Threshold = threshold
} else {
baseRule.Strategy = circuitbreaker.ErrorCount
baseRule.Threshold = float64(config.MaxFailures)
}
rule = []*circuitbreaker.Rule{baseRule}
if _, err := circuitbreaker.LoadRulesOfResource(resourcePattern, []*circuitbreaker.Rule{}); err != nil {
return fmt.Errorf("清空熔断规则失败: %v", err)
}
if _, err := circuitbreaker.LoadRules(rule); err != nil {
return fmt.Errorf("加载熔断规则失败: %v", err)
}
successCodeMap := make(map[int]bool, len(config.SuccessStatusCodes))
for _, code := range config.SuccessStatusCodes {
successCodeMap[code] = true
}
cbInfo := &CircuitBreakerInfo{
ResourceName: resourcePattern,
Config: config,
Metrics: newCircuitBreakerMetrics(),
SuccessCodeMap: successCodeMap,
AdaptiveThreshold: threshold,
WarmupEndTime: time.Now().Add(config.WarmupDurationParsed).Unix(),
}
cbInfo.init()
circuitBreakers.Store(resourcePattern, cbInfo)
strategy := "error_count"
if config.EnableSlidingWindow {
strategy = "slow_ratio"
}
g.Log().Infof(context.Background(), "接口 %s 熔断器初始化成功: resource=%s, strategy=%s, timeout=%v, threshold=%.2f",
resourcePattern, resourcePattern, strategy, config.TimeoutParsed, rule[0].Threshold)
return nil
}
// parseIntSlice 解析整数切片
func parseIntSlice(str string) []int {
2026-01-01 01:33:59 +08:00
parts := strings.Split(str, ",")
2026-01-05 15:59:51 +08:00
result := make([]int, 0, len(parts))
2026-01-01 01:33:59 +08:00
for _, part := range parts {
2026-01-05 15:59:51 +08:00
if val, err := strconv.Atoi(strings.TrimSpace(part)); err == nil {
result = append(result, val)
2026-01-01 01:33:59 +08:00
}
2025-12-31 23:38:33 +08:00
}
2026-01-01 13:14:46 +08:00
return result
2026-01-01 01:33:59 +08:00
}
2025-12-31 23:38:33 +08:00
2026-01-01 13:39:51 +08:00
// parseDurationWithDefault 解析持续时间,失败时使用默认值
func parseDurationWithDefault(ctx context.Context, durationStr, defaultStr, fieldName string) (time.Duration, string) {
durationParsed, err := time.ParseDuration(durationStr)
if err != nil {
g.Log().Warningf(ctx, "解析%s失败: %s, 使用默认值 %s, error: %v", fieldName, durationStr, defaultStr, err)
durationParsed, _ = time.ParseDuration(defaultStr)
return durationParsed, defaultStr
}
return durationParsed, durationStr
}
// atomicUpdateMin 原子更新最小值
func atomicUpdateMin(minValue *atomic.Int64, newValue int64) {
for {
currentMin := minValue.Load()
if newValue >= currentMin {
break
}
if minValue.CompareAndSwap(currentMin, newValue) {
break
}
}
}
// atomicUpdateMax 原子更新最大值
func atomicUpdateMax(maxValue *atomic.Int64, newValue int64) {
for {
currentMax := maxValue.Load()
if newValue <= currentMax {
break
}
if maxValue.CompareAndSwap(currentMax, newValue) {
break
}
}
}
// reset 重置所有指标到初始状态
func (m *CircuitBreakerMetrics) reset() {
m.TotalRequests.Store(0)
m.PassRequests.Store(0)
m.BlockRequests.Store(0)
m.FailureRequests.Store(0)
m.SlowRequests.Store(0)
m.OpenCount.Store(0)
m.HalfOpenRequests.Store(0)
m.HalfOpenPassed.Store(0)
m.HalfOpenFailed.Store(0)
m.TotalResponseTime.Store(0)
m.MinResponseTime.Store(1<<63 - 1) // 最大int64值作为初始最小值
m.MaxResponseTime.Store(0)
m.WindowRequests.Store(0)
m.WindowFailures.Store(0)
// 时间戳相关字段不重置LastResetTime在调用时单独设置
}
2026-01-01 13:14:46 +08:00
// newCircuitBreakerMetrics 创建并初始化熔断器指标
func newCircuitBreakerMetrics() *CircuitBreakerMetrics {
2026-01-01 13:39:51 +08:00
metrics := &CircuitBreakerMetrics{}
metrics.reset()
2026-01-01 13:14:46 +08:00
return metrics
}
2026-01-01 11:15:53 +08:00
2026-01-01 13:14:46 +08:00
// updateWindowStats 更新窗口统计信息
func (cb *CircuitBreakerInfo) updateWindowStats(isSuccess bool, ctx context.Context) {
now := time.Now().Unix()
windowStart := cb.Metrics.WindowStartTime.Load()
2026-01-01 12:05:12 +08:00
2026-01-01 13:14:46 +08:00
// 默认窗口大小为60秒
windowSize := int64(60)
2026-01-01 12:05:12 +08:00
2026-01-01 13:14:46 +08:00
// 如果超过窗口大小,重置统计
if now-windowStart >= windowSize {
2026-01-05 16:14:14 +08:00
// 使用原子操作重置窗口只有一个goroutine会成功
2026-01-01 13:14:46 +08:00
if cb.Metrics.WindowStartTime.CompareAndSwap(windowStart, now) {
2026-01-05 16:14:14 +08:00
// CAS成功的goroutine负责重置计数器
// 注意:可能有一些请求的累加在重置之前完成,但不会丢失很多数据
2026-01-01 13:14:46 +08:00
cb.Metrics.WindowRequests.Store(0)
cb.Metrics.WindowFailures.Store(0)
}
// 重新获取最新的windowStart
windowStart = cb.Metrics.WindowStartTime.Load()
2026-01-01 12:05:12 +08:00
}
2026-01-01 11:15:53 +08:00
2026-01-01 13:14:46 +08:00
// 原子更新窗口内请求总数
cb.Metrics.WindowRequests.Add(1)
if !isSuccess {
cb.Metrics.WindowFailures.Add(1)
2026-01-01 11:08:55 +08:00
}
2026-01-01 13:14:46 +08:00
// 计算当前窗口内的成功率
total := cb.Metrics.WindowRequests.Load()
failures := cb.Metrics.WindowFailures.Load()
2026-01-01 14:07:14 +08:00
if total >= 10 { // 有足够样本时才记录
2026-01-01 13:14:46 +08:00
successRate := float64(total-failures) / float64(total)
2026-01-01 14:07:14 +08:00
if successRate < 0.5 { // 如果成功率低于50%
2026-01-01 13:14:46 +08:00
g.Log().Warningf(ctx, "熔断器 %s 窗口内成功率较低: %.2f%%, total=%d, failures=%d",
cb.ResourceName, successRate*100, total, failures)
2026-01-01 11:08:55 +08:00
}
}
}
2026-01-01 13:54:34 +08:00
// validateInRange 验证值是否在指定范围内
func validateInRange(name string, value, min, max int) error {
if value < min || value > max {
return fmt.Errorf("%s必须在%d-%d之间", name, min, max)
}
return nil
}
// validateFloatInRange 验证浮点数值是否在指定范围内
func validateFloatInRange(name string, value, min, max float64) error {
if value < min || value > max {
return fmt.Errorf("%s必须在%.1f-%.1f之间", name, min, max)
}
return nil
}
2026-01-01 13:14:46 +08:00
// validateCircuitBreakerConfig 验证配置
func validateCircuitBreakerConfig(config *CircuitBreakerConfig) error {
if config.MaxFailures <= 0 {
return fmt.Errorf("maxFailures必须大于0")
2026-01-01 10:48:47 +08:00
}
2026-01-01 13:54:34 +08:00
if err := validateFloatInRange("failureRateThreshold", config.FailureRateThreshold, 0.0, 1.0); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
if len(config.SuccessStatusCodes) == 0 {
return fmt.Errorf("successStatusCodes不能为空")
}
2026-01-01 13:54:34 +08:00
if err := validateInRange("requestTimeout", config.RequestTimeout, 0, 300000); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
2026-01-01 13:54:34 +08:00
if err := validateInRange("distributedTTL", config.DistributedTTL, 0, 3600); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
2026-01-01 13:54:34 +08:00
if err := validateInRange("statIntervalMs", config.StatIntervalMs, 100, 60000); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
2026-01-01 13:54:34 +08:00
if err := validateInRange("minRequestAmount", config.MinRequestAmount, 1, 10000); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
2026-01-01 13:54:34 +08:00
if err := validateInRange("halfOpenMaxRequests", config.HalfOpenMaxRequests, 1, 100); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
2026-01-01 13:54:34 +08:00
if err := validateFloatInRange("halfOpenSuccessThreshold", config.HalfOpenSuccessThreshold, 0.0, 1.0); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
if config.EnableAdaptiveThreshold {
2026-01-01 13:54:34 +08:00
if err := validateFloatInRange("adaptiveMinThreshold", config.AdaptiveMinThreshold, 0.0, 1.0); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
2026-01-01 13:54:34 +08:00
if err := validateFloatInRange("adaptiveMaxThreshold", config.AdaptiveMaxThreshold, 0.0, 1.0); err != nil {
return err
2026-01-01 13:14:46 +08:00
}
if config.AdaptiveMinThreshold >= config.AdaptiveMaxThreshold {
return fmt.Errorf("adaptiveMinThreshold必须小于adaptiveMaxThreshold")
2026-01-01 10:48:47 +08:00
}
}
2026-01-01 13:14:46 +08:00
return nil
2026-01-01 10:48:47 +08:00
}
2026-01-01 13:14:46 +08:00
// CircuitBreakerMiddleware 熔断降级中间件
2025-12-31 23:38:33 +08:00
func CircuitBreakerMiddleware(r *ghttp.Request) {
2026-01-01 07:38:00 +08:00
startTime := time.Now()
2026-01-01 10:37:01 +08:00
ctx := r.GetCtx()
2026-01-01 07:38:00 +08:00
2026-01-05 15:59:51 +08:00
// 基于接口地址+请求参数生成熔断资源名
resourceName := generateResourceName(r)
if resourceName == "" {
2025-12-31 23:38:33 +08:00
r.Middleware.Next()
return
}
2026-01-05 15:59:51 +08:00
// 检查是否有该资源的熔断配置
cbInfo, config := getCircuitBreakerInfoByResource(resourceName)
2026-01-01 13:54:34 +08:00
if cbInfo == nil || config == nil || !config.Enabled {
2026-01-01 10:37:01 +08:00
r.Middleware.Next()
return
}
2026-01-01 07:38:00 +08:00
cbInfo.Metrics.TotalRequests.Add(1)
2026-01-01 13:14:46 +08:00
// 预热期检查
if time.Now().Unix() < cbInfo.WarmupEndTime {
r.Middleware.Next()
return
}
2026-01-01 11:15:53 +08:00
2026-01-01 10:37:01 +08:00
if config.RequestTimeout > 0 {
2026-01-01 13:14:46 +08:00
var ctxCancel context.CancelFunc
ctx, ctxCancel = context.WithTimeout(ctx, time.Duration(config.RequestTimeout)*time.Millisecond)
2026-01-01 10:37:01 +08:00
r.SetCtx(ctx)
2026-01-01 13:14:46 +08:00
defer ctxCancel()
2026-01-01 10:37:01 +08:00
}
2026-01-01 13:14:46 +08:00
// 分布式熔断检查
if config.DistributedTTL > 0 && isCircuitBreakerOpenInDistributed(ctx, resourceName) {
cbInfo.Metrics.BlockRequests.Add(1)
g.Log().Warningf(ctx, "分布式熔断触发: %s", resourceName)
2026-01-05 15:59:51 +08:00
sendFallbackResponse(r, resourceName, config, "distributed")
2026-01-01 13:14:46 +08:00
return
}
2026-01-01 15:13:36 +08:00
// 半开状态处理 - 使用HalfOpenManager确保线程安全
2026-01-01 13:14:46 +08:00
currentState := cbInfo.getState()
if currentState == StateHalfOpen {
2026-01-01 15:13:36 +08:00
manager := GetHalfOpenManager()
acquired, _ := manager.TryAcquireHalfOpenSlot(cbInfo.Metrics, config.HalfOpenMaxRequests)
2026-01-01 13:14:46 +08:00
2026-01-01 15:13:36 +08:00
if !acquired {
2026-01-01 07:38:00 +08:00
cbInfo.Metrics.BlockRequests.Add(1)
2026-01-01 13:14:46 +08:00
// 尝试转换为打开状态,如果成功则记录日志
oldState := cbInfo.setState(StateOpen)
if oldState != StateOpen {
g.Log().Warningf(ctx, "半开状态试探请求超限,恢复熔断: %s", resourceName)
if config.DistributedTTL > 0 {
syncCircuitBreakerStateToDistributed(ctx, resourceName, "open", config.DistributedTTL)
}
}
2026-01-05 15:59:51 +08:00
sendFallbackResponse(r, resourceName, config, "halfopen_limit")
2026-01-01 01:33:59 +08:00
return
}
2025-12-31 23:38:33 +08:00
}
2026-01-01 01:33:59 +08:00
entry, blockError := api.Entry(resourceName)
if blockError != nil {
2026-01-01 13:14:46 +08:00
if entry != nil {
entry.Exit()
}
2026-01-01 07:38:00 +08:00
cbInfo.Metrics.BlockRequests.Add(1)
2026-01-01 13:14:46 +08:00
oldState := cbInfo.setStateWithMetrics(StateOpen, true)
if oldState != StateOpen {
2026-01-05 15:59:51 +08:00
notifyStateChange(resourceName, oldState, StateOpen)
2026-01-01 10:37:01 +08:00
}
if config.DistributedTTL > 0 {
syncCircuitBreakerStateToDistributed(ctx, resourceName, "open", config.DistributedTTL)
2026-01-01 07:38:00 +08:00
}
2026-01-05 15:59:51 +08:00
sendFallbackResponse(r, resourceName, config, "blocked")
2026-01-01 01:33:59 +08:00
return
}
2025-12-31 23:38:33 +08:00
2026-01-01 13:14:46 +08:00
if entry != nil {
defer entry.Exit()
}
2026-01-01 01:33:59 +08:00
r.Middleware.Next()
2025-12-31 23:38:33 +08:00
2026-01-01 01:33:59 +08:00
statusCode := r.Response.Status
2026-01-01 13:14:46 +08:00
if statusCode < 100 || statusCode > 599 {
return
}
2026-01-01 07:38:00 +08:00
duration := time.Since(startTime)
2026-01-01 13:14:46 +08:00
// 记录响应时间统计
2026-01-01 13:54:34 +08:00
updateResponseTimeStats(cbInfo, duration, config)
2026-01-01 12:05:12 +08:00
2026-01-01 13:14:46 +08:00
isSuccess := isSuccessStatusCode(cbInfo, statusCode)
// 更新窗口统计
cbInfo.updateWindowStats(isSuccess, ctx)
if !isSuccess {
2026-01-01 07:38:00 +08:00
cbInfo.Metrics.FailureRequests.Add(1)
2026-01-01 13:14:46 +08:00
if entry != nil {
api.TraceError(entry, fmt.Errorf("request failed with status: %d", statusCode))
}
2026-01-05 15:59:51 +08:00
g.Log().Debugf(ctx, "接口 %s 请求失败: status=%d, duration=%v", resourceName, statusCode, duration)
2026-01-01 13:14:46 +08:00
// 重新获取当前状态,避免使用过期状态
currentState := cbInfo.getState()
if currentState == StateHalfOpen {
cbInfo.Metrics.HalfOpenFailed.Add(1)
oldState := cbInfo.setStateWithMetrics(StateOpen, true)
if oldState == StateHalfOpen {
g.Log().Warningf(ctx, "半开状态请求失败,恢复熔断: %s", resourceName)
if config.DistributedTTL > 0 {
syncCircuitBreakerStateToDistributed(ctx, resourceName, "open", config.DistributedTTL)
}
}
}
2026-01-01 07:38:00 +08:00
} else {
cbInfo.Metrics.PassRequests.Add(1)
2026-01-01 13:14:46 +08:00
// 重新获取当前状态
currentState := cbInfo.getState()
if currentState == StateHalfOpen {
2026-01-01 15:13:36 +08:00
manager := GetHalfOpenManager()
// 使用HalfOpenManager记录结果并检查是否达到阈值
if manager.RecordHalfOpenResult(cbInfo.Metrics, true, config.HalfOpenSuccessThreshold) {
// 达到成功阈值,关闭熔断器
oldState := cbInfo.setStateWithMetrics(StateClosed, true)
if oldState == StateHalfOpen {
// 重置半开统计
manager.ResetHalfOpenStats(cbInfo.Metrics)
g.Log().Infof(ctx, "半开状态成功,恢复关闭: %s", resourceName)
// 同步分布式状态
if config.DistributedTTL > 0 {
syncCircuitBreakerStateToDistributed(ctx, resourceName, "closed", config.DistributedTTL)
2026-01-01 13:14:46 +08:00
}
}
}
} else if currentState != StateClosed {
// 如果状态不是关闭但也不是半开,尝试重置为关闭状态
oldState := cbInfo.setStateWithMetrics(StateClosed, true)
if oldState != StateClosed {
2026-01-05 15:59:51 +08:00
notifyStateChange(resourceName, oldState, StateClosed)
2026-01-01 12:05:12 +08:00
}
2026-01-01 10:37:01 +08:00
}
2026-01-01 01:33:59 +08:00
}
}
2025-12-31 23:38:33 +08:00
2026-01-01 13:14:46 +08:00
// sendFallbackResponse 发送降级响应
2026-01-05 15:59:51 +08:00
func sendFallbackResponse(r *ghttp.Request, resourceName string, config *CircuitBreakerConfig, reason string) {
g.Log().Warningf(r.GetCtx(), "熔断器降级: resource=%s, reason=%s", resourceName, reason)
2026-01-01 12:05:12 +08:00
2026-01-01 07:38:00 +08:00
if config.EnableFallback && config.FallbackMessage != "" {
r.Response.WriteStatusExit(503, config.FallbackMessage)
2026-01-01 13:14:46 +08:00
return
}
2026-01-05 15:59:51 +08:00
msg := fmt.Sprintf("接口 '%s' 暂时不可用,请稍后再试", resourceName)
2026-01-01 13:14:46 +08:00
switch reason {
case "blocked":
2026-01-05 15:59:51 +08:00
msg = fmt.Sprintf("接口 '%s' 熔断保护中,请稍后再试", resourceName)
2026-01-01 13:14:46 +08:00
case "distributed":
2026-01-05 15:59:51 +08:00
msg = fmt.Sprintf("接口 '%s' 分布式熔断中", resourceName)
2026-01-01 07:38:00 +08:00
}
2026-01-01 14:07:14 +08:00
r.Response.WriteStatusExit(503, msg)
2026-01-01 07:38:00 +08:00
}
2026-01-01 13:14:46 +08:00
// isSuccessStatusCode 判断HTTP状态码是否成功
2026-01-01 12:05:12 +08:00
func isSuccessStatusCode(cbInfo *CircuitBreakerInfo, statusCode int) bool {
2026-01-01 13:14:46 +08:00
// 验证状态码范围
if statusCode < 100 || statusCode > 599 {
return false
}
if len(cbInfo.SuccessCodeMap) > 0 {
2026-01-01 12:05:12 +08:00
return cbInfo.SuccessCodeMap[statusCode]
2025-12-31 23:38:33 +08:00
}
2026-01-01 01:33:59 +08:00
return statusCode >= 200 && statusCode < 300
2025-12-31 23:38:33 +08:00
}
2026-01-05 15:59:51 +08:00
// generateResourceName 基于接口地址+请求参数生成熔断资源名
func generateResourceName(r *ghttp.Request) string {
method := r.Method
path := r.URL.Path
query := r.URL.Query().Encode()
2026-01-01 11:08:55 +08:00
2026-01-05 16:14:14 +08:00
// 安全限制:防止资源名过长导致内存或存储问题
const maxResourceNameLength = 512
2026-01-05 15:59:51 +08:00
// 生成资源名:方法:路径?查询参数
// 示例: GET:/api/users?userId=123
resourceName := method + ":" + path
2026-01-05 16:14:14 +08:00
// 限制路径长度
if len(resourceName) > maxResourceNameLength/2 {
// 截断路径,保留头部以便识别
resourceName = resourceName[:maxResourceNameLength/2] + "..."
}
2026-01-05 15:59:51 +08:00
if query != "" {
// 对查询参数进行排序以确保相同的参数顺序生成相同的资源名
sortedQuery := sortQueryString(query)
2026-01-05 16:14:14 +08:00
// 限制查询参数长度
maxQueryLength := maxResourceNameLength - len(resourceName) - 1
if len(sortedQuery) > maxQueryLength {
// 截断查询参数
sortedQuery = sortedQuery[:maxQueryLength] + "..."
}
2026-01-05 15:59:51 +08:00
resourceName += "?" + sortedQuery
2026-01-01 12:05:12 +08:00
}
2026-01-05 16:14:14 +08:00
// 最终长度检查
if len(resourceName) > maxResourceNameLength {
resourceName = resourceName[:maxResourceNameLength]
}
2026-01-05 15:59:51 +08:00
return resourceName
2026-01-01 11:08:55 +08:00
}
2026-01-05 15:59:51 +08:00
// sortQueryString 对查询字符串进行排序
func sortQueryString(query string) string {
if query == "" {
return ""
2026-01-01 12:05:12 +08:00
}
2026-01-01 14:07:14 +08:00
2026-01-05 15:59:51 +08:00
params := strings.Split(query, "&")
if len(params) == 0 {
return query
2026-01-01 12:05:12 +08:00
}
2026-01-01 13:14:46 +08:00
2026-01-05 16:14:14 +08:00
// 使用快速排序替代冒泡排序O(n log n) vs O(n²)
// 限制最大参数数量防止DoS攻击
const maxParams = 100
if len(params) > maxParams {
params = params[:maxParams]
2026-01-01 13:14:46 +08:00
}
2026-01-05 15:59:51 +08:00
2026-01-05 16:14:14 +08:00
// 简单的快速排序实现
quickSortStrings(params, 0, len(params)-1)
2026-01-05 15:59:51 +08:00
return strings.Join(params, "&")
2026-01-01 13:14:46 +08:00
}
2026-01-05 16:14:14 +08:00
// quickSortStrings 快速排序字符串切片
func quickSortStrings(arr []string, low, high int) {
if low < high {
pivot := partitionStrings(arr, low, high)
quickSortStrings(arr, low, pivot-1)
quickSortStrings(arr, pivot+1, high)
}
}
// partitionStrings 快速排序的分区函数
func partitionStrings(arr []string, low, high int) int {
pivot := arr[high]
i := low - 1
for j := low; j < high; j++ {
if arr[j] <= pivot {
i++
arr[i], arr[j] = arr[j], arr[i]
}
}
arr[i+1], arr[high] = arr[high], arr[i+1]
return i + 1
}
2026-01-01 01:33:59 +08:00
// isCircuitBreakerOpenInDistributed 检查分布式熔断状态
func isCircuitBreakerOpenInDistributed(ctx context.Context, resourceName string) bool {
2026-01-01 13:14:46 +08:00
key := "circuit_breaker:" + resourceName + ":state"
2026-01-01 14:07:14 +08:00
redisClient := g.Redis()
if redisClient == nil {
2026-01-01 10:37:01 +08:00
return false
}
2026-01-01 14:07:14 +08:00
value, err := redisClient.Get(ctx, key)
2026-01-01 01:33:59 +08:00
if err != nil || value.IsNil() {
return false
2025-12-31 23:38:33 +08:00
}
2026-01-01 13:14:46 +08:00
return value.String() == "open"
2026-01-01 10:48:47 +08:00
}
2026-01-01 13:14:46 +08:00
// syncCircuitBreakerStateToDistributed 同步熔断器状态到Redis
2026-01-01 10:37:01 +08:00
func syncCircuitBreakerStateToDistributed(ctx context.Context, resourceName, state string, ttl int) {
2026-01-01 13:39:51 +08:00
stateKey := "circuit_breaker:" + resourceName + ":state"
2026-01-01 13:14:46 +08:00
lockKey := "circuit_breaker:" + resourceName + ":lock"
2026-01-01 13:39:51 +08:00
redisClient := g.Redis()
if redisClient == nil {
2026-01-01 13:14:46 +08:00
g.Log().Warningf(ctx, "Redis未初始化无法同步分布式熔断状态: %s", resourceName)
2026-01-01 10:37:01 +08:00
return
}
2026-01-05 16:14:14 +08:00
// 使用更短的锁超时时间3秒避免死锁风险
// 同时添加重试机制,确保最终一致性
lockTimeout := int64(3)
maxRetries := 2
var lastErr error
for attempt := 0; attempt <= maxRetries; attempt++ {
if attempt > 0 {
// 短暂延迟后重试
time.Sleep(time.Duration(attempt*50) * time.Millisecond)
}
// 使用common/redis中的Lock方法获取分布式锁
success, err := redis.Lock(ctx, lockKey, lockTimeout, func(ctx context.Context) error {
// 设置熔断器状态
_, err := redisClient.Do(ctx, "SETEX", stateKey, ttl, state)
if err != nil {
g.Log().Errorf(ctx, "设置分布式熔断状态失败: %s=%s, error: %v", stateKey, state, err)
return err
}
2026-01-01 13:39:51 +08:00
g.Log().Debugf(ctx, "分布式熔断状态已同步: %s=%s (TTL: %d)", stateKey, state, ttl)
2026-01-05 16:14:14 +08:00
return nil
})
if err != nil {
lastErr = err
g.Log().Errorf(ctx, "获取分布式锁失败 (尝试 %d/%d): %s, error: %v", attempt+1, maxRetries+1, lockKey, err)
continue
2026-01-01 13:39:51 +08:00
}
2026-01-01 10:37:01 +08:00
2026-01-05 16:14:14 +08:00
if success {
// 成功获取锁并设置状态
return
}
2026-01-01 10:48:47 +08:00
}
2026-01-05 16:14:14 +08:00
// 所有尝试都失败
g.Log().Warningf(ctx, "分布式熔断状态同步失败,跳过: %s, 最后错误: %v", lockKey, lastErr)
2026-01-01 10:37:01 +08:00
}
2026-01-05 16:28:29 +08:00
// checkCircuitBreakerAuthToken 验证熔断器管理接口的认证Token
// 统一的认证逻辑,用于健康检查、重置等管理接口
// 返回值:
// - true: 认证通过
// - false: 认证失败已发送401响应
func checkCircuitBreakerAuthToken(r *ghttp.Request) bool {
2026-01-05 16:14:14 +08:00
// 从Header中获取认证信息
authToken := r.Header.Get("Authorization")
2026-01-05 16:28:29 +08:00
// 如果Header中没有尝试从查询参数获取仅用于开发/测试环境)
// 生产环境应禁用此方式仅支持Header认证
2026-01-05 16:14:14 +08:00
if authToken == "" {
authToken = r.Get("authToken").String()
}
2026-01-05 16:28:29 +08:00
// 检查Token是否为空
2026-01-05 16:14:14 +08:00
if authToken == "" {
2026-01-05 16:28:29 +08:00
g.Log().Warningf(r.GetCtx(), "熔断器管理接口访问被拒绝缺少认证Token, IP=%s, Path=%s",
r.GetClientIp(), r.URL.Path)
2026-01-05 16:14:14 +08:00
r.Response.WriteStatusExit(401, "Unauthorized: Missing authentication token")
2026-01-05 16:28:29 +08:00
return false
2026-01-05 16:14:14 +08:00
}
2026-01-05 16:28:29 +08:00
// 支持Bearer Token格式
if strings.HasPrefix(authToken, "Bearer ") {
authToken = strings.TrimPrefix(authToken, "Bearer ")
}
// TODO: 实现完整的Token验证逻辑
// 建议使用JWT或其他安全机制
//
// 1. 使用gogf/gf/v2/os/gjwt进行JWT验证
// token, err := gjwt.ParseAndVerify(authToken, []byte(secret))
// if err != nil {
// return false
// }
//
// 2. 或使用其他JWT库如github.com/golang-jwt/jwt/v5
// claims := &MyClaims{}
// token, err := jwt.ParseWithClaims(authToken, claims, ...)
//
// 3. 验证Token的
// - 签名有效性
// - 过期时间exp
// - 签发者iss
// - 权限范围scope/roles
//
// 4. 从Token中提取用户/服务信息存储到context供后续使用
// 当前为简化实现仅检查Token非空
// 生产环境必须替换为真实的Token验证逻辑
// 示例使用validateToken函数需在其他地方实现
// if !validateToken(authToken) {
// g.Log().Warningf(r.GetCtx(), "熔断器管理接口访问被拒绝无效的认证Token, IP=%s", r.GetClientIp())
2026-01-05 16:14:14 +08:00
// r.Response.WriteStatusExit(401, "Unauthorized: Invalid token")
2026-01-05 16:28:29 +08:00
// return false
2026-01-05 16:14:14 +08:00
// }
2026-01-05 16:28:29 +08:00
return true
}
// CircuitBreakerHealthCheckHandler 健康检查接口
func CircuitBreakerHealthCheckHandler(r *ghttp.Request) {
// 认证检查使用Token验证
if !checkCircuitBreakerAuthToken(r) {
return
}
2026-01-01 12:05:12 +08:00
page := r.Get("page").Int()
size := r.Get("size").Int()
if page < 0 {
page = 0
}
if size <= 0 || size > 100 {
2026-01-01 13:14:46 +08:00
size = 20
2026-01-01 12:05:12 +08:00
}
2026-01-05 15:59:51 +08:00
// 获取所有熔断器资源
var resources []string
circuitBreakers.Range(func(key, value interface{}) bool {
resources = append(resources, key.(string))
return true
})
2026-01-01 13:14:46 +08:00
2026-01-05 15:59:51 +08:00
total := len(resources)
2026-01-01 12:05:12 +08:00
start := page * size
2026-01-01 13:14:46 +08:00
if start >= total {
r.Response.WriteJsonExit(ghttp.DefaultHandlerResponse{Code: 200, Message: "熔断器状态",
Data: map[string]interface{}{
"summary": map[string]interface{}{"totalServices": 0, "openServices": 0, "closedServices": 0, "halfOpenServices": 0},
"services": map[string]interface{}{}, "page": page, "size": size, "total": total}})
return
}
2026-01-01 12:05:12 +08:00
end := start + size
if end > total {
end = total
}
2026-01-01 13:14:46 +08:00
status := make(map[string]interface{})
totalServices := 0
openServices := 0
halfOpenServices := 0
for i := start; i < end; i++ {
2026-01-05 15:59:51 +08:00
resourceName := resources[i]
cbInfoVal, ok := circuitBreakers.Load(resourceName)
2026-01-01 13:14:46 +08:00
if !ok {
continue
2026-01-01 12:05:12 +08:00
}
2026-01-01 13:14:46 +08:00
cbInfo, ok := cbInfoVal.(*CircuitBreakerInfo)
if !ok {
continue
2026-01-01 12:05:12 +08:00
}
2026-01-01 07:38:00 +08:00
totalServices++
2026-01-01 13:14:46 +08:00
state := cbInfo.getState()
if state == StateOpen {
2026-01-01 07:38:00 +08:00
openServices++
2026-01-01 13:14:46 +08:00
} else if state == StateHalfOpen {
halfOpenServices++
2026-01-01 01:33:59 +08:00
}
2026-01-01 13:54:34 +08:00
// 格式化时间字符串
lastResetTimeStr := formatUnixTime(cbInfo.Metrics.LastResetTime.Load())
lastOpenTimeStr := formatUnixTime(cbInfo.Metrics.LastOpenTime.Load())
nextRetryTimeStr := formatUnixTime(cbInfo.Metrics.NextRetryTime.Load())
2026-01-01 12:05:12 +08:00
2026-01-05 15:59:51 +08:00
status[resourceName] = map[string]interface{}{
2026-01-01 13:14:46 +08:00
"resource": cbInfo.ResourceName,
"state": string(state),
"lastOpenTime": lastOpenTimeStr,
"nextRetryTime": nextRetryTimeStr,
"totalRequests": cbInfo.Metrics.TotalRequests.Load(),
"passRequests": cbInfo.Metrics.PassRequests.Load(),
"blockRequests": cbInfo.Metrics.BlockRequests.Load(),
"failureRequests": cbInfo.Metrics.FailureRequests.Load(),
"slowRequests": cbInfo.Metrics.SlowRequests.Load(),
"openCount": cbInfo.Metrics.OpenCount.Load(),
"lastResetTime": lastResetTimeStr,
"halfOpenRequests": cbInfo.Metrics.HalfOpenRequests.Load(),
"halfOpenPassed": cbInfo.Metrics.HalfOpenPassed.Load(),
2026-01-01 01:33:59 +08:00
}
2026-01-01 07:38:00 +08:00
}
2026-01-01 13:14:46 +08:00
r.Response.WriteJsonExit(ghttp.DefaultHandlerResponse{Code: 200, Message: "熔断器状态",
2026-01-01 07:38:00 +08:00
Data: map[string]interface{}{
2026-01-01 13:14:46 +08:00
"summary": map[string]interface{}{"totalServices": totalServices, "openServices": openServices, "closedServices": totalServices - openServices - halfOpenServices, "halfOpenServices": halfOpenServices},
"services": status, "page": page, "size": size, "total": total}})
2025-12-31 23:38:33 +08:00
}
2026-01-05 15:59:51 +08:00
// batchProcessResources 批量处理资源
func batchProcessResources(r *ghttp.Request, processFunc func(resourceName string) error) (int, int, map[string]string) {
2026-01-01 13:39:51 +08:00
successCount := 0
failCount := 0
failures := make(map[string]string)
2026-01-05 15:59:51 +08:00
circuitBreakers.Range(func(key, value interface{}) bool {
resourceName := key.(string)
if err := processFunc(resourceName); err != nil {
g.Log().Errorf(r.GetCtx(), "资源 %s 处理失败: %v", resourceName, err)
2026-01-01 13:39:51 +08:00
failCount++
2026-01-05 15:59:51 +08:00
failures[resourceName] = err.Error()
2026-01-01 13:39:51 +08:00
} else {
successCount++
}
2026-01-05 15:59:51 +08:00
return true
})
2026-01-01 13:39:51 +08:00
return successCount, failCount, failures
}
2026-01-01 13:14:46 +08:00
// CircuitBreakerResetHandler 重置熔断器
2025-12-31 23:38:33 +08:00
func CircuitBreakerResetHandler(r *ghttp.Request) {
2026-01-05 16:28:29 +08:00
// 认证检查
if !checkCircuitBreakerAuthToken(r) {
2026-01-05 16:14:14 +08:00
return
}
2026-01-05 15:59:51 +08:00
resourceName := r.Get("resource").String()
2026-01-01 13:14:46 +08:00
2026-01-05 15:59:51 +08:00
if resourceName == "" || resourceName == "*" {
successCount, failCount, failures := batchProcessResources(r, func(name string) error {
return resetSingleResource(r, name)
2026-01-01 13:39:51 +08:00
})
2026-01-01 13:14:46 +08:00
g.Log().Infof(r.GetCtx(), "批量重置熔断器完成: 成功 %d, 失败 %d", successCount, failCount)
r.Response.WriteJsonExit(ghttp.DefaultHandlerResponse{Code: 200, Message: fmt.Sprintf("批量重置完成: 成功 %d, 失败 %d", successCount, failCount),
2026-01-01 13:39:51 +08:00
Data: map[string]interface{}{"success": successCount, "failed": failCount, "failures": failures}})
2026-01-01 11:08:55 +08:00
return
}
2026-01-05 15:59:51 +08:00
if err := resetSingleResource(r, resourceName); err != nil {
2026-01-01 13:14:46 +08:00
r.Response.WriteJsonExit(ghttp.DefaultHandlerResponse{Code: 500, Message: fmt.Sprintf("重置熔断器失败: %v", err)})
return
}
2025-12-31 23:38:33 +08:00
2026-01-05 15:59:51 +08:00
r.Response.WriteJsonExit(ghttp.DefaultHandlerResponse{Code: 200, Message: fmt.Sprintf("资源 '%s' 的熔断器已重置", resourceName)})
2026-01-01 13:14:46 +08:00
}
2026-01-01 07:38:00 +08:00
2026-01-05 15:59:51 +08:00
// resetSingleResource 重置单个资源
func resetSingleResource(r *ghttp.Request, resourceName string) error {
2026-01-01 13:14:46 +08:00
if rules := circuitbreaker.GetRulesOfResource(resourceName); len(rules) > 0 {
if _, err := circuitbreaker.LoadRulesOfResource(resourceName, []*circuitbreaker.Rule{}); err != nil {
return err
2026-01-01 07:38:00 +08:00
}
2025-12-31 23:38:33 +08:00
}
2026-01-05 15:59:51 +08:00
if cbInfoVal, ok := circuitBreakers.Load(resourceName); ok {
2026-01-01 13:14:46 +08:00
cbInfo := cbInfoVal.(*CircuitBreakerInfo)
2026-01-05 15:59:51 +08:00
config := cbInfo.Config
2026-01-01 12:05:12 +08:00
cbInfo.State.Store(stateClosed)
2026-01-01 13:39:51 +08:00
// 重置指标
cbInfo.Metrics.reset()
2026-01-05 16:14:14 +08:00
warmupEndTime := time.Now().Add(config.WarmupDurationParsed).Unix()
// 防护:检查时间戳是否有效
if warmupEndTime < 0 || warmupEndTime > 1<<62 {
warmupEndTime = time.Now().Unix() + int64(config.WarmupDurationParsed.Seconds())
}
cbInfo.WarmupEndTime = warmupEndTime
2026-01-01 11:08:55 +08:00
cbInfo.Metrics.LastResetTime.Store(time.Now().Unix())
2026-01-01 01:33:59 +08:00
2026-01-05 15:59:51 +08:00
// 清除分布式状态
if config.DistributedTTL > 0 {
2026-01-01 14:07:14 +08:00
redisClient := g.Redis()
if redisClient != nil {
2026-01-01 15:13:36 +08:00
lockKey := "circuit_breaker:" + resourceName + ":lock"
2026-01-05 16:14:14 +08:00
// 使用较短的锁超时时间
success, err := redis.Lock(r.GetCtx(), lockKey, int64(3), func(ctx context.Context) error {
2026-01-01 15:13:36 +08:00
_, err := redisClient.Del(ctx, "circuit_breaker:"+resourceName+":state")
if err != nil {
g.Log().Warningf(ctx, "清除分布式熔断状态失败: %s, error: %v", resourceName, err)
}
return nil
})
if err != nil {
g.Log().Errorf(r.GetCtx(), "获取分布式锁失败: %s, error: %v", lockKey, err)
}
if !success {
g.Log().Debugf(r.GetCtx(), "未获取到分布式锁,跳过状态清除: %s", lockKey)
2026-01-01 13:14:46 +08:00
}
2026-01-01 10:37:01 +08:00
}
}
2026-01-01 01:33:59 +08:00
}
g.Log().Infof(r.GetCtx(), "熔断器已手动重置: %s", resourceName)
2026-01-01 13:14:46 +08:00
return nil
2025-12-31 23:38:33 +08:00
}
2026-01-01 07:38:00 +08:00
2026-01-01 13:14:46 +08:00
// CircuitBreakerReloadHandler 配置重载接口
2026-01-01 07:38:00 +08:00
func CircuitBreakerReloadHandler(r *ghttp.Request) {
2026-01-05 15:59:51 +08:00
r.Response.WriteJsonExit(ghttp.DefaultHandlerResponse{Code: 501, Message: "基于接口的熔断器暂不支持配置重载"})
2026-01-01 13:14:46 +08:00
}
// StateChangeListener 状态变化监听器类型
type StateChangeListener func(serviceName string, fromState, toState CircuitBreakerState)
// RegisterStateChangeListener 注册监听器
func RegisterStateChangeListener(name string, listener StateChangeListener) {
stateChangeListeners.Store(name, listener)
}
// notifyStateChange 通知监听器
func notifyStateChange(serviceName string, fromState, toState CircuitBreakerState) {
stateChangeListeners.Range(func(_, value interface{}) bool {
listener, ok := value.(StateChangeListener)
if ok {
listener(serviceName, fromState, toState)
}
return true
})
}
// registerStateChangeListeners 注册默认监听器
func registerStateChangeListeners() {
if _, exists := stateChangeListenersRegistered.LoadOrStore("default", true); exists {
2026-01-01 07:38:00 +08:00
return
}
2026-01-01 13:14:46 +08:00
RegisterStateChangeListener("default", func(serviceName string, fromState, toState CircuitBreakerState) {
level := "Info"
if toState == StateOpen {
level = "Warning"
}
g.Log().Print(context.Background(), level, fmt.Sprintf("熔断器状态变化: service=%s, %s -> %s", serviceName, fromState, toState))
2026-01-01 07:38:00 +08:00
})
}