refactor(service): 重构服务模块结构并优化模型配置

This commit is contained in:
2026-05-29 17:54:19 +08:00
parent e487b4bb5e
commit d409b84b58
24 changed files with 943 additions and 1158 deletions

201
service/queue/auto_tune.go Normal file
View File

@@ -0,0 +1,201 @@
package queue
import (
"context"
"errors"
"fmt"
"math"
"model-gateway/model/dto"
"model-gateway/consts/public"
"model-gateway/model/entity"
"gitea.com/red-future/common/db/gfdb"
"github.com/gogf/gf/v2/frame/g"
)
// AutoTuneResult 单次调参结果(按 model_name
type AutoTuneResult struct {
ModelName string `json:"modelName"` // 模型名称asynch_models.model_name
Samples int `json:"samples"` // 统计样本数(窗口内 state=2/3 且 started_at/finished_at 非空的任务数量)
P90Exec float64 `json:"p90ExecSeconds"` // 执行耗时 P90口径finished_at - started_at
CapMaxConcurrency int `json:"capMaxConcurrency"` // 配置上限asynch_models.max_concurrencycap不会被动态调参覆盖
OldMaxConcurrency int `json:"oldMaxConcurrency"` // 调参前运行时值Redis若无则等于 cap
NewMaxConcurrency int `json:"newMaxConcurrency"` // 本次计算出的运行时值(将写入 Redis受 ±50% 约束且不超过 cap
CapQueueLimit int `json:"capQueueLimit"` // 配置上限asynch_models.queue_limitcap不会被动态调参覆盖
OldQueueLimit int `json:"oldQueueLimit"` // 调参前运行时值Redis若无则等于 cap
NewQueueLimit int `json:"newQueueLimit"` // 本次计算出的运行时值(将写入 Redis受 ±50% 约束且不超过 cap
ExpectedSeconds int `json:"expectedSeconds"` // 模型预计执行时间asynch_models.expected_seconds用于 queue_limit 计算绑定)
}
// AutoTune 由上层定时任务通过接口触发:
// - 统计指定时间窗口内该模型任务的执行耗时finished_at - started_at取 P90
// - 基于吞吐与 P90 执行耗时估算 max_concurrency 的运行时值(不超过 cap
// - queue_limit 与 expected_seconds 绑定(允许排队时间 = expected_seconds * 2生成运行时值不超过 cap
// - 单次调整幅度限制 ±50%,写入 Redis带 TTL
func AutoTune(ctx context.Context, req *dto.AutoTuneReq) (res *dto.AutoTuneRes, err error) {
if req == nil {
return nil, errors.New("request cannot be nil")
}
if req.WindowSeconds <= 0 {
req.WindowSeconds = 3600 // 默认1小时
}
// 1) 读取模型配置cap按 model_name 聚合去重(如果表里有多租户重复数据,取较大上限)
var modelRows []*entity.AsynchModel
if err := gfdb.DB(ctx).Model(ctx, public.TableNameModel).
Where("deleted_at IS NULL").
Where(entity.AsynchModelCol.Enabled, 1).
Scan(&modelRows); err != nil {
return nil, err
}
modelMap := make(map[string]*entity.AsynchModel)
for _, m := range modelRows {
if m == nil || m.ModelName == "" {
continue
}
cur := modelMap[m.ModelName]
if cur == nil {
modelMap[m.ModelName] = m
continue
}
// 取更大的 cap
if m.MaxConcurrency > cur.MaxConcurrency {
cur.MaxConcurrency = m.MaxConcurrency
}
if m.QueueLimit > cur.QueueLimit {
cur.QueueLimit = m.QueueLimit
}
if m.ExpectedSeconds > cur.ExpectedSeconds {
cur.ExpectedSeconds = m.ExpectedSeconds
}
}
if len(modelMap) == 0 {
return nil, errors.New("no models found")
}
// 2) 统计指定窗口:按 model_name 计算 cnt 和 P90 执行耗时
type statRow struct {
ModelName string
Cnt int
P90Exec float64
}
var stats []statRow
sql := fmt.Sprintf(`
SELECT model_name,
COUNT(1) AS cnt,
COALESCE(percentile_cont(0.9) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (finished_at - started_at))), 0) AS p90_exec
FROM %s
WHERE deleted_at IS NULL
AND state IN (2,3)
AND started_at IS NOT NULL
AND finished_at IS NOT NULL
AND finished_at >= (NOW() - (? || ' seconds')::interval)
GROUP BY model_name`, public.TableNameTask)
r, err := gfdb.DB(ctx).GetAll(ctx, sql, req.WindowSeconds)
if err != nil {
return nil, err
}
_ = r.Structs(&stats)
statMap := make(map[string]statRow, len(stats))
for _, s := range stats {
statMap[s.ModelName] = s
}
// 3) 调参计算
const utilization = 0.8
const maxChangeRatio = 0.5 // ±50%
const queueFactor = 2.0 // 与 expected_seconds 绑定W_target = expected_seconds * 2
out := make([]AutoTuneResult, 0, len(modelMap))
for modelName, m := range modelMap {
s := statMap[modelName]
capMax := m.MaxConcurrency
capQueue := m.QueueLimit
oldMax := GetRuntimeMaxConcurrency(ctx, modelName, capMax)
oldQueue := GetRuntimeQueueLimit(ctx, modelName, capQueue)
// 默认:无样本则不调整
if s.Cnt <= 0 || s.P90Exec <= 0 {
out = append(out, AutoTuneResult{
ModelName: modelName,
Samples: s.Cnt,
P90Exec: s.P90Exec,
CapMaxConcurrency: capMax,
OldMaxConcurrency: oldMax,
NewMaxConcurrency: oldMax,
CapQueueLimit: capQueue,
OldQueueLimit: oldQueue,
NewQueueLimit: oldQueue,
ExpectedSeconds: m.ExpectedSeconds,
})
continue
}
// arrival_rate ≈ 完成数/3600
arrivalRate := float64(s.Cnt) / 3600.0
// desiredMax = ceil(arrivalRate * p90 / utilization)
desiredMax := int(math.Ceil(arrivalRate * s.P90Exec / utilization))
if desiredMax < 1 {
desiredMax = 1
}
// 单次变化幅度限制
minMax := int(math.Floor(float64(oldMax) * (1 - maxChangeRatio)))
maxMax := int(math.Ceil(float64(oldMax) * (1 + maxChangeRatio)))
if minMax < 1 {
minMax = 1
}
newMax := clampInt(desiredMax, minMax, maxMax)
if capMax > 0 {
newMax = clampInt(newMax, 1, capMax)
}
setRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName), newMax)
// queue_limitW_target = expected_seconds * queueFactor
exp := m.ExpectedSeconds
if exp <= 0 {
exp = 60
}
wTarget := float64(exp) * queueFactor
desiredQueue := int(math.Ceil(arrivalRate*wTarget)) + newMax
if desiredQueue < newMax {
desiredQueue = newMax
}
newQueue := oldQueue
if capQueue > 0 {
minQ := int(math.Floor(float64(oldQueue) * (1 - maxChangeRatio)))
maxQ := int(math.Ceil(float64(oldQueue) * (1 + maxChangeRatio)))
if minQ < newMax {
minQ = newMax
}
if maxQ < minQ {
maxQ = minQ
}
newQueue = clampInt(desiredQueue, minQ, maxQ)
newQueue = clampInt(newQueue, newMax, capQueue)
setRuntimeInt(ctx, runtimeQueueLimitKey(modelName), newQueue)
}
out = append(out, AutoTuneResult{
ModelName: modelName,
Samples: s.Cnt,
P90Exec: s.P90Exec,
CapMaxConcurrency: capMax,
OldMaxConcurrency: oldMax,
NewMaxConcurrency: newMax,
CapQueueLimit: capQueue,
OldQueueLimit: oldQueue,
NewQueueLimit: newQueue,
ExpectedSeconds: m.ExpectedSeconds,
})
}
g.Log().Infof(ctx, "[auto_tune] done models=%d windowSeconds=%d", len(out), req.WindowSeconds)
return &dto.AutoTuneRes{
List: out,
}, nil
}

107
service/queue/queue_gate.go Normal file
View File

@@ -0,0 +1,107 @@
package queue
import (
"context"
"fmt"
"math"
"time"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/util/gconv"
)
// ===== 严格 queue_limitRedis 原子闸门 =====
//
// 背景:原来的 queue_limit 通过“Count + Insert”做近似控制分布式并发创建时会短暂超限。
// 目标:以 Redis Lua 脚本实现原子校验 + 入队占位,做到严格不超限。
//
// 计数口径与原逻辑保持一致:只统计 state=0/1排队中/执行中)。
// - CreateTask 成功入库后占用 1 个 slot
// - 任务成功/失败state->2/3释放 slot
// - 失败任务重试state 3->0需要再次占用 slot若占位失败则暂不重试留在 state=3下次 cleaner 再尝试)
//
// 说明:为避免极端情况下“占位泄漏”导致永久占满,采用 ZSET + 过期时间的方式自动回收。
// 只要任务实际生命周期远小于 gateTTLSeconds就可保持严格。
const (
queueGateKeyPrefix = "asynch:qgate:" // asynch:qgate:{modelName}
)
// Lua清理过期 slot然后按 limit 做原子判定并占位
var queueGateAcquireLua = `
local key = KEYS[1]
local now = tonumber(ARGV[1])
local limit = tonumber(ARGV[2])
local expireAt = tonumber(ARGV[3])
local member = ARGV[4]
local keyTTL = tonumber(ARGV[5])
-- 先清理过期的占位
redis.call("ZREMRANGEBYSCORE", key, "-inf", now)
local current = tonumber(redis.call("ZCARD", key) or "0")
if current >= limit then
return 0
end
redis.call("ZADD", key, expireAt, member)
redis.call("EXPIRE", key, keyTTL)
return 1
`
// Lua释放 slot幂等
var queueGateReleaseLua = `
local key = KEYS[1]
local member = ARGV[1]
redis.call("ZREM", key, member)
return 1
`
func queueGateKey(modelName string) string {
return fmt.Sprintf("%s%s", queueGateKeyPrefix, modelName)
}
// calcGateTTLSeconds 计算闸门占位的“自动回收 TTL”
// 取 expectedSeconds 的倍数并做上下限,避免任务异常导致永久占位。
func calcGateTTLSeconds(expectedSeconds int) int {
// 默认至少 1 小时;最多 24 小时
minTTL := 3600
maxTTL := 24 * 3600
if expectedSeconds <= 0 {
return minTTL
}
ttl := int(math.Ceil(float64(expectedSeconds) * 10)) // 预计耗时 * 10 做兜底
if ttl < minTTL {
ttl = minTTL
}
if ttl > maxTTL {
ttl = maxTTL
}
return ttl
}
// AcquireQueueSlot 严格入队:原子占位(成功返回 true
func AcquireQueueSlot(ctx context.Context, modelName, taskId string, limit int, expectedSeconds int) (bool, error) {
if limit <= 0 {
return true, nil
}
key := queueGateKey(modelName)
now := time.Now().Unix()
ttl := calcGateTTLSeconds(expectedSeconds)
expireAt := now + int64(ttl)
// keyTTL 要略大于 member TTL避免 key 先过期导致计数丢失
keyTTL := ttl + 60
r, err := g.Redis().Do(ctx, "EVAL", queueGateAcquireLua, 1, key, now, limit, expireAt, taskId, keyTTL)
if err != nil {
return false, fmt.Errorf("queue gate acquire failed: %w", err)
}
return gconv.Int(r) == 1, nil
}
// ReleaseQueueSlot 释放占位(幂等)
func ReleaseQueueSlot(ctx context.Context, modelName, taskId string) {
if taskId == "" || modelName == "" {
return
}
key := queueGateKey(modelName)
_, _ = g.Redis().Do(ctx, "EVAL", queueGateReleaseLua, 1, key, taskId)
}

View File

@@ -0,0 +1,82 @@
package queue
import (
"context"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/util/gconv"
)
// 运行时调参存储在 Redis不修改 asynch_models 中的 cap最大上限
// 上层每小时调用 /model/autoTune 写入运行时值Worker/CreateTask 读取运行时值生效。
const (
runtimeMaxCKeyPrefix = "asynch:runtime:max_concurrency:" // + model_name
runtimeQueueKeyPrefix = "asynch:runtime:queue_limit:" // + model_name
runtimeTTLSeconds = 2 * 3600 // 2小时避免一次调参失败导致立即回退
)
func runtimeMaxConcurrencyKey(modelName string) string {
return runtimeMaxCKeyPrefix + modelName
}
func runtimeQueueLimitKey(modelName string) string {
return runtimeQueueKeyPrefix + modelName
}
func getRuntimeInt(ctx context.Context, key string) (int, bool) {
v, err := g.Redis().Do(ctx, "GET", key)
if err != nil || v == nil {
return 0, false
}
iv := gconv.Int(v)
if iv <= 0 {
return 0, false
}
return iv, true
}
func setRuntimeInt(ctx context.Context, key string, val int) {
if val <= 0 {
return
}
// SETEX key ttl val
_, _ = g.Redis().Do(ctx, "SETEX", key, runtimeTTLSeconds, val)
}
// GetRuntimeMaxConcurrency 返回运行时并发上限(<= cap。若不存在运行时值则返回 cap。
func GetRuntimeMaxConcurrency(ctx context.Context, modelName string, cap int) int {
if cap <= 0 {
return cap
}
if v, ok := getRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName)); ok {
if v > cap {
return cap
}
return v
}
return cap
}
// GetRuntimeQueueLimit 返回运行时队列上限(<= cap。若不存在运行时值则返回 cap。
func GetRuntimeQueueLimit(ctx context.Context, modelName string, cap int) int {
if cap <= 0 {
return cap
}
if v, ok := getRuntimeInt(ctx, runtimeQueueLimitKey(modelName)); ok {
if v > cap {
return cap
}
return v
}
return cap
}
func clampInt(v, minV, maxV int) int {
if v < minV {
return minV
}
if v > maxV {
return maxV
}
return v
}

View File

@@ -0,0 +1,57 @@
package queue
import (
"context"
"fmt"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/util/gconv"
)
var acquireLua = `
local current = tonumber(redis.call("GET", KEYS[1]) or "0")
local max = tonumber(ARGV[1])
local ttl = tonumber(ARGV[2])
if current >= max then
return 0
end
current = redis.call("INCR", KEYS[1])
if current == 1 then
redis.call("EXPIRE", KEYS[1], ttl)
end
if current > max then
redis.call("DECR", KEYS[1])
return 0
end
return 1
`
var releaseLua = `
local current = tonumber(redis.call("DECR", KEYS[1]) or "0")
if current <= 0 then
redis.call("DEL", KEYS[1])
end
return 1
`
// AcquireSemaphore 获取并发令牌
func AcquireSemaphore(ctx context.Context, key string, max int, ttlSeconds int64) (bool, error) {
if max <= 0 {
// 不限制
return true, nil
}
if ttlSeconds <= 0 {
ttlSeconds = 3600
}
r, err := g.Redis().Do(ctx, "EVAL", acquireLua, 1, key, max, ttlSeconds)
if err != nil {
return false, fmt.Errorf("获取并发令牌失败: %w", err)
}
return gconv.Int(r) == 1, nil
}
// ReleaseSemaphore 释放并发令牌
func ReleaseSemaphore(ctx context.Context, key string) error {
_, err := g.Redis().Do(ctx, "EVAL", releaseLua, 1, key)
return err
}