refactor(service): 重构服务模块结构并优化模型配置
This commit is contained in:
201
service/queue/auto_tune.go
Normal file
201
service/queue/auto_tune.go
Normal file
@@ -0,0 +1,201 @@
|
||||
package queue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"model-gateway/model/dto"
|
||||
|
||||
"model-gateway/consts/public"
|
||||
"model-gateway/model/entity"
|
||||
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// AutoTuneResult 单次调参结果(按 model_name)
|
||||
type AutoTuneResult struct {
|
||||
ModelName string `json:"modelName"` // 模型名称(asynch_models.model_name)
|
||||
Samples int `json:"samples"` // 统计样本数(窗口内 state=2/3 且 started_at/finished_at 非空的任务数量)
|
||||
P90Exec float64 `json:"p90ExecSeconds"` // 执行耗时 P90(秒),口径:finished_at - started_at
|
||||
|
||||
CapMaxConcurrency int `json:"capMaxConcurrency"` // 配置上限:asynch_models.max_concurrency(cap,不会被动态调参覆盖)
|
||||
OldMaxConcurrency int `json:"oldMaxConcurrency"` // 调参前运行时值(Redis),若无则等于 cap
|
||||
NewMaxConcurrency int `json:"newMaxConcurrency"` // 本次计算出的运行时值(将写入 Redis),受 ±50% 约束且不超过 cap
|
||||
|
||||
CapQueueLimit int `json:"capQueueLimit"` // 配置上限:asynch_models.queue_limit(cap,不会被动态调参覆盖)
|
||||
OldQueueLimit int `json:"oldQueueLimit"` // 调参前运行时值(Redis),若无则等于 cap
|
||||
NewQueueLimit int `json:"newQueueLimit"` // 本次计算出的运行时值(将写入 Redis),受 ±50% 约束且不超过 cap
|
||||
|
||||
ExpectedSeconds int `json:"expectedSeconds"` // 模型预计执行时间(秒):asynch_models.expected_seconds(用于 queue_limit 计算绑定)
|
||||
}
|
||||
|
||||
// AutoTune 由上层定时任务通过接口触发:
|
||||
// - 统计指定时间窗口内该模型任务的执行耗时(finished_at - started_at,取 P90)
|
||||
// - 基于吞吐与 P90 执行耗时估算 max_concurrency 的运行时值(不超过 cap)
|
||||
// - queue_limit 与 expected_seconds 绑定(允许排队时间 = expected_seconds * 2),生成运行时值(不超过 cap)
|
||||
// - 单次调整幅度限制 ±50%,写入 Redis(带 TTL)
|
||||
func AutoTune(ctx context.Context, req *dto.AutoTuneReq) (res *dto.AutoTuneRes, err error) {
|
||||
if req == nil {
|
||||
return nil, errors.New("request cannot be nil")
|
||||
}
|
||||
if req.WindowSeconds <= 0 {
|
||||
req.WindowSeconds = 3600 // 默认1小时
|
||||
}
|
||||
// 1) 读取模型配置(cap),按 model_name 聚合去重(如果表里有多租户重复数据,取较大上限)
|
||||
var modelRows []*entity.AsynchModel
|
||||
if err := gfdb.DB(ctx).Model(ctx, public.TableNameModel).
|
||||
Where("deleted_at IS NULL").
|
||||
Where(entity.AsynchModelCol.Enabled, 1).
|
||||
Scan(&modelRows); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modelMap := make(map[string]*entity.AsynchModel)
|
||||
for _, m := range modelRows {
|
||||
if m == nil || m.ModelName == "" {
|
||||
continue
|
||||
}
|
||||
cur := modelMap[m.ModelName]
|
||||
if cur == nil {
|
||||
modelMap[m.ModelName] = m
|
||||
continue
|
||||
}
|
||||
// 取更大的 cap
|
||||
if m.MaxConcurrency > cur.MaxConcurrency {
|
||||
cur.MaxConcurrency = m.MaxConcurrency
|
||||
}
|
||||
if m.QueueLimit > cur.QueueLimit {
|
||||
cur.QueueLimit = m.QueueLimit
|
||||
}
|
||||
if m.ExpectedSeconds > cur.ExpectedSeconds {
|
||||
cur.ExpectedSeconds = m.ExpectedSeconds
|
||||
}
|
||||
}
|
||||
if len(modelMap) == 0 {
|
||||
return nil, errors.New("no models found")
|
||||
}
|
||||
|
||||
// 2) 统计指定窗口:按 model_name 计算 cnt 和 P90 执行耗时
|
||||
type statRow struct {
|
||||
ModelName string
|
||||
Cnt int
|
||||
P90Exec float64
|
||||
}
|
||||
var stats []statRow
|
||||
sql := fmt.Sprintf(`
|
||||
SELECT model_name,
|
||||
COUNT(1) AS cnt,
|
||||
COALESCE(percentile_cont(0.9) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (finished_at - started_at))), 0) AS p90_exec
|
||||
FROM %s
|
||||
WHERE deleted_at IS NULL
|
||||
AND state IN (2,3)
|
||||
AND started_at IS NOT NULL
|
||||
AND finished_at IS NOT NULL
|
||||
AND finished_at >= (NOW() - (? || ' seconds')::interval)
|
||||
GROUP BY model_name`, public.TableNameTask)
|
||||
r, err := gfdb.DB(ctx).GetAll(ctx, sql, req.WindowSeconds)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_ = r.Structs(&stats)
|
||||
statMap := make(map[string]statRow, len(stats))
|
||||
for _, s := range stats {
|
||||
statMap[s.ModelName] = s
|
||||
}
|
||||
|
||||
// 3) 调参计算
|
||||
const utilization = 0.8
|
||||
const maxChangeRatio = 0.5 // ±50%
|
||||
const queueFactor = 2.0 // 与 expected_seconds 绑定:W_target = expected_seconds * 2
|
||||
|
||||
out := make([]AutoTuneResult, 0, len(modelMap))
|
||||
for modelName, m := range modelMap {
|
||||
s := statMap[modelName]
|
||||
capMax := m.MaxConcurrency
|
||||
capQueue := m.QueueLimit
|
||||
oldMax := GetRuntimeMaxConcurrency(ctx, modelName, capMax)
|
||||
oldQueue := GetRuntimeQueueLimit(ctx, modelName, capQueue)
|
||||
|
||||
// 默认:无样本则不调整
|
||||
if s.Cnt <= 0 || s.P90Exec <= 0 {
|
||||
out = append(out, AutoTuneResult{
|
||||
ModelName: modelName,
|
||||
Samples: s.Cnt,
|
||||
P90Exec: s.P90Exec,
|
||||
CapMaxConcurrency: capMax,
|
||||
OldMaxConcurrency: oldMax,
|
||||
NewMaxConcurrency: oldMax,
|
||||
CapQueueLimit: capQueue,
|
||||
OldQueueLimit: oldQueue,
|
||||
NewQueueLimit: oldQueue,
|
||||
ExpectedSeconds: m.ExpectedSeconds,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// arrival_rate ≈ 完成数/3600
|
||||
arrivalRate := float64(s.Cnt) / 3600.0
|
||||
|
||||
// desiredMax = ceil(arrivalRate * p90 / utilization)
|
||||
desiredMax := int(math.Ceil(arrivalRate * s.P90Exec / utilization))
|
||||
if desiredMax < 1 {
|
||||
desiredMax = 1
|
||||
}
|
||||
// 单次变化幅度限制
|
||||
minMax := int(math.Floor(float64(oldMax) * (1 - maxChangeRatio)))
|
||||
maxMax := int(math.Ceil(float64(oldMax) * (1 + maxChangeRatio)))
|
||||
if minMax < 1 {
|
||||
minMax = 1
|
||||
}
|
||||
newMax := clampInt(desiredMax, minMax, maxMax)
|
||||
if capMax > 0 {
|
||||
newMax = clampInt(newMax, 1, capMax)
|
||||
}
|
||||
setRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName), newMax)
|
||||
|
||||
// queue_limit:W_target = expected_seconds * queueFactor
|
||||
exp := m.ExpectedSeconds
|
||||
if exp <= 0 {
|
||||
exp = 60
|
||||
}
|
||||
wTarget := float64(exp) * queueFactor
|
||||
desiredQueue := int(math.Ceil(arrivalRate*wTarget)) + newMax
|
||||
if desiredQueue < newMax {
|
||||
desiredQueue = newMax
|
||||
}
|
||||
|
||||
newQueue := oldQueue
|
||||
if capQueue > 0 {
|
||||
minQ := int(math.Floor(float64(oldQueue) * (1 - maxChangeRatio)))
|
||||
maxQ := int(math.Ceil(float64(oldQueue) * (1 + maxChangeRatio)))
|
||||
if minQ < newMax {
|
||||
minQ = newMax
|
||||
}
|
||||
if maxQ < minQ {
|
||||
maxQ = minQ
|
||||
}
|
||||
newQueue = clampInt(desiredQueue, minQ, maxQ)
|
||||
newQueue = clampInt(newQueue, newMax, capQueue)
|
||||
setRuntimeInt(ctx, runtimeQueueLimitKey(modelName), newQueue)
|
||||
}
|
||||
|
||||
out = append(out, AutoTuneResult{
|
||||
ModelName: modelName,
|
||||
Samples: s.Cnt,
|
||||
P90Exec: s.P90Exec,
|
||||
CapMaxConcurrency: capMax,
|
||||
OldMaxConcurrency: oldMax,
|
||||
NewMaxConcurrency: newMax,
|
||||
CapQueueLimit: capQueue,
|
||||
OldQueueLimit: oldQueue,
|
||||
NewQueueLimit: newQueue,
|
||||
ExpectedSeconds: m.ExpectedSeconds,
|
||||
})
|
||||
}
|
||||
|
||||
g.Log().Infof(ctx, "[auto_tune] done models=%d windowSeconds=%d", len(out), req.WindowSeconds)
|
||||
return &dto.AutoTuneRes{
|
||||
List: out,
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user