refactor(asynch): 重构异步模型配置和队列管理

This commit is contained in:
2026-06-02 20:26:45 +08:00
parent c7e9eb889b
commit 52124385a1
18 changed files with 726 additions and 1006 deletions

View File

@@ -28,7 +28,6 @@ type AutoTuneResult struct {
OldQueueLimit int `json:"oldQueueLimit"` // 调参前运行时值Redis若无则等于 cap
NewQueueLimit int `json:"newQueueLimit"` // 本次计算出的运行时值(将写入 Redis受 ±50% 约束且不超过 cap
ExpectedSeconds int `json:"expectedSeconds"` // 模型预计执行时间asynch_models.expected_seconds用于 queue_limit 计算绑定)
}
// AutoTune 由上层定时任务通过接口触发:
@@ -65,11 +64,11 @@ func AutoTune(ctx context.Context, req *dto.AutoTuneReq) (res *dto.AutoTuneRes,
if m.MaxConcurrency > cur.MaxConcurrency {
cur.MaxConcurrency = m.MaxConcurrency
}
if m.QueueLimit > cur.QueueLimit {
cur.QueueLimit = m.QueueLimit
if m.MaxConcurrency*2 > cur.MaxConcurrency*2 {
cur.MaxConcurrency = m.MaxConcurrency
}
if m.ExpectedSeconds > cur.ExpectedSeconds {
cur.ExpectedSeconds = m.ExpectedSeconds
if m.TimeoutSeconds > cur.TimeoutSeconds {
cur.TimeoutSeconds = m.TimeoutSeconds
}
}
if len(modelMap) == 0 {
@@ -113,7 +112,7 @@ SELECT model_name,
for modelName, m := range modelMap {
s := statMap[modelName]
capMax := m.MaxConcurrency
capQueue := m.QueueLimit
capQueue := m.MaxConcurrency * 2
oldMax := GetRuntimeMaxConcurrency(ctx, modelName, capMax)
oldQueue := GetRuntimeQueueLimit(ctx, modelName, capQueue)
@@ -129,7 +128,6 @@ SELECT model_name,
CapQueueLimit: capQueue,
OldQueueLimit: oldQueue,
NewQueueLimit: oldQueue,
ExpectedSeconds: m.ExpectedSeconds,
})
continue
}
@@ -155,7 +153,7 @@ SELECT model_name,
setRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName), newMax)
// queue_limitW_target = expected_seconds * queueFactor
exp := m.ExpectedSeconds
exp := m.TimeoutSeconds
if exp <= 0 {
exp = 60
}
@@ -190,7 +188,6 @@ SELECT model_name,
CapQueueLimit: capQueue,
OldQueueLimit: oldQueue,
NewQueueLimit: newQueue,
ExpectedSeconds: m.ExpectedSeconds,
})
}