perf: Optimize Antigravity MODEL_CAPACITY_EXHAUSTED retry strategy
- Reduce max retry attempts from 60 to 10 (exponential backoff prevents pile-up) - Replace fixed 1s delays with exponential backoff: 1s, 2s, 4s, 8s, 16s, 32s - Add ±10% jitter to prevent thundering herd effect - Cap max wait at 32 seconds to avoid excessive delays - Improves response time when API is temporarily unavailable Before: ~60s worst case (60 * 1s fixed delays) After: ~10s worst case (exponential backoff with cap) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
721dc939a7
commit
b01a44cd39
@ -44,9 +44,10 @@ const (
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED 专用重试参数
|
||||
// 模型容量不足时,所有账号共享同一容量池,切换账号无意义
|
||||
// 使用固定 1s 间隔重试,最多重试 60 次
|
||||
antigravityModelCapacityRetryMaxAttempts = 60
|
||||
// 使用指数退避策略重试,最多重试 10 次(而非 60 次)
|
||||
antigravityModelCapacityRetryMaxAttempts = 10
|
||||
antigravityModelCapacityRetryWait = 1 * time.Second
|
||||
antigravityModelCapacityRetryMaxWait = 32 * time.Second // 指数退避上限
|
||||
|
||||
// Google RPC 状态和类型常量
|
||||
googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED"
|
||||
@ -307,7 +308,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
var lastRetryResp *http.Response
|
||||
var lastRetryBody []byte
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED 使用独立的重试参数(60 次,固定 1s 间隔)
|
||||
// MODEL_CAPACITY_EXHAUSTED 使用独立的重试参数(10 次,指数退避)
|
||||
maxAttempts := antigravitySmartRetryMaxAttempts
|
||||
if isModelCapacityExhausted {
|
||||
maxAttempts = antigravityModelCapacityRetryMaxAttempts
|
||||
@ -334,10 +335,29 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
}
|
||||
|
||||
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||
log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
|
||||
p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
|
||||
// 计算本次重试的等待时间
|
||||
var currentWaitDuration time.Duration
|
||||
if isModelCapacityExhausted {
|
||||
// 使用指数退避:1s, 2s, 4s, 8s, 16s, 32s, ...
|
||||
currentWaitDuration = waitDuration * time.Duration(1<<(attempt-1))
|
||||
if currentWaitDuration > antigravityModelCapacityRetryMaxWait {
|
||||
currentWaitDuration = antigravityModelCapacityRetryMaxWait
|
||||
}
|
||||
// 添加随机抖动(±10%)避免羊群效应
|
||||
jitter := time.Duration(mathrand.Int63n(int64(currentWaitDuration / 5)))
|
||||
if mathrand.Intn(2) == 0 {
|
||||
currentWaitDuration += jitter
|
||||
} else {
|
||||
currentWaitDuration -= jitter
|
||||
}
|
||||
} else {
|
||||
currentWaitDuration = waitDuration
|
||||
}
|
||||
|
||||
timer := time.NewTimer(waitDuration)
|
||||
log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
|
||||
p.prefix, resp.StatusCode, attempt, maxAttempts, currentWaitDuration, modelName, p.account.ID)
|
||||
|
||||
timer := time.NewTimer(currentWaitDuration)
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
timer.Stop()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user