perf: Optimize Antigravity MODEL_CAPACITY_EXHAUSTED retry strategy

- Reduce max retry attempts from 60 to 10 (exponential backoff prevents pile-up) - Replace fixed 1s delays with exponential backoff: 1s, 2s, 4s, 8s, 16s, 32s - Add ±10% jitter to prevent thundering herd effect - Cap max wait at 32 seconds to avoid excessive delays - Improves response time when API is temporarily unavailable Before: ~60s worst case (60 * 1s fixed delays) After: ~10s worst case (exponential backoff with cap) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-04-10 23:05:39 +08:00 · 2026-04-10 23:05:39 +08:00 · b01a44cd39
commit b01a44cd39
parent 721dc939a7
1 changed files with 26 additions and 6 deletions
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@ -44,9 +44,10 @@ const (

 	// MODEL_CAPACITY_EXHAUSTED 专用重试参数
 	// 模型容量不足时，所有账号共享同一容量池，切换账号无意义
-	// 使用固定 1s 间隔重试，最多重试 60 次
-	antigravityModelCapacityRetryMaxAttempts = 60
+	// 使用指数退避策略重试，最多重试 10 次（而非 60 次）
+	antigravityModelCapacityRetryMaxAttempts = 10
 	antigravityModelCapacityRetryWait        = 1 * time.Second
+	antigravityModelCapacityRetryMaxWait     = 32 * time.Second // 指数退避上限

 	// Google RPC 状态和类型常量
 	googleRPCStatusResourceExhausted      = "RESOURCE_EXHAUSTED"
@ -307,7 +308,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 		var lastRetryResp *http.Response
 		var lastRetryBody []byte

-		// MODEL_CAPACITY_EXHAUSTED 使用独立的重试参数（60 次，固定 1s 间隔）
+		// MODEL_CAPACITY_EXHAUSTED 使用独立的重试参数（10 次，指数退避）
 		maxAttempts := antigravitySmartRetryMaxAttempts
 		if isModelCapacityExhausted {
 			maxAttempts = antigravityModelCapacityRetryMaxAttempts
@ -334,10 +335,29 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 		}

 		for attempt := 1; attempt <= maxAttempts; attempt++ {
-			log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
-				p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
+			// 计算本次重试的等待时间
+			var currentWaitDuration time.Duration
+			if isModelCapacityExhausted {
+				// 使用指数退避：1s, 2s, 4s, 8s, 16s, 32s, ...
+				currentWaitDuration = waitDuration * time.Duration(1<<(attempt-1))
+				if currentWaitDuration > antigravityModelCapacityRetryMaxWait {
+					currentWaitDuration = antigravityModelCapacityRetryMaxWait
+				}
+				// 添加随机抖动（±10%）避免羊群效应
+				jitter := time.Duration(mathrand.Int63n(int64(currentWaitDuration / 5)))
+				if mathrand.Intn(2) == 0 {
+					currentWaitDuration += jitter
+				} else {
+					currentWaitDuration -= jitter
+				}
+			} else {
+				currentWaitDuration = waitDuration
+			}

-			timer := time.NewTimer(waitDuration)
+			log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
+				p.prefix, resp.StatusCode, attempt, maxAttempts, currentWaitDuration, modelName, p.account.ID)
+
+			timer := time.NewTimer(currentWaitDuration)
 			select {
 			case <-p.ctx.Done():
 				timer.Stop()