win b856586412
Some checks failed
CI / test (push) Failing after 16m30s
CI / golangci-lint (push) Failing after 4s
Security Scan / backend-security (push) Failing after 1m35s
Security Scan / frontend-security (push) Failing after 1m31s
修复h1
2026-04-01 01:35:49 +08:00

1187 lines
31 KiB
Go

// Package lspool manages a pool of AntiGravity Language Server instances.
//
// Each Google account gets its own LS instance. The LS binary is Google's own
// compiled Go binary, so all upstream TLS fingerprints, session behavior,
// and protocol patterns are 100% authentic — indistinguishable from real IDE.
//
// Architecture:
//
// sub2API Gateway → LS Pool → LS Instance (per account) → cloudcode-pa
//
// Communication protocol (from JS source analysis):
//
// sub2API → LS: ConnectRPC over HTTPS/2, binary proto, x-codeium-csrf-token header
// LS → ExtServer: ConnectRPC over HTTP/1.1, binary proto, x-codeium-csrf-token header
//
// Unary calls: Content-Type: application/proto (no envelope framing)
// Stream calls: Content-Type: application/connect+proto (envelope-framed)
// Envelope = 1 byte flags + 4 byte BE length + payload
// flags=0x02 means end-of-stream trailer
package lspool
import (
"bufio"
"bytes"
"context"
crand "crypto/rand"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"golang.org/x/net/http2"
"github.com/Wei-Shaw/sub2api/internal/util/logredact"
)
// ============================================================
// Configuration
// ============================================================
// Config for the LS pool
type Config struct {
// AppRoot is the path to AntiGravity.app resources
// e.g., "/Applications/AntiGravity.app/Contents/Resources/app"
AppRoot string
// CloudCodeEndpoint overrides the default Cloud Code endpoint
CloudCodeEndpoint string
// MaxIdleTime before shutting down an idle LS instance
MaxIdleTime time.Duration
// HealthCheckInterval between health checks
HealthCheckInterval time.Duration
// ReplicasPerAccount controls how many LS processes a single account can use.
ReplicasPerAccount int
}
// DefaultConfig returns production defaults
func DefaultConfig() Config {
return Config{
AppRoot: findAppRoot(),
CloudCodeEndpoint: "https://cloudcode-pa.googleapis.com",
MaxIdleTime: 30 * time.Minute,
HealthCheckInterval: 30 * time.Second,
ReplicasPerAccount: parseLSReplicaCount(),
}
}
func parseLSReplicaCount() int {
raw := strings.TrimSpace(os.Getenv("ANTIGRAVITY_LS_REPLICAS_PER_ACCOUNT"))
if raw == "" {
return 5
}
val, err := strconv.Atoi(raw)
if err != nil || val < 1 {
return 5
}
return val
}
func findAppRoot() string {
candidates := []string{
"/Applications/AntiGravity.app/Contents/Resources/app",
"/Applications/Antigravity.app/Contents/Resources/app",
filepath.Join(os.Getenv("HOME"), ".local/share/antigravity/app"),
}
for _, c := range candidates {
if _, err := os.Stat(filepath.Join(c, "extensions", "antigravity", "bin")); err == nil {
return c
}
}
return candidates[0]
}
// ============================================================
// LS Instance
// ============================================================
// maxConcurrencyPerInstance limits how many concurrent Cascade calls a single
// LS instance handles. LS is designed for a single IDE user; beyond this
// threshold requests are rejected so the caller can fall back to direct HTTP.
const (
maxConcurrencyPerInstance = 5
lsStartupReadyTimeout = 6 * time.Second
lsStartupProbeInterval = 100 * time.Millisecond
lsStartupHeartbeatTimeout = 1 * time.Second
)
// Instance represents a single Language Server process bound to one Google account
type Instance struct {
AccountID string
Email string
CSRF string
Replica int
Address string // e.g., "127.0.0.1:52444"
cmd *exec.Cmd
cleanup func()
client *http.Client
mu sync.RWMutex
healthy bool
lastUsed time.Time
startedAt time.Time
inflight int64 // atomic: current number of concurrent cascade calls
modelMapReady int32
modelMapHard int32
remote bool
workerToken string
routingKey string
modelMapError string
}
// AcquireConcurrency atomically increments the inflight counter.
// Returns false if the instance is already at max capacity.
func (i *Instance) AcquireConcurrency() bool {
for {
cur := atomic.LoadInt64(&i.inflight)
if cur >= int64(maxConcurrencyPerInstance) {
return false
}
if atomic.CompareAndSwapInt64(&i.inflight, cur, cur+1) {
return true
}
}
}
// ReleaseConcurrency decrements the inflight counter.
func (i *Instance) ReleaseConcurrency() {
atomic.AddInt64(&i.inflight, -1)
}
// ConcurrentCount returns the current number of in-flight cascade calls.
func (i *Instance) ConcurrentCount() int64 {
return atomic.LoadInt64(&i.inflight)
}
// SetModelMappingReady records whether this LS instance has successfully loaded
// its model config from the upstream service.
func (i *Instance) SetModelMappingReady(ready bool) {
if ready {
atomic.StoreInt32(&i.modelMapReady, 1)
return
}
atomic.StoreInt32(&i.modelMapReady, 0)
}
// SetModelMappingUnavailable marks the instance as unable to load model config
// with the current token/client combination.
func (i *Instance) SetModelMappingUnavailable(reason string) {
atomic.StoreInt32(&i.modelMapHard, 1)
i.mu.Lock()
i.modelMapError = strings.TrimSpace(reason)
i.mu.Unlock()
}
// ClearModelMappingUnavailable resets any previously recorded permanent model
// mapping failure state.
func (i *Instance) ClearModelMappingUnavailable() {
atomic.StoreInt32(&i.modelMapHard, 0)
i.mu.Lock()
i.modelMapError = ""
i.mu.Unlock()
}
// HasModelMappingUnavailable reports whether model config loading is currently
// known to be incompatible with the account/token.
func (i *Instance) HasModelMappingUnavailable() bool {
return atomic.LoadInt32(&i.modelMapHard) == 1
}
// ModelMappingUnavailableReason returns the last recorded permanent failure
// reason, if any.
func (i *Instance) ModelMappingUnavailableReason() string {
i.mu.RLock()
defer i.mu.RUnlock()
return strings.TrimSpace(i.modelMapError)
}
// HasModelMappingReady reports whether this LS instance has completed model
// config loading successfully.
func (i *Instance) HasModelMappingReady() bool {
return atomic.LoadInt32(&i.modelMapReady) == 1
}
// IsHealthy returns whether the instance is healthy
func (i *Instance) IsHealthy() bool {
i.mu.RLock()
defer i.mu.RUnlock()
return i.healthy
}
// Touch marks the instance as recently used
func (i *Instance) Touch() {
i.mu.Lock()
i.lastUsed = time.Now()
i.mu.Unlock()
}
// ============================================================
// RPC Methods — uses ConnectRPC binary proto
// ============================================================
const (
LSService = "exa.language_server_pb.LanguageServerService"
)
// CallUnaryJSON makes a ConnectRPC JSON unary call (for convenience/debugging).
func (i *Instance) CallUnaryJSON(ctx context.Context, service, method string, input any) ([]byte, error) {
i.Touch()
if i.remote {
body, err := marshalWorkerJSONBody(input)
if err != nil {
return nil, fmt.Errorf("marshal input: %w", err)
}
return i.callWorkerUnary(ctx, service, method, "json", body)
}
url := fmt.Sprintf("https://%s/%s/%s", i.Address, service, method)
var body []byte
if input != nil {
var err error
body, err = json.Marshal(input)
if err != nil {
return nil, fmt.Errorf("marshal input: %w", err)
}
} else {
body = []byte("{}")
}
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Connect-Protocol-Version", "1")
req.Header.Set("x-codeium-csrf-token", i.CSRF)
resp, err := i.client.Do(req)
if err != nil {
return nil, fmt.Errorf("rpc %s/%s: %w", service, method, err)
}
defer resp.Body.Close()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read response: %w", err)
}
if resp.StatusCode != 200 {
return respBody, fmt.Errorf("rpc %s/%s HTTP %d: %s",
service, method, resp.StatusCode, truncate(string(respBody), 200))
}
return respBody, nil
}
// CallRPC makes a ConnectRPC binary proto unary call to the LS.
// Uses Content-Type: application/proto (Connect protocol unary).
func (i *Instance) CallRPC(ctx context.Context, service, method string, protoBody []byte) ([]byte, error) {
i.Touch()
if i.remote {
return i.callWorkerUnary(ctx, service, method, "proto", protoBody)
}
url := fmt.Sprintf("https://%s/%s/%s", i.Address, service, method)
if protoBody == nil {
protoBody = []byte{}
}
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(protoBody))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/proto")
req.Header.Set("Connect-Protocol-Version", "1")
req.Header.Set("x-codeium-csrf-token", i.CSRF)
resp, err := i.client.Do(req)
if err != nil {
return nil, fmt.Errorf("rpc %s/%s: %w", service, method, err)
}
defer resp.Body.Close()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read response: %w", err)
}
if resp.StatusCode != 200 {
return respBody, fmt.Errorf("rpc %s/%s HTTP %d: %s",
service, method, resp.StatusCode, truncate(string(respBody), 200))
}
return respBody, nil
}
// StreamRPC makes a server-streaming ConnectRPC call, returning the raw response.
// Uses Content-Type: application/connect+proto with envelope framing.
func (i *Instance) StreamRPC(ctx context.Context, service, method string, protoBody []byte) (*http.Response, error) {
i.Touch()
if i.remote {
return i.callWorkerStream(ctx, service, method, "proto", protoBody)
}
url := fmt.Sprintf("https://%s/%s/%s", i.Address, service, method)
if protoBody == nil {
protoBody = []byte{}
}
// Wrap in Connect envelope for streaming request
framedBody := frameConnectMessage(protoBody)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(framedBody))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/connect+proto")
req.Header.Set("Connect-Protocol-Version", "1")
req.Header.Set("Connect-Content-Encoding", "identity")
req.Header.Set("x-codeium-csrf-token", i.CSRF)
return i.client.Do(req)
}
// StreamRPCJSON makes a server-streaming ConnectRPC JSON call (for debugging).
func (i *Instance) StreamRPCJSON(ctx context.Context, service, method string, input any) (*http.Response, error) {
i.Touch()
if i.remote {
body, err := marshalWorkerJSONBody(input)
if err != nil {
return nil, fmt.Errorf("marshal: %w", err)
}
return i.callWorkerStream(ctx, service, method, "json", body)
}
url := fmt.Sprintf("https://%s/%s/%s", i.Address, service, method)
var body []byte
if input != nil {
var err error
body, err = json.Marshal(input)
if err != nil {
return nil, fmt.Errorf("marshal: %w", err)
}
} else {
body = []byte("{}")
}
// Wrap in Connect envelope for streaming request
body = frameConnectMessage(body)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/connect+json")
req.Header.Set("Connect-Protocol-Version", "1")
req.Header.Set("Connect-Content-Encoding", "identity")
req.Header.Set("x-codeium-csrf-token", i.CSRF)
return i.client.Do(req)
}
func frameConnectMessage(payload []byte) []byte {
framed := make([]byte, 5+len(payload))
binary.BigEndian.PutUint32(framed[1:5], uint32(len(payload)))
copy(framed[5:], payload)
return framed
}
// Heartbeat sends a heartbeat to the LS
func (i *Instance) Heartbeat(ctx context.Context) error {
return i.HeartbeatWithTimeout(ctx, 15*time.Second)
}
// HeartbeatWithTimeout sends a heartbeat to the LS with an explicit deadline.
func (i *Instance) HeartbeatWithTimeout(ctx context.Context, timeout time.Duration) error {
if timeout <= 0 {
timeout = 15 * time.Second
}
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
_, err := i.CallRPC(ctx, LSService, "Heartbeat", nil)
return err
}
func waitForInstanceReady(
ctx context.Context,
probeInterval time.Duration,
heartbeat func(context.Context) error,
) (int, error) {
if probeInterval <= 0 {
probeInterval = lsStartupProbeInterval
}
if heartbeat == nil {
return 0, fmt.Errorf("heartbeat func is nil")
}
timer := time.NewTimer(0)
defer timer.Stop()
attempts := 0
var lastErr error
for {
select {
case <-ctx.Done():
if lastErr == nil {
lastErr = ctx.Err()
}
return attempts, lastErr
case <-timer.C:
}
attempts++
if err := heartbeat(ctx); err == nil {
return attempts, nil
} else {
lastErr = err
}
timer.Reset(probeInterval)
}
}
// ============================================================
// Pool Manager
// ============================================================
// Pool manages multiple LS instances, with sticky session routing per account.
type Pool struct {
config Config
instances map[string][]*Instance // accountID -> replica slot -> instance
extServer *MockExtensionServer // shared mock extension server
mu sync.RWMutex
ctx context.Context
cancel context.CancelFunc
logger *slog.Logger
}
// NewPool creates a new LS pool with lifecycle management
func NewPool(config Config) *Pool {
ctx, cancel := context.WithCancel(context.Background())
// Generate a shared CSRF token for communication between LS and ext server
csrf := generateUUID()
// Start the mock extension server
extServer, err := NewMockExtensionServer(csrf)
if err != nil {
slog.Error("failed to start mock extension server", "err", err)
}
p := &Pool{
config: config,
instances: make(map[string][]*Instance),
extServer: extServer,
ctx: ctx,
cancel: cancel,
logger: slog.Default().With("component", "lspool"),
}
go p.lifecycleLoop()
return p
}
func (p *Pool) replicaCount() int {
if p.config.ReplicasPerAccount < 1 {
return 1
}
return p.config.ReplicasPerAccount
}
func (p *Pool) ensureLogger() {
if p.logger == nil {
p.logger = slog.Default().With("component", "lspool")
}
}
func replicaSlotIndex(routingKey string, replicaCount int) int {
if replicaCount <= 1 || strings.TrimSpace(routingKey) == "" {
return 0
}
sum := sha256.Sum256([]byte(routingKey))
return int(binary.BigEndian.Uint32(sum[:4]) % uint32(replicaCount))
}
func chooseLeastBusyHealthy(instances []*Instance) *Instance {
var best *Instance
for _, inst := range instances {
if inst == nil || !inst.IsHealthy() {
continue
}
if best == nil || inst.ConcurrentCount() < best.ConcurrentCount() {
best = inst
}
}
return best
}
func (p *Pool) ensureReplicaSlotsLocked(accountID string) []*Instance {
slots := p.instances[accountID]
required := p.replicaCount()
if len(slots) < required {
expanded := make([]*Instance, required)
copy(expanded, slots)
slots = expanded
p.instances[accountID] = slots
}
return slots
}
// Get returns an existing healthy LS instance for the account and routing key, or nil.
func (p *Pool) Get(accountID, routingKey string) *Instance {
p.mu.RLock()
defer p.mu.RUnlock()
instances := p.instances[accountID]
if len(instances) == 0 {
return nil
}
if strings.TrimSpace(routingKey) != "" {
slot := replicaSlotIndex(routingKey, p.replicaCount())
if slot < len(instances) {
inst := instances[slot]
if inst != nil && inst.IsHealthy() {
inst.Touch()
return inst
}
}
return nil
}
if inst := chooseLeastBusyHealthy(instances); inst != nil {
inst.Touch()
return inst
}
return nil
}
// GetOrCreate returns an existing LS or starts a new one.
// proxyURL is passed to the LS process as HTTPS_PROXY for Google API connectivity.
func (p *Pool) GetOrCreate(accountID, routingKey string, proxyURL ...string) (*Instance, error) {
p.ensureLogger()
if inst := p.Get(accountID, routingKey); inst != nil {
return inst, nil
}
p.mu.Lock()
defer p.mu.Unlock()
slots := p.ensureReplicaSlotsLocked(accountID)
slot := replicaSlotIndex(routingKey, p.replicaCount())
if strings.TrimSpace(routingKey) == "" {
if inst := chooseLeastBusyHealthy(slots); inst != nil {
inst.Touch()
return inst, nil
}
for idx, inst := range slots {
if inst == nil {
slot = idx
break
}
}
}
if slot < len(slots) {
if inst := slots[slot]; inst != nil && inst.IsHealthy() {
inst.Touch()
return inst, nil
}
}
proxy := ""
if len(proxyURL) > 0 {
proxy = proxyURL[0]
}
if slot < len(slots) && slots[slot] != nil {
p.stopInstance(slots[slot])
slots[slot] = nil
}
inst, err := p.startInstance(accountID, proxy, slot)
if err != nil {
return nil, err
}
slots[slot] = inst
p.logger.Info("LS instance created",
"account", shortAccountID(accountID),
"replica", slot,
"address", inst.Address,
"pid", inst.cmd.Process.Pid)
return inst, nil
}
// Remove stops and removes all LS instances for an account.
func (p *Pool) Remove(accountID string) {
p.mu.Lock()
defer p.mu.Unlock()
if slots, ok := p.instances[accountID]; ok {
for _, inst := range slots {
if inst == nil {
continue
}
p.stopInstance(inst)
}
delete(p.instances, accountID)
}
}
// SetAccountToken updates the OAuth token for an account in the mock extension server
func (p *Pool) SetAccountToken(accountID, accessToken, refreshToken string, expiresAt time.Time) {
if p.extServer != nil {
p.extServer.SetToken(accountID, &TokenInfo{
AccessToken: accessToken,
RefreshToken: refreshToken,
ExpiresAt: expiresAt,
})
}
p.mu.RLock()
slots := append([]*Instance(nil), p.instances[accountID]...)
p.mu.RUnlock()
for _, inst := range slots {
if inst == nil {
continue
}
inst.SetModelMappingReady(false)
inst.ClearModelMappingUnavailable()
}
}
// SetAccountModelCredits updates the JS-parity uss-modelCredits state for an account.
func (p *Pool) SetAccountModelCredits(accountID string, useAICredits bool, availableCredits, minimumCreditAmountForUsage *int32) {
if p.extServer != nil {
p.extServer.SetModelCredits(accountID, &ModelCreditsInfo{
UseAICredits: useAICredits,
AvailableCredits: availableCredits,
MinimumCreditAmountForUsage: minimumCreditAmountForUsage,
})
}
}
// Stats returns pool statistics
func (p *Pool) Stats() map[string]any {
p.mu.RLock()
defer p.mu.RUnlock()
active := 0
total := 0
for _, slots := range p.instances {
for _, inst := range slots {
if inst == nil {
continue
}
total++
if inst.IsHealthy() {
active++
}
}
}
return map[string]any{
"accounts": len(p.instances),
"total": total,
"active": active,
}
}
// Close shuts down all instances and the extension server
func (p *Pool) Close() {
p.ensureLogger()
p.cancel()
p.mu.Lock()
defer p.mu.Unlock()
for id, slots := range p.instances {
for _, inst := range slots {
if inst == nil {
continue
}
p.logger.Info("shutting down LS", "account", shortAccountID(id), "replica", inst.Replica)
p.stopInstance(inst)
}
}
p.instances = make(map[string][]*Instance)
if p.extServer != nil {
p.extServer.Close()
}
}
// ============================================================
// Instance Lifecycle
// ============================================================
var portRe = regexp.MustCompile(`at (\d+) for HTTPS`)
func (p *Pool) startInstance(accountID string, proxyURL string, replica int) (*Instance, error) {
binPath := filepath.Join(p.config.AppRoot, "extensions", "antigravity", "bin", lsBinaryName())
if _, err := os.Stat(binPath); err != nil {
return nil, fmt.Errorf("LS binary not found: %s", binPath)
}
// Each LS instance gets its own CSRF token (like the real IDE)
csrf := generateUUID()
appDataDir := fmt.Sprintf("antigravity-pool-%s-r%d", shortAccountID(accountID), replica)
args := []string{
"--csrf_token", csrf,
"--app_data_dir", appDataDir,
"--https_server_port", "0",
}
if p.config.CloudCodeEndpoint != "" {
args = append(args, "--cloud_code_endpoint", p.config.CloudCodeEndpoint)
}
// Connect LS to our mock extension server for token injection.
// The extension server uses a shared CSRF token (set at server creation).
if p.extServer != nil {
args = append(args,
"--extension_server_port", fmt.Sprintf("%d", p.extServer.Port()),
"--extension_server_csrf_token", p.extServer.csrf,
)
}
rawProxyURL := resolveLSProxy(proxyURL)
launchPlan, err := prepareLSLaunchPlan(binPath, args, rawProxyURL)
if err != nil {
return nil, fmt.Errorf("prepare LS launch: %w", err)
}
cmd := launchPlan.cmd
cmd.Env = buildLSEnv(os.Environ(), p.config.AppRoot, launchPlan.effectiveProxyURL)
p.logger.Info("LS starting",
"account", shortAccountID(accountID),
"replica", replica,
"proxy_source", logredact.RedactProxyURL(rawProxyURL),
"proxy_mode", launchPlan.proxyMode,
"effective_proxy", logredact.RedactProxyURL(launchPlan.effectiveProxyURL))
stdin, err := cmd.StdinPipe()
if err != nil {
return nil, fmt.Errorf("stdin pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, fmt.Errorf("stderr pipe: %w", err)
}
if err := cmd.Start(); err != nil {
if launchPlan.cleanup != nil {
launchPlan.cleanup()
}
return nil, fmt.Errorf("start LS: %w", err)
}
// Write metadata proto to stdin (with access token if available)
accessToken := ""
if p.extServer != nil {
p.extServer.mu.RLock()
if info := p.extServer.tokens[accountID]; info != nil {
accessToken = info.AccessToken
}
p.extServer.mu.RUnlock()
}
metaBytes := buildMetadataBytes(accessToken)
p.logger.Info("writing metadata to LS stdin",
"account", shortAccountID(accountID),
"replica", replica,
"meta_len", len(metaBytes),
"has_token", accessToken != "",
"hex_prefix", fmt.Sprintf("%x", metaBytes[:min(40, len(metaBytes))]))
stdin.Write(metaBytes)
stdin.Close()
// Parse HTTPS port from stderr AND log all LS output
portCh := make(chan string, 1)
go func() {
scanner := bufio.NewScanner(stderr)
for scanner.Scan() {
line := scanner.Text()
p.logger.Warn("LS stderr", "account", shortAccountID(accountID), "replica", replica, "line", line)
if matches := portRe.FindStringSubmatch(line); len(matches) > 1 {
portCh <- matches[1]
}
}
p.logger.Warn("LS stderr EOF", "account", shortAccountID(accountID), "replica", replica)
}()
var address string
select {
case port := <-portCh:
address = "127.0.0.1:" + port
case <-time.After(15 * time.Second):
cmd.Process.Kill()
return nil, fmt.Errorf("timeout: LS did not report HTTPS port within 15s")
}
// Create HTTPS client with LS self-signed cert
httpClient, err := createHTTPClient(p.config.AppRoot, csrf)
if err != nil {
cmd.Process.Kill()
return nil, fmt.Errorf("create http client: %w", err)
}
inst := &Instance{
AccountID: accountID,
CSRF: csrf,
Replica: replica,
Address: address,
cmd: cmd,
cleanup: launchPlan.cleanup,
client: httpClient,
healthy: false,
lastUsed: time.Now(),
startedAt: time.Now(),
}
// Real IDE waits for LanguageServerStarted callback from ExtServer (timeout 60s).
// Our MockExtServer already received this callback during port detection
// (LS calls LanguageServerStarted right after binding HTTPS port).
// Probe readiness immediately and keep retrying with a short interval instead
// of sleeping a fixed 3-6 seconds on every cold start.
p.logger.Info("waiting for LS readiness", "account", shortAccountID(accountID), "replica", replica, "address", address)
readyStartedAt := time.Now()
readyCtx, cancel := context.WithTimeout(context.Background(), lsStartupReadyTimeout)
attempts, err := waitForInstanceReady(readyCtx, lsStartupProbeInterval, func(callCtx context.Context) error {
return inst.HeartbeatWithTimeout(callCtx, lsStartupHeartbeatTimeout)
})
cancel()
if err != nil {
cmd.Process.Kill()
return nil, fmt.Errorf("LS not ready after startup: %w", err)
}
inst.mu.Lock()
inst.healthy = true
inst.mu.Unlock()
p.logger.Info("LS ready",
"account", shortAccountID(accountID),
"replica", replica,
"attempts", attempts,
"waited", time.Since(readyStartedAt).Truncate(time.Millisecond))
// Refresh model mapping from LS (async with retries — don't block startup)
go func() {
for attempt := 1; attempt <= 5; attempt++ {
if RefreshModelMapping(inst) {
p.logger.Info("model mapping loaded", "account", shortAccountID(accountID), "replica", replica, "attempt", attempt)
return
}
if inst.HasModelMappingUnavailable() {
p.logger.Warn("model mapping unavailable",
"account", shortAccountID(accountID),
"replica", replica,
"attempt", attempt,
"reason", truncate(inst.ModelMappingUnavailableReason(), 200))
return
}
p.logger.Warn("model mapping not loaded, retrying", "account", shortAccountID(accountID), "replica", replica, "attempt", attempt)
time.Sleep(time.Duration(attempt*10) * time.Second)
}
}()
return inst, nil
}
func (p *Pool) stopInstance(inst *Instance) {
if inst.cmd != nil && inst.cmd.Process != nil {
inst.cmd.Process.Kill()
inst.cmd.Wait()
}
if inst.cleanup != nil {
inst.cleanup()
}
inst.mu.Lock()
inst.healthy = false
inst.mu.Unlock()
}
func (p *Pool) lifecycleLoop() {
ticker := time.NewTicker(p.config.HealthCheckInterval)
defer ticker.Stop()
for {
select {
case <-p.ctx.Done():
return
case <-ticker.C:
p.doHealthCheck()
}
}
}
func (p *Pool) doHealthCheck() {
p.mu.Lock()
defer p.mu.Unlock()
for id, slots := range p.instances {
for replica, inst := range slots {
if inst == nil {
continue
}
// Check process alive
if inst.cmd.ProcessState != nil {
p.logger.Warn("LS process exited", "account", shortAccountID(id), "replica", replica)
slots[replica] = nil
continue
}
// Check idle timeout
inst.mu.RLock()
idle := time.Since(inst.lastUsed)
inst.mu.RUnlock()
if idle > p.config.MaxIdleTime {
p.logger.Info("LS idle timeout", "account", shortAccountID(id), "replica", replica, "idle", idle)
p.stopInstance(inst)
slots[replica] = nil
continue
}
// Heartbeat check
if err := inst.Heartbeat(p.ctx); err != nil {
p.logger.Warn("heartbeat failed", "account", shortAccountID(id), "replica", replica, "err", err)
inst.mu.Lock()
inst.healthy = false
inst.mu.Unlock()
} else {
inst.mu.Lock()
inst.healthy = true
inst.mu.Unlock()
}
}
allNil := true
for _, inst := range slots {
if inst != nil {
allNil = false
break
}
}
if allNil {
delete(p.instances, id)
}
}
}
// ============================================================
// Helpers
// ============================================================
var (
defaultLSCertFileCandidates = []string{
"/etc/ssl/certs/ca-certificates.crt",
"/etc/pki/tls/certs/ca-bundle.crt",
"/etc/ssl/cert.pem",
}
defaultLSCertDirCandidates = []string{
"/etc/ssl/certs",
"/etc/pki/tls/certs",
}
)
func buildLSEnv(baseEnv []string, appRoot string, proxyURL string) []string {
env := append([]string(nil), baseEnv...)
env = setEnvValue(env, "ANTIGRAVITY_EDITOR_APP_ROOT", appRoot)
// Set proxy for LS to reach Google APIs.
// MUST always override inherited container proxy (which may be Anthropic-only).
// proxyURL is already fully resolved by the caller.
// Always set — even empty string clears inherited container values
env = setEnvValue(env, "HTTPS_PROXY", proxyURL)
env = setEnvValue(env, "HTTP_PROXY", proxyURL)
env = setEnvValue(env, "ALL_PROXY", proxyURL)
env = setEnvValue(env, "https_proxy", proxyURL)
env = setEnvValue(env, "http_proxy", proxyURL)
env = setEnvValue(env, "all_proxy", proxyURL)
if !hasEnvKey(env, "SSL_CERT_FILE") {
if certFile := firstExistingPath(defaultLSCertFileCandidates); certFile != "" {
env = setEnvValue(env, "SSL_CERT_FILE", certFile)
}
}
if !hasEnvKey(env, "SSL_CERT_DIR") {
if certDir := firstExistingPath(defaultLSCertDirCandidates); certDir != "" {
env = setEnvValue(env, "SSL_CERT_DIR", certDir)
}
}
return env
}
func resolveLSProxy(proxyURL string) string {
if strings.TrimSpace(proxyURL) != "" {
return strings.TrimSpace(proxyURL)
}
return strings.TrimSpace(os.Getenv("ANTIGRAVITY_LS_PROXY"))
}
func firstExistingPath(candidates []string) string {
for _, candidate := range candidates {
if candidate == "" {
continue
}
if _, err := os.Stat(candidate); err == nil {
return candidate
}
}
return ""
}
func hasEnvKey(env []string, key string) bool {
prefix := key + "="
for _, entry := range env {
if strings.HasPrefix(entry, prefix) {
return true
}
}
return false
}
func setEnvValue(env []string, key, value string) []string {
prefix := key + "="
for i, entry := range env {
if strings.HasPrefix(entry, prefix) {
env[i] = prefix + value
return env
}
}
return append(env, prefix+value)
}
func createHTTPClient(appRoot, csrf string) (*http.Client, error) {
certPath := filepath.Join(appRoot, "extensions", "antigravity", "dist", "languageServer", "cert.pem")
caCert, err := os.ReadFile(certPath)
if err != nil {
return nil, fmt.Errorf("read cert %s: %w", certPath, err)
}
certPool := x509.NewCertPool()
if !certPool.AppendCertsFromPEM(caCert) {
return nil, fmt.Errorf("failed to parse cert")
}
tlsCfg := &tls.Config{
RootCAs: certPool,
InsecureSkipVerify: true, // LS uses self-signed cert; trust it unconditionally
}
return &http.Client{
Transport: &csrfTransport{
base: &http2.Transport{
TLSClientConfig: tlsCfg,
ReadIdleTimeout: 30 * time.Second,
},
csrf: csrf,
},
Timeout: 5 * time.Minute,
}, nil
}
type csrfTransport struct {
base http.RoundTripper
csrf string
}
func (t *csrfTransport) RoundTrip(req *http.Request) (*http.Response, error) {
req.Header.Set("x-codeium-csrf-token", t.csrf)
return t.base.RoundTrip(req)
}
// writeMetadata writes the Metadata proto to LS stdin.
// This matches what the real IDE does (extension.js line 42520-42522):
//
// toBinary(MetadataSchema, create(MetadataSchema, {
// ideName, ideVersion, extensionName, extensionPath,
// locale, deviceFingerprint, apiKey, disableTelemetry, userTierId
// }))
func buildMetadataBytes(accessToken string) []byte {
var buf bytes.Buffer
writeProtoStringField(&buf, 1, "antigravity") // ide_name
writeProtoStringField(&buf, 7, "1.107.0") // ide_version
writeProtoStringField(&buf, 12, "antigravity") // extension_name
writeProtoStringField(&buf, 4, "en") // locale
if accessToken != "" {
writeProtoStringField(&buf, 3, accessToken) // api_key = access_token
}
// disable_telemetry = true (field 6, varint, value 1)
buf.Write([]byte{0x30, 0x01})
return buf.Bytes()
}
func writeProtoStringField(buf *bytes.Buffer, fieldNum int, val string) {
writeVarInt(buf, uint64(fieldNum<<3|2))
writeVarInt(buf, uint64(len(val)))
buf.WriteString(val)
}
func writeVarInt(buf *bytes.Buffer, v uint64) {
b := make([]byte, binary.MaxVarintLen64)
n := binary.PutUvarint(b, v)
buf.Write(b[:n])
}
func lsBinaryName() string {
m := map[string]string{
"darwin/arm64": "language_server_macos_arm",
"darwin/amd64": "language_server_macos_x64",
"linux/amd64": "language_server_linux_x64",
"linux/arm64": "language_server_linux_arm",
"windows/amd64": "language_server_windows_x64.exe",
}
if name, ok := m[runtime.GOOS+"/"+runtime.GOARCH]; ok {
return name
}
return "language_server_linux_x64" // fallback
}
func generateUUID() string {
b := make([]byte, 16)
crand.Read(b)
b[6] = (b[6] & 0x0f) | 0x40
b[8] = (b[8] & 0x3f) | 0x80
return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
}
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "..."
}
func shortAccountID(accountID string) string {
if len(accountID) <= 8 {
return accountID
}
return accountID[:8]
}