1187 lines
31 KiB
Go
1187 lines
31 KiB
Go
// Package lspool manages a pool of AntiGravity Language Server instances.
|
|
//
|
|
// Each Google account gets its own LS instance. The LS binary is Google's own
|
|
// compiled Go binary, so all upstream TLS fingerprints, session behavior,
|
|
// and protocol patterns are 100% authentic — indistinguishable from real IDE.
|
|
//
|
|
// Architecture:
|
|
//
|
|
// sub2API Gateway → LS Pool → LS Instance (per account) → cloudcode-pa
|
|
//
|
|
// Communication protocol (from JS source analysis):
|
|
//
|
|
// sub2API → LS: ConnectRPC over HTTPS/2, binary proto, x-codeium-csrf-token header
|
|
// LS → ExtServer: ConnectRPC over HTTP/1.1, binary proto, x-codeium-csrf-token header
|
|
//
|
|
// Unary calls: Content-Type: application/proto (no envelope framing)
|
|
// Stream calls: Content-Type: application/connect+proto (envelope-framed)
|
|
// Envelope = 1 byte flags + 4 byte BE length + payload
|
|
// flags=0x02 means end-of-stream trailer
|
|
package lspool
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
crand "crypto/rand"
|
|
"crypto/sha256"
|
|
"crypto/tls"
|
|
"crypto/x509"
|
|
"encoding/binary"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"golang.org/x/net/http2"
|
|
|
|
"github.com/Wei-Shaw/sub2api/internal/util/logredact"
|
|
)
|
|
|
|
// ============================================================
|
|
// Configuration
|
|
// ============================================================
|
|
|
|
// Config for the LS pool
|
|
type Config struct {
|
|
// AppRoot is the path to AntiGravity.app resources
|
|
// e.g., "/Applications/AntiGravity.app/Contents/Resources/app"
|
|
AppRoot string
|
|
|
|
// CloudCodeEndpoint overrides the default Cloud Code endpoint
|
|
CloudCodeEndpoint string
|
|
|
|
// MaxIdleTime before shutting down an idle LS instance
|
|
MaxIdleTime time.Duration
|
|
|
|
// HealthCheckInterval between health checks
|
|
HealthCheckInterval time.Duration
|
|
|
|
// ReplicasPerAccount controls how many LS processes a single account can use.
|
|
ReplicasPerAccount int
|
|
}
|
|
|
|
// DefaultConfig returns production defaults
|
|
func DefaultConfig() Config {
|
|
return Config{
|
|
AppRoot: findAppRoot(),
|
|
CloudCodeEndpoint: "https://cloudcode-pa.googleapis.com",
|
|
MaxIdleTime: 30 * time.Minute,
|
|
HealthCheckInterval: 30 * time.Second,
|
|
ReplicasPerAccount: parseLSReplicaCount(),
|
|
}
|
|
}
|
|
|
|
func parseLSReplicaCount() int {
|
|
raw := strings.TrimSpace(os.Getenv("ANTIGRAVITY_LS_REPLICAS_PER_ACCOUNT"))
|
|
if raw == "" {
|
|
return 5
|
|
}
|
|
val, err := strconv.Atoi(raw)
|
|
if err != nil || val < 1 {
|
|
return 5
|
|
}
|
|
return val
|
|
}
|
|
|
|
func findAppRoot() string {
|
|
candidates := []string{
|
|
"/Applications/AntiGravity.app/Contents/Resources/app",
|
|
"/Applications/Antigravity.app/Contents/Resources/app",
|
|
filepath.Join(os.Getenv("HOME"), ".local/share/antigravity/app"),
|
|
}
|
|
for _, c := range candidates {
|
|
if _, err := os.Stat(filepath.Join(c, "extensions", "antigravity", "bin")); err == nil {
|
|
return c
|
|
}
|
|
}
|
|
return candidates[0]
|
|
}
|
|
|
|
// ============================================================
|
|
// LS Instance
|
|
// ============================================================
|
|
|
|
// maxConcurrencyPerInstance limits how many concurrent Cascade calls a single
|
|
// LS instance handles. LS is designed for a single IDE user; beyond this
|
|
// threshold requests are rejected so the caller can fall back to direct HTTP.
|
|
const (
|
|
maxConcurrencyPerInstance = 5
|
|
lsStartupReadyTimeout = 6 * time.Second
|
|
lsStartupProbeInterval = 100 * time.Millisecond
|
|
lsStartupHeartbeatTimeout = 1 * time.Second
|
|
)
|
|
|
|
// Instance represents a single Language Server process bound to one Google account
|
|
type Instance struct {
|
|
AccountID string
|
|
Email string
|
|
CSRF string
|
|
Replica int
|
|
Address string // e.g., "127.0.0.1:52444"
|
|
cmd *exec.Cmd
|
|
cleanup func()
|
|
client *http.Client
|
|
mu sync.RWMutex
|
|
healthy bool
|
|
lastUsed time.Time
|
|
startedAt time.Time
|
|
inflight int64 // atomic: current number of concurrent cascade calls
|
|
modelMapReady int32
|
|
modelMapHard int32
|
|
remote bool
|
|
workerToken string
|
|
routingKey string
|
|
modelMapError string
|
|
}
|
|
|
|
// AcquireConcurrency atomically increments the inflight counter.
|
|
// Returns false if the instance is already at max capacity.
|
|
func (i *Instance) AcquireConcurrency() bool {
|
|
for {
|
|
cur := atomic.LoadInt64(&i.inflight)
|
|
if cur >= int64(maxConcurrencyPerInstance) {
|
|
return false
|
|
}
|
|
if atomic.CompareAndSwapInt64(&i.inflight, cur, cur+1) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
// ReleaseConcurrency decrements the inflight counter.
|
|
func (i *Instance) ReleaseConcurrency() {
|
|
atomic.AddInt64(&i.inflight, -1)
|
|
}
|
|
|
|
// ConcurrentCount returns the current number of in-flight cascade calls.
|
|
func (i *Instance) ConcurrentCount() int64 {
|
|
return atomic.LoadInt64(&i.inflight)
|
|
}
|
|
|
|
// SetModelMappingReady records whether this LS instance has successfully loaded
|
|
// its model config from the upstream service.
|
|
func (i *Instance) SetModelMappingReady(ready bool) {
|
|
if ready {
|
|
atomic.StoreInt32(&i.modelMapReady, 1)
|
|
return
|
|
}
|
|
atomic.StoreInt32(&i.modelMapReady, 0)
|
|
}
|
|
|
|
// SetModelMappingUnavailable marks the instance as unable to load model config
|
|
// with the current token/client combination.
|
|
func (i *Instance) SetModelMappingUnavailable(reason string) {
|
|
atomic.StoreInt32(&i.modelMapHard, 1)
|
|
i.mu.Lock()
|
|
i.modelMapError = strings.TrimSpace(reason)
|
|
i.mu.Unlock()
|
|
}
|
|
|
|
// ClearModelMappingUnavailable resets any previously recorded permanent model
|
|
// mapping failure state.
|
|
func (i *Instance) ClearModelMappingUnavailable() {
|
|
atomic.StoreInt32(&i.modelMapHard, 0)
|
|
i.mu.Lock()
|
|
i.modelMapError = ""
|
|
i.mu.Unlock()
|
|
}
|
|
|
|
// HasModelMappingUnavailable reports whether model config loading is currently
|
|
// known to be incompatible with the account/token.
|
|
func (i *Instance) HasModelMappingUnavailable() bool {
|
|
return atomic.LoadInt32(&i.modelMapHard) == 1
|
|
}
|
|
|
|
// ModelMappingUnavailableReason returns the last recorded permanent failure
|
|
// reason, if any.
|
|
func (i *Instance) ModelMappingUnavailableReason() string {
|
|
i.mu.RLock()
|
|
defer i.mu.RUnlock()
|
|
return strings.TrimSpace(i.modelMapError)
|
|
}
|
|
|
|
// HasModelMappingReady reports whether this LS instance has completed model
|
|
// config loading successfully.
|
|
func (i *Instance) HasModelMappingReady() bool {
|
|
return atomic.LoadInt32(&i.modelMapReady) == 1
|
|
}
|
|
|
|
// IsHealthy returns whether the instance is healthy
|
|
func (i *Instance) IsHealthy() bool {
|
|
i.mu.RLock()
|
|
defer i.mu.RUnlock()
|
|
return i.healthy
|
|
}
|
|
|
|
// Touch marks the instance as recently used
|
|
func (i *Instance) Touch() {
|
|
i.mu.Lock()
|
|
i.lastUsed = time.Now()
|
|
i.mu.Unlock()
|
|
}
|
|
|
|
// ============================================================
|
|
// RPC Methods — uses ConnectRPC binary proto
|
|
// ============================================================
|
|
|
|
const (
|
|
LSService = "exa.language_server_pb.LanguageServerService"
|
|
)
|
|
|
|
// CallUnaryJSON makes a ConnectRPC JSON unary call (for convenience/debugging).
|
|
func (i *Instance) CallUnaryJSON(ctx context.Context, service, method string, input any) ([]byte, error) {
|
|
i.Touch()
|
|
|
|
if i.remote {
|
|
body, err := marshalWorkerJSONBody(input)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("marshal input: %w", err)
|
|
}
|
|
return i.callWorkerUnary(ctx, service, method, "json", body)
|
|
}
|
|
|
|
url := fmt.Sprintf("https://%s/%s/%s", i.Address, service, method)
|
|
|
|
var body []byte
|
|
if input != nil {
|
|
var err error
|
|
body, err = json.Marshal(input)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("marshal input: %w", err)
|
|
}
|
|
} else {
|
|
body = []byte("{}")
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Connect-Protocol-Version", "1")
|
|
req.Header.Set("x-codeium-csrf-token", i.CSRF)
|
|
|
|
resp, err := i.client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("rpc %s/%s: %w", service, method, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode != 200 {
|
|
return respBody, fmt.Errorf("rpc %s/%s HTTP %d: %s",
|
|
service, method, resp.StatusCode, truncate(string(respBody), 200))
|
|
}
|
|
|
|
return respBody, nil
|
|
}
|
|
|
|
// CallRPC makes a ConnectRPC binary proto unary call to the LS.
|
|
// Uses Content-Type: application/proto (Connect protocol unary).
|
|
func (i *Instance) CallRPC(ctx context.Context, service, method string, protoBody []byte) ([]byte, error) {
|
|
i.Touch()
|
|
|
|
if i.remote {
|
|
return i.callWorkerUnary(ctx, service, method, "proto", protoBody)
|
|
}
|
|
|
|
url := fmt.Sprintf("https://%s/%s/%s", i.Address, service, method)
|
|
|
|
if protoBody == nil {
|
|
protoBody = []byte{}
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(protoBody))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "application/proto")
|
|
req.Header.Set("Connect-Protocol-Version", "1")
|
|
req.Header.Set("x-codeium-csrf-token", i.CSRF)
|
|
|
|
resp, err := i.client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("rpc %s/%s: %w", service, method, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode != 200 {
|
|
return respBody, fmt.Errorf("rpc %s/%s HTTP %d: %s",
|
|
service, method, resp.StatusCode, truncate(string(respBody), 200))
|
|
}
|
|
|
|
return respBody, nil
|
|
}
|
|
|
|
// StreamRPC makes a server-streaming ConnectRPC call, returning the raw response.
|
|
// Uses Content-Type: application/connect+proto with envelope framing.
|
|
func (i *Instance) StreamRPC(ctx context.Context, service, method string, protoBody []byte) (*http.Response, error) {
|
|
i.Touch()
|
|
|
|
if i.remote {
|
|
return i.callWorkerStream(ctx, service, method, "proto", protoBody)
|
|
}
|
|
|
|
url := fmt.Sprintf("https://%s/%s/%s", i.Address, service, method)
|
|
|
|
if protoBody == nil {
|
|
protoBody = []byte{}
|
|
}
|
|
|
|
// Wrap in Connect envelope for streaming request
|
|
framedBody := frameConnectMessage(protoBody)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(framedBody))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "application/connect+proto")
|
|
req.Header.Set("Connect-Protocol-Version", "1")
|
|
req.Header.Set("Connect-Content-Encoding", "identity")
|
|
req.Header.Set("x-codeium-csrf-token", i.CSRF)
|
|
|
|
return i.client.Do(req)
|
|
}
|
|
|
|
// StreamRPCJSON makes a server-streaming ConnectRPC JSON call (for debugging).
|
|
func (i *Instance) StreamRPCJSON(ctx context.Context, service, method string, input any) (*http.Response, error) {
|
|
i.Touch()
|
|
|
|
if i.remote {
|
|
body, err := marshalWorkerJSONBody(input)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("marshal: %w", err)
|
|
}
|
|
return i.callWorkerStream(ctx, service, method, "json", body)
|
|
}
|
|
|
|
url := fmt.Sprintf("https://%s/%s/%s", i.Address, service, method)
|
|
|
|
var body []byte
|
|
if input != nil {
|
|
var err error
|
|
body, err = json.Marshal(input)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("marshal: %w", err)
|
|
}
|
|
} else {
|
|
body = []byte("{}")
|
|
}
|
|
|
|
// Wrap in Connect envelope for streaming request
|
|
body = frameConnectMessage(body)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "application/connect+json")
|
|
req.Header.Set("Connect-Protocol-Version", "1")
|
|
req.Header.Set("Connect-Content-Encoding", "identity")
|
|
req.Header.Set("x-codeium-csrf-token", i.CSRF)
|
|
|
|
return i.client.Do(req)
|
|
}
|
|
|
|
func frameConnectMessage(payload []byte) []byte {
|
|
framed := make([]byte, 5+len(payload))
|
|
binary.BigEndian.PutUint32(framed[1:5], uint32(len(payload)))
|
|
copy(framed[5:], payload)
|
|
return framed
|
|
}
|
|
|
|
// Heartbeat sends a heartbeat to the LS
|
|
func (i *Instance) Heartbeat(ctx context.Context) error {
|
|
return i.HeartbeatWithTimeout(ctx, 15*time.Second)
|
|
}
|
|
|
|
// HeartbeatWithTimeout sends a heartbeat to the LS with an explicit deadline.
|
|
func (i *Instance) HeartbeatWithTimeout(ctx context.Context, timeout time.Duration) error {
|
|
if timeout <= 0 {
|
|
timeout = 15 * time.Second
|
|
}
|
|
ctx, cancel := context.WithTimeout(ctx, timeout)
|
|
defer cancel()
|
|
_, err := i.CallRPC(ctx, LSService, "Heartbeat", nil)
|
|
return err
|
|
}
|
|
|
|
func waitForInstanceReady(
|
|
ctx context.Context,
|
|
probeInterval time.Duration,
|
|
heartbeat func(context.Context) error,
|
|
) (int, error) {
|
|
if probeInterval <= 0 {
|
|
probeInterval = lsStartupProbeInterval
|
|
}
|
|
if heartbeat == nil {
|
|
return 0, fmt.Errorf("heartbeat func is nil")
|
|
}
|
|
|
|
timer := time.NewTimer(0)
|
|
defer timer.Stop()
|
|
|
|
attempts := 0
|
|
var lastErr error
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
if lastErr == nil {
|
|
lastErr = ctx.Err()
|
|
}
|
|
return attempts, lastErr
|
|
case <-timer.C:
|
|
}
|
|
|
|
attempts++
|
|
if err := heartbeat(ctx); err == nil {
|
|
return attempts, nil
|
|
} else {
|
|
lastErr = err
|
|
}
|
|
|
|
timer.Reset(probeInterval)
|
|
}
|
|
}
|
|
|
|
// ============================================================
|
|
// Pool Manager
|
|
// ============================================================
|
|
|
|
// Pool manages multiple LS instances, with sticky session routing per account.
|
|
type Pool struct {
|
|
config Config
|
|
instances map[string][]*Instance // accountID -> replica slot -> instance
|
|
extServer *MockExtensionServer // shared mock extension server
|
|
mu sync.RWMutex
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
logger *slog.Logger
|
|
}
|
|
|
|
// NewPool creates a new LS pool with lifecycle management
|
|
func NewPool(config Config) *Pool {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
// Generate a shared CSRF token for communication between LS and ext server
|
|
csrf := generateUUID()
|
|
|
|
// Start the mock extension server
|
|
extServer, err := NewMockExtensionServer(csrf)
|
|
if err != nil {
|
|
slog.Error("failed to start mock extension server", "err", err)
|
|
}
|
|
|
|
p := &Pool{
|
|
config: config,
|
|
instances: make(map[string][]*Instance),
|
|
extServer: extServer,
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
logger: slog.Default().With("component", "lspool"),
|
|
}
|
|
go p.lifecycleLoop()
|
|
return p
|
|
}
|
|
|
|
func (p *Pool) replicaCount() int {
|
|
if p.config.ReplicasPerAccount < 1 {
|
|
return 1
|
|
}
|
|
return p.config.ReplicasPerAccount
|
|
}
|
|
|
|
func (p *Pool) ensureLogger() {
|
|
if p.logger == nil {
|
|
p.logger = slog.Default().With("component", "lspool")
|
|
}
|
|
}
|
|
|
|
func replicaSlotIndex(routingKey string, replicaCount int) int {
|
|
if replicaCount <= 1 || strings.TrimSpace(routingKey) == "" {
|
|
return 0
|
|
}
|
|
sum := sha256.Sum256([]byte(routingKey))
|
|
return int(binary.BigEndian.Uint32(sum[:4]) % uint32(replicaCount))
|
|
}
|
|
|
|
func chooseLeastBusyHealthy(instances []*Instance) *Instance {
|
|
var best *Instance
|
|
for _, inst := range instances {
|
|
if inst == nil || !inst.IsHealthy() {
|
|
continue
|
|
}
|
|
if best == nil || inst.ConcurrentCount() < best.ConcurrentCount() {
|
|
best = inst
|
|
}
|
|
}
|
|
return best
|
|
}
|
|
|
|
func (p *Pool) ensureReplicaSlotsLocked(accountID string) []*Instance {
|
|
slots := p.instances[accountID]
|
|
required := p.replicaCount()
|
|
if len(slots) < required {
|
|
expanded := make([]*Instance, required)
|
|
copy(expanded, slots)
|
|
slots = expanded
|
|
p.instances[accountID] = slots
|
|
}
|
|
return slots
|
|
}
|
|
|
|
// Get returns an existing healthy LS instance for the account and routing key, or nil.
|
|
func (p *Pool) Get(accountID, routingKey string) *Instance {
|
|
p.mu.RLock()
|
|
defer p.mu.RUnlock()
|
|
|
|
instances := p.instances[accountID]
|
|
if len(instances) == 0 {
|
|
return nil
|
|
}
|
|
if strings.TrimSpace(routingKey) != "" {
|
|
slot := replicaSlotIndex(routingKey, p.replicaCount())
|
|
if slot < len(instances) {
|
|
inst := instances[slot]
|
|
if inst != nil && inst.IsHealthy() {
|
|
inst.Touch()
|
|
return inst
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
if inst := chooseLeastBusyHealthy(instances); inst != nil {
|
|
inst.Touch()
|
|
return inst
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetOrCreate returns an existing LS or starts a new one.
|
|
// proxyURL is passed to the LS process as HTTPS_PROXY for Google API connectivity.
|
|
func (p *Pool) GetOrCreate(accountID, routingKey string, proxyURL ...string) (*Instance, error) {
|
|
p.ensureLogger()
|
|
if inst := p.Get(accountID, routingKey); inst != nil {
|
|
return inst, nil
|
|
}
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
slots := p.ensureReplicaSlotsLocked(accountID)
|
|
slot := replicaSlotIndex(routingKey, p.replicaCount())
|
|
|
|
if strings.TrimSpace(routingKey) == "" {
|
|
if inst := chooseLeastBusyHealthy(slots); inst != nil {
|
|
inst.Touch()
|
|
return inst, nil
|
|
}
|
|
for idx, inst := range slots {
|
|
if inst == nil {
|
|
slot = idx
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if slot < len(slots) {
|
|
if inst := slots[slot]; inst != nil && inst.IsHealthy() {
|
|
inst.Touch()
|
|
return inst, nil
|
|
}
|
|
}
|
|
|
|
proxy := ""
|
|
if len(proxyURL) > 0 {
|
|
proxy = proxyURL[0]
|
|
}
|
|
|
|
if slot < len(slots) && slots[slot] != nil {
|
|
p.stopInstance(slots[slot])
|
|
slots[slot] = nil
|
|
}
|
|
|
|
inst, err := p.startInstance(accountID, proxy, slot)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
slots[slot] = inst
|
|
p.logger.Info("LS instance created",
|
|
"account", shortAccountID(accountID),
|
|
"replica", slot,
|
|
"address", inst.Address,
|
|
"pid", inst.cmd.Process.Pid)
|
|
return inst, nil
|
|
}
|
|
|
|
// Remove stops and removes all LS instances for an account.
|
|
func (p *Pool) Remove(accountID string) {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
if slots, ok := p.instances[accountID]; ok {
|
|
for _, inst := range slots {
|
|
if inst == nil {
|
|
continue
|
|
}
|
|
p.stopInstance(inst)
|
|
}
|
|
delete(p.instances, accountID)
|
|
}
|
|
}
|
|
|
|
// SetAccountToken updates the OAuth token for an account in the mock extension server
|
|
func (p *Pool) SetAccountToken(accountID, accessToken, refreshToken string, expiresAt time.Time) {
|
|
if p.extServer != nil {
|
|
p.extServer.SetToken(accountID, &TokenInfo{
|
|
AccessToken: accessToken,
|
|
RefreshToken: refreshToken,
|
|
ExpiresAt: expiresAt,
|
|
})
|
|
}
|
|
p.mu.RLock()
|
|
slots := append([]*Instance(nil), p.instances[accountID]...)
|
|
p.mu.RUnlock()
|
|
for _, inst := range slots {
|
|
if inst == nil {
|
|
continue
|
|
}
|
|
inst.SetModelMappingReady(false)
|
|
inst.ClearModelMappingUnavailable()
|
|
}
|
|
}
|
|
|
|
// SetAccountModelCredits updates the JS-parity uss-modelCredits state for an account.
|
|
func (p *Pool) SetAccountModelCredits(accountID string, useAICredits bool, availableCredits, minimumCreditAmountForUsage *int32) {
|
|
if p.extServer != nil {
|
|
p.extServer.SetModelCredits(accountID, &ModelCreditsInfo{
|
|
UseAICredits: useAICredits,
|
|
AvailableCredits: availableCredits,
|
|
MinimumCreditAmountForUsage: minimumCreditAmountForUsage,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Stats returns pool statistics
|
|
func (p *Pool) Stats() map[string]any {
|
|
p.mu.RLock()
|
|
defer p.mu.RUnlock()
|
|
|
|
active := 0
|
|
total := 0
|
|
for _, slots := range p.instances {
|
|
for _, inst := range slots {
|
|
if inst == nil {
|
|
continue
|
|
}
|
|
total++
|
|
if inst.IsHealthy() {
|
|
active++
|
|
}
|
|
}
|
|
}
|
|
return map[string]any{
|
|
"accounts": len(p.instances),
|
|
"total": total,
|
|
"active": active,
|
|
}
|
|
}
|
|
|
|
// Close shuts down all instances and the extension server
|
|
func (p *Pool) Close() {
|
|
p.ensureLogger()
|
|
p.cancel()
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
for id, slots := range p.instances {
|
|
for _, inst := range slots {
|
|
if inst == nil {
|
|
continue
|
|
}
|
|
p.logger.Info("shutting down LS", "account", shortAccountID(id), "replica", inst.Replica)
|
|
p.stopInstance(inst)
|
|
}
|
|
}
|
|
p.instances = make(map[string][]*Instance)
|
|
|
|
if p.extServer != nil {
|
|
p.extServer.Close()
|
|
}
|
|
}
|
|
|
|
// ============================================================
|
|
// Instance Lifecycle
|
|
// ============================================================
|
|
|
|
var portRe = regexp.MustCompile(`at (\d+) for HTTPS`)
|
|
|
|
func (p *Pool) startInstance(accountID string, proxyURL string, replica int) (*Instance, error) {
|
|
binPath := filepath.Join(p.config.AppRoot, "extensions", "antigravity", "bin", lsBinaryName())
|
|
if _, err := os.Stat(binPath); err != nil {
|
|
return nil, fmt.Errorf("LS binary not found: %s", binPath)
|
|
}
|
|
|
|
// Each LS instance gets its own CSRF token (like the real IDE)
|
|
csrf := generateUUID()
|
|
appDataDir := fmt.Sprintf("antigravity-pool-%s-r%d", shortAccountID(accountID), replica)
|
|
|
|
args := []string{
|
|
"--csrf_token", csrf,
|
|
"--app_data_dir", appDataDir,
|
|
"--https_server_port", "0",
|
|
}
|
|
if p.config.CloudCodeEndpoint != "" {
|
|
args = append(args, "--cloud_code_endpoint", p.config.CloudCodeEndpoint)
|
|
}
|
|
// Connect LS to our mock extension server for token injection.
|
|
// The extension server uses a shared CSRF token (set at server creation).
|
|
if p.extServer != nil {
|
|
args = append(args,
|
|
"--extension_server_port", fmt.Sprintf("%d", p.extServer.Port()),
|
|
"--extension_server_csrf_token", p.extServer.csrf,
|
|
)
|
|
}
|
|
|
|
rawProxyURL := resolveLSProxy(proxyURL)
|
|
launchPlan, err := prepareLSLaunchPlan(binPath, args, rawProxyURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("prepare LS launch: %w", err)
|
|
}
|
|
|
|
cmd := launchPlan.cmd
|
|
cmd.Env = buildLSEnv(os.Environ(), p.config.AppRoot, launchPlan.effectiveProxyURL)
|
|
p.logger.Info("LS starting",
|
|
"account", shortAccountID(accountID),
|
|
"replica", replica,
|
|
"proxy_source", logredact.RedactProxyURL(rawProxyURL),
|
|
"proxy_mode", launchPlan.proxyMode,
|
|
"effective_proxy", logredact.RedactProxyURL(launchPlan.effectiveProxyURL))
|
|
|
|
stdin, err := cmd.StdinPipe()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("stdin pipe: %w", err)
|
|
}
|
|
stderr, err := cmd.StderrPipe()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("stderr pipe: %w", err)
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
if launchPlan.cleanup != nil {
|
|
launchPlan.cleanup()
|
|
}
|
|
return nil, fmt.Errorf("start LS: %w", err)
|
|
}
|
|
|
|
// Write metadata proto to stdin (with access token if available)
|
|
accessToken := ""
|
|
if p.extServer != nil {
|
|
p.extServer.mu.RLock()
|
|
if info := p.extServer.tokens[accountID]; info != nil {
|
|
accessToken = info.AccessToken
|
|
}
|
|
p.extServer.mu.RUnlock()
|
|
}
|
|
metaBytes := buildMetadataBytes(accessToken)
|
|
p.logger.Info("writing metadata to LS stdin",
|
|
"account", shortAccountID(accountID),
|
|
"replica", replica,
|
|
"meta_len", len(metaBytes),
|
|
"has_token", accessToken != "",
|
|
"hex_prefix", fmt.Sprintf("%x", metaBytes[:min(40, len(metaBytes))]))
|
|
stdin.Write(metaBytes)
|
|
stdin.Close()
|
|
|
|
// Parse HTTPS port from stderr AND log all LS output
|
|
portCh := make(chan string, 1)
|
|
go func() {
|
|
scanner := bufio.NewScanner(stderr)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
p.logger.Warn("LS stderr", "account", shortAccountID(accountID), "replica", replica, "line", line)
|
|
if matches := portRe.FindStringSubmatch(line); len(matches) > 1 {
|
|
portCh <- matches[1]
|
|
}
|
|
}
|
|
p.logger.Warn("LS stderr EOF", "account", shortAccountID(accountID), "replica", replica)
|
|
}()
|
|
|
|
var address string
|
|
select {
|
|
case port := <-portCh:
|
|
address = "127.0.0.1:" + port
|
|
case <-time.After(15 * time.Second):
|
|
cmd.Process.Kill()
|
|
return nil, fmt.Errorf("timeout: LS did not report HTTPS port within 15s")
|
|
}
|
|
|
|
// Create HTTPS client with LS self-signed cert
|
|
httpClient, err := createHTTPClient(p.config.AppRoot, csrf)
|
|
if err != nil {
|
|
cmd.Process.Kill()
|
|
return nil, fmt.Errorf("create http client: %w", err)
|
|
}
|
|
|
|
inst := &Instance{
|
|
AccountID: accountID,
|
|
CSRF: csrf,
|
|
Replica: replica,
|
|
Address: address,
|
|
cmd: cmd,
|
|
cleanup: launchPlan.cleanup,
|
|
client: httpClient,
|
|
healthy: false,
|
|
lastUsed: time.Now(),
|
|
startedAt: time.Now(),
|
|
}
|
|
|
|
// Real IDE waits for LanguageServerStarted callback from ExtServer (timeout 60s).
|
|
// Our MockExtServer already received this callback during port detection
|
|
// (LS calls LanguageServerStarted right after binding HTTPS port).
|
|
// Probe readiness immediately and keep retrying with a short interval instead
|
|
// of sleeping a fixed 3-6 seconds on every cold start.
|
|
p.logger.Info("waiting for LS readiness", "account", shortAccountID(accountID), "replica", replica, "address", address)
|
|
readyStartedAt := time.Now()
|
|
readyCtx, cancel := context.WithTimeout(context.Background(), lsStartupReadyTimeout)
|
|
attempts, err := waitForInstanceReady(readyCtx, lsStartupProbeInterval, func(callCtx context.Context) error {
|
|
return inst.HeartbeatWithTimeout(callCtx, lsStartupHeartbeatTimeout)
|
|
})
|
|
cancel()
|
|
if err != nil {
|
|
cmd.Process.Kill()
|
|
return nil, fmt.Errorf("LS not ready after startup: %w", err)
|
|
}
|
|
inst.mu.Lock()
|
|
inst.healthy = true
|
|
inst.mu.Unlock()
|
|
p.logger.Info("LS ready",
|
|
"account", shortAccountID(accountID),
|
|
"replica", replica,
|
|
"attempts", attempts,
|
|
"waited", time.Since(readyStartedAt).Truncate(time.Millisecond))
|
|
|
|
// Refresh model mapping from LS (async with retries — don't block startup)
|
|
go func() {
|
|
for attempt := 1; attempt <= 5; attempt++ {
|
|
if RefreshModelMapping(inst) {
|
|
p.logger.Info("model mapping loaded", "account", shortAccountID(accountID), "replica", replica, "attempt", attempt)
|
|
return
|
|
}
|
|
if inst.HasModelMappingUnavailable() {
|
|
p.logger.Warn("model mapping unavailable",
|
|
"account", shortAccountID(accountID),
|
|
"replica", replica,
|
|
"attempt", attempt,
|
|
"reason", truncate(inst.ModelMappingUnavailableReason(), 200))
|
|
return
|
|
}
|
|
p.logger.Warn("model mapping not loaded, retrying", "account", shortAccountID(accountID), "replica", replica, "attempt", attempt)
|
|
time.Sleep(time.Duration(attempt*10) * time.Second)
|
|
}
|
|
}()
|
|
|
|
return inst, nil
|
|
}
|
|
|
|
func (p *Pool) stopInstance(inst *Instance) {
|
|
if inst.cmd != nil && inst.cmd.Process != nil {
|
|
inst.cmd.Process.Kill()
|
|
inst.cmd.Wait()
|
|
}
|
|
if inst.cleanup != nil {
|
|
inst.cleanup()
|
|
}
|
|
inst.mu.Lock()
|
|
inst.healthy = false
|
|
inst.mu.Unlock()
|
|
}
|
|
|
|
func (p *Pool) lifecycleLoop() {
|
|
ticker := time.NewTicker(p.config.HealthCheckInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-p.ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
p.doHealthCheck()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *Pool) doHealthCheck() {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
for id, slots := range p.instances {
|
|
for replica, inst := range slots {
|
|
if inst == nil {
|
|
continue
|
|
}
|
|
|
|
// Check process alive
|
|
if inst.cmd.ProcessState != nil {
|
|
p.logger.Warn("LS process exited", "account", shortAccountID(id), "replica", replica)
|
|
slots[replica] = nil
|
|
continue
|
|
}
|
|
|
|
// Check idle timeout
|
|
inst.mu.RLock()
|
|
idle := time.Since(inst.lastUsed)
|
|
inst.mu.RUnlock()
|
|
|
|
if idle > p.config.MaxIdleTime {
|
|
p.logger.Info("LS idle timeout", "account", shortAccountID(id), "replica", replica, "idle", idle)
|
|
p.stopInstance(inst)
|
|
slots[replica] = nil
|
|
continue
|
|
}
|
|
|
|
// Heartbeat check
|
|
if err := inst.Heartbeat(p.ctx); err != nil {
|
|
p.logger.Warn("heartbeat failed", "account", shortAccountID(id), "replica", replica, "err", err)
|
|
inst.mu.Lock()
|
|
inst.healthy = false
|
|
inst.mu.Unlock()
|
|
} else {
|
|
inst.mu.Lock()
|
|
inst.healthy = true
|
|
inst.mu.Unlock()
|
|
}
|
|
}
|
|
|
|
allNil := true
|
|
for _, inst := range slots {
|
|
if inst != nil {
|
|
allNil = false
|
|
break
|
|
}
|
|
}
|
|
if allNil {
|
|
delete(p.instances, id)
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================
|
|
// Helpers
|
|
// ============================================================
|
|
|
|
var (
|
|
defaultLSCertFileCandidates = []string{
|
|
"/etc/ssl/certs/ca-certificates.crt",
|
|
"/etc/pki/tls/certs/ca-bundle.crt",
|
|
"/etc/ssl/cert.pem",
|
|
}
|
|
defaultLSCertDirCandidates = []string{
|
|
"/etc/ssl/certs",
|
|
"/etc/pki/tls/certs",
|
|
}
|
|
)
|
|
|
|
func buildLSEnv(baseEnv []string, appRoot string, proxyURL string) []string {
|
|
env := append([]string(nil), baseEnv...)
|
|
env = setEnvValue(env, "ANTIGRAVITY_EDITOR_APP_ROOT", appRoot)
|
|
|
|
// Set proxy for LS to reach Google APIs.
|
|
// MUST always override inherited container proxy (which may be Anthropic-only).
|
|
// proxyURL is already fully resolved by the caller.
|
|
// Always set — even empty string clears inherited container values
|
|
env = setEnvValue(env, "HTTPS_PROXY", proxyURL)
|
|
env = setEnvValue(env, "HTTP_PROXY", proxyURL)
|
|
env = setEnvValue(env, "ALL_PROXY", proxyURL)
|
|
env = setEnvValue(env, "https_proxy", proxyURL)
|
|
env = setEnvValue(env, "http_proxy", proxyURL)
|
|
env = setEnvValue(env, "all_proxy", proxyURL)
|
|
|
|
if !hasEnvKey(env, "SSL_CERT_FILE") {
|
|
if certFile := firstExistingPath(defaultLSCertFileCandidates); certFile != "" {
|
|
env = setEnvValue(env, "SSL_CERT_FILE", certFile)
|
|
}
|
|
}
|
|
if !hasEnvKey(env, "SSL_CERT_DIR") {
|
|
if certDir := firstExistingPath(defaultLSCertDirCandidates); certDir != "" {
|
|
env = setEnvValue(env, "SSL_CERT_DIR", certDir)
|
|
}
|
|
}
|
|
|
|
return env
|
|
}
|
|
|
|
func resolveLSProxy(proxyURL string) string {
|
|
if strings.TrimSpace(proxyURL) != "" {
|
|
return strings.TrimSpace(proxyURL)
|
|
}
|
|
return strings.TrimSpace(os.Getenv("ANTIGRAVITY_LS_PROXY"))
|
|
}
|
|
|
|
func firstExistingPath(candidates []string) string {
|
|
for _, candidate := range candidates {
|
|
if candidate == "" {
|
|
continue
|
|
}
|
|
if _, err := os.Stat(candidate); err == nil {
|
|
return candidate
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func hasEnvKey(env []string, key string) bool {
|
|
prefix := key + "="
|
|
for _, entry := range env {
|
|
if strings.HasPrefix(entry, prefix) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func setEnvValue(env []string, key, value string) []string {
|
|
prefix := key + "="
|
|
for i, entry := range env {
|
|
if strings.HasPrefix(entry, prefix) {
|
|
env[i] = prefix + value
|
|
return env
|
|
}
|
|
}
|
|
return append(env, prefix+value)
|
|
}
|
|
|
|
func createHTTPClient(appRoot, csrf string) (*http.Client, error) {
|
|
certPath := filepath.Join(appRoot, "extensions", "antigravity", "dist", "languageServer", "cert.pem")
|
|
caCert, err := os.ReadFile(certPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read cert %s: %w", certPath, err)
|
|
}
|
|
certPool := x509.NewCertPool()
|
|
if !certPool.AppendCertsFromPEM(caCert) {
|
|
return nil, fmt.Errorf("failed to parse cert")
|
|
}
|
|
|
|
tlsCfg := &tls.Config{
|
|
RootCAs: certPool,
|
|
InsecureSkipVerify: true, // LS uses self-signed cert; trust it unconditionally
|
|
}
|
|
|
|
return &http.Client{
|
|
Transport: &csrfTransport{
|
|
base: &http2.Transport{
|
|
TLSClientConfig: tlsCfg,
|
|
ReadIdleTimeout: 30 * time.Second,
|
|
},
|
|
csrf: csrf,
|
|
},
|
|
Timeout: 5 * time.Minute,
|
|
}, nil
|
|
}
|
|
|
|
type csrfTransport struct {
|
|
base http.RoundTripper
|
|
csrf string
|
|
}
|
|
|
|
func (t *csrfTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
|
req.Header.Set("x-codeium-csrf-token", t.csrf)
|
|
return t.base.RoundTrip(req)
|
|
}
|
|
|
|
// writeMetadata writes the Metadata proto to LS stdin.
|
|
// This matches what the real IDE does (extension.js line 42520-42522):
|
|
//
|
|
// toBinary(MetadataSchema, create(MetadataSchema, {
|
|
// ideName, ideVersion, extensionName, extensionPath,
|
|
// locale, deviceFingerprint, apiKey, disableTelemetry, userTierId
|
|
// }))
|
|
func buildMetadataBytes(accessToken string) []byte {
|
|
var buf bytes.Buffer
|
|
writeProtoStringField(&buf, 1, "antigravity") // ide_name
|
|
writeProtoStringField(&buf, 7, "1.107.0") // ide_version
|
|
writeProtoStringField(&buf, 12, "antigravity") // extension_name
|
|
writeProtoStringField(&buf, 4, "en") // locale
|
|
if accessToken != "" {
|
|
writeProtoStringField(&buf, 3, accessToken) // api_key = access_token
|
|
}
|
|
// disable_telemetry = true (field 6, varint, value 1)
|
|
buf.Write([]byte{0x30, 0x01})
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func writeProtoStringField(buf *bytes.Buffer, fieldNum int, val string) {
|
|
writeVarInt(buf, uint64(fieldNum<<3|2))
|
|
writeVarInt(buf, uint64(len(val)))
|
|
buf.WriteString(val)
|
|
}
|
|
|
|
func writeVarInt(buf *bytes.Buffer, v uint64) {
|
|
b := make([]byte, binary.MaxVarintLen64)
|
|
n := binary.PutUvarint(b, v)
|
|
buf.Write(b[:n])
|
|
}
|
|
|
|
func lsBinaryName() string {
|
|
m := map[string]string{
|
|
"darwin/arm64": "language_server_macos_arm",
|
|
"darwin/amd64": "language_server_macos_x64",
|
|
"linux/amd64": "language_server_linux_x64",
|
|
"linux/arm64": "language_server_linux_arm",
|
|
"windows/amd64": "language_server_windows_x64.exe",
|
|
}
|
|
if name, ok := m[runtime.GOOS+"/"+runtime.GOARCH]; ok {
|
|
return name
|
|
}
|
|
return "language_server_linux_x64" // fallback
|
|
}
|
|
|
|
func generateUUID() string {
|
|
b := make([]byte, 16)
|
|
crand.Read(b)
|
|
b[6] = (b[6] & 0x0f) | 0x40
|
|
b[8] = (b[8] & 0x3f) | 0x80
|
|
return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
|
|
}
|
|
|
|
func truncate(s string, max int) string {
|
|
if len(s) <= max {
|
|
return s
|
|
}
|
|
return s[:max] + "..."
|
|
}
|
|
|
|
func shortAccountID(accountID string) string {
|
|
if len(accountID) <= 8 {
|
|
return accountID
|
|
}
|
|
return accountID[:8]
|
|
}
|