Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions commons/tenant-manager/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ package client

import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/url"
"sync"
Expand Down Expand Up @@ -667,23 +669,44 @@ func (c *Client) GetActiveTenantsByService(ctx context.Context, service string)
return tenants, nil
}

// newDefaultHTTPClient creates an HTTP client by cloning http.DefaultTransport
// so that Proxy, TLS, DialContext and other platform defaults are preserved.
// HTTP/2 is explicitly disabled to prevent a known Go stdlib panic in the
// HTTP/2 hpack encoder when multiple goroutines concurrently use the same
// HTTP/2 connection (e.g., middleware + async revalidation goroutines).
// newDefaultHTTPClient creates an HTTP client with a fully explicit *http.Transport.
//
// Historically this function cloned http.DefaultTransport to inherit platform
// defaults (Proxy, DialContext, TLS settings). That approach was fragile:
// OpenTelemetry (and other libraries) instrument http.DefaultTransport at boot
// by registering HTTP/2 handlers in the TLSNextProto map. Clone() copies that
// map, and setting ForceAttemptHTTP2 = false only prevents future auto-setup —
// it does NOT clear handlers that are already present. The inherited "h2"
// handler then triggers HTTP/2 negotiation via ALPN on HTTPS connections,
// which interacts poorly with the concurrent-goroutine usage pattern of this
// client (middleware + async revalidation) and has caused production incidents
// (malformed response errors, 503s, circuit breaker cascade).
//
// Building the transport from scratch decouples the client from the mutable
// global http.DefaultTransport entirely. HTTP/1.1 is the deliberate choice —
// a single connection is not shared across goroutines under HTTP/1.1 (each
// goroutine gets its own connection from the pool), avoiding a known stdlib
// HTTP/2 hpack encoder issue under concurrent writes.
//
// TLSNextProto is initialized to a non-nil empty map: this is the explicit
// opt-out signal the stdlib respects — a nil map instead triggers HTTP/2
// auto-setup on the next HTTPS handshake.
func newDefaultHTTPClient() *http.Client {
base, ok := http.DefaultTransport.(*http.Transport)
if !ok {
base = &http.Transport{}
t := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
ForceAttemptHTTP2: false,
TLSNextProto: map[string]func(string, *tls.Conn) http.RoundTripper{},
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
}

t := base.Clone()
t.ForceAttemptHTTP2 = false
t.MaxIdleConns = 100
t.MaxIdleConnsPerHost = 10
t.IdleConnTimeout = 90 * time.Second

return &http.Client{
Timeout: 30 * time.Second,
Transport: t,
Expand Down
86 changes: 86 additions & 0 deletions commons/tenant-manager/client/client_transport_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package client

import (
"crypto/tls"
"net/http"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// TestNewDefaultHTTPClient_UsesHTTP1Only is a regression test for the
// 2026-04-10 production incident where an inherited "h2" handler in
// http.DefaultTransport.TLSNextProto caused HTTP/2 negotiation and malformed
// responses under concurrent load.
//
// The invariant enforced here: newDefaultHTTPClient must produce a transport
// that will never negotiate HTTP/2 via ALPN, regardless of what any other
// package (OpenTelemetry, middleware, etc.) did to http.DefaultTransport.
func TestNewDefaultHTTPClient_UsesHTTP1Only(t *testing.T) {
// Simulate OTel boot-time instrumentation that registers an "h2" handler.
// Save and restore so we don't affect other tests.
originalTransport := http.DefaultTransport
defer func() { http.DefaultTransport = originalTransport }()

contaminated := &http.Transport{
TLSNextProto: map[string]func(string, *tls.Conn) http.RoundTripper{
"h2": func(string, *tls.Conn) http.RoundTripper { return nil },
},
}
http.DefaultTransport = contaminated

client := newDefaultHTTPClient()
require.NotNil(t, client)
require.NotNil(t, client.Transport)

transport, ok := client.Transport.(*http.Transport)
require.True(t, ok, "transport must be *http.Transport")

// TLSNextProto must be non-nil (explicit opt-out) and must NOT contain h2.
// A nil map would trigger stdlib's HTTP/2 auto-setup; an "h2" entry would
// make ALPN negotiate HTTP/2.
require.NotNil(t, transport.TLSNextProto, "TLSNextProto must be non-nil to opt out of HTTP/2 auto-setup")
assert.Empty(t, transport.TLSNextProto, "TLSNextProto must be empty — no h2 or other handlers")
_, hasH2 := transport.TLSNextProto["h2"]
assert.False(t, hasH2, "contaminated http.DefaultTransport must not leak h2 handler into client")

assert.False(t, transport.ForceAttemptHTTP2, "ForceAttemptHTTP2 must be false")
}

// TestNewDefaultHTTPClient_TransportIsIsolated ensures the client's transport
// does not share memory with http.DefaultTransport. A future mutation of the
// global must not leak into an already-constructed client.
func TestNewDefaultHTTPClient_TransportIsIsolated(t *testing.T) {
client := newDefaultHTTPClient()
transport, ok := client.Transport.(*http.Transport)
require.True(t, ok)

defaultTransport, ok := http.DefaultTransport.(*http.Transport)
require.True(t, ok)

assert.NotSame(t, defaultTransport, transport,
"client transport must not share pointer identity with http.DefaultTransport")
}

// TestNewDefaultHTTPClient_ExpectedDefaults pins the transport configuration
// so that silent regressions on pool sizing, timeouts, or proxy support are
// caught by CI.
func TestNewDefaultHTTPClient_ExpectedDefaults(t *testing.T) {
client := newDefaultHTTPClient()
require.NotNil(t, client)

assert.Equal(t, 30*time.Second, client.Timeout, "Client.Timeout")

transport, ok := client.Transport.(*http.Transport)
require.True(t, ok)

assert.NotNil(t, transport.Proxy, "Proxy must be set (http.ProxyFromEnvironment)")
assert.NotNil(t, transport.DialContext, "DialContext must be set via net.Dialer")
assert.Equal(t, 100, transport.MaxIdleConns, "MaxIdleConns")
assert.Equal(t, 10, transport.MaxIdleConnsPerHost, "MaxIdleConnsPerHost")
assert.Equal(t, 90*time.Second, transport.IdleConnTimeout, "IdleConnTimeout")
assert.Equal(t, 10*time.Second, transport.TLSHandshakeTimeout, "TLSHandshakeTimeout")
assert.Equal(t, 1*time.Second, transport.ExpectContinueTimeout, "ExpectContinueTimeout")
}
Loading