Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion agent/agentserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ import (
type Config struct {
// How long a successful readiness check is valid for. If 0, disable caching successful readiness.
readinessCacheTTL time.Duration `yaml:"readiness_cache_ttl"`
// Timeout configurations
DownloadTimeout time.Duration `yaml:"download_timeout"`
}

func (c Config) applyDefaults() Config {
if c.DownloadTimeout == 0 {
c.DownloadTimeout = 15 * time.Minute
}
return c
}

// Server defines the agent HTTP server.
Expand All @@ -65,7 +74,9 @@ func New(
sched scheduler.ReloadableScheduler,
tags tagclient.Client,
ac announceclient.Client,
containerRuntime containerruntime.Factory) *Server {
containerRuntime containerruntime.Factory,
) *Server {
config = config.applyDefaults()

stats = stats.Tagged(map[string]string{
"module": "agentserver",
Expand Down
13 changes: 7 additions & 6 deletions agent/cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,14 @@ func Run(flags *Flags, opts ...Option) {
go heartbeat(stats)

log.Fatal(nginx.Run(config.Nginx, map[string]interface{}{
"allowed_cidrs": config.AllowedCidrs,
"port": flags.AgentRegistryPort,
"registry_server": nginx.GetServer(
config.Registry.Docker.HTTP.Net, config.Registry.Docker.HTTP.Addr),
"allowed_cidrs": config.AllowedCidrs,
"port": flags.AgentRegistryPort,
"registry_server": nginx.GetServer(config.Registry.Docker.HTTP.Net, config.Registry.Docker.HTTP.Addr),
"agent_server": fmt.Sprintf("127.0.0.1:%d", flags.AgentServerPort),
"registry_backup": config.RegistryBackup},
nginx.WithTLS(config.TLS)))
"registry_backup": config.RegistryBackup,
// Pass timeout parameters from agent server config
"download_timeout": nginx.FormatDurationForNginx(config.AgentServer.DownloadTimeout),
}, nginx.WithTLS(config.TLS)))
}

// heartbeat periodically emits a counter metric which allows us to monitor the
Expand Down
4 changes: 4 additions & 0 deletions config/agent/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ registry:

peer_id_factory: addr_hash

agentserver:
# Timeout configurations (also used by nginx)
download_timeout: 15m # nginx proxy_read_timeout for downloads

# Allow agent to only serve localhost and Docker default bridge requests.
allowed_cidrs:
- 127.0.0.1
Expand Down
6 changes: 6 additions & 0 deletions config/origin/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ blobserver:
net: unix
addr: /tmp/kraken-origin.sock

# Timeout configurations (also used by nginx)
download_timeout: 15m # nginx proxy_read_timeout for downloads
upload_timeout: 10m # nginx proxy_read_timeout/send_timeout for uploads
replication_timeout: 3m # nginx timeout for replication operations
backend_timeout: 2m # nginx proxy_connect_timeout

nginx:
name: kraken-origin
cache_dir: /var/cache/kraken/kraken-origin/nginx/
Expand Down
3 changes: 3 additions & 0 deletions config/tracker/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ trackerserver:
net: unix
addr: /tmp/kraken-tracker.sock

# Timeout configurations (also used by nginx)
metainfo_timeout: 2m # nginx proxy_read_timeout for metainfo requests to origins

nginx:
name: kraken-tracker
cache_dir: /var/cache/kraken/kraken-tracker/nginx/
Expand Down
13 changes: 13 additions & 0 deletions nginx/config/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,22 @@ server {

{{healthEndpoint "agent-server"}}

# Download operations
location ~ ^/namespace/.*/blobs/ {
proxy_pass http://agent-server;

# Use download timeout for blob operations
proxy_read_timeout {{.download_timeout}};
proxy_send_timeout {{.download_timeout}};
}

location / {
proxy_pass http://registry-backend;
proxy_next_upstream error timeout http_404 http_500;

# Standard timeouts for registry operations
proxy_read_timeout {{.download_timeout}};
proxy_send_timeout {{.download_timeout}};
}
}
`
57 changes: 57 additions & 0 deletions nginx/config/origin.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,65 @@ server {

{{healthEndpoint .server}}

# Timeout configurations from origin server config
proxy_connect_timeout {{.backend_timeout}};
proxy_send_timeout {{.upload_timeout}};
proxy_read_timeout {{.download_timeout}};

# Disable buffering for large blob transfers
#
# proxy_buffering off: Stream responses directly from upstream to client
# instead of buffering entire response in nginx memory/disk. Critical for
# large container image layers (multi-GB) to avoid memory exhaustion and
# provide immediate streaming to clients.
#
# proxy_request_buffering off: Stream request body directly to upstream
# instead of buffering entire request. Enables immediate upload streaming
# for large image pushes without requiring disk space for temporary files.
#
# Without these settings, nginx would buffer entire blobs before forwarding,
# causing high memory usage, storage requirements, and delayed transfers.
proxy_buffering off;
proxy_request_buffering off;

location / {
proxy_pass http://{{.server}};

# Pass original client info
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}

# Special handling for upload operations with longer timeout
location ~ ^/namespace/.*/blobs/.*/uploads {
proxy_pass http://{{.server}};

# Use upload timeout for these operations
proxy_read_timeout {{.upload_timeout}};
proxy_send_timeout {{.upload_timeout}};

# Pass original client info
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}

# Replication operations with their own timeout
location ~ ^/namespace/.*/blobs/.*/remote {
proxy_pass http://{{.server}};

# Use replication timeout for these operations
proxy_read_timeout {{.replication_timeout}};
proxy_send_timeout {{.replication_timeout}};

# Pass original client info
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
`
4 changes: 4 additions & 0 deletions nginx/config/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ server {
proxy_cache_valid 200 5m;
proxy_cache_valid any 1s;
proxy_cache_lock on;

# Use metainfo timeout for these operations
proxy_read_timeout {{.metainfo_timeout}};
proxy_send_timeout {{.metainfo_timeout}};
}
}
`
31 changes: 31 additions & 0 deletions nginx/nginx.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"path"
"path/filepath"
"text/template"
"time"

"github.com/uber/kraken/nginx/config"
"github.com/uber/kraken/utils/httputil"
Expand Down Expand Up @@ -261,3 +262,33 @@ func GetServer(net, addr string) string {
}
return addr
}

// FormatDurationForNginx converts a Go time.Duration to an nginx-compatible timeout string.
//
// This function adds a 30-second buffer to the input duration to ensure that the Go server
// times out before nginx does. This approach provides better observability and error handling
// because the Go application can return structured error responses with proper HTTP status codes,
// rather than nginx returning generic 504 Gateway Timeout errors.
//
// Timeout Strategy:
// - Go server timeout: d (original duration)
// - Nginx timeout: d + 30s (buffered duration)
// - This ensures Go responds with proper errors before nginx cuts the connection
//
// Format: Always returns seconds format (e.g., "60s", "150s", "3600s") for simplicity.
// Nginx accepts both seconds and minutes formats, so this approach works universally.
//
// Examples:
//
// FormatDurationForNginx(5 * time.Minute) // "330s" (5m + 30s = 330s)
// FormatDurationForNginx(2 * time.Minute) // "150s" (2m + 30s = 150s)
// FormatDurationForNginx(30 * time.Second) // "60s" (30s + 30s = 60s)
// FormatDurationForNginx(10 * time.Second) // "40s" (10s + 30s = 40s)
// FormatDurationForNginx(500 * time.Millisecond) // "30s" (500ms + 30s = 30.5s → 30s)
//
// Note: Nginx accepts both "60s" and "1m" formats. This function uses seconds for consistency.
func FormatDurationForNginx(d time.Duration) string {
bufferedDuration := d + (30 * time.Second)
seconds := int(bufferedDuration.Seconds())
return fmt.Sprintf("%ds", seconds)
}
16 changes: 16 additions & 0 deletions origin/blobserver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,27 @@ import (
type Config struct {
Listener listener.Config `yaml:"listener"`
DuplicateWriteBackStagger time.Duration `yaml:"duplicate_write_back_stagger"`
DownloadTimeout time.Duration `yaml:"download_timeout"`
UploadTimeout time.Duration `yaml:"upload_timeout"`
ReplicationTimeout time.Duration `yaml:"replication_timeout"`
BackendTimeout time.Duration `yaml:"backend_timeout"`
}

func (c Config) applyDefaults() Config {
if c.DuplicateWriteBackStagger == 0 {
c.DuplicateWriteBackStagger = 30 * time.Minute
}
if c.DownloadTimeout == 0 {
c.DownloadTimeout = 15 * time.Minute
}
if c.UploadTimeout == 0 {
c.UploadTimeout = 10 * time.Minute
}
if c.ReplicationTimeout == 0 {
c.ReplicationTimeout = 3 * time.Minute
}
if c.BackendTimeout == 0 {
c.BackendTimeout = 2 * time.Minute
}
return c
}
8 changes: 6 additions & 2 deletions origin/cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,12 @@ func Run(flags *Flags, opts ...Option) {
log.Fatal(nginx.Run(
config.Nginx,
map[string]interface{}{
"port": flags.BlobServerPort,
"server": nginx.GetServer(config.BlobServer.Listener.Net, config.BlobServer.Listener.Addr),
"port": flags.BlobServerPort,
"server": nginx.GetServer(config.BlobServer.Listener.Net, config.BlobServer.Listener.Addr),
"download_timeout": nginx.FormatDurationForNginx(config.BlobServer.DownloadTimeout),
"upload_timeout": nginx.FormatDurationForNginx(config.BlobServer.UploadTimeout),
"backend_timeout": nginx.FormatDurationForNginx(config.BlobServer.BackendTimeout),
"replication_timeout": nginx.FormatDurationForNginx(config.BlobServer.ReplicationTimeout),
},
nginx.WithTLS(config.TLS)))
}
Expand Down
11 changes: 5 additions & 6 deletions tracker/cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,16 +148,15 @@ func Run(flags *Flags, opts ...Option) {
r := blobclient.NewClientResolver(blobclient.NewProvider(blobclient.WithTLS(tls)), origins)
originCluster := blobclient.NewClusterClient(r)

server := trackerserver.New(
config.TrackerServer, stats, policy, peerStore, originStore, originCluster)
server := trackerserver.New(config.TrackerServer, stats, policy, peerStore, originStore, originCluster)
go func() {
log.Fatal(server.ListenAndServe())
}()

log.Info("Starting nginx...")
log.Fatal(nginx.Run(config.Nginx, map[string]interface{}{
"port": flags.Port,
"server": nginx.GetServer(
config.TrackerServer.Listener.Net, config.TrackerServer.Listener.Addr)},
nginx.WithTLS(config.TLS)))
"port": flags.Port,
"server": nginx.GetServer(config.TrackerServer.Listener.Net, config.TrackerServer.Listener.Addr),
"metainfo_timeout": nginx.FormatDurationForNginx(config.TrackerServer.MetaInfoTimeout),
}, nginx.WithTLS(config.TLS)))
}
6 changes: 5 additions & 1 deletion tracker/trackerserver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ type Config struct {

AnnounceInterval time.Duration `yaml:"announce_interval"`

Listener listener.Config `yaml:"listener"`
Listener listener.Config `yaml:"listener"`
MetaInfoTimeout time.Duration `yaml:"metainfo_timeout"` // Timeout for metainfo requests to origins
}

func (c Config) applyDefaults() Config {
Expand All @@ -42,5 +43,8 @@ func (c Config) applyDefaults() Config {
if c.AnnounceInterval == 0 {
c.AnnounceInterval = 3 * time.Second
}
if c.MetaInfoTimeout == 0 {
c.MetaInfoTimeout = 2 * time.Minute
}
return c
}
Loading