From 6f0c9157743775b6aab78d8a6ff620b6e8586d77 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Wed, 10 Jun 2026 20:21:16 +0530 Subject: [PATCH] feat(router): entity caching with L1/L2, shadow mode, and per-request cache controls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracted from jensneuse/entity-caching-v2 (PR #2777) — router layer, stacked on the demo PR. Entity cache OTEL/Prometheus metrics are split into the next PR. - router/pkg/entitycache: in-memory L1, Redis L2, circuit breaker - router/core: factoryresolver mapping of proto cache config to planner metadata, graph server wiring, per-request cache-control headers (incl. WS subscriptions), EntityCacheKeyInterceptor module hook - graphql-go-tools pinned to 63fa1c88 (PR #1259 + v2.4.4 merged via entity-caching-v244-merge branch) — this also moves astjson to the caching arena rework (MergeValuesWithPath now returns 2 values; flushwriter adapted) - router-tests/entity_caching: integration suite consuming demo's cachetest subgraphs; config composed via wgc (composition-go was removed on main), testdata/config.json regenerated with main's composition - subscription-overhaul adaptation shims from the old base were dropped in favor of main's versions (websocket, flushwriter, executor, updater, mocks) Co-Authored-By: Claude Fable 5 --- .../entity_caching_standard_subgraphs_test.go | 291 ++ .../entitycaching/entitycaching_test.go | 2670 +++++++++++++++++ router-tests/entitycaching/harness_test.go | 523 ++++ router-tests/entitycaching/redis_test.go | 123 + router-tests/entitycaching/setup_test.go | 17 + .../entitycaching/testdata/config.json | 930 ++++++ router-tests/go.mod | 11 +- router-tests/go.sum | 12 +- router-tests/protocol/testdata/tracing.json | 52 + router-tests/testenv/testenv.go | 32 + router/core/executor.go | 78 +- router/core/executor_entity_cache_test.go | 200 ++ router/core/factoryresolver.go | 174 +- .../core/factoryresolver_entity_cache_test.go | 224 ++ router/core/factoryresolver_test.go | 55 + router/core/flushwriter.go | 2 +- router/core/graph_server.go | 179 +- router/core/graphql_handler.go | 71 + .../graphql_handler_caching_options_test.go | 172 ++ router/core/modules.go | 10 + router/core/router.go | 140 + router/core/router_config.go | 4 + router/core/router_entity_cache_test.go | 170 ++ router/core/supervisor_instance.go | 1 + router/core/websocket.go | 1 + router/go.mod | 6 +- router/go.sum | 12 +- router/pkg/config/config.go | 100 + router/pkg/config/config.schema.json | 147 + router/pkg/config/config_test.go | 212 ++ .../pkg/config/testdata/config_defaults.json | 21 + router/pkg/config/testdata/config_full.json | 21 + router/pkg/entitycache/circuit_breaker.go | 162 + .../pkg/entitycache/circuit_breaker_test.go | 373 +++ router/pkg/entitycache/memory.go | 128 + router/pkg/entitycache/memory_test.go | 274 ++ router/pkg/entitycache/redis.go | 86 + router/pkg/entitycache/redis_test.go | 191 ++ .../schemausage_bench_test.go | 2 +- .../graphqlschemausage/schemausage_test.go | 2 +- 40 files changed, 7822 insertions(+), 57 deletions(-) create mode 100644 router-tests/entity_caching_standard_subgraphs_test.go create mode 100644 router-tests/entitycaching/entitycaching_test.go create mode 100644 router-tests/entitycaching/harness_test.go create mode 100644 router-tests/entitycaching/redis_test.go create mode 100644 router-tests/entitycaching/setup_test.go create mode 100644 router-tests/entitycaching/testdata/config.json create mode 100644 router/core/executor_entity_cache_test.go create mode 100644 router/core/factoryresolver_entity_cache_test.go create mode 100644 router/core/factoryresolver_test.go create mode 100644 router/core/graphql_handler_caching_options_test.go create mode 100644 router/core/router_entity_cache_test.go create mode 100644 router/pkg/entitycache/circuit_breaker.go create mode 100644 router/pkg/entitycache/circuit_breaker_test.go create mode 100644 router/pkg/entitycache/memory.go create mode 100644 router/pkg/entitycache/memory_test.go create mode 100644 router/pkg/entitycache/redis.go create mode 100644 router/pkg/entitycache/redis_test.go diff --git a/router-tests/entity_caching_standard_subgraphs_test.go b/router-tests/entity_caching_standard_subgraphs_test.go new file mode 100644 index 0000000000..b39e63ca46 --- /dev/null +++ b/router-tests/entity_caching_standard_subgraphs_test.go @@ -0,0 +1,291 @@ +package integration + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/wundergraph/cosmo/router-tests/testenv" + "github.com/wundergraph/cosmo/router/core" + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" + "github.com/wundergraph/cosmo/router/pkg/config" + "github.com/wundergraph/cosmo/router/pkg/entitycache" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +func newEntityMemoryCache(t *testing.T) *entitycache.MemoryEntityCache { + t.Helper() + c, err := entitycache.NewMemoryEntityCache(10 * 1024 * 1024) // 10MB for tests + require.NoError(t, err) + t.Cleanup(func() { _ = c.Close() }) + return c +} + +// entityCachingConfig returns RouterOptions that enable entity caching with +// the given MemoryEntityCache as the default L2 cache. +func entityCachingConfig(cache *entitycache.MemoryEntityCache) []core.Option { + return []core.Option{ + core.WithEntityCaching(config.EntityCachingConfiguration{ + Enabled: true, + L1: config.EntityCachingL1Configuration{ + Enabled: true, + }, + L2: config.EntityCachingL2Configuration{ + Enabled: true, + }, + }), + core.WithEntityCacheInstances(map[string]resolve.LoaderCache{ + "default": cache, + }), + } +} + +// addEntityCacheConfig adds entity cache configuration to all datasources +// in the router config with the given TTL in seconds. +func addEntityCacheConfig(routerConfig *nodev1.RouterConfig, ttlSeconds int64) { + for _, ds := range routerConfig.EngineConfig.DatasourceConfigurations { + for _, key := range ds.Keys { + if key.DisableEntityResolver { + continue + } + ds.EntityCacheConfigurations = append(ds.EntityCacheConfigurations, &nodev1.EntityCacheConfiguration{ + TypeName: key.TypeName, + MaxAgeSeconds: ttlSeconds, + }) + } + } +} + +func TestEntityCaching(t *testing.T) { + t.Parallel() + + // Cross-subgraph query: employee root from employees subgraph, + // products field resolved by products subgraph via _entities. + // Entity caching intercepts the _entities call. + const crossSubgraphQuery = `{ employee(id: 1) { id products } }` + + t.Run("basic L2 miss then hit", func(t *testing.T) { + t.Parallel() + + cache := newEntityMemoryCache(t) + testenv.Run(t, &testenv.Config{ + RouterOptions: entityCachingConfig(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + addEntityCacheConfig(routerConfig, 300) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // First request: cache miss, both employees and products subgraphs called + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: crossSubgraphQuery}) + require.Contains(t, res.Body, `"products"`) + + productsCountAfterFirst := xEnv.SubgraphRequestCount.Products.Load() + require.Equal(t, int64(1), productsCountAfterFirst) + + // Second request: entity cache hit, products subgraph NOT called again + res2 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: crossSubgraphQuery}) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, int64(1), xEnv.SubgraphRequestCount.Products.Load()) + }) + }) + + t.Run("different entities produce separate cache entries", func(t *testing.T) { + t.Parallel() + + cache := newEntityMemoryCache(t) + testenv.Run(t, &testenv.Config{ + RouterOptions: entityCachingConfig(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + addEntityCacheConfig(routerConfig, 300) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Fetch employee 1 products + res1 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ employee(id: 1) { id products } }`, + }) + require.Contains(t, res1.Body, `"products"`) + + // Fetch employee 3 products (different entity — cache miss) + res2 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ employee(id: 3) { id products } }`, + }) + require.Contains(t, res2.Body, `"products"`) + + // Products subgraph called twice (once per distinct employee) + require.Equal(t, int64(2), xEnv.SubgraphRequestCount.Products.Load()) + + // Now re-fetch employee 1 — should be cached + res3 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ employee(id: 1) { id products } }`, + }) + require.Equal(t, res1.Body, res3.Body) + require.Equal(t, int64(2), xEnv.SubgraphRequestCount.Products.Load()) + }) + }) + + t.Run("multi-subgraph entity caching", func(t *testing.T) { + t.Parallel() + + cache := newEntityMemoryCache(t) + testenv.Run(t, &testenv.Config{ + RouterOptions: entityCachingConfig(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + addEntityCacheConfig(routerConfig, 300) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // First query hits products subgraph via _entities + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ employee(id: 1) { id products } }`, + }) + require.Contains(t, res.Body, `"products"`) + require.Equal(t, int64(1), xEnv.SubgraphRequestCount.Products.Load()) + + // Second query hits availability subgraph via _entities + res2 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ employee(id: 1) { id isAvailable } }`, + }) + require.Contains(t, res2.Body, `"isAvailable"`) + require.Equal(t, int64(1), xEnv.SubgraphRequestCount.Availability.Load()) + + // Re-fetch both: products and availability should be cached + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ employee(id: 1) { id products } }`, + }) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ employee(id: 1) { id isAvailable } }`, + }) + require.Equal(t, int64(1), xEnv.SubgraphRequestCount.Products.Load()) + require.Equal(t, int64(1), xEnv.SubgraphRequestCount.Availability.Load()) + }) + }) + + t.Run("per-subgraph cache name routes to separate instances", func(t *testing.T) { + t.Parallel() + + defaultCache := newEntityMemoryCache(t) + customCache := newEntityMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithEntityCaching(config.EntityCachingConfiguration{ + Enabled: true, + L1: config.EntityCachingL1Configuration{Enabled: true}, + L2: config.EntityCachingL2Configuration{Enabled: true}, + SubgraphCacheOverrides: []config.EntityCachingSubgraphCacheOverride{ + { + Name: "products", + Entities: []config.EntityCachingEntityConfig{ + {Type: "Employee", StorageProviderID: "custom"}, + }, + }, + }, + }), + core.WithEntityCacheInstances(map[string]resolve.LoaderCache{ + "default": defaultCache, + "custom": customCache, + }), + }, + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + addEntityCacheConfig(routerConfig, 300) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: crossSubgraphQuery, + }) + require.Contains(t, res.Body, `"products"`) + + // The custom cache should have entries (Employee on products routed to "custom") + require.Equal(t, 1, customCache.Len()) + }) + }) + + t.Run("shadow mode always fetches from subgraph", func(t *testing.T) { + t.Parallel() + + cache := newEntityMemoryCache(t) + testenv.Run(t, &testenv.Config{ + RouterOptions: entityCachingConfig(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + for _, ds := range routerConfig.EngineConfig.DatasourceConfigurations { + for _, key := range ds.Keys { + if key.DisableEntityResolver { + continue + } + ds.EntityCacheConfigurations = append(ds.EntityCacheConfigurations, &nodev1.EntityCacheConfiguration{ + TypeName: key.TypeName, + MaxAgeSeconds: 300, + ShadowMode: true, + }) + } + } + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // First request + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: crossSubgraphQuery}) + require.Contains(t, res.Body, `"products"`) + productsFirst := xEnv.SubgraphRequestCount.Products.Load() + + // Second request: in shadow mode, subgraph ALWAYS called + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: crossSubgraphQuery}) + require.Equal(t, productsFirst+1, xEnv.SubgraphRequestCount.Products.Load()) + }) + }) + + t.Run("list query with caching", func(t *testing.T) { + t.Parallel() + + cache := newEntityMemoryCache(t) + testenv.Run(t, &testenv.Config{ + RouterOptions: entityCachingConfig(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + addEntityCacheConfig(routerConfig, 300) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // List query that fetches multiple employees with cross-subgraph products + query := `{ employees { id products } }` + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Contains(t, res.Body, `"employees"`) + productsFirst := xEnv.SubgraphRequestCount.Products.Load() + require.Equal(t, int64(1), productsFirst) + + // Second list query: all _entities calls should be cached + res2 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, int64(1), xEnv.SubgraphRequestCount.Products.Load()) + }) + }) + + t.Run("disabled caching does not cache", func(t *testing.T) { + t.Parallel() + + testenv.Run(t, &testenv.Config{ + // No entity caching options + }, func(t *testing.T, xEnv *testenv.Environment) { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: crossSubgraphQuery}) + require.Contains(t, res.Body, `"products"`) + productsFirst := xEnv.SubgraphRequestCount.Products.Load() + + // Second request: products subgraph called again (no caching) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: crossSubgraphQuery}) + require.Equal(t, productsFirst+1, xEnv.SubgraphRequestCount.Products.Load()) + }) + }) + + t.Run("cache entries written to L2", func(t *testing.T) { + t.Parallel() + + cache := newEntityMemoryCache(t) + testenv.Run(t, &testenv.Config{ + RouterOptions: entityCachingConfig(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + addEntityCacheConfig(routerConfig, 300) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + require.Equal(t, 0, cache.Len()) + + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: crossSubgraphQuery}) + + // After first request, cache should have entries + require.Equal(t, 1, cache.Len()) + }) + }) +} diff --git a/router-tests/entitycaching/entitycaching_test.go b/router-tests/entitycaching/entitycaching_test.go new file mode 100644 index 0000000000..44092bdce0 --- /dev/null +++ b/router-tests/entitycaching/entitycaching_test.go @@ -0,0 +1,2670 @@ +package entitycaching + +import ( + "encoding/json" + "net/http" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + + itemsModel "github.com/wundergraph/cosmo/demo/pkg/subgraphs/cachetest/items/subgraph/model" + "github.com/wundergraph/cosmo/router-tests/testenv" + "github.com/wundergraph/cosmo/router/core" + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" + "github.com/wundergraph/cosmo/router/pkg/config" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +func TestEntityCaching(t *testing.T) { + t.Parallel() + + t.Run("L2/basic miss then hit", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res.Body) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res2.Body) + + // Details subgraph should NOT be called again (cache hit) + require.Equal(t, int64(1), counters.details.Load()) + }) + }) + + t.Run("L2/different entities use separate entries", func(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + reqItem1 := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + } + res1 := xEnv.MakeGraphQLRequestOK(reqItem1) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res1.Body) + + res2 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "2") { id name description } }`, + }) + require.Equal(t, `{"data":{"item":{"id":"2","name":"Gadget","description":"A high-tech gadget with many features"}}}`, res2.Body) + + // Both entities should produce cache entries + require.Equal(t, 2, cache.Len()) + + // Re-fetch id:"1" — verify response correctness + res3 := xEnv.MakeGraphQLRequestOK(reqItem1) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res3.Body) + }) + }) + + t.Run("L2/list query caching", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ items { id description rating } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"description"`) + require.Contains(t, res.Body, `"rating"`) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, int64(1), counters.details.Load()) + }) + }) + + t.Run("L2/cache entries are written", func(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + require.Equal(t, 0, cache.Len()) + + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + }) + + require.Equal(t, 1, cache.Len()) + }) + }) + + t.Run("L2/disabled caching does not cache", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + clearEntityCacheConfigs(routerConfig) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + } + xEnv.MakeGraphQLRequestOK(req) + detailsFirst := counters.details.Load() + require.Equal(t, int64(1), detailsFirst) + + xEnv.MakeGraphQLRequestOK(req) + // Details subgraph called again (no caching) + require.Equal(t, int64(2), counters.details.Load()) + }) + }) + + t.Run("L2/multi-subgraph caching", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Fetch description (from details subgraph) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + }) + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Fetch available (from inventory subgraph) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id available } }`, + }) + inventoryAfterFirst := counters.inventory.Load() + require.Equal(t, int64(1), inventoryAfterFirst) + + // Re-fetch both: should be cached + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + }) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id available } }`, + }) + + require.Equal(t, detailsAfterFirst, counters.details.Load()) + require.Equal(t, inventoryAfterFirst, counters.inventory.Load()) + }) + }) + + t.Run("L2/cross-subgraph combined query", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description available } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use","available":true}}}`, res.Body) + + detailsAfterFirst := counters.details.Load() + inventoryAfterFirst := counters.inventory.Load() + + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use","available":true}}}`, res2.Body) + + require.Equal(t, detailsAfterFirst, counters.details.Load()) + require.Equal(t, inventoryAfterFirst, counters.inventory.Load()) + }) + }) + + t.Run("Shadow/always fetches", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptions(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setEntityCacheShadowMode(routerConfig, true) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + } + xEnv.MakeGraphQLRequestOK(req) + detailsFirst := counters.details.Load() + + xEnv.MakeGraphQLRequestOK(req) + // Shadow mode: subgraph ALWAYS called, but cache is populated + require.Equal(t, detailsFirst+1, counters.details.Load()) + require.Equal(t, 1, cache.Len()) + }) + }) + + t.Run("L2/partial cache load", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setEntityCachePartialLoad(routerConfig, true) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warm cache for id:"1" — this populates one entity entry in L2. + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + }) + detailsAfterWarm := counters.details.Load() + require.Equal(t, int64(1), detailsAfterWarm) + require.GreaterOrEqual(t, cache.Len(), 1, + "warm-up must populate at least one L2 entity entry before the partial-load path is exercised") + cacheLenAfterWarm := cache.Len() + + // List query: id:"1" served from cache, other IDs fetched from + // details. With L2 disabled the warm-up wouldn't have written + // anything, so this assertion of exactly one additional details + // call only holds when partial-load actually reads from L2. + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ items { id description } }`, + }) + require.Equal(t, detailsAfterWarm+1, counters.details.Load(), + "partial-load must fetch only the uncached entities; one extra details call expected") + + // After the list query every entity is cached. + require.Greater(t, cache.Len(), cacheLenAfterWarm, + "partial-load must write the newly-fetched entities back to L2 so subsequent reads are served from cache") + + // Repeat list query: all entities now cached → no additional details call. + detailsBeforeRepeat := counters.details.Load() + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ items { id description } }`, + }) + require.Equal(t, detailsBeforeRepeat, counters.details.Load(), + "repeat list query must be served entirely from cache — this fails if L2 is off") + }) + }) + + t.Run("L2/TTL expiry", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setEntityCacheTTL(routerConfig, 1) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + } + xEnv.MakeGraphQLRequestOK(req) + detailsAfterFirst := counters.details.Load() + + // Immediately, should be cached + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, detailsAfterFirst, counters.details.Load()) + + // Wait for TTL expiry + time.Sleep(1500 * time.Millisecond) + + // After expiry, cache miss + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, detailsAfterFirst+1, counters.details.Load()) + }) + }) + + t.Run("L2/per-subgraph cache name", func(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + defaultCache := newMemoryCache(t) + customCache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptionsWithSubgraphConfig( + map[string]resolve.LoaderCache{ + "default": defaultCache, + "custom": customCache, + }, + []config.EntityCachingSubgraphCacheOverride{ + { + Name: "details", + Entities: []config.EntityCachingEntityConfig{ + {Type: "Item", StorageProviderID: "custom"}, + }, + }, + }, + ), + }, func(t *testing.T, xEnv *testenv.Environment) { + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + }) + + require.Equal(t, 1, customCache.Len()) + }) + }) + + t.Run("L2/include headers varies cache key", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: append( + entityCachingL2OnlyOptions(cache), + core.WithHeaderRules(config.HeaderRules{ + All: &config.GlobalHeaderRule{ + Request: []*config.RequestHeaderRule{ + { + Operation: config.HeaderRuleOperationPropagate, + Named: "X-Tenant", + }, + }, + }, + }), + ), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setEntityCacheIncludeHeaders(routerConfig, true) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Request with header A + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + Header: map[string][]string{"X-Tenant": {"A"}}, + }) + detailsAfterA := counters.details.Load() + + // Request with header B — different cache key, miss + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + Header: map[string][]string{"X-Tenant": {"B"}}, + }) + detailsAfterB := counters.details.Load() + require.Equal(t, detailsAfterA+1, detailsAfterB) + + // Request with header A again — should hit + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + Header: map[string][]string{"X-Tenant": {"A"}}, + }) + require.Equal(t, detailsAfterB, counters.details.Load()) + }) + }) + + t.Run("L2/negative TTL caches not-found response within window", func(t *testing.T) { + // Exercises the entity-level not-found cache: items subgraph has an id + // that details subgraph does NOT have. On first fetch, details' + // _entities resolver returns null for that key. With notFoundCacheTtl + // set, the router caches the null result so the second fetch skips the + // details subgraph. Verifying via counters.details (rather than + // counters.items) is the signal the reviewer asked for: the items + // subgraph is always called in this flow, so only the details counter + // distinguishes a not-found cache hit from a miss. + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + // Create an item in items-subgraph via mutation, but details-subgraph + // has no record for the new id — entity hydration will return null. + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setNotFoundCacheTTL(routerConfig, 60) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Reserve a new id by creating the item in items-subgraph. + createRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { createItem(name: "Ghost", category: "phantom") { id } }`, + }) + var created struct { + Data struct { + CreateItem struct { + ID string `json:"id"` + } `json:"createItem"` + } `json:"data"` + } + require.NoError(t, json.Unmarshal([]byte(createRes.Body), &created)) + require.NotEmpty(t, created.Data.CreateItem.ID) + newID := created.Data.CreateItem.ID + + req := testenv.GraphQLRequest{ + Query: `{ item(id: "` + newID + `") { id description } }`, + } + // First query: items returns the new entity, details returns null. + // The details subgraph IS called once. + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, int64(1), counters.details.Load(), + "first request must hit the details entity subgraph for the not-found hydration") + + // Second query: with notFoundCacheTtl, the null entity is cached, + // so details is NOT called again. With L2 disabled this counter + // would grow to 2. + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, int64(1), counters.details.Load(), + "not-found cache must short-circuit the details subgraph; with L2 disabled this counter would be 2") + }) + }) + + t.Run("L2/root field caching with key mapping", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + } + // Cross-subgraph query to trigger both root-field and entity caching. + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res.Body) + + itemsAfterFirst := counters.items.Load() + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), itemsAfterFirst) + require.Equal(t, int64(1), detailsAfterFirst) + require.GreaterOrEqual(t, cache.Len(), 1) + + // Same query — both root-field (items) AND entity (details) caches + // must hit. Asserting on counters.items specifically is what the + // reviewer asked for: the previous form only checked details and + // so would pass even if @queryCache were completely ignored. + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res2.Body) + require.Equal(t, itemsAfterFirst, counters.items.Load(), + "root-field cache hit: items subgraph must NOT be re-fetched; with @queryCache ignored this counter would be 2") + require.Equal(t, detailsAfterFirst, counters.details.Load(), + "entity cache hit: details subgraph must NOT be re-fetched") + }) + }) + + t.Run("L2/root field list caching", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ items { id name description } }`, + } + // Cross-subgraph list query: exercises both the root-field cache + // on the items subgraph and the per-entity cache on details. + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"Widget"`) + require.Contains(t, res.Body, `"description"`) + + itemsAfterFirst := counters.items.Load() + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), itemsAfterFirst) + require.Equal(t, int64(1), detailsAfterFirst) + + // 5 entity entries (one per item) + 1 root field L2 entry. + require.Equal(t, 6, cache.Len()) + + // Same query — both root-field AND entity caches must hit. + // Asserting counters.items (not only details) is what the reviewer + // asked for: with @queryCache ignored, items would be refetched. + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, itemsAfterFirst, counters.items.Load(), + "root-field cache hit: items subgraph must NOT be re-fetched") + require.Equal(t, detailsAfterFirst, counters.details.Load(), + "entity cache hit: details subgraph must NOT be re-fetched") + }) + }) + + t.Run("L2/root field different args use different cache keys", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Different arguments must produce different cache keys (two entries). + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name } }`, + }) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "2") { id name } }`, + }) + require.Equal(t, int64(2), counters.items.Load()) + require.GreaterOrEqual(t, cache.Len(), 2, + "each distinct args tuple must produce its own cache entry") + + // Repeat both calls — each must be a cache hit, so items counter + // stays at 2. With @queryCache ignored (or L2 disabled) items + // would climb to 4. + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name } }`, + }) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "2") { id name } }`, + }) + require.Equal(t, int64(2), counters.items.Load(), + "repeat calls must be cache hits on the items root field; with L2 disabled this would be 4") + }) + }) + + t.Run("Shadow/query cache always fetches", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptions(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setQueryCacheShadowMode(routerConfig, true) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name } }`, + } + xEnv.MakeGraphQLRequestOK(req) + itemsFirst := counters.items.Load() + // Shadow mode MUST populate the cache even though it does not read + // from it — that's the whole point. Without L2, cache.Len() stays + // at 0 and this assertion fails. + require.GreaterOrEqual(t, cache.Len(), 1, + "shadow mode must still WRITE the root-field entry to L2") + + // Shadow mode: items subgraph is called on the second request too. + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, itemsFirst+1, counters.items.Load(), + "shadow mode must always fetch from the items subgraph") + }) + }) + + t.Run("L2/mutation invalidates cache", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + } + // Warm cache + xEnv.MakeGraphQLRequestOK(req) + detailsAfterWarm := counters.details.Load() + + // Verify cache hit + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, detailsAfterWarm, counters.details.Load()) + + // Mutation triggers @cacheInvalidate + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { updateItem(id: "1", name: "Updated Widget") { id name } }`, + }) + + // After invalidation, cache miss → details subgraph called again + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, detailsAfterWarm+1, counters.details.Load()) + }) + }) + + t.Run("L2/mutation populates cache", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + lenBefore := cache.Len() + + // createItem has @cachePopulate(maxAge: 60). The mutation response + // body alone isn't a caching signal — we need to verify the L2 was + // actually written and that a follow-up read hits cache. + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { createItem(name: "Foobar", category: "test") { id name category } }`, + }) + require.Contains(t, res.Body, `"Foobar"`) + + // @cachePopulate MUST have added at least one entry to L2. With + // L2.Enabled=false, lenAfter stays at lenBefore. + require.Greater(t, cache.Len(), lenBefore, + "@cachePopulate must write the mutation result to L2") + + // Extract the newly-created id and verify a follow-up read by id + // is served from cache (items subgraph not called again). + var body struct { + Data struct { + CreateItem struct { + ID string `json:"id"` + } `json:"createItem"` + } `json:"data"` + } + require.NoError(t, json.Unmarshal([]byte(res.Body), &body)) + newID := body.Data.CreateItem.ID + require.NotEmpty(t, newID) + + itemsAfterMutation := counters.items.Load() + readRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "` + newID + `") { id name category } }`, + }) + require.Equal(t, + `{"data":{"item":{"id":"`+newID+`","name":"Foobar","category":"test"}}}`, + readRes.Body) + require.Equal(t, itemsAfterMutation, counters.items.Load(), + "follow-up read must be a cache hit (items subgraph not called); with L2 disabled this counter would be itemsAfterMutation+1") + }) + }) + + // Tests that the full circuit breaker lifecycle keeps requests working: + // cache healthy → cache breaks → breaker opens → cache recovers → breaker closes. + // At every phase, GraphQL queries must return correct data. The subgraph call + // counter proves whether the response came from cache (counter unchanged) or + // from a subgraph fetch (counter incremented). + t.Run("L2/circuit breaker degrades gracefully on cache failure", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + + cache := newControllableCache(t) + cooldown := 100 * time.Millisecond + opts, cb := entityCachingOptionsWithCircuitBreakerRef(cache, 2, cooldown) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: opts, + }, func(t *testing.T, xEnv *testenv.Environment) { + const query = `{ item(id: "1") { id name description } }` + const expected = `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}` + + // Phase 1: Cache is healthy. First request populates cache. + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Second request should be a cache hit — subgraph counter stays the same. + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + require.Equal(t, detailsAfterFirst, counters.details.Load(), "expected cache hit: details counter should not change") + + // Phase 2: Cache starts failing. Breaker is still closed, so it tries the cache + // and gets errors. Requests still succeed via subgraph fallback. + cache.SetFailing(true) + for range 2 { + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + } + require.True(t, cb.IsOpen(), "breaker should be open after 2 consecutive failures") + + // Phase 3: Breaker is open — cache is bypassed entirely. + // Subgraph counter should increase with every request. + counterBefore := counters.details.Load() + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + require.Equal(t, counterBefore+1, counters.details.Load(), "expected subgraph fetch when breaker is open") + + // Phase 4: Cache recovers. Wait for cooldown so breaker transitions to half-open. + cache.SetFailing(false) + time.Sleep(cooldown + 50*time.Millisecond) + + // The next request is the half-open probe. It should succeed and close the breaker. + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + require.False(t, cb.IsOpen(), "breaker should be closed after successful probe") + + // Phase 5: Cache works again. Verify we get a cache hit. + detailsBefore := counters.details.Load() + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + require.Equal(t, detailsBefore, counters.details.Load(), "expected cache hit after recovery") + }) + }) + + // Focused test for the half-open → closed transition. + // Trips the breaker, waits for cooldown, then verifies that one successful + // probe closes the breaker and the cache resumes normal operation. + t.Run("L2/circuit breaker recovers after cooldown", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + + cache := newControllableCache(t) + cooldown := 100 * time.Millisecond + cache.SetFailing(true) // Start broken + + opts, cb := entityCachingOptionsWithCircuitBreakerRef(cache, 2, cooldown) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: opts, + }, func(t *testing.T, xEnv *testenv.Environment) { + const query = `{ item(id: "1") { id name description } }` + const expected = `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}` + + // Trip the breaker: 2 failures while closed. + for range 2 { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + } + require.True(t, cb.IsOpen(), "breaker should be open after threshold failures") + + // Fix the cache and wait for cooldown. + cache.SetFailing(false) + time.Sleep(cooldown + 50*time.Millisecond) + + // Probe request: succeeds, closes the breaker, populates cache. + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + require.False(t, cb.IsOpen(), "breaker should be closed after successful probe") + + // Next request should be a cache hit — subgraph not called. + detailsBefore := counters.details.Load() + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: query}) + require.Equal(t, expected, res.Body) + require.Equal(t, detailsBefore, counters.details.Load(), "expected cache hit after recovery") + }) + }) + + t.Run("L2/subscription invalidates cache", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + } + // Warm cache + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res.Body) + + detailsAfterWarm := counters.details.Load() + + // Verify cache hit + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, detailsAfterWarm, counters.details.Load()) + + // Start subscription via WebSocket (itemUpdated has @cacheInvalidate) + conn := xEnv.InitGraphQLWebSocketConnection(nil, nil, nil) + + err := testenv.WSWriteJSON(t, conn, testenv.WebSocketMessage{ + ID: "1", + Type: "subscribe", + // Select ONLY key fields so the engine uses SubscriptionCacheModeInvalidate. + // Selecting non-key fields would cause SubscriptionCacheModePopulate instead. + Payload: []byte(`{"query":"subscription { itemUpdated { id } }"}`), + }) + require.NoError(t, err) + + // Push event in background after subscription is established + go func() { + xEnv.WaitForSubscriptionCount(1, 5*time.Second) + servers.itemUpdatedCh <- &itemsModel.Item{ID: "1", Name: "Updated Widget", Category: "tools"} + }() + + // Read subscription event + var msg testenv.WebSocketMessage + err = testenv.WSReadJSON(t, conn, &msg) + require.NoError(t, err) + require.Equal(t, "next", msg.Type) + require.Contains(t, string(msg.Payload), `"itemUpdated"`) + + // Close subscription + require.NoError(t, conn.Close()) + xEnv.WaitForSubscriptionCount(0, 5*time.Second) + + // After invalidation, cache miss → details subgraph called again + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, detailsAfterWarm+1, counters.details.Load()) + }) + }) + + t.Run("L2/subscription populate config carries entity type name", func(t *testing.T) { + // Regression test for the composition->router pipeline carrying entityTypeName + // end-to-end on @cachePopulate configs. Before this was wired: + // - composition wrote CachePopulateConfig without entityTypeName + // - router compensated by expanding subscription populate across every + // cached entity in the subgraph (semantically ambiguous, wrong config) + // Now the field carries the specific target entity — router looks it up directly. + // + // If composition is reverted, entityTypeName is empty, the router skips the + // populate setup, and the follow-up subscription_populates_cache test goes red. + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + var itemCreatedPopulate *nodev1.CachePopulateConfiguration + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + ModifyRouterConfig: func(rc *nodev1.RouterConfig) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, cp := range ds.CachePopulateConfigurations { + if cp.OperationType == "Subscription" && cp.FieldName == "itemCreated" { + itemCreatedPopulate = cp + } + } + } + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + require.NotNil(t, itemCreatedPopulate, + "expected a CachePopulateConfiguration for subscription itemCreated") + require.Equal(t, "Item", itemCreatedPopulate.EntityTypeName, + "@cachePopulate must carry the target entity type name through the pipeline") + }) + }) + + t.Run("L2/subscription populates cache", func(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Start subscription via WebSocket (itemCreated has @cachePopulate) + conn := xEnv.InitGraphQLWebSocketConnection(nil, nil, nil) + + err := testenv.WSWriteJSON(t, conn, testenv.WebSocketMessage{ + ID: "1", + Type: "subscribe", + Payload: []byte(`{"query":"subscription { itemCreated { id name category } }"}`), + }) + require.NoError(t, err) + + // Push event in background after subscription is established + go func() { + xEnv.WaitForSubscriptionCount(1, 5*time.Second) + servers.itemCreatedCh <- &itemsModel.Item{ID: "99", Name: "New Item", Category: "test"} + }() + + // Read subscription event + var msg testenv.WebSocketMessage + err = testenv.WSReadJSON(t, conn, &msg) + require.NoError(t, err) + require.Equal(t, "next", msg.Type) + require.Contains(t, string(msg.Payload), `"itemCreated"`) + require.Contains(t, string(msg.Payload), `"New Item"`) + + // Close subscription + require.NoError(t, conn.Close()) + xEnv.WaitForSubscriptionCount(0, 5*time.Second) + + // @cachePopulate should have written the entity data to L2 cache + require.Equal(t, 1, cache.Len()) + }) + }) + + t.Run("L2/extensions invalidate cache", func(t *testing.T) { + t.Parallel() + + var extensionFlag atomic.Bool + servers, counters := startSubgraphServersWithMiddleware(t, extensionInvalidationMiddleware(&extensionFlag)) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warm cache with extension OFF + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + }) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res.Body) + + detailsAfterWarm := counters.details.Load() + + // Verify cache hit + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + }) + require.Equal(t, detailsAfterWarm, counters.details.Load()) + + // Enable extension: details responses will now include cacheInvalidation for Item id:"1" + extensionFlag.Store(true) + + // Make a request that hits details subgraph for a DIFFERENT entity. + // This triggers the details middleware which adds the extension for id:"1". + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "2") { id name description } }`, + }) + + // Disable extension for the final query + extensionFlag.Store(false) + + // Query id:"1" again — should be cache miss because extension invalidated it + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + }) + require.Equal(t, detailsAfterWarm+2, counters.details.Load()) + }) + }) + + t.Run("L2/mutation populate writes to cache", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // createItem has @cachePopulate(maxAge: 60). The truthful signal is + // that the follow-up read-by-id is served from cache — i.e. the + // items subgraph is NOT called again for the newly created entity. + // + // Checking cache.Len() growth is insufficient: there is a known + // router-Go bug where @cachePopulate writes still land in L2 even + // when L2.Enabled=false, so size-based assertions pass under a + // feature-disabled run. The items-counter read-path check below is + // the assertion that fails when caching is actually off. + createRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { createItem(name: "Foobar", category: "test") { id name category } }`, + }) + var created struct { + Data struct { + CreateItem struct { + ID string `json:"id"` + } `json:"createItem"` + } `json:"data"` + } + require.NoError(t, json.Unmarshal([]byte(createRes.Body), &created)) + require.NotEmpty(t, created.Data.CreateItem.ID) + + itemsAfterCreate := counters.items.Load() + + // Read the just-created entity by its @key. If @cachePopulate wrote + // to L2, the items root-field subgraph must NOT be re-fetched. + // With L2 disabled this counter would grow to itemsAfterCreate+1. + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "` + created.Data.CreateItem.ID + `") { id name category } }`, + }) + require.Equal(t, itemsAfterCreate, counters.items.Load(), + "@cachePopulate must write the entity to L2 so the read-by-id is a cache hit; with L2 disabled this counter would be itemsAfterCreate+1") + }) + }) + + t.Run("Regression/mutation cache populate does not write when L2 is disabled", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: []core.Option{ + core.WithEntityCaching(config.EntityCachingConfiguration{ + Enabled: true, + L1: config.EntityCachingL1Configuration{ + Enabled: true, + }, + L2: config.EntityCachingL2Configuration{ + Enabled: false, + }, + }), + core.WithEntityCacheInstances(map[string]resolve.LoaderCache{ + "default": cache, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + lenBefore := cache.Len() + + createRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { createItem(name: "NoL2Populate", category: "disabled") { id name category } }`, + }) + require.Contains(t, createRes.Body, `"NoL2Populate"`) + + var created struct { + Data struct { + CreateItem struct { + ID string `json:"id"` + } `json:"createItem"` + } `json:"data"` + } + require.NoError(t, json.Unmarshal([]byte(createRes.Body), &created)) + require.NotEmpty(t, created.Data.CreateItem.ID) + + require.Equal(t, lenBefore, cache.Len(), + "@cachePopulate must not write to L2 when router L2 caching is disabled") + + itemsAfterCreate := counters.items.Load() + readRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "` + created.Data.CreateItem.ID + `") { id name category } }`, + }) + require.Equal(t, + `{"data":{"item":{"id":"`+created.Data.CreateItem.ID+`","name":"NoL2Populate","category":"disabled"}}}`, + readRes.Body) + require.Equal(t, itemsAfterCreate+1, counters.items.Load(), + "with L2 disabled, the follow-up read must miss cache and refetch from the items subgraph") + }) + }) + + t.Run("L2/delete mutation invalidates cache", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warm cache: first request populates both the root-field L2 entry and + // the Item entity L2 entry. + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + }) + itemsAfterWarm := counters.items.Load() + detailsAfterWarm := counters.details.Load() + require.Greater(t, cache.Len(), 0, + "warm-up must populate at least one cache entry") + + // Second identical request must be a cache hit: neither the items + // (root-field) subgraph nor the details (entity) subgraph is called. + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + }) + require.Equal(t, itemsAfterWarm, counters.items.Load(), + "pre-invalidate read must be a cache hit on the items root-field subgraph") + require.Equal(t, detailsAfterWarm, counters.details.Load(), + "pre-invalidate read must be a cache hit on the details entity subgraph") + + // Delete triggers @cacheInvalidate. The mutation itself hits the items + // subgraph to run the resolver, so we capture the counter AFTER the + // mutation and assert on the delta from the subsequent read. + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { deleteItem(id: "1") { id name } }`, + }) + itemsAfterMutation := counters.items.Load() + + // After invalidation, the read MUST miss the cache and re-hit the + // items subgraph. The store has persisted the delete, so `item(id:"1")` + // now returns null and no downstream entity fetch to the details + // subgraph happens — but the root-field cache-miss alone is enough + // to prove @cacheInvalidate fired. + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + }) + require.Equal(t, `{"data":{"item":null}}`, res.Body, + "post-delete read must return null since the store now lacks id=1") + require.Equal(t, itemsAfterMutation+1, counters.items.Load(), + "post-invalidate read must refetch from items subgraph; equal count means the cache entry survived the invalidate") + }) + }) + + t.Run("Combined/L1 deduplicates with warm L2", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Query same entity via two aliases — L1 should deduplicate + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ + a: item(id: "1") { id name description } + b: item(id: "1") { id name description } + }`, + }) + require.Contains(t, res.Body, `"a"`) + require.Contains(t, res.Body, `"b"`) + + // Details subgraph called only once (L1 dedup within single request) + require.Equal(t, int64(1), counters.details.Load()) + + // L2 receives two writes — one per alias — because graphql-go-tools + // PR #1259 commit 2427062b1f ("bulk L2 Set") writes each resolved + // field result to L2 independently rather than dedup-on-key at + // write time. Ristretto-backed Len() counts each Set() that admits + // a new entry, and the same key set twice can both admit when the + // reads in between race the asynchronous admission. The L1 dedup + // still saves the subgraph call (asserted above); only the L2 + // bookkeeping doubled. + require.Equal(t, 2, cache.Len()) + }) + }) + + t.Run("L2/@is directive cache key mapping", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Query using @is-mapped argument (pid maps to @key field "id") + // Include cross-subgraph field (description from details) to trigger entity caching + req := testenv.GraphQLRequest{ + Query: `{ itemByPid(pid: "1") { id name description } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"itemByPid":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res.Body) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Same query again — entity cache hit (details subgraph not called again) + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"itemByPid":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res2.Body) + require.Equal(t, int64(1), counters.details.Load()) + + // Different pid — entity cache miss for this entity + res3 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ itemByPid(pid: "2") { id name description } }`, + }) + require.Equal(t, `{"data":{"itemByPid":{"id":"2","name":"Gadget","description":"A high-tech gadget with many features"}}}`, res3.Body) + require.Equal(t, int64(2), counters.details.Load()) + }) + }) + + t.Run("Shadow/with failing cache", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptions(&FailingEntityCache{}), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setEntityCacheShadowMode(routerConfig, true) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + } + // Shadow mode + failing cache: every request must still resolve + // end-to-end via the subgraphs. The signal is that BOTH calls hit + // the details subgraph — shadow mode never serves from cache, even + // when the cache is working. With cache failures on top, the only + // data source is the subgraph, so the counter must tick on every + // call. With shadow mode disabled (reads-from-cache), the cache + // error would either short-circuit or fail the request; so this + // counter-based assertion is the truthful shadow-mode signal. + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res.Body) + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res2.Body) + require.Equal(t, int64(2), counters.details.Load(), + "shadow mode must refetch from details on every request even when the cache is failing") + }) + }) + + t.Run("L2/negative TTL expires and refetches", func(t *testing.T) { + // Companion to negative_cache_caches_null: asserts the not-found entity + // cache also EXPIRES after its configured TTL. Like the sister test, the + // signal is counters.details (the entity-hydration subgraph) because + // items-subgraph is always called for the root field. The TTL is set + // to 1s; after sleeping >1s, the third request MUST re-hit details. + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setNotFoundCacheTTL(routerConfig, 1) // 1 second not-found cache TTL + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Create an item in items-subgraph; details-subgraph has no record + // for the new id, so entity hydration returns null there. + createRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { createItem(name: "Phantom", category: "void") { id } }`, + }) + var created struct { + Data struct { + CreateItem struct { + ID string `json:"id"` + } `json:"createItem"` + } `json:"data"` + } + require.NoError(t, json.Unmarshal([]byte(createRes.Body), &created)) + newID := created.Data.CreateItem.ID + require.NotEmpty(t, newID) + + req := testenv.GraphQLRequest{ + Query: `{ item(id: "` + newID + `") { id description } }`, + } + // First call: details hit once, null entity cached under its key. + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, int64(1), counters.details.Load(), + "first request must hit details for the not-found hydration") + + // Immediate re-request: not-found cache hit, details skipped. + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, int64(1), counters.details.Load(), + "immediate re-request must be a not-found cache hit") + + // Wait past TTL. + time.Sleep(1500 * time.Millisecond) + + // After expiry the not-found entry must be gone, so details is + // re-hit. Without TTL expiry (or with L2 off) this counter would + // stay at 1 or climb on every call respectively. + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, int64(2), counters.details.Load(), + "after not-found TTL expiry, the next request must re-hit details exactly once") + }) + }) + + t.Run("L2/negative TTL falls back to positive TTL when unset", func(t *testing.T) { + // Regression guard against the new negativeCacheTTL field clobbering + // the two existing code paths. + // With notFoundCacheTtlSeconds unset (directive default 0) on the + // details subgraph's Item, + // both pre-existing behaviors must hold: + // (1) not-found entity fetches are NOT negatively cached — + // a second request for a missing id re-hits details. + // (2) found-entity positive caching (maxAge: 300) still works — + // a second request for a known id serves from cache. + // Signals: counters.details climbs on each not-found request, + // then stays flat on a repeated known-id request. + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + // Deliberately do NOT call setNotFoundCacheTTL — leave the field at + // its zero default to exercise the unset-fallback path. + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Create an id that exists in items-subgraph but not in details. + createRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { createItem(name: "Missing", category: "void") { id } }`, + }) + var created struct { + Data struct { + CreateItem struct { + ID string `json:"id"` + } `json:"createItem"` + } `json:"data"` + } + require.NoError(t, json.Unmarshal([]byte(createRes.Body), &created)) + newID := created.Data.CreateItem.ID + require.NotEmpty(t, newID) + + missingReq := testenv.GraphQLRequest{ + Query: `{ item(id: "` + newID + `") { id description } }`, + } + // First not-found request: details is hit once. + xEnv.MakeGraphQLRequestOK(missingReq) + require.Equal(t, int64(1), counters.details.Load(), + "first not-found request must hit details") + + // Second not-found request: negative cache is OFF, so details + // must be hit again. With a negative-TTL leak here, this would + // stay at 1. + xEnv.MakeGraphQLRequestOK(missingReq) + require.Equal(t, int64(2), counters.details.Load(), + "without notFoundCacheTtlSeconds, not-found results must NOT be cached; details must re-hit") + + // Positive-TTL sanity check: a known id must still be entity-cached. + foundReq := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + } + xEnv.MakeGraphQLRequestOK(foundReq) + detailsAfterFoundFirst := counters.details.Load() + require.Equal(t, int64(3), detailsAfterFoundFirst, + "first request for a known id must hit details once") + + xEnv.MakeGraphQLRequestOK(foundReq) + require.Equal(t, detailsAfterFoundFirst, counters.details.Load(), + "positive-TTL entity cache must hit on the second request for a known id") + }) + }) + + t.Run("L2/partial cache load with multiple warm entities", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setEntityCachePartialLoad(routerConfig, true) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warm cache for id:"1" and id:"2" + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + }) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "2") { id description } }`, + }) + detailsAfterWarm := counters.details.Load() + require.Equal(t, int64(2), detailsAfterWarm) + require.Equal(t, 2, cache.Len()) + + // List query: id:"1" and id:"2" cached, rest fetched from subgraph + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ items { id description } }`, + }) + require.Contains(t, res.Body, `"description"`) + + // Details should be called once more for the remaining uncached items + require.Equal(t, detailsAfterWarm+1, counters.details.Load()) + }) + }) + + t.Run("L2/query cache include headers", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: append( + entityCachingL2OnlyOptions(cache), + core.WithHeaderRules(config.HeaderRules{ + All: &config.GlobalHeaderRule{ + Request: []*config.RequestHeaderRule{ + { + Operation: config.HeaderRuleOperationPropagate, + Named: "X-Tenant", + }, + }, + }, + }), + ), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + // @queryCache includeHeaders varies the root field cache key by request headers + setQueryCacheIncludeHeaders(routerConfig, true) + // Also set entity cache includeHeaders so entity resolution cache key varies too + setEntityCacheIncludeHeaders(routerConfig, true) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Request with header A — entity resolution calls details subgraph + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + Header: map[string][]string{"X-Tenant": {"A"}}, + }) + detailsAfterA := counters.details.Load() + require.Equal(t, int64(1), detailsAfterA) + + // Same query, header A — entity cache hit (details not called) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + Header: map[string][]string{"X-Tenant": {"A"}}, + }) + require.Equal(t, detailsAfterA, counters.details.Load()) + + // Same query, header B — different cache key, details called again + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + Header: map[string][]string{"X-Tenant": {"B"}}, + }) + require.Equal(t, detailsAfterA+1, counters.details.Load()) + }) + }) + + t.Run("L2/cache populate maxAge override", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + // Override @cachePopulate(maxAge:60) down to 1 second. + setCachePopulateTTL(routerConfig, 1) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + lenBefore := cache.Len() + + // createItem has @cachePopulate. The mutation must write the new + // Item to L2 under the short (1s) override TTL. + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { createItem(name: "ShortLived", category: "test") { id name category } }`, + }) + require.Contains(t, res.Body, `"ShortLived"`) + + // Cache MUST have grown — proves @cachePopulate actually ran. + // With L2.Enabled=false, this assertion fails. + require.Greater(t, cache.Len(), lenBefore, + "@cachePopulate must write to L2 even under the maxAge override") + + var body struct { + Data struct { + CreateItem struct { + ID string `json:"id"` + } `json:"createItem"` + } `json:"data"` + } + require.NoError(t, json.Unmarshal([]byte(res.Body), &body)) + newID := body.Data.CreateItem.ID + require.NotEmpty(t, newID) + + // Immediately after the mutation the entity is in L2 with a 1s TTL: + // a follow-up read hits cache (items subgraph not called). + itemsAfterMutation := counters.items.Load() + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "` + newID + `") { id name } }`, + }) + require.Equal(t, itemsAfterMutation, counters.items.Load(), + "immediate follow-up read must hit the populated L2 entry") + + // Wait past the 1s override TTL. The entry must have expired, so + // a subsequent read now MUST re-hit the items subgraph. This is + // the truthful proof that the maxAge override fired: without the + // override (60s default), the counter would still not tick. + time.Sleep(1500 * time.Millisecond) + + itemsBeforeExpiredRead := counters.items.Load() + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "` + newID + `") { id name } }`, + }) + require.Equal(t, itemsBeforeExpiredRead+1, counters.items.Load(), + "after the 1s override TTL expires, the read must refetch from items; unchanged counter means the override was ignored") + }) + }) + + // --- Mapping rule coverage tests --- + + t.Run("L2/batch list argument cache keys", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // itemsByIds uses @is(fields: "id") with a list argument → batch cache lookup. + // Each element in the ids list maps to one entity cache key. + req := testenv.GraphQLRequest{ + Query: `{ itemsByIds(ids: ["1", "2"]) { id name description } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"Widget"`) + require.Contains(t, res.Body, `"Gadget"`) + require.Contains(t, res.Body, `"description"`) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Two entities fetched → two cache entries + require.Equal(t, 2, cache.Len()) + + // Same query again → all entities served from cache + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, detailsAfterFirst, counters.details.Load()) + }) + }) + + t.Run("L2/batch list partial cache hit", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warm cache for id:"1" only + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + }) + detailsAfterWarm := counters.details.Load() + require.Equal(t, 1, cache.Len()) + + // Batch query for ids ["1", "3"] — id:"1" is cached, id:"3" is not + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ itemsByIds(ids: ["1", "3"]) { id name description } }`, + }) + require.Contains(t, res.Body, `"Widget"`) + require.Contains(t, res.Body, `"Gizmo"`) + + // Details subgraph should be called again for the uncached entity + require.Greater(t, counters.details.Load(), detailsAfterWarm) + }) + }) + + t.Run("L2/composite key auto mapping", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // product(id, region) auto-maps both args to @key(fields: "id region"). + // The cache key includes both id AND region. + req := testenv.GraphQLRequest{ + Query: `{ product(id: "p1", region: "US") { id region name info } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"product":{"id":"p1","region":"US","name":"Alpha","info":"Alpha product details for US market"}}}`, res.Body) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Same composite key → cache hit + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, detailsAfterFirst, counters.details.Load()) + + // Same id, different region → cache miss (different composite key) + res3 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ product(id: "p3", region: "EU") { id region name info } }`, + }) + require.Contains(t, res3.Body, `"Gamma"`) + require.Equal(t, detailsAfterFirst+1, counters.details.Load()) + }) + }) + + t.Run("L2/multiple keys with one satisfiable", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Product has @key(fields: "id region") and @key(fields: "sku"). + // productBySku only provides sku → only the sku key is satisfiable. + req := testenv.GraphQLRequest{ + Query: `{ productBySku(sku: "SKU-001") { id region sku name info } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"Alpha"`) + require.Contains(t, res.Body, `"SKU-001"`) + require.Contains(t, res.Body, `"info"`) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Same sku → cache hit + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, detailsAfterFirst, counters.details.Load()) + + // Different sku → cache miss + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ productBySku(sku: "SKU-003") { id region sku name info } }`, + }) + require.Equal(t, detailsAfterFirst+1, counters.details.Load()) + }) + }) + + t.Run("L2/no key match leaves root field uncached", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // productByName(name) — "name" doesn't match any @key field. + // No entity key mapping is emitted, so no per-entity cache keys + // are constructed from the argument. Entity caching via _entities + // still works (the details subgraph result is cached by entity key), + // but the root field itself does not produce a query cache mapping. + req := testenv.GraphQLRequest{ + Query: `{ productByName(name: "Alpha") { id region name info } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"Alpha"`) + require.Contains(t, res.Body, `"info"`) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Entity caching from _entities still works — details cached + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, detailsAfterFirst, counters.details.Load()) + }) + }) + + t.Run("L2/composite key input object via @is", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // productByKey uses an input object argument with @is(fields: "id region"). + // The composition decomposes this into argumentPath ["key","id"] and ["key","region"], + // mapping input object fields to the composite @key(fields: "id region"). + req := testenv.GraphQLRequest{ + Query: `query($k: ProductKeyInput!) { productByKey(key: $k) { id region name info } }`, + Variables: []byte(`{"k": {"id": "p1", "region": "US"}}`), + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"productByKey":{"id":"p1","region":"US","name":"Alpha","info":"Alpha product details for US market"}}}`, res.Body) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Same input object → cache hit + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, detailsAfterFirst, counters.details.Load()) + + // Different input object → cache miss + req2 := testenv.GraphQLRequest{ + Query: `query($k: ProductKeyInput!) { productByKey(key: $k) { id region name info } }`, + Variables: []byte(`{"k": {"id": "p3", "region": "EU"}}`), + } + res3 := xEnv.MakeGraphQLRequestOK(req2) + require.Contains(t, res3.Body, `"Gamma"`) + require.Equal(t, detailsAfterFirst+1, counters.details.Load()) + }) + }) + + t.Run("L2/nested key via @is directive", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // warehouse(locationId) uses @is(fields: "location.id") to map a scalar + // argument to the nested key path @key(fields: "location { id }"). + req := testenv.GraphQLRequest{ + Query: `{ warehouse(locationId: "w1") { location { id } name capacity } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"Main Depot"`) + require.Contains(t, res.Body, `"capacity"`) + + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Same nested key → cache hit + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, detailsAfterFirst, counters.details.Load()) + + // Different location → cache miss + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ warehouse(locationId: "w2") { location { id } name capacity } }`, + }) + require.Equal(t, detailsAfterFirst+1, counters.details.Load()) + }) + }) + + t.Run("L2/single-subgraph composite key input object", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // productByKey uses an input object argument with @is(fields: "id region"). + // Querying only items-subgraph fields (id, region, name) verifies that + // RemapVariables correctly handles nested argument paths in a single-subgraph + // setup where no entity fetch is needed. + req := testenv.GraphQLRequest{ + Query: `query($k: ProductKeyInput!) { productByKey(key: $k) { id region name } }`, + Variables: []byte(`{"k": {"id": "p1", "region": "US"}}`), + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"productByKey":{"id":"p1","region":"US","name":"Alpha"}}}`, res.Body) + + itemsAfterFirst := counters.items.Load() + require.Equal(t, int64(1), itemsAfterFirst) + + // Same input object → cache hit, items subgraph NOT called again + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, itemsAfterFirst, counters.items.Load()) + }) + }) + + // request_scoped_field_deduplication establishes the baseline behavior for + // entity resolution deduplication. Without @requestScoped, the details + // subgraph is called exactly once for a list query (all entities are + // batched into a single _entities call). The L2 cache then serves + // subsequent identical requests without calling the subgraph again. + // + // When @requestScoped support is added (subgraph schemas declare + // @requestScoped, composition produces requestScopedFields in config.json, + // and the planner generates RequestScopedExports/Hints), this test should + // be extended to verify that the details subgraph is called fewer times + // across multiple entity batches within a single request. + t.Run("L2/list entity batch caches across requests", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Query a list of items. Each item triggers entity resolution to + // the details subgraph for description, batched into one _entities call. + req := testenv.GraphQLRequest{ + Query: `{ items { id name description } }`, + } + + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"description"`) + require.Contains(t, res.Body, `"Widget"`) + + // Baseline: details subgraph called exactly once (one batch) + require.Equal(t, int64(1), counters.details.Load(), + "details should be called once for the entity batch") + + // Second identical request: L2 cache hit, no subgraph calls + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, int64(1), counters.details.Load(), + "details should not be called again (L2 cache hit)") + }) + }) + + // field_widening_across_requests verifies that when a cached entity has + // a subset of fields (e.g., description only), a subsequent request + // asking for additional fields from the same subgraph (e.g., description + // + rating) correctly fetches the wider field set from the subgraph. + t.Run("L2/field widening across requests", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Request 1: fetch only description from details subgraph + res1 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description } }`, + }) + require.Equal(t, `{"data":{"item":{"id":"1","name":"Widget","description":"A versatile widget for everyday use"}}}`, res1.Body) + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Request 2: fetch description + rating (wider field set from same subgraph). + // The cache key includes the field selection, so this is a cache miss + // for the entity resolution to details. The subgraph must be called again. + res2 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description rating } }`, + }) + require.Contains(t, res2.Body, `"description"`) + require.Contains(t, res2.Body, `"rating"`) + require.Contains(t, res2.Body, `"Widget"`) + + // Details subgraph called again because wider field set is a different cache key + require.Equal(t, detailsAfterFirst+1, counters.details.Load(), + "details should be called again for the wider field set") + + // Request 3: repeat the wider query — should now be a cache hit + res3 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description rating } }`, + }) + require.Equal(t, res2.Body, res3.Body) + require.Equal(t, detailsAfterFirst+1, counters.details.Load(), + "wider field set should be cached after second fetch") + }) + }) + + // batch_partial_hit_with_extension_fields verifies that batch queries + // correctly handle partial cache hits when entity extension fields + // (from the details subgraph) are involved. Entities with cached + // extension data are served from cache; uncached entities trigger a + // subgraph fetch only for the missing ones. + t.Run("L2/batch partial hit with extension fields", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warm cache: fetch extension fields for entity 1 and entity 2 + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ itemsByIds(ids: ["1", "2"]) { id name description } }`, + }) + detailsAfterWarm := counters.details.Load() + require.Equal(t, int64(1), detailsAfterWarm) + require.Equal(t, 2, cache.Len()) + + // Batch query for entities [1, 2, 3]: entities 1 and 2 have cached + // extension data from details, entity 3 does not. + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ itemsByIds(ids: ["1", "2", "3"]) { id name description } }`, + }) + require.Contains(t, res.Body, `"Widget"`) + require.Contains(t, res.Body, `"Gadget"`) + require.Contains(t, res.Body, `"Gizmo"`) + require.Contains(t, res.Body, `"description"`) + + // Details subgraph called again for the uncached entity (id:"3") + require.Greater(t, counters.details.Load(), detailsAfterWarm, + "details should be called for uncached entity 3") + + // All three entities now cached + require.Equal(t, 3, cache.Len()) + + // Repeat the batch query — all entities cached, no more subgraph calls + detailsBeforeRepeat := counters.details.Load() + res2 := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ itemsByIds(ids: ["1", "2", "3"]) { id name description } }`, + }) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, detailsBeforeRepeat, counters.details.Load(), + "all entities should be served from cache") + }) + }) + + t.Run("L2/batch entity key per-element caching", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ itemsByIds(ids: ["1", "2"]) { id name description } }`, + } + + // Request 1: both subgraphs called (items for root field, details for entity) + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"Widget"`) + require.Contains(t, res.Body, `"Gadget"`) + require.Contains(t, res.Body, `"description"`) + + itemsAfterFirst := counters.items.Load() + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), itemsAfterFirst) + require.Equal(t, int64(1), detailsAfterFirst) + + // Per-element cache entries: 2 entity keys (one per id) + require.Equal(t, 2, cache.Len()) + + // Request 2: identical query — batch entity keys hit, no subgraph calls + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + + // Items subgraph should NOT be called again (batch entity key cache hit) + require.Equal(t, itemsAfterFirst, counters.items.Load()) + // Details subgraph should NOT be called again (entity cache hit) + require.Equal(t, detailsAfterFirst, counters.details.Load()) + }) + }) + + // request_scoped_widening_refetch asserts the TARGET behavior for + // @requestScoped coordinate L1 caching: no matter how many sites within a + // single request read a @requestScoped field with the same key, the + // underlying subgraph should be fetched EXACTLY ONCE. + // + // This test is currently expected to FAIL. Under the present implementation + // the planner writes L1 with the narrow root selection ({id, name}) and a + // later sequentially-dependent read needs the wider selection + // ({id, name, email}) via @requires. The widening check in + // validateItemHasRequiredData sees that email is missing and triggers a + // refetch against the viewer subgraph, so counters.viewer is 2, not 1. + // + // The fix will either (a) teach the planner to pre-plan the wider union of + // selections up-front so the root fetch already carries {id, name, email}, + // or (b) teach the L1 layer to widen its stored entry when a later read + // asks for a superset of fields. Either way, once the fix lands this test + // should pass unchanged. + // + // Schema setup (see subgraphs/viewer + subgraphs/articles): + // + // viewer subgraph: + // Query.currentViewer @requestScoped(key: "currentViewer") + // Personalized.currentViewer @requestScoped(key: "currentViewer") + // Viewer { id, name, email } + // + // articles subgraph: + // Viewer { recommendedArticles } (extends viewer entity) + // Article implements Personalized { + // personalizedRecommendation: String! + // @requires(fields: "currentViewer { id name email }") + // } + // + // Query under test: + // + // { + // currentViewer { id name + // recommendedArticles { + // id title + // personalizedRecommendation + // } + // } + // } + t.Run("L1/request-scoped widening refetch", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL1OnlyOptions(), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ + currentViewer { + id + name + recommendedArticles { + id + title + personalizedRecommendation + } + } + }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + // personalizedRecommendation formats "for <>" — asserting the + // email shows up proves the wider {id,name,email} selection actually + // reached the articles subgraph via @requires, i.e. the widening + // refetch really happened. + require.Equal(t, + `{"data":{"currentViewer":{"id":"v1","name":"Alice","recommendedArticles":[`+ + `{"id":"a1","title":"The Rise of Federated GraphQL","personalizedRecommendation":"The Rise of Federated GraphQL, recommended for Alice "},`+ + `{"id":"a2","title":"Caching Strategies for Modern APIs","personalizedRecommendation":"Caching Strategies for Modern APIs, recommended for Alice "},`+ + `{"id":"a3","title":"A Practical Guide to @requestScoped","personalizedRecommendation":"A Practical Guide to @requestScoped, recommended for Alice "}`+ + `]}}}`, + res.Body) + + // Target behavior: viewer should be fetched EXACTLY ONCE no matter + // how many @requestScoped reads happen within the request. + // + // Currently fails (actual == 2) because the root fetch carries only + // {id, name} and the later @requires-driven Personalized._entities + // fetch needs {id, name, email} — the widening check misses and + // refetches the viewer subgraph. See the test's header comment. + require.Equal(t, int64(1), counters.viewer.Load(), + "viewer must be fetched exactly once per request regardless of "+ + "how many @requestScoped reads share the same key") + + require.Equal(t, int64(2), counters.articles.Load(), + "articles is called twice: once for Viewer._entities (recommendedArticles), "+ + "once for Article._entities (personalizedRecommendation after @requires)") + }) + }) + + // Exercise the same @requestScoped key at the root, nested Article.currentViewer, + // and relatedArticles.currentViewer sites. L1 should let the first fetch populate + // the coordinate cache so the viewer subgraph is fetched once. + t.Run("L1/request-scoped nested dedup", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL1OnlyOptions(), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ + currentViewer { + id + name + email + recommendedArticles { + id + title + currentViewer { + id + name + email + } + relatedArticles { + id + title + currentViewer { + id + name + email + } + } + } + } + }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + // Sanity: the query resolved successfully and the viewer data is + // identical at every site (proves the @requestScoped dedup is at + // least returning consistent data, even if it made too many fetches). + require.Contains(t, res.Body, `"currentViewer":{"id":"v1","name":"Alice","email":"alice@example.com"}`) + require.Contains(t, res.Body, `"recommendedArticles"`) + require.Contains(t, res.Body, `"relatedArticles"`) + + // Target behavior: the viewer subgraph is hit EXACTLY ONCE regardless + // of how many Article.currentViewer sites exist in the query. The root + // Query.currentViewer fetch populates the L1 coordinate cache under + // key "currentViewer", and every subsequent read at any nesting depth + // must inject from L1 without launching a new subgraph fetch. + require.Equal(t, int64(1), counters.viewer.Load(), + "viewer must be fetched exactly once per request regardless of "+ + "how many nesting levels select Article.currentViewer inline "+ + "(currently fails: the planner launches BatchEntity viewer fetches "+ + "for deeper Article.currentViewer sites in parallel with the L1 "+ + "injection check, paying the subgraph round-trip unnecessarily)") + }) + }) + + // Inverse of L1/request-scoped nested dedup: when the coordinate L1 cache + // is disabled, every Article.currentViewer site must issue its own BatchEntity + // fetch to the viewer subgraph. Without this counter-assertion, the dedup + // test above could silently turn into a no-op if the planner ever started + // merging the three selection sites on its own. + t.Run("L1-disabled/request-scoped nested no-dedup baseline", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingDisabledOptions(), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ + currentViewer { + id + name + email + recommendedArticles { + id + title + currentViewer { + id + name + email + } + relatedArticles { + id + title + currentViewer { + id + name + email + } + } + } + } + }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"currentViewer":{"id":"v1","name":"Alice","email":"alice@example.com"}`) + + // Without L1 coordinate caching, the viewer subgraph is hit three times: + // 1. Root Query.currentViewer + // 2. Viewer._entities for recommendedArticles[].currentViewer (batched) + // 3. Viewer._entities for relatedArticles[].currentViewer (batched) + require.Equal(t, int64(3), counters.viewer.Load(), + "with L1 disabled, each Article.currentViewer site must hit the "+ + "viewer subgraph; if this ever drops below 3 the companion dedup "+ + "test is no longer proving anything") + }) + }) + + t.Run("Regression/complex viewer/articles query shape has no errors", func(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `query ViewerArticles { + articles { + id + title + body + relatedArticles { + ...ArticleFields + relatedArticles { + ...ArticleFields + relatedArticles { + ...ArticleFields + } + } + } + } + currentViewer { + id + name + email + recommendedArticles { + ...ArticleFields + relatedArticles { + ...ArticleFields + relatedArticles { + ...ArticleFields + } + } + } + } + } + + fragment ArticleFields on Article { + id + title + tags + viewCount + rating + reviewSummary + personalizedRecommendation + currentViewer { + id + name + email + } + }`, + } + + for i := range 3 { + res := xEnv.MakeGraphQLRequestOK(req) + require.NotContains(t, res.Body, `"errors"`, "iteration %d: expected query to execute without GraphQL errors", i) + require.Contains(t, res.Body, `"articles"`, "iteration %d: expected articles payload", i) + require.Contains(t, res.Body, `"currentViewer"`, "iteration %d: expected currentViewer payload", i) + } + }) + }) + + t.Run("Regression/complex viewer/articles cached matches uncached", func(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `query ViewerArticles { + articles { + id + title + body + relatedArticles { + ...ArticleFields + relatedArticles { + ...ArticleFields + relatedArticles { + ...ArticleFields + } + } + } + } + currentViewer { + id + name + email + recommendedArticles { + ...ArticleFields + relatedArticles { + ...ArticleFields + relatedArticles { + ...ArticleFields + } + } + } + } + } + + fragment ArticleFields on Article { + id + title + tags + viewCount + rating + reviewSummary + personalizedRecommendation + currentViewer { + id + name + email + } + }`, + } + + // Warm cache. + xEnv.MakeGraphQLRequestOK(req) + + cachedRes := xEnv.MakeGraphQLRequestOK(req) + require.NotContains(t, cachedRes.Body, `"errors"`) + + uncachedReq := req + uncachedReq.Header = http.Header{ + "X-WG-Disable-Entity-Cache": []string{"true"}, + } + uncachedRes := xEnv.MakeGraphQLRequestOK(uncachedReq) + require.NotContains(t, uncachedRes.Body, `"errors"`) + + require.Equal(t, uncachedRes.Body, cachedRes.Body) + }) + }) + + // Regression test for the arena pointer bug: exportRequestScopedFields must + // copy values before storing in requestScopedL1. Without the copy, stored + // pointers become dangling when the goroutine arena is reused on subsequent + // requests, causing crashes or corrupted data. + t.Run("Regression/repeated complex query does not panic", func(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // A query that exercises entity fetching across multiple subgraphs + // (items + details + inventory). Repeated execution triggers arena + // reuse which would crash if exported values were not copied. + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id name description rating tags available count } }`, + } + for i := range 5 { + res := xEnv.MakeGraphQLRequestOK(req) + require.Contains(t, res.Body, `"Widget"`, "iteration %d: expected Widget in response", i) + require.Contains(t, res.Body, `"description"`, "iteration %d: expected description in response", i) + require.Contains(t, res.Body, `"available"`, "iteration %d: expected available in response", i) + } + }) + }) + + // Companion to mutation_populate_writes_to_cache. Both tests pin the same + // @cachePopulate read-after-write contract on a Mutation: the returned + // entity must land in L2 so the next `item(id: ...)` read is a cache hit. + // This variant keeps the RED_-style explicit response-body assertion as + // a second signal — the body assertion alone is not a truthful caching + // signal (the items store persists the new item regardless), but the + // counters.items no-growth assertion is. + // + // Historically the mutation_populate_* tests only verified the mutation + // responded; the read-path effect was added after review round 1. + t.Run("L2/cache populate writes entity for subsequent read", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // createItem has @cachePopulate(maxAge: 60). The mutation must populate L2 + // with the returned Item entity under its @key("id") cache key. + createRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { createItem(name: "PopulatedItem", category: "populate") { id name category } }`, + }) + require.Contains(t, createRes.Body, `"PopulatedItem"`) + + // Extract the new id from the response — `nextID` is shared across the + // parallel test suite, so we can't predict it. + var idMatch struct { + Data struct { + CreateItem struct { + ID string `json:"id"` + Name string `json:"name"` + Category string `json:"category"` + } `json:"createItem"` + } `json:"data"` + } + require.NoError(t, json.Unmarshal([]byte(createRes.Body), &idMatch)) + newID := idMatch.Data.CreateItem.ID + require.NotEmpty(t, newID, "createItem must return a non-empty id") + + itemsAfterCreate := counters.items.Load() + + // Read the just-created entity by its key. If @cachePopulate wrote to L2, + // the items subgraph must NOT be called again. The items store now persists + // createItem results (see items subgraph data.go), so the response body is + // correct either way — the truthful signal is the items-counter: cache hit + // leaves it unchanged, cache miss grows it by one. + readRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "` + newID + `") { id name category } }`, + }) + require.Equal(t, + `{"data":{"item":{"id":"`+newID+`","name":"PopulatedItem","category":"populate"}}}`, + readRes.Body) + require.Equal(t, itemsAfterCreate, counters.items.Load(), + "@cachePopulate must write the entity to L2 so the read-by-id is served from cache") + }) + }) + + // Regression test: @cacheInvalidate clears an entity cached under a composite @key. + // + // `delete_mutation_invalidates_cache` already covers the simple id-only case + // (Item @key("id")). This test pins the composite-key path via Product + // @key("id region") + deleteProduct(id, region) @cacheInvalidate. + // + // The cache-demo failure of an apparently equivalent scenario turned out to be + // a test-script artifact (mutable subgraph state caused warm-up to return null, + // which prevented the cache write). The router-side composite-key invalidate + // path itself works correctly — this test pins that contract. + t.Run("L2/cache invalidates composite key", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + warmReq := testenv.GraphQLRequest{ + Query: `{ product(id: "p1", region: "US") { id region sku name } }`, + } + // 1. Warm the cache for product (id=p1, region=US) + xEnv.MakeGraphQLRequestOK(warmReq) + itemsAfterWarm := counters.items.Load() + + // 2. Re-read — must be a cache hit + xEnv.MakeGraphQLRequestOK(warmReq) + require.Equal(t, itemsAfterWarm, counters.items.Load(), + "composite-key entity must be cached after warm-up") + + // 3. Invalidate via deleteProduct. The mutation itself hits the items subgraph + // to execute the resolver — so we capture the counter AFTER the mutation and + // only assert on the delta from the subsequent read. + delRes := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { deleteProduct(id: "p1", region: "US") { id region } }`, + }) + require.Contains(t, delRes.Body, `"deleteProduct"`) + itemsAfterMutation := counters.items.Load() + + // 4. Read again — composite-key cache MUST be cleared, items subgraph + // MUST be hit one more time. Currently fails: counter unchanged → @cacheInvalidate + // did not evict the composite-key entity, so the read is still a cache hit. + xEnv.MakeGraphQLRequestOK(warmReq) + require.Equal(t, itemsAfterMutation+1, counters.items.Load(), + "@cacheInvalidate on Mutation returning composite-key entity must clear the L2 entry; the post-invalidate read must re-fetch from subgraph") + }) + }) + + // RED test: nested @key reached via input object @is(fields: "location { id }") + // + // `warehouse(locationId: ID! @is(fields: "location.id"))` (scalar arg with dot notation) + // already passes — see "nested_key_via_is_directive" above. + // + // `warehouseByInput(input: WarehouseLocationInput! @is(fields: "location { id }"))` is + // the same nested @key reached via a multi-hop argument path. Composition produces + // the same `entityKeyField: "location.id"` plus `argumentPath: ["input","location","id"]`. + // The router's loader must walk the input-object path to construct the cache key. + // + // Discovered in cache-demo manual testing: cache lookup fires with the right key but + // every call shows l2_miss and the entity is never written. Pinning the failure here + // so the loader fix can land with a regression test. + t.Run("L2/nested key via input object @is directive", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingL2OnlyOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // IMPORTANT: this query does NOT select the @key field (`location { id }`). + // Reproduces the cache-demo Venue failure where queries that omit the + // key field from the selection set prevent the cache write — the router's + // entity write path derives the cache key from the response payload + // instead of from the argument values that were already used to build + // the lookup key. + // + // Compare to "nested_key_via_is_directive" above, which selects + // `location { id }` and passes — that test masks this bug because the + // key value happens to be in the response payload. + req := testenv.GraphQLRequest{ + Query: `{ warehouseByInput(input: { location: { id: "w1" } }) { name } }`, + } + res := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, `{"data":{"warehouseByInput":{"name":"Main Depot"}}}`, res.Body) + + itemsAfterFirst := counters.items.Load() + require.Equal(t, int64(1), itemsAfterFirst, "first call must hit the items subgraph") + + // Same nested key via input object → MUST be a cache hit. Currently fails: + // the items subgraph is called a second time despite the L2 lookup running + // with the structurally correct key, because the cache write path can't + // reconstruct the entity key from a response that doesn't contain the + // key field. + res2 := xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, res.Body, res2.Body) + require.Equal(t, itemsAfterFirst, counters.items.Load(), + "input-object → nested-key cache write must persist when @key field is not selected; second call must NOT re-hit subgraph") + }) + }) + + // REGRESSION: a SingleFetch served entirely from L2 cache must report + // `load_skipped: true` in the request trace. Previously the resolveSingle + // path didn't set LoadSkipped on cache-hit branches even though the bulk + // parallel path already did, so observability reported `false` on fetches + // that demonstrably never called the subgraph. + t.Run("Trace/root field cache hit reports load_skipped", func(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: entityCachingOptions(cache), + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warm the cache. + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + }) + + // Second call with tracing enabled — assert load_skipped == true on the fetch. + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + Header: map[string][]string{"X-WG-Trace": {"true"}}, + }) + var body struct { + Extensions struct { + Trace struct { + Fetches map[string]any `json:"fetches"` + } `json:"trace"` + } `json:"extensions"` + } + require.NoError(t, json.Unmarshal([]byte(res.Body), &body)) + + // Walk the fetch tree and find any Single fetch with load_skipped=true. + var anyLoadSkipped bool + var visit func(node any) + visit = func(node any) { + m, ok := node.(map[string]any) + if !ok { + return + } + if m["kind"] == "Single" { + if fetch, ok := m["fetch"].(map[string]any); ok { + if trace, ok := fetch["trace"].(map[string]any); ok { + if ls, _ := trace["load_skipped"].(bool); ls { + anyLoadSkipped = true + } + } + } + } + if children, ok := m["children"].([]any); ok { + for _, c := range children { + visit(c) + } + } + } + visit(map[string]any(body.Extensions.Trace.Fetches)) + require.True(t, anyLoadSkipped, + "trace must report load_skipped=true on the cache-hit fetch") + }) + }) + + // REGRESSION: includeHeaders=true with NO header forwarded must still produce a + // stable cache key — write and read paths must agree on the prefix. Previously + // the WRITE path dropped the prefix when headerHash==0 while the READ path + // always built "0:..." → every read missed. + t.Run("L2/include headers still caches when no header is forwarded", func(t *testing.T) { + t.Parallel() + + servers, counters := startSubgraphServers(t) + configJSON := buildConfigJSON(servers) + cache := newMemoryCache(t) + + testenv.Run(t, &testenv.Config{ + RouterConfigJSONTemplate: configJSON, + RouterOptions: append( + entityCachingL2OnlyOptions(cache), + core.WithHeaderRules(config.HeaderRules{ + All: &config.GlobalHeaderRule{ + Request: []*config.RequestHeaderRule{ + { + Operation: config.HeaderRuleOperationPropagate, + Named: "X-Tenant", + }, + }, + }, + }), + ), + ModifyRouterConfig: func(routerConfig *nodev1.RouterConfig) { + setEntityCacheIncludeHeaders(routerConfig, true) + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + req := testenv.GraphQLRequest{ + Query: `{ item(id: "1") { id description } }`, + // No X-Tenant header sent — SubgraphHeadersBuilder returns hash=0. + } + + // First call: cache miss → subgraph fetch. + xEnv.MakeGraphQLRequestOK(req) + detailsAfterFirst := counters.details.Load() + require.Equal(t, int64(1), detailsAfterFirst) + + // Second call (same query, still no header): MUST be a cache hit. Counter + // stays at 1. Previously failed because write key {json} ≠ read key 0:{json}. + xEnv.MakeGraphQLRequestOK(req) + require.Equal(t, detailsAfterFirst, counters.details.Load(), + "includeHeaders=true with no header forwarded must produce a stable cache key; second call must hit cache") + }) + }) +} diff --git a/router-tests/entitycaching/harness_test.go b/router-tests/entitycaching/harness_test.go new file mode 100644 index 0000000000..0ac4a4c425 --- /dev/null +++ b/router-tests/entitycaching/harness_test.go @@ -0,0 +1,523 @@ +package entitycaching + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/99designs/gqlgen/graphql/handler" + "github.com/99designs/gqlgen/graphql/handler/transport" + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" + + "github.com/wundergraph/cosmo/demo/pkg/subgraphs/cachetest/articles" + "github.com/wundergraph/cosmo/demo/pkg/subgraphs/cachetest/articlesmeta" + "github.com/wundergraph/cosmo/demo/pkg/subgraphs/cachetest/details" + "github.com/wundergraph/cosmo/demo/pkg/subgraphs/cachetest/inventory" + "github.com/wundergraph/cosmo/demo/pkg/subgraphs/cachetest/items" + itemsModel "github.com/wundergraph/cosmo/demo/pkg/subgraphs/cachetest/items/subgraph/model" + "github.com/wundergraph/cosmo/demo/pkg/subgraphs/cachetest/viewer" + "github.com/wundergraph/cosmo/router/core" + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" + "github.com/wundergraph/cosmo/router/pkg/config" + "github.com/wundergraph/cosmo/router/pkg/entitycache" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +type requestCounters struct { + items atomic.Int64 + details atomic.Int64 + inventory atomic.Int64 + viewer atomic.Int64 + articles atomic.Int64 + articlesMeta atomic.Int64 +} + +func countingMiddleware(counter *atomic.Int64, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + counter.Add(1) + next.ServeHTTP(w, r) + }) +} + +type subgraphServers struct { + items *httptest.Server + details *httptest.Server + inventory *httptest.Server + viewer *httptest.Server + articles *httptest.Server + articlesMeta *httptest.Server + // Subscription channels for the items subgraph. + itemUpdatedCh chan *itemsModel.Item + itemCreatedCh chan *itemsModel.Item +} + +func startSubgraphServers(t *testing.T) (*subgraphServers, *requestCounters) { + t.Helper() + + counters := &requestCounters{} + itemUpdatedCh := make(chan *itemsModel.Item, 1) + itemCreatedCh := make(chan *itemsModel.Item, 1) + + itemsSchema := items.NewSchema(itemUpdatedCh, itemCreatedCh) + itemsHandler := handler.New(itemsSchema) + itemsHandler.AddTransport(transport.POST{}) + itemsHandler.AddTransport(transport.Websocket{ + KeepAlivePingInterval: 10 * time.Second, + }) + + detailsSchema := details.NewSchema() + detailsHandler := handler.New(detailsSchema) + detailsHandler.AddTransport(transport.POST{}) + + inventorySchema := inventory.NewSchema() + inventoryHandler := handler.New(inventorySchema) + inventoryHandler.AddTransport(transport.POST{}) + + viewerSchema := viewer.NewSchema() + viewerHandler := handler.New(viewerSchema) + viewerHandler.AddTransport(transport.POST{}) + + articlesSchema := articles.NewSchema() + articlesHandler := handler.New(articlesSchema) + articlesHandler.AddTransport(transport.POST{}) + + articlesMetaSchema := articlesmeta.NewSchema() + articlesMetaHandler := handler.New(articlesMetaSchema) + articlesMetaHandler.AddTransport(transport.POST{}) + + itemsSrv := httptest.NewServer(countingMiddleware(&counters.items, itemsHandler)) + t.Cleanup(itemsSrv.Close) + + detailsSrv := httptest.NewServer(countingMiddleware(&counters.details, detailsHandler)) + t.Cleanup(detailsSrv.Close) + + inventorySrv := httptest.NewServer(countingMiddleware(&counters.inventory, inventoryHandler)) + t.Cleanup(inventorySrv.Close) + + viewerSrv := httptest.NewServer(countingMiddleware(&counters.viewer, viewerHandler)) + t.Cleanup(viewerSrv.Close) + + articlesSrv := httptest.NewServer(countingMiddleware(&counters.articles, articlesHandler)) + t.Cleanup(articlesSrv.Close) + + articlesMetaSrv := httptest.NewServer(countingMiddleware(&counters.articlesMeta, articlesMetaHandler)) + t.Cleanup(articlesMetaSrv.Close) + + return &subgraphServers{ + items: itemsSrv, + details: detailsSrv, + inventory: inventorySrv, + viewer: viewerSrv, + articles: articlesSrv, + articlesMeta: articlesMetaSrv, + itemUpdatedCh: itemUpdatedCh, + itemCreatedCh: itemCreatedCh, + }, counters +} + +func startSubgraphServersWithMiddleware(t *testing.T, mw func(http.Handler) http.Handler) (*subgraphServers, *requestCounters) { + t.Helper() + + counters := &requestCounters{} + itemUpdatedCh := make(chan *itemsModel.Item, 1) + itemCreatedCh := make(chan *itemsModel.Item, 1) + + itemsSchema := items.NewSchema(itemUpdatedCh, itemCreatedCh) + itemsHandler := handler.New(itemsSchema) + itemsHandler.AddTransport(transport.POST{}) + itemsHandler.AddTransport(transport.Websocket{ + KeepAlivePingInterval: 10 * time.Second, + }) + + detailsSchema := details.NewSchema() + detailsHandler := handler.New(detailsSchema) + detailsHandler.AddTransport(transport.POST{}) + + inventorySchema := inventory.NewSchema() + inventoryHandler := handler.New(inventorySchema) + inventoryHandler.AddTransport(transport.POST{}) + + viewerSchema := viewer.NewSchema() + viewerHandler := handler.New(viewerSchema) + viewerHandler.AddTransport(transport.POST{}) + + articlesSchema := articles.NewSchema() + articlesHandler := handler.New(articlesSchema) + articlesHandler.AddTransport(transport.POST{}) + + articlesMetaSchema := articlesmeta.NewSchema() + articlesMetaHandler := handler.New(articlesMetaSchema) + articlesMetaHandler.AddTransport(transport.POST{}) + + var detailsWrapped http.Handler = detailsHandler + if mw != nil { + detailsWrapped = mw(detailsHandler) + } + + itemsSrv := httptest.NewServer(countingMiddleware(&counters.items, itemsHandler)) + t.Cleanup(itemsSrv.Close) + + detailsSrv := httptest.NewServer(countingMiddleware(&counters.details, detailsWrapped)) + t.Cleanup(detailsSrv.Close) + + inventorySrv := httptest.NewServer(countingMiddleware(&counters.inventory, inventoryHandler)) + t.Cleanup(inventorySrv.Close) + + viewerSrv := httptest.NewServer(countingMiddleware(&counters.viewer, viewerHandler)) + t.Cleanup(viewerSrv.Close) + + articlesSrv := httptest.NewServer(countingMiddleware(&counters.articles, articlesHandler)) + t.Cleanup(articlesSrv.Close) + + articlesMetaSrv := httptest.NewServer(countingMiddleware(&counters.articlesMeta, articlesMetaHandler)) + t.Cleanup(articlesMetaSrv.Close) + + return &subgraphServers{ + items: itemsSrv, + details: detailsSrv, + inventory: inventorySrv, + viewer: viewerSrv, + articles: articlesSrv, + articlesMeta: articlesMetaSrv, + itemUpdatedCh: itemUpdatedCh, + itemCreatedCh: itemCreatedCh, + }, counters +} + +func TestStartSubgraphServersWithMiddlewareBuildsCompleteConfig(t *testing.T) { + t.Parallel() + + servers, _ := startSubgraphServersWithMiddleware(t, nil) + require.NotNil(t, servers.articlesMeta) + require.NotPanics(t, func() { + _ = buildConfigJSON(servers) + }) +} + +func buildConfigJSON(servers *subgraphServers) string { + replaced := configJSONTemplate + replaced = strings.ReplaceAll(replaced, itemsPlaceholderURL, servers.items.URL) + replaced = strings.ReplaceAll(replaced, detailsPlaceholderURL, servers.details.URL) + replaced = strings.ReplaceAll(replaced, inventoryPlaceholderURL, servers.inventory.URL) + replaced = strings.ReplaceAll(replaced, viewerPlaceholderURL, servers.viewer.URL) + replaced = strings.ReplaceAll(replaced, articlesPlaceholderURL, servers.articles.URL) + replaced = strings.ReplaceAll(replaced, articlesMetaPlaceholderURL, servers.articlesMeta.URL) + return replaced +} + +func entityCachingOptions(cache resolve.LoaderCache) []core.Option { + return []core.Option{ + core.WithEntityCaching(config.EntityCachingConfiguration{ + Enabled: true, + L1: config.EntityCachingL1Configuration{ + Enabled: true, + }, + L2: config.EntityCachingL2Configuration{ + Enabled: true, + }, + }), + core.WithEntityCacheInstances(map[string]resolve.LoaderCache{ + "default": cache, + }), + } +} + +// entityCachingL1OnlyOptions enables only the per-request L1 cache. +// Use in subtests named L1/... so the assertion depends on L1 behavior alone, +// not on L2 coincidentally helping. +func entityCachingL1OnlyOptions() []core.Option { + return []core.Option{ + core.WithEntityCaching(config.EntityCachingConfiguration{ + Enabled: true, + L1: config.EntityCachingL1Configuration{ + Enabled: true, + }, + L2: config.EntityCachingL2Configuration{ + Enabled: false, + }, + }), + } +} + +// entityCachingL2OnlyOptions enables only the cross-request L2 cache. +// Use in subtests named L2/... so the assertion depends on L2 behavior alone. +func entityCachingL2OnlyOptions(cache resolve.LoaderCache) []core.Option { + return []core.Option{ + core.WithEntityCaching(config.EntityCachingConfiguration{ + Enabled: true, + L1: config.EntityCachingL1Configuration{ + Enabled: false, + }, + L2: config.EntityCachingL2Configuration{ + Enabled: true, + }, + }), + core.WithEntityCacheInstances(map[string]resolve.LoaderCache{ + "default": cache, + }), + } +} + +// entityCachingDisabledOptions turns both layers off. +// Use as the baseline for inverse "L1 disabled → N calls" assertions. +func entityCachingDisabledOptions() []core.Option { + return []core.Option{ + core.WithEntityCaching(config.EntityCachingConfiguration{ + Enabled: true, + L1: config.EntityCachingL1Configuration{ + Enabled: false, + }, + L2: config.EntityCachingL2Configuration{ + Enabled: false, + }, + }), + } +} + +// clearEntityCacheConfigs removes all entity cache configs from the router config. +func clearEntityCacheConfigs(rc *nodev1.RouterConfig) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + ds.EntityCacheConfigurations = nil + ds.RootFieldCacheConfigurations = nil + ds.CacheInvalidateConfigurations = nil + ds.CachePopulateConfigurations = nil + } +} + +// setEntityCacheTTL overrides MaxAgeSeconds on all entity cache configs. +func setEntityCacheTTL(rc *nodev1.RouterConfig, ttl int64) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, ec := range ds.EntityCacheConfigurations { + ec.MaxAgeSeconds = ttl + } + } +} + +// setEntityCacheShadowMode sets ShadowMode on all entity cache configs. +func setEntityCacheShadowMode(rc *nodev1.RouterConfig, enabled bool) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, ec := range ds.EntityCacheConfigurations { + ec.ShadowMode = enabled + } + } +} + +// setEntityCachePartialLoad sets PartialCacheLoad on all entity cache configs. +func setEntityCachePartialLoad(rc *nodev1.RouterConfig, enabled bool) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, ec := range ds.EntityCacheConfigurations { + ec.PartialCacheLoad = enabled + } + } +} + +// setEntityCacheIncludeHeaders sets IncludeHeaders on all entity cache configs. +func setEntityCacheIncludeHeaders(rc *nodev1.RouterConfig, enabled bool) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, ec := range ds.EntityCacheConfigurations { + ec.IncludeHeaders = enabled + } + } +} + +// setNotFoundCacheTTL sets NotFoundCacheTtlSeconds on all entity cache configs. +func setNotFoundCacheTTL(rc *nodev1.RouterConfig, ttl int64) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, ec := range ds.EntityCacheConfigurations { + ec.NotFoundCacheTtlSeconds = ttl + } + } +} + +// setQueryCacheShadowMode sets ShadowMode on all root field cache configs. +func setQueryCacheShadowMode(rc *nodev1.RouterConfig, enabled bool) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, rfc := range ds.RootFieldCacheConfigurations { + rfc.ShadowMode = enabled + } + } +} + +// setQueryCacheIncludeHeaders sets IncludeHeaders on all root field cache configs. +func setQueryCacheIncludeHeaders(rc *nodev1.RouterConfig, enabled bool) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, rfc := range ds.RootFieldCacheConfigurations { + rfc.IncludeHeaders = enabled + } + } +} + +// setCachePopulateTTL overrides MaxAgeSeconds on all cache populate configs. +func setCachePopulateTTL(rc *nodev1.RouterConfig, ttl int64) { + for _, ds := range rc.EngineConfig.DatasourceConfigurations { + for _, cp := range ds.CachePopulateConfigurations { + cp.MaxAgeSeconds = &ttl + } + } +} + +// FailingEntityCache implements resolve.LoaderCache and always returns errors. +type FailingEntityCache struct{} + +var _ resolve.LoaderCache = (*FailingEntityCache)(nil) + +func (f *FailingEntityCache) Get(_ context.Context, keys []string) ([]*resolve.CacheEntry, error) { + return nil, errCacheFailed +} + +func (f *FailingEntityCache) Set(_ context.Context, _ []*resolve.CacheEntry) error { + return errCacheFailed +} + +func (f *FailingEntityCache) Delete(_ context.Context, _ []string) error { + return errCacheFailed +} + +var errCacheFailed = &cacheFailed{} + +type cacheFailed struct{} + +func (c *cacheFailed) Error() string { + return "entity cache operation failed" +} + +// ControllableCache wraps a MemoryEntityCache but can be toggled to fail on demand. +// Use SetFailing(true) to simulate a Redis outage mid-test. +type ControllableCache struct { + inner *entitycache.MemoryEntityCache + failing atomic.Bool +} + +func newControllableCache(t *testing.T) *ControllableCache { + t.Helper() + cache, err := entitycache.NewMemoryEntityCache(10 * 1024 * 1024) + require.NoError(t, err) + t.Cleanup(func() { _ = cache.Close() }) + return &ControllableCache{inner: cache} +} + +func (c *ControllableCache) SetFailing(v bool) { c.failing.Store(v) } + +func (c *ControllableCache) Get(ctx context.Context, keys []string) ([]*resolve.CacheEntry, error) { + if c.failing.Load() { + return nil, errCacheFailed + } + return c.inner.Get(ctx, keys) +} + +func (c *ControllableCache) Set(ctx context.Context, entries []*resolve.CacheEntry) error { + if c.failing.Load() { + return errCacheFailed + } + return c.inner.Set(ctx, entries) +} + +func (c *ControllableCache) Delete(ctx context.Context, keys []string) error { + if c.failing.Load() { + return errCacheFailed + } + return c.inner.Delete(ctx, keys) +} + +// entityCachingOptionsWithCircuitBreakerRef returns L2-only router options with +// a CircuitBreakerCache so tests can inspect its state. +func entityCachingOptionsWithCircuitBreakerRef(cache resolve.LoaderCache, threshold int, cooldown time.Duration) ([]core.Option, *entitycache.CircuitBreakerCache) { + cb := entitycache.NewCircuitBreakerCache(cache, entitycache.CircuitBreakerConfig{ + Enabled: true, + FailureThreshold: threshold, + CooldownPeriod: cooldown, + }) + return entityCachingL2OnlyOptions(cb), cb +} + +// entityCachingOptionsWithSubgraphConfig returns L2-only router options with +// per-subgraph cache routing. +func entityCachingOptionsWithSubgraphConfig(caches map[string]resolve.LoaderCache, subgraphs []config.EntityCachingSubgraphCacheOverride) []core.Option { + return []core.Option{ + core.WithEntityCaching(config.EntityCachingConfiguration{ + Enabled: true, + L1: config.EntityCachingL1Configuration{ + Enabled: false, + }, + L2: config.EntityCachingL2Configuration{ + Enabled: true, + }, + SubgraphCacheOverrides: subgraphs, + }), + core.WithEntityCacheInstances(caches), + } +} + +// newMemoryCache is a convenience wrapper. +func newMemoryCache(t *testing.T) *entitycache.MemoryEntityCache { + t.Helper() + c, err := entitycache.NewMemoryEntityCache(10 * 1024 * 1024) // 10MB for tests + require.NoError(t, err) + t.Cleanup(func() { _ = c.Close() }) + return c +} + +// newTestRedisCache creates a miniredis-backed cache for testing. +func newTestRedisCache(t *testing.T) (*entitycache.RedisEntityCache, *miniredis.Miniredis) { + t.Helper() + mr := miniredis.RunT(t) + client := redis.NewClient(&redis.Options{Addr: mr.Addr()}) + t.Cleanup(func() { client.Close() }) + return entitycache.NewRedisEntityCache(client, "test"), mr +} + +// extensionInvalidationMiddleware returns an HTTP middleware that injects +// a cacheInvalidation extension into the subgraph response when the flag is set. +// Format: {"extensions":{"cacheInvalidation":{"keys":[{"typename":"Item","key":{"id":"1"}}]}}} +func extensionInvalidationMiddleware(flag *atomic.Bool) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !flag.Load() { + next.ServeHTTP(w, r) + return + } + // Capture the response. + rec := httptest.NewRecorder() + next.ServeHTTP(rec, r) + + body := rec.Body.Bytes() + var resp map[string]json.RawMessage + if err := json.Unmarshal(body, &resp); err != nil { + // Pass through on unmarshal error. + for k, v := range rec.Header() { + w.Header()[k] = v + } + w.WriteHeader(rec.Code) + _, _ = w.Write(body) + return + } + + // Inject cacheInvalidation extension. + ext := map[string]any{ + "cacheInvalidation": map[string]any{ + "keys": []map[string]any{ + {"typename": "Item", "key": map[string]any{"id": "1"}}, + }, + }, + } + extBytes, _ := json.Marshal(ext) + resp["extensions"] = extBytes + modified, _ := json.Marshal(resp) + + for k, v := range rec.Header() { + w.Header()[k] = v + } + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(modified))) + w.WriteHeader(rec.Code) + _, _ = w.Write(modified) + }) + } +} diff --git a/router-tests/entitycaching/redis_test.go b/router-tests/entitycaching/redis_test.go new file mode 100644 index 0000000000..0a8323f230 --- /dev/null +++ b/router-tests/entitycaching/redis_test.go @@ -0,0 +1,123 @@ +package entitycaching + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +func TestRedis(t *testing.T) { + t.Parallel() + + t.Run("basic_miss_then_hit", func(t *testing.T) { + t.Parallel() + + cache, _ := newTestRedisCache(t) + ctx := t.Context() + + // Get miss + entries, err := cache.Get(ctx, []string{"key1"}) + require.NoError(t, err) + require.Len(t, entries, 1) + require.Nil(t, entries[0]) + + // Set + err = cache.Set(ctx, []*resolve.CacheEntry{ + {Key: "key1", Value: []byte(`{"id":"1","name":"Widget"}`), TTL: 300 * time.Second}, + }) + require.NoError(t, err) + + // Get hit + entries, err = cache.Get(ctx, []string{"key1"}) + require.NoError(t, err) + require.Len(t, entries, 1) + require.NotNil(t, entries[0]) + require.Equal(t, "key1", entries[0].Key) + require.Equal(t, `{"id":"1","name":"Widget"}`, string(entries[0].Value)) + }) + + t.Run("batch_operations", func(t *testing.T) { + t.Parallel() + + cache, _ := newTestRedisCache(t) + ctx := t.Context() + + // Batch Set + err := cache.Set(ctx, []*resolve.CacheEntry{ + {Key: "a", Value: []byte(`{"id":"1"}`), TTL: 300 * time.Second}, + {Key: "b", Value: []byte(`{"id":"2"}`), TTL: 300 * time.Second}, + {Key: "c", Value: []byte(`{"id":"3"}`), TTL: 300 * time.Second}, + }) + require.NoError(t, err) + + // Batch Get (MGet) + entries, err := cache.Get(ctx, []string{"a", "b", "c", "d"}) + require.NoError(t, err) + require.Len(t, entries, 4) + require.NotNil(t, entries[0]) + require.Equal(t, `{"id":"1"}`, string(entries[0].Value)) + require.NotNil(t, entries[1]) + require.Equal(t, `{"id":"2"}`, string(entries[1].Value)) + require.NotNil(t, entries[2]) + require.Equal(t, `{"id":"3"}`, string(entries[2].Value)) + require.Nil(t, entries[3]) // "d" not set + }) + + t.Run("ttl_expiry", func(t *testing.T) { + t.Parallel() + + cache, mr := newTestRedisCache(t) + ctx := t.Context() + + err := cache.Set(ctx, []*resolve.CacheEntry{ + {Key: "expiring", Value: []byte(`{"ttl":"test"}`), TTL: 1 * time.Second}, + }) + require.NoError(t, err) + + // Verify it's there + entries, err := cache.Get(ctx, []string{"expiring"}) + require.NoError(t, err) + require.NotNil(t, entries[0]) + + // Fast-forward time + mr.FastForward(2 * time.Second) + + // Should be expired + entries, err = cache.Get(ctx, []string{"expiring"}) + require.NoError(t, err) + require.Nil(t, entries[0]) + }) + + t.Run("delete", func(t *testing.T) { + t.Parallel() + + cache, _ := newTestRedisCache(t) + ctx := t.Context() + + // Set entries + err := cache.Set(ctx, []*resolve.CacheEntry{ + {Key: "del1", Value: []byte(`{"a":"1"}`), TTL: 300 * time.Second}, + {Key: "del2", Value: []byte(`{"b":"2"}`), TTL: 300 * time.Second}, + }) + require.NoError(t, err) + + // Verify present + entries, err := cache.Get(ctx, []string{"del1", "del2"}) + require.NoError(t, err) + require.NotNil(t, entries[0]) + require.NotNil(t, entries[1]) + + // Delete one + err = cache.Delete(ctx, []string{"del1"}) + require.NoError(t, err) + + // Verify deleted + entries, err = cache.Get(ctx, []string{"del1", "del2"}) + require.NoError(t, err) + require.Nil(t, entries[0]) + require.NotNil(t, entries[1]) + }) +} diff --git a/router-tests/entitycaching/setup_test.go b/router-tests/entitycaching/setup_test.go new file mode 100644 index 0000000000..32148ab950 --- /dev/null +++ b/router-tests/entitycaching/setup_test.go @@ -0,0 +1,17 @@ +package entitycaching + +import ( + _ "embed" +) + +//go:embed testdata/config.json +var configJSONTemplate string + +const ( + itemsPlaceholderURL = "http://items.entity-cache-test.local/graphql" + detailsPlaceholderURL = "http://details.entity-cache-test.local/graphql" + inventoryPlaceholderURL = "http://inventory.entity-cache-test.local/graphql" + viewerPlaceholderURL = "http://viewer.entity-cache-test.local/graphql" + articlesPlaceholderURL = "http://articles.entity-cache-test.local/graphql" + articlesMetaPlaceholderURL = "http://articlesmeta.entity-cache-test.local/graphql" +) diff --git a/router-tests/entitycaching/testdata/config.json b/router-tests/entitycaching/testdata/config.json new file mode 100644 index 0000000000..1d08d91dd8 --- /dev/null +++ b/router-tests/entitycaching/testdata/config.json @@ -0,0 +1,930 @@ +{ + "engineConfig": { + "defaultFlushInterval": "500", + "datasourceConfigurations": [ + { + "kind": "GRAPHQL", + "rootNodes": [ + { + "typeName": "Query", + "fieldNames": [ + "item", + "itemByPid", + "items", + "itemsByIds", + "product", + "productBySku", + "productByName", + "productByKey", + "warehouse", + "warehouseByInput" + ] + }, + { + "typeName": "Mutation", + "fieldNames": [ + "updateItem", + "deleteItem", + "createItem", + "deleteProduct" + ] + }, + { + "typeName": "Subscription", + "fieldNames": [ + "itemUpdated", + "itemCreated" + ] + }, + { + "typeName": "Item", + "fieldNames": [ + "id", + "name", + "category" + ] + }, + { + "typeName": "Product", + "fieldNames": [ + "id", + "region", + "sku", + "name" + ] + }, + { + "typeName": "Warehouse", + "fieldNames": [ + "location", + "name" + ] + } + ], + "childNodes": [ + { + "typeName": "Location", + "fieldNames": [ + "id" + ] + } + ], + "overrideFieldPathFromAlias": true, + "customGraphql": { + "fetch": { + "url": { + "staticVariableContent": "http://items.entity-cache-test.local/graphql" + }, + "method": "POST", + "body": {}, + "baseUrl": {}, + "path": {} + }, + "subscription": { + "enabled": true, + "url": { + "staticVariableContent": "http://items.entity-cache-test.local/graphql" + }, + "protocol": "GRAPHQL_SUBSCRIPTION_PROTOCOL_WS", + "websocketSubprotocol": "GRAPHQL_WEBSOCKET_SUBPROTOCOL_AUTO" + }, + "federation": { + "enabled": true, + "serviceSdl": "extend schema\n @link(\n url: \"https://specs.apollo.dev/federation/v2.5\"\n import: [\"@key\"]\n )\n\ndirective @openfed__entityCache(\n maxAge: Int!\n negativeCacheTTL: Int = 0\n includeHeaders: Boolean = false\n partialCacheLoad: Boolean = false\n shadowMode: Boolean = false\n) on OBJECT\n\ndirective @openfed__queryCache(\n maxAge: Int!\n includeHeaders: Boolean = false\n shadowMode: Boolean = false\n) on FIELD_DEFINITION\n\ndirective @openfed__cacheInvalidate on FIELD_DEFINITION\n\ndirective @openfed__cachePopulate(maxAge: Int) on FIELD_DEFINITION\n\ndirective @openfed__is(fields: String!) on ARGUMENT_DEFINITION\n\ntype Query {\n item(id: ID!): Item @openfed__queryCache(maxAge: 300)\n itemByPid(pid: ID! @openfed__is(fields: \"id\")): Item @openfed__queryCache(maxAge: 300)\n items: [Item!]! @openfed__queryCache(maxAge: 300)\n itemsByIds(ids: [ID!]! @openfed__is(fields: \"id\")): [Item!]! @openfed__queryCache(maxAge: 300)\n product(id: ID!, region: String!): Product @openfed__queryCache(maxAge: 300)\n productBySku(sku: String!): Product @openfed__queryCache(maxAge: 300)\n productByName(name: String!): Product @openfed__queryCache(maxAge: 300)\n productByKey(key: ProductKeyInput! @openfed__is(fields: \"id region\")): Product @openfed__queryCache(maxAge: 300)\n warehouse(locationId: ID! @openfed__is(fields: \"location.id\")): Warehouse @openfed__queryCache(maxAge: 300)\n # warehouseByInput exercises the same nested @key as `warehouse` but reaches it\n # via an input object using GraphQL selection syntax in @openfed__is. This produces the\n # multi-hop argumentPath [\"input\",\"location\",\"id\"] instead of the scalar\n # [\"locationId\"]. Captures the red state where input-object → nested-key\n # cache writes don't persist (cache lookup uses correct key but never hits).\n warehouseByInput(input: WarehouseLocationInput! @openfed__is(fields: \"location { id }\")): Warehouse @openfed__queryCache(maxAge: 300)\n}\n\ninput ProductKeyInput {\n id: ID!\n region: String!\n}\n\ninput WarehouseLocationInput {\n location: WarehouseLocationKeyInput!\n}\n\ninput WarehouseLocationKeyInput {\n id: ID!\n}\n\ntype Mutation {\n updateItem(id: ID!, name: String!): Item @openfed__cacheInvalidate\n deleteItem(id: ID!): Item @openfed__cacheInvalidate\n createItem(name: String!, category: String!): Item! @openfed__cachePopulate(maxAge: 60)\n # deleteProduct invalidates a composite-key entity (Product @key(\"id region\")).\n # Used to pin behavior for cache invalidation when the entity uses a composite\n # @key — a separate code path from the simple id-only key in deleteItem.\n deleteProduct(id: ID!, region: String!): Product @openfed__cacheInvalidate\n}\n\ntype Subscription {\n itemUpdated: Item @openfed__cacheInvalidate\n itemCreated: Item @openfed__cachePopulate\n}\n\ntype Item @key(fields: \"id\") @openfed__entityCache(maxAge: 300, negativeCacheTTL: 30) {\n id: ID!\n name: String!\n category: String!\n}\n\ntype Product @key(fields: \"id region\") @key(fields: \"sku\") @openfed__entityCache(maxAge: 300) {\n id: ID!\n region: String!\n sku: String!\n name: String!\n}\n\ntype Location {\n id: ID!\n}\n\ntype Warehouse @key(fields: \"location { id }\") @openfed__entityCache(maxAge: 300) {\n location: Location!\n name: String!\n}\n" + }, + "upstreamSchema": { + "key": "bd938246cff0199909aca1d5df40d042d917c954" + } + }, + "requestTimeoutSeconds": "10", + "id": "0", + "keys": [ + { + "typeName": "Item", + "selectionSet": "id" + }, + { + "typeName": "Product", + "selectionSet": "id region" + }, + { + "typeName": "Product", + "selectionSet": "sku" + }, + { + "typeName": "Warehouse", + "selectionSet": "location { id }" + } + ], + "entityCacheConfigurations": [ + { + "typeName": "Item", + "maxAgeSeconds": "300", + "notFoundCacheTtlSeconds": "30" + }, + { + "typeName": "Product", + "maxAgeSeconds": "300" + }, + { + "typeName": "Warehouse", + "maxAgeSeconds": "300" + } + ], + "rootFieldCacheConfigurations": [ + { + "fieldName": "item", + "maxAgeSeconds": "300", + "entityTypeName": "Item", + "entityKeyMappings": [ + { + "entityTypeName": "Item", + "fieldMappings": [ + { + "entityKeyField": "id", + "argumentPath": [ + "id" + ] + } + ] + } + ] + }, + { + "fieldName": "itemByPid", + "maxAgeSeconds": "300", + "entityTypeName": "Item", + "entityKeyMappings": [ + { + "entityTypeName": "Item", + "fieldMappings": [ + { + "entityKeyField": "id", + "argumentPath": [ + "pid" + ] + } + ] + } + ] + }, + { + "fieldName": "items", + "maxAgeSeconds": "300", + "entityTypeName": "Item" + }, + { + "fieldName": "itemsByIds", + "maxAgeSeconds": "300", + "entityTypeName": "Item", + "entityKeyMappings": [ + { + "entityTypeName": "Item", + "fieldMappings": [ + { + "entityKeyField": "id", + "argumentPath": [ + "ids" + ], + "isBatch": true + } + ] + } + ] + }, + { + "fieldName": "product", + "maxAgeSeconds": "300", + "entityTypeName": "Product", + "entityKeyMappings": [ + { + "entityTypeName": "Product", + "fieldMappings": [ + { + "entityKeyField": "id", + "argumentPath": [ + "id" + ] + }, + { + "entityKeyField": "region", + "argumentPath": [ + "region" + ] + } + ] + } + ] + }, + { + "fieldName": "productBySku", + "maxAgeSeconds": "300", + "entityTypeName": "Product", + "entityKeyMappings": [ + { + "entityTypeName": "Product", + "fieldMappings": [ + { + "entityKeyField": "sku", + "argumentPath": [ + "sku" + ] + } + ] + } + ] + }, + { + "fieldName": "productByName", + "maxAgeSeconds": "300", + "entityTypeName": "Product" + }, + { + "fieldName": "productByKey", + "maxAgeSeconds": "300", + "entityTypeName": "Product", + "entityKeyMappings": [ + { + "entityTypeName": "Product", + "fieldMappings": [ + { + "entityKeyField": "id", + "argumentPath": [ + "key", + "id" + ] + }, + { + "entityKeyField": "region", + "argumentPath": [ + "key", + "region" + ] + } + ] + } + ] + }, + { + "fieldName": "warehouse", + "maxAgeSeconds": "300", + "entityTypeName": "Warehouse", + "entityKeyMappings": [ + { + "entityTypeName": "Warehouse", + "fieldMappings": [ + { + "entityKeyField": "location.id", + "argumentPath": [ + "locationId" + ] + } + ] + } + ] + }, + { + "fieldName": "warehouseByInput", + "maxAgeSeconds": "300", + "entityTypeName": "Warehouse", + "entityKeyMappings": [ + { + "entityTypeName": "Warehouse", + "fieldMappings": [ + { + "entityKeyField": "location.id", + "argumentPath": [ + "input", + "location", + "id" + ] + } + ] + } + ] + } + ], + "cachePopulateConfigurations": [ + { + "fieldName": "createItem", + "operationType": "Mutation", + "maxAgeSeconds": "60", + "entityTypeName": "Item" + }, + { + "fieldName": "itemCreated", + "operationType": "Subscription", + "entityTypeName": "Item" + } + ], + "cacheInvalidateConfigurations": [ + { + "fieldName": "updateItem", + "operationType": "Mutation", + "entityTypeName": "Item" + }, + { + "fieldName": "deleteItem", + "operationType": "Mutation", + "entityTypeName": "Item" + }, + { + "fieldName": "deleteProduct", + "operationType": "Mutation", + "entityTypeName": "Product" + }, + { + "fieldName": "itemUpdated", + "operationType": "Subscription", + "entityTypeName": "Item" + } + ] + }, + { + "kind": "GRAPHQL", + "rootNodes": [ + { + "typeName": "Item", + "fieldNames": [ + "id", + "description", + "rating", + "tags" + ] + }, + { + "typeName": "Product", + "fieldNames": [ + "id", + "region", + "info" + ] + }, + { + "typeName": "Warehouse", + "fieldNames": [ + "location", + "capacity" + ] + } + ], + "childNodes": [ + { + "typeName": "Location", + "fieldNames": [ + "id" + ] + } + ], + "overrideFieldPathFromAlias": true, + "customGraphql": { + "fetch": { + "url": { + "staticVariableContent": "http://details.entity-cache-test.local/graphql" + }, + "method": "POST", + "body": {}, + "baseUrl": {}, + "path": {} + }, + "subscription": { + "enabled": true, + "url": { + "staticVariableContent": "http://details.entity-cache-test.local/graphql" + }, + "protocol": "GRAPHQL_SUBSCRIPTION_PROTOCOL_WS", + "websocketSubprotocol": "GRAPHQL_WEBSOCKET_SUBPROTOCOL_AUTO" + }, + "federation": { + "enabled": true, + "serviceSdl": "extend schema\n @link(\n url: \"https://specs.apollo.dev/federation/v2.5\"\n import: [\"@key\"]\n )\n\ndirective @openfed__entityCache(\n maxAge: Int!\n includeHeaders: Boolean = false\n partialCacheLoad: Boolean = false\n shadowMode: Boolean = false\n) on OBJECT\n\ntype Item @key(fields: \"id\") @openfed__entityCache(maxAge: 300) {\n id: ID!\n description: String!\n rating: Float!\n tags: [String!]!\n}\n\ntype Product @key(fields: \"id region\") @openfed__entityCache(maxAge: 300) {\n id: ID!\n region: String!\n info: String!\n}\n\ntype Location {\n id: ID!\n}\n\ntype Warehouse @key(fields: \"location { id }\") @openfed__entityCache(maxAge: 300) {\n location: Location!\n capacity: Int!\n}\n" + }, + "upstreamSchema": { + "key": "c155dbdb80b10cafddd0f1f0379b57532dc3dcc7" + } + }, + "requestTimeoutSeconds": "10", + "id": "1", + "keys": [ + { + "typeName": "Item", + "selectionSet": "id" + }, + { + "typeName": "Product", + "selectionSet": "id region" + }, + { + "typeName": "Warehouse", + "selectionSet": "location { id }" + } + ], + "entityCacheConfigurations": [ + { + "typeName": "Item", + "maxAgeSeconds": "300" + }, + { + "typeName": "Product", + "maxAgeSeconds": "300" + }, + { + "typeName": "Warehouse", + "maxAgeSeconds": "300" + } + ] + }, + { + "kind": "GRAPHQL", + "rootNodes": [ + { + "typeName": "Item", + "fieldNames": [ + "id", + "available", + "count" + ] + } + ], + "overrideFieldPathFromAlias": true, + "customGraphql": { + "fetch": { + "url": { + "staticVariableContent": "http://inventory.entity-cache-test.local/graphql" + }, + "method": "POST", + "body": {}, + "baseUrl": {}, + "path": {} + }, + "subscription": { + "enabled": true, + "url": { + "staticVariableContent": "http://inventory.entity-cache-test.local/graphql" + }, + "protocol": "GRAPHQL_SUBSCRIPTION_PROTOCOL_WS", + "websocketSubprotocol": "GRAPHQL_WEBSOCKET_SUBPROTOCOL_AUTO" + }, + "federation": { + "enabled": true, + "serviceSdl": "extend schema\n @link(\n url: \"https://specs.apollo.dev/federation/v2.5\"\n import: [\"@key\"]\n )\n\ndirective @openfed__entityCache(\n maxAge: Int!\n includeHeaders: Boolean = false\n partialCacheLoad: Boolean = false\n shadowMode: Boolean = false\n) on OBJECT\n\ntype Item @key(fields: \"id\") @openfed__entityCache(maxAge: 300) {\n id: ID!\n available: Boolean!\n count: Int!\n}\n" + }, + "upstreamSchema": { + "key": "f39f668358c7d91881c7b4b68f5387d7f18b9aad" + } + }, + "requestTimeoutSeconds": "10", + "id": "2", + "keys": [ + { + "typeName": "Item", + "selectionSet": "id" + } + ], + "entityCacheConfigurations": [ + { + "typeName": "Item", + "maxAgeSeconds": "300" + } + ] + }, + { + "kind": "GRAPHQL", + "rootNodes": [ + { + "typeName": "Viewer", + "fieldNames": [ + "id", + "name", + "email" + ] + }, + { + "typeName": "Personalized", + "fieldNames": [ + "id", + "currentViewer" + ] + }, + { + "typeName": "Query", + "fieldNames": [ + "currentViewer" + ] + }, + { + "typeName": "Article", + "fieldNames": [ + "id", + "currentViewer" + ] + } + ], + "overrideFieldPathFromAlias": true, + "customGraphql": { + "fetch": { + "url": { + "staticVariableContent": "http://viewer.entity-cache-test.local/graphql" + }, + "method": "POST", + "body": {}, + "baseUrl": {}, + "path": {} + }, + "subscription": { + "enabled": true, + "url": { + "staticVariableContent": "http://viewer.entity-cache-test.local/graphql" + }, + "protocol": "GRAPHQL_SUBSCRIPTION_PROTOCOL_WS", + "websocketSubprotocol": "GRAPHQL_WEBSOCKET_SUBPROTOCOL_AUTO" + }, + "federation": { + "enabled": true, + "serviceSdl": "extend schema\n @link(\n url: \"https://specs.apollo.dev/federation/v2.5\"\n import: [\"@key\", \"@interfaceObject\", \"@inaccessible\"]\n )\n\ndirective @openfed__requestScoped(key: String!) on FIELD_DEFINITION\n\ntype Viewer @key(fields: \"id\") {\n id: ID!\n name: String!\n email: String!\n}\n\n# Symmetric @openfed__requestScoped: Query.currentViewer and Personalized.currentViewer\n# share key \"currentViewer\" so they read/write the same L1 coordinate cache entry.\ntype Personalized @key(fields: \"id\") @interfaceObject {\n id: ID!\n currentViewer: Viewer @inaccessible @openfed__requestScoped(key: \"currentViewer\")\n}\n\ntype Query {\n currentViewer: Viewer @openfed__requestScoped(key: \"currentViewer\")\n}\n" + }, + "upstreamSchema": { + "key": "9ee951b49b83d66e073d83402d78cd8078181571" + } + }, + "requestTimeoutSeconds": "10", + "id": "3", + "keys": [ + { + "typeName": "Viewer", + "selectionSet": "id" + }, + { + "typeName": "Personalized", + "selectionSet": "id" + }, + { + "typeName": "Article", + "selectionSet": "id" + } + ], + "interfaceObjects": [ + { + "interfaceTypeName": "Personalized", + "concreteTypeNames": [ + "Article" + ] + } + ], + "requestScopedFields": [ + { + "fieldName": "currentViewer", + "typeName": "Personalized", + "l1Key": "viewer.currentViewer" + }, + { + "fieldName": "currentViewer", + "typeName": "Query", + "l1Key": "viewer.currentViewer" + } + ] + }, + { + "kind": "GRAPHQL", + "rootNodes": [ + { + "typeName": "Query", + "fieldNames": [ + "articles" + ] + }, + { + "typeName": "Personalized", + "fieldNames": [ + "id" + ] + }, + { + "typeName": "Viewer", + "fieldNames": [ + "id", + "recommendedArticles" + ], + "externalFieldNames": [ + "name", + "email" + ] + }, + { + "typeName": "Article", + "fieldNames": [ + "id", + "title", + "body", + "tags", + "personalizedRecommendation", + "relatedArticles" + ], + "externalFieldNames": [ + "currentViewer" + ] + } + ], + "overrideFieldPathFromAlias": true, + "customGraphql": { + "fetch": { + "url": { + "staticVariableContent": "http://articles.entity-cache-test.local/graphql" + }, + "method": "POST", + "body": {}, + "baseUrl": {}, + "path": {} + }, + "subscription": { + "enabled": true, + "url": { + "staticVariableContent": "http://articles.entity-cache-test.local/graphql" + }, + "protocol": "GRAPHQL_SUBSCRIPTION_PROTOCOL_WS", + "websocketSubprotocol": "GRAPHQL_WEBSOCKET_SUBPROTOCOL_AUTO" + }, + "federation": { + "enabled": true, + "serviceSdl": "extend schema\n @link(\n url: \"https://specs.apollo.dev/federation/v2.5\"\n import: [\"@key\", \"@external\", \"@requires\"]\n )\n\ntype Query {\n articles: [Article!]!\n}\n\ninterface Personalized @key(fields: \"id\") {\n id: ID!\n}\n\n# Extends Viewer with an edge back into articles land.\n# The @key forces sequencing: the articles _entities fetch for\n# recommendedArticles needs viewer.id from the prior viewer fetch, so the\n# planner cannot parallelize it with the root Query.currentViewer call.\n#\n# name and email are declared @external because Article.personalizedRecommendation\n# @requires references them. Composition fails without these declarations.\ntype Viewer @key(fields: \"id\") {\n id: ID!\n name: String! @external\n email: String! @external\n recommendedArticles: [Article!]!\n}\n\ntype Article implements Personalized @key(fields: \"id\") {\n id: ID!\n title: String!\n body: String!\n tags: [String!]!\n # Article.currentViewer is provided by the viewer subgraph via the\n # Personalized @interfaceObject. It is @external here so @requires can\n # reference it.\n currentViewer: Viewer @external\n # @requires a WIDER selection ({id, name, email}) than the root query\n # will ask for ({id, name}). This is the widening-miss trigger that the\n # test asserts: the coordinate L1 was populated with {id, name}, so the\n # follow-up _entities(Personalized) fetch fails the widening check and\n # refetches the viewer subgraph.\n personalizedRecommendation: String!\n @requires(fields: \"currentViewer { id name email }\")\n # relatedArticles enables nested selection of Article entities so tests\n # can exercise @openfed__requestScoped deduplication across multiple nesting\n # levels (Article.currentViewer selected inline at more than one depth).\n relatedArticles: [Article!]!\n}\n" + }, + "upstreamSchema": { + "key": "cf1e444bb26e20ccce317dd20e2f8da88671c87d" + } + }, + "requestTimeoutSeconds": "10", + "id": "4", + "keys": [ + { + "typeName": "Personalized", + "selectionSet": "id" + }, + { + "typeName": "Viewer", + "selectionSet": "id" + }, + { + "typeName": "Article", + "selectionSet": "id" + } + ], + "requires": [ + { + "typeName": "Article", + "fieldName": "personalizedRecommendation", + "selectionSet": "currentViewer { email id name }" + } + ], + "entityInterfaces": [ + { + "interfaceTypeName": "Personalized", + "concreteTypeNames": [ + "Article" + ] + } + ] + }, + { + "kind": "GRAPHQL", + "rootNodes": [ + { + "typeName": "Article", + "fieldNames": [ + "id", + "viewCount", + "rating", + "reviewSummary" + ] + } + ], + "overrideFieldPathFromAlias": true, + "customGraphql": { + "fetch": { + "url": { + "staticVariableContent": "http://articlesmeta.entity-cache-test.local/graphql" + }, + "method": "POST", + "body": {}, + "baseUrl": {}, + "path": {} + }, + "subscription": { + "enabled": true, + "url": { + "staticVariableContent": "http://articlesmeta.entity-cache-test.local/graphql" + }, + "protocol": "GRAPHQL_SUBSCRIPTION_PROTOCOL_WS", + "websocketSubprotocol": "GRAPHQL_WEBSOCKET_SUBPROTOCOL_AUTO" + }, + "federation": { + "enabled": true, + "serviceSdl": "extend schema\n @link(\n url: \"https://specs.apollo.dev/federation/v2.5\"\n import: [\"@key\"]\n )\n\ntype Article @key(fields: \"id\") {\n id: ID!\n viewCount: Int!\n rating: Float!\n reviewSummary: String!\n}\n" + }, + "upstreamSchema": { + "key": "26317d3464ef1d47aa8354694ea00cc787b1caef" + } + }, + "requestTimeoutSeconds": "10", + "id": "5", + "keys": [ + { + "typeName": "Article", + "selectionSet": "id" + } + ] + } + ], + "fieldConfigurations": [ + { + "typeName": "Query", + "fieldName": "item", + "argumentsConfiguration": [ + { + "name": "id", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Query", + "fieldName": "itemByPid", + "argumentsConfiguration": [ + { + "name": "pid", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Query", + "fieldName": "itemsByIds", + "argumentsConfiguration": [ + { + "name": "ids", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Query", + "fieldName": "product", + "argumentsConfiguration": [ + { + "name": "id", + "sourceType": "FIELD_ARGUMENT" + }, + { + "name": "region", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Query", + "fieldName": "productBySku", + "argumentsConfiguration": [ + { + "name": "sku", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Query", + "fieldName": "productByName", + "argumentsConfiguration": [ + { + "name": "name", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Query", + "fieldName": "productByKey", + "argumentsConfiguration": [ + { + "name": "key", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Query", + "fieldName": "warehouse", + "argumentsConfiguration": [ + { + "name": "locationId", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Query", + "fieldName": "warehouseByInput", + "argumentsConfiguration": [ + { + "name": "input", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Mutation", + "fieldName": "updateItem", + "argumentsConfiguration": [ + { + "name": "id", + "sourceType": "FIELD_ARGUMENT" + }, + { + "name": "name", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Mutation", + "fieldName": "deleteItem", + "argumentsConfiguration": [ + { + "name": "id", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Mutation", + "fieldName": "createItem", + "argumentsConfiguration": [ + { + "name": "name", + "sourceType": "FIELD_ARGUMENT" + }, + { + "name": "category", + "sourceType": "FIELD_ARGUMENT" + } + ] + }, + { + "typeName": "Mutation", + "fieldName": "deleteProduct", + "argumentsConfiguration": [ + { + "name": "id", + "sourceType": "FIELD_ARGUMENT" + }, + { + "name": "region", + "sourceType": "FIELD_ARGUMENT" + } + ] + } + ], + "graphqlSchema": "schema {\n query: Query\n mutation: Mutation\n subscription: Subscription\n}\n\ndirective @inaccessible on ARGUMENT_DEFINITION | ENUM | ENUM_VALUE | FIELD_DEFINITION | INPUT_FIELD_DEFINITION | INPUT_OBJECT | INTERFACE | OBJECT | SCALAR | UNION\n\ntype Query {\n item(id: ID!): Item\n itemByPid(pid: ID!): Item\n items: [Item!]!\n itemsByIds(ids: [ID!]!): [Item!]!\n product(id: ID!, region: String!): Product\n productBySku(sku: String!): Product\n productByName(name: String!): Product\n productByKey(key: ProductKeyInput!): Product\n warehouse(locationId: ID!): Warehouse\n warehouseByInput(input: WarehouseLocationInput!): Warehouse\n currentViewer: Viewer\n articles: [Article!]!\n}\n\ninput ProductKeyInput {\n id: ID!\n region: String!\n}\n\ninput WarehouseLocationInput {\n location: WarehouseLocationKeyInput!\n}\n\ninput WarehouseLocationKeyInput {\n id: ID!\n}\n\ntype Mutation {\n updateItem(id: ID!, name: String!): Item\n deleteItem(id: ID!): Item\n createItem(name: String!, category: String!): Item!\n deleteProduct(id: ID!, region: String!): Product\n}\n\ntype Subscription {\n itemUpdated: Item\n itemCreated: Item\n}\n\ntype Item {\n id: ID!\n name: String!\n category: String!\n description: String!\n rating: Float!\n tags: [String!]!\n available: Boolean!\n count: Int!\n}\n\ntype Product {\n id: ID!\n region: String!\n sku: String!\n name: String!\n info: String!\n}\n\ntype Location {\n id: ID!\n}\n\ntype Warehouse {\n location: Location!\n name: String!\n capacity: Int!\n}\n\ntype Viewer {\n id: ID!\n name: String!\n email: String!\n recommendedArticles: [Article!]!\n}\n\ninterface Personalized {\n id: ID!\n currentViewer: Viewer @inaccessible\n}\n\ntype Article implements Personalized {\n id: ID!\n title: String!\n body: String!\n tags: [String!]!\n currentViewer: Viewer\n personalizedRecommendation: String!\n relatedArticles: [Article!]!\n viewCount: Int!\n rating: Float!\n reviewSummary: String!\n}", + "stringStorage": { + "bd938246cff0199909aca1d5df40d042d917c954": "schema {\n query: Query\n mutation: Mutation\n subscription: Subscription\n}\n\ndirective @key(fields: openfed__FieldSet!, resolvable: Boolean = true) repeatable on INTERFACE | OBJECT\n\ndirective @link(as: String, for: link__Purpose, import: [link__Import], url: String!) repeatable on SCHEMA\n\ndirective @openfed__cacheInvalidate on FIELD_DEFINITION\n\ndirective @openfed__cachePopulate(maxAge: Int) on FIELD_DEFINITION\n\ndirective @openfed__entityCache(includeHeaders: Boolean = false, maxAge: Int!, negativeCacheTTL: Int = 0, partialCacheLoad: Boolean = false, shadowMode: Boolean = false) on OBJECT\n\ndirective @openfed__is(fields: String!) on ARGUMENT_DEFINITION\n\ndirective @openfed__queryCache(includeHeaders: Boolean = false, maxAge: Int!, shadowMode: Boolean = false) on FIELD_DEFINITION\n\ntype Item @key(fields: \"id\") @openfed__entityCache(maxAge: 300, negativeCacheTTL: 30) {\n category: String!\n id: ID!\n name: String!\n}\n\ntype Location {\n id: ID!\n}\n\ntype Mutation {\n createItem(category: String!, name: String!): Item! @openfed__cachePopulate(maxAge: 60)\n deleteItem(id: ID!): Item @openfed__cacheInvalidate\n deleteProduct(id: ID!, region: String!): Product @openfed__cacheInvalidate\n updateItem(id: ID!, name: String!): Item @openfed__cacheInvalidate\n}\n\ntype Product @key(fields: \"id region\") @key(fields: \"sku\") @openfed__entityCache(maxAge: 300) {\n id: ID!\n name: String!\n region: String!\n sku: String!\n}\n\ninput ProductKeyInput {\n id: ID!\n region: String!\n}\n\ntype Query {\n item(id: ID!): Item @openfed__queryCache(maxAge: 300)\n itemByPid(pid: ID! @openfed__is(fields: \"id\")): Item @openfed__queryCache(maxAge: 300)\n items: [Item!]! @openfed__queryCache(maxAge: 300)\n itemsByIds(ids: [ID!]! @openfed__is(fields: \"id\")): [Item!]! @openfed__queryCache(maxAge: 300)\n product(id: ID!, region: String!): Product @openfed__queryCache(maxAge: 300)\n productByKey(key: ProductKeyInput! @openfed__is(fields: \"id region\")): Product @openfed__queryCache(maxAge: 300)\n productByName(name: String!): Product @openfed__queryCache(maxAge: 300)\n productBySku(sku: String!): Product @openfed__queryCache(maxAge: 300)\n warehouse(locationId: ID! @openfed__is(fields: \"location.id\")): Warehouse @openfed__queryCache(maxAge: 300)\n warehouseByInput(input: WarehouseLocationInput! @openfed__is(fields: \"location { id }\")): Warehouse @openfed__queryCache(maxAge: 300)\n}\n\ntype Subscription {\n itemCreated: Item @openfed__cachePopulate\n itemUpdated: Item @openfed__cacheInvalidate\n}\n\ntype Warehouse @key(fields: \"location { id }\") @openfed__entityCache(maxAge: 300) {\n location: Location!\n name: String!\n}\n\ninput WarehouseLocationInput {\n location: WarehouseLocationKeyInput!\n}\n\ninput WarehouseLocationKeyInput {\n id: ID!\n}\n\nscalar link__Import\n\nenum link__Purpose {\n EXECUTION\n SECURITY\n}\n\nscalar openfed__FieldSet", + "c155dbdb80b10cafddd0f1f0379b57532dc3dcc7": "directive @key(fields: openfed__FieldSet!, resolvable: Boolean = true) repeatable on INTERFACE | OBJECT\n\ndirective @link(as: String, for: link__Purpose, import: [link__Import], url: String!) repeatable on SCHEMA\n\ndirective @openfed__entityCache(includeHeaders: Boolean = false, maxAge: Int!, negativeCacheTTL: Int = 0, partialCacheLoad: Boolean = false, shadowMode: Boolean = false) on OBJECT\n\ntype Item @key(fields: \"id\") @openfed__entityCache(maxAge: 300) {\n description: String!\n id: ID!\n rating: Float!\n tags: [String!]!\n}\n\ntype Location {\n id: ID!\n}\n\ntype Product @key(fields: \"id region\") @openfed__entityCache(maxAge: 300) {\n id: ID!\n info: String!\n region: String!\n}\n\ntype Warehouse @key(fields: \"location { id }\") @openfed__entityCache(maxAge: 300) {\n capacity: Int!\n location: Location!\n}\n\nscalar link__Import\n\nenum link__Purpose {\n EXECUTION\n SECURITY\n}\n\nscalar openfed__FieldSet", + "f39f668358c7d91881c7b4b68f5387d7f18b9aad": "directive @key(fields: openfed__FieldSet!, resolvable: Boolean = true) repeatable on INTERFACE | OBJECT\n\ndirective @link(as: String, for: link__Purpose, import: [link__Import], url: String!) repeatable on SCHEMA\n\ndirective @openfed__entityCache(includeHeaders: Boolean = false, maxAge: Int!, negativeCacheTTL: Int = 0, partialCacheLoad: Boolean = false, shadowMode: Boolean = false) on OBJECT\n\ntype Item @key(fields: \"id\") @openfed__entityCache(maxAge: 300) {\n available: Boolean!\n count: Int!\n id: ID!\n}\n\nscalar link__Import\n\nenum link__Purpose {\n EXECUTION\n SECURITY\n}\n\nscalar openfed__FieldSet", + "9ee951b49b83d66e073d83402d78cd8078181571": "schema {\n query: Query\n}\n\ndirective @inaccessible on ARGUMENT_DEFINITION | ENUM | ENUM_VALUE | FIELD_DEFINITION | INPUT_FIELD_DEFINITION | INPUT_OBJECT | INTERFACE | OBJECT | SCALAR | UNION\n\ndirective @interfaceObject on OBJECT\n\ndirective @key(fields: openfed__FieldSet!, resolvable: Boolean = true) repeatable on INTERFACE | OBJECT\n\ndirective @link(as: String, for: link__Purpose, import: [link__Import], url: String!) repeatable on SCHEMA\n\ndirective @openfed__requestScoped(key: String!) on FIELD_DEFINITION\n\ntype Personalized @key(fields: \"id\") @interfaceObject {\n currentViewer: Viewer @inaccessible @openfed__requestScoped(key: \"currentViewer\")\n id: ID!\n}\n\ntype Query {\n currentViewer: Viewer @openfed__requestScoped(key: \"currentViewer\")\n}\n\ntype Viewer @key(fields: \"id\") {\n email: String!\n id: ID!\n name: String!\n}\n\nscalar link__Import\n\nenum link__Purpose {\n EXECUTION\n SECURITY\n}\n\nscalar openfed__FieldSet", + "cf1e444bb26e20ccce317dd20e2f8da88671c87d": "schema {\n query: Query\n}\n\ndirective @external on FIELD_DEFINITION | OBJECT\n\ndirective @key(fields: openfed__FieldSet!, resolvable: Boolean = true) repeatable on INTERFACE | OBJECT\n\ndirective @link(as: String, for: link__Purpose, import: [link__Import], url: String!) repeatable on SCHEMA\n\ndirective @requires(fields: openfed__FieldSet!) on FIELD_DEFINITION\n\ntype Article implements Personalized @key(fields: \"id\") {\n body: String!\n currentViewer: Viewer @external\n id: ID!\n personalizedRecommendation: String! @requires(fields: \"currentViewer { id name email }\")\n relatedArticles: [Article!]!\n tags: [String!]!\n title: String!\n}\n\ninterface Personalized @key(fields: \"id\") {\n id: ID!\n}\n\ntype Query {\n articles: [Article!]!\n}\n\ntype Viewer @key(fields: \"id\") {\n email: String! @external\n id: ID!\n name: String! @external\n recommendedArticles: [Article!]!\n}\n\nscalar link__Import\n\nenum link__Purpose {\n EXECUTION\n SECURITY\n}\n\nscalar openfed__FieldSet", + "26317d3464ef1d47aa8354694ea00cc787b1caef": "directive @key(fields: openfed__FieldSet!, resolvable: Boolean = true) repeatable on INTERFACE | OBJECT\n\ndirective @link(as: String, for: link__Purpose, import: [link__Import], url: String!) repeatable on SCHEMA\n\ntype Article @key(fields: \"id\") {\n id: ID!\n rating: Float!\n reviewSummary: String!\n viewCount: Int!\n}\n\nscalar link__Import\n\nenum link__Purpose {\n EXECUTION\n SECURITY\n}\n\nscalar openfed__FieldSet" + }, + "graphqlClientSchema": "schema {\n query: Query\n mutation: Mutation\n subscription: Subscription\n}\n\ntype Query {\n item(id: ID!): Item\n itemByPid(pid: ID!): Item\n items: [Item!]!\n itemsByIds(ids: [ID!]!): [Item!]!\n product(id: ID!, region: String!): Product\n productBySku(sku: String!): Product\n productByName(name: String!): Product\n productByKey(key: ProductKeyInput!): Product\n warehouse(locationId: ID!): Warehouse\n warehouseByInput(input: WarehouseLocationInput!): Warehouse\n currentViewer: Viewer\n articles: [Article!]!\n}\n\ninput ProductKeyInput {\n id: ID!\n region: String!\n}\n\ninput WarehouseLocationInput {\n location: WarehouseLocationKeyInput!\n}\n\ninput WarehouseLocationKeyInput {\n id: ID!\n}\n\ntype Mutation {\n updateItem(id: ID!, name: String!): Item\n deleteItem(id: ID!): Item\n createItem(name: String!, category: String!): Item!\n deleteProduct(id: ID!, region: String!): Product\n}\n\ntype Subscription {\n itemUpdated: Item\n itemCreated: Item\n}\n\ntype Item {\n id: ID!\n name: String!\n category: String!\n description: String!\n rating: Float!\n tags: [String!]!\n available: Boolean!\n count: Int!\n}\n\ntype Product {\n id: ID!\n region: String!\n sku: String!\n name: String!\n info: String!\n}\n\ntype Location {\n id: ID!\n}\n\ntype Warehouse {\n location: Location!\n name: String!\n capacity: Int!\n}\n\ntype Viewer {\n id: ID!\n name: String!\n email: String!\n recommendedArticles: [Article!]!\n}\n\ninterface Personalized {\n id: ID!\n}\n\ntype Article implements Personalized {\n id: ID!\n title: String!\n body: String!\n tags: [String!]!\n currentViewer: Viewer\n personalizedRecommendation: String!\n relatedArticles: [Article!]!\n viewCount: Int!\n rating: Float!\n reviewSummary: String!\n}" + }, + "version": "00000000-0000-0000-0000-000000000000", + "subgraphs": [ + { + "id": "0", + "name": "items", + "routingUrl": "http://items.entity-cache-test.local/graphql" + }, + { + "id": "1", + "name": "details", + "routingUrl": "http://details.entity-cache-test.local/graphql" + }, + { + "id": "2", + "name": "inventory", + "routingUrl": "http://inventory.entity-cache-test.local/graphql" + }, + { + "id": "3", + "name": "viewer", + "routingUrl": "http://viewer.entity-cache-test.local/graphql" + }, + { + "id": "4", + "name": "articles", + "routingUrl": "http://articles.entity-cache-test.local/graphql" + }, + { + "id": "5", + "name": "articlesmeta", + "routingUrl": "http://articlesmeta.entity-cache-test.local/graphql" + } + ], + "compatibilityVersion": "1:{{$COMPOSITION__VERSION}}" +} diff --git a/router-tests/go.mod b/router-tests/go.mod index b81cc273f6..f201dc5743 100644 --- a/router-tests/go.mod +++ b/router-tests/go.mod @@ -4,7 +4,9 @@ go 1.25.0 require ( connectrpc.com/connect v1.19.1 + github.com/99designs/gqlgen v0.17.76 github.com/MicahParks/jwkset v0.11.0 + github.com/alicebob/miniredis/v2 v2.34.0 github.com/buger/jsonparser v1.1.2 github.com/cloudflare/backoff v0.0.0-20240920015135-e46b80a3a7d0 github.com/golang-jwt/jwt/v5 v5.3.0 @@ -25,13 +27,13 @@ require ( github.com/stretchr/testify v1.11.1 github.com/twmb/franz-go v1.16.1 github.com/twmb/franz-go/pkg/kadm v1.11.0 - github.com/wundergraph/astjson v1.1.0 + github.com/wundergraph/astjson v1.1.1-0.20260419105127-f600d161463f github.com/wundergraph/cosmo/demo v0.0.0-20260323091151-a7de617c31d0 github.com/wundergraph/cosmo/demo/pkg/subgraphs/projects v0.0.0-20250715110703-10f2e5f9c79e github.com/wundergraph/cosmo/router v0.0.0-20260330183556-dc4388d100a4 github.com/wundergraph/cosmo/router-plugin v0.0.0-20250808194725-de123ba1c65e github.com/wundergraph/cosmo/speedtrap v0.0.0-00010101000000-000000000000 - github.com/wundergraph/graphql-go-tools/v2 v2.4.4 + github.com/wundergraph/graphql-go-tools/v2 v2.4.5-0.20260610161004-63fa1c88eaea go.opentelemetry.io/otel v1.43.0 go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/sdk/metric v1.43.0 @@ -48,10 +50,10 @@ require ( require ( connectrpc.com/vanguard v0.3.0 // indirect - github.com/99designs/gqlgen v0.17.76 // indirect github.com/KimMachineGun/automemlimit v0.6.1 // indirect github.com/MicahParks/keyfunc/v3 v3.6.2 // indirect github.com/agnivade/levenshtein v1.2.1 // indirect + github.com/alicebob/gopher-json v0.0.0-20230218143504-906a9b012302 // indirect github.com/andybalholm/brotli v1.1.0 // indirect github.com/antithesishq/antithesis-sdk-go v0.6.0-default-no-op // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect @@ -164,9 +166,10 @@ require ( github.com/vbatts/tar-split v0.12.1 // indirect github.com/vektah/gqlparser/v2 v2.5.30 // indirect github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect - github.com/wundergraph/go-arena v1.1.0 // indirect + github.com/wundergraph/go-arena v1.2.0 // indirect github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + github.com/yuin/gopher-lua v1.1.1 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 // indirect diff --git a/router-tests/go.sum b/router-tests/go.sum index 12a78bfb31..9949d2135b 100644 --- a/router-tests/go.sum +++ b/router-tests/go.sum @@ -377,12 +377,12 @@ github.com/vektah/gqlparser/v2 v2.5.30 h1:EqLwGAFLIzt1wpx1IPpY67DwUujF1OfzgEyDsL github.com/vektah/gqlparser/v2 v2.5.30/go.mod h1:D1/VCZtV3LPnQrcPBeR/q5jkSQIPti0uYCP/RI0gIeo= github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= -github.com/wundergraph/astjson v1.1.0 h1:xORDosrZ87zQFJwNGe/HIHXqzpdHOFmqWgykCLVL040= -github.com/wundergraph/astjson v1.1.0/go.mod h1:h12D/dxxnedtLzsKyBLK7/Oe4TAoGpRVC9nDpDrZSWw= -github.com/wundergraph/go-arena v1.1.0 h1:9+wSRkJAkA2vbYHp6s8tEGhPViRGQNGXqPHT0QzhdIc= -github.com/wundergraph/go-arena v1.1.0/go.mod h1:ROOysEHWJjLQ8FSfNxZCziagb7Qw2nXY3/vgKRh7eWw= -github.com/wundergraph/graphql-go-tools/v2 v2.4.4 h1:VCvS9bku4ie7+St3+H5SNuVz6dtQiDKujqQ439yrMBM= -github.com/wundergraph/graphql-go-tools/v2 v2.4.4/go.mod h1:7ljNHLrBOoOszCk4ir4Z+O6Yrf+vwBBmxjwqM3imVgA= +github.com/wundergraph/astjson v1.1.1-0.20260419105127-f600d161463f h1:MoVoeMlgY9Ej1aoF3Y/kniBZ8pv+WfIA3YSCnPBh+6M= +github.com/wundergraph/astjson v1.1.1-0.20260419105127-f600d161463f/go.mod h1:uHSJv7uowLN/nIPvkTFqUDt1sXk4qQU0KNwHfwfDcQE= +github.com/wundergraph/go-arena v1.2.0 h1:6MlhEy0NBY3Z+BuK3rj0F9YoT3bM0SlahGkzK0lKRZ4= +github.com/wundergraph/go-arena v1.2.0/go.mod h1:ROOysEHWJjLQ8FSfNxZCziagb7Qw2nXY3/vgKRh7eWw= +github.com/wundergraph/graphql-go-tools/v2 v2.4.5-0.20260610161004-63fa1c88eaea h1:ACjZjX87K3ADlFy54YrwZ/UPCugKL56/DtQNWB5EGeU= +github.com/wundergraph/graphql-go-tools/v2 v2.4.5-0.20260610161004-63fa1c88eaea/go.mod h1:3NuqY1nBh7g4IkytYazmT6RHg/giCsdZpmX0NkpayNs= github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg= github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= diff --git a/router-tests/protocol/testdata/tracing.json b/router-tests/protocol/testdata/tracing.json index ff47debcfa..a27ef045e4 100644 --- a/router-tests/protocol/testdata/tracing.json +++ b/router-tests/protocol/testdata/tracing.json @@ -674,6 +674,19 @@ "duration_since_start_nanoseconds": 1, "duration_since_start_pretty": "1ns" } + }, + "cache_trace": { + "duration_since_start_nanoseconds": 1, + "duration_since_start_pretty": "1ns", + "duration_nanoseconds": 1, + "duration_pretty": "1ns", + "l1_enabled": false, + "l2_enabled": false, + "entity_count": 0, + "l1_hit": 0, + "l1_miss": 0, + "l2_hit": 0, + "l2_miss": 0 } } } @@ -1050,6 +1063,19 @@ "duration_since_start_nanoseconds": 1, "duration_since_start_pretty": "1ns" } + }, + "cache_trace": { + "duration_since_start_nanoseconds": 1, + "duration_since_start_pretty": "1ns", + "duration_nanoseconds": 1, + "duration_pretty": "1ns", + "l1_enabled": false, + "l2_enabled": false, + "entity_count": 0, + "l1_hit": 0, + "l1_miss": 0, + "l2_hit": 0, + "l2_miss": 0 } } } @@ -1676,6 +1702,19 @@ "duration_since_start_nanoseconds": 1, "duration_since_start_pretty": "1ns" } + }, + "cache_trace": { + "duration_since_start_nanoseconds": 1, + "duration_since_start_pretty": "1ns", + "duration_nanoseconds": 1, + "duration_pretty": "1ns", + "l1_enabled": false, + "l2_enabled": false, + "entity_count": 0, + "l1_hit": 0, + "l1_miss": 0, + "l2_hit": 0, + "l2_miss": 0 } } } @@ -2005,6 +2044,19 @@ "duration_since_start_nanoseconds": 1, "duration_since_start_pretty": "1ns" } + }, + "cache_trace": { + "duration_since_start_nanoseconds": 1, + "duration_since_start_pretty": "1ns", + "duration_nanoseconds": 1, + "duration_pretty": "1ns", + "l1_enabled": false, + "l2_enabled": false, + "entity_count": 0, + "l1_hit": 0, + "l1_miss": 0, + "l2_hit": 0, + "l2_miss": 0 } } } diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 4d4799e6a1..8f3ae62e78 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -746,6 +746,22 @@ func CreateTestSupervisorEnv(t testing.TB, cfg *Config) (*Environment, error) { retryClient.RetryMax = 10 retryClient.RetryWaitMin = 100 * time.Millisecond retryClient.HTTPClient = httpClient + // Retry on HTTP 501. The router itself never returns 501 for GraphQL requests — + // the only way to see 501 from the router URL is Go's net/http server emitting + // isUnsupportedTEError during request parsing before any handler runs. Under + // heavy parallel-subtest load this has been observed at ~0.05% rate, enough to + // surface as a flake in CI. Retrying is safe because a legitimate router 501 + // is not possible in this suite. + defaultCheckRetry := retryClient.CheckRetry + retryClient.CheckRetry = func(ctx context.Context, resp *http.Response, err error) (bool, error) { + if resp != nil && resp.StatusCode == http.StatusNotImplemented { + return true, nil + } + if defaultCheckRetry != nil { + return defaultCheckRetry(ctx, resp, err) + } + return retryablehttp.DefaultRetryPolicy(ctx, resp, err) + } client = retryClient.StandardClient() } @@ -1172,6 +1188,22 @@ func CreateTestEnv(t testing.TB, cfg *Config) (*Environment, error) { retryClient.RetryMax = 10 retryClient.RetryWaitMin = 100 * time.Millisecond retryClient.HTTPClient = httpClient + // Retry on HTTP 501. The router itself never returns 501 for GraphQL requests — + // the only way to see 501 from the router URL is Go's net/http server emitting + // isUnsupportedTEError during request parsing before any handler runs. Under + // heavy parallel-subtest load this has been observed at ~0.05% rate, enough to + // surface as a flake in CI. Retrying is safe because a legitimate router 501 + // is not possible in this suite. + defaultCheckRetry := retryClient.CheckRetry + retryClient.CheckRetry = func(ctx context.Context, resp *http.Response, err error) (bool, error) { + if resp != nil && resp.StatusCode == http.StatusNotImplemented { + return true, nil + } + if defaultCheckRetry != nil { + return defaultCheckRetry(ctx, resp, err) + } + return retryablehttp.DefaultRetryPolicy(ctx, resp, err) + } client = retryClient.StandardClient() } diff --git a/router/core/executor.go b/router/core/executor.go index 785aaadb6c..a05abe8269 100644 --- a/router/core/executor.go +++ b/router/core/executor.go @@ -62,10 +62,12 @@ type ExecutorBuildOptions struct { TraceClientRequired bool PluginsEnabled bool InstanceData InstanceData + EntityCacheInstances map[string]resolve.LoaderCache + EntityCachingConfig *config.EntityCachingConfiguration } func (b *ExecutorConfigurationBuilder) Build(ctx context.Context, opts *ExecutorBuildOptions) (*Executor, []pubsub_datasource.Provider, error) { - planConfig, providers, err := b.buildPlannerConfiguration(ctx, opts.EngineConfig, opts.Subgraphs, opts.RouterEngineConfig, opts.PluginsEnabled) + planConfig, providers, err := b.buildPlannerConfiguration(ctx, opts.EngineConfig, opts.Subgraphs, opts.RouterEngineConfig, opts.PluginsEnabled, opts.EntityCachingConfig) if err != nil { return nil, nil, fmt.Errorf("failed to build planner configuration: %w", err) } @@ -91,6 +93,8 @@ func (b *ExecutorConfigurationBuilder) Build(ctx context.Context, opts *Executor ValidateRequiredExternalFields: opts.RouterEngineConfig.Execution.ValidateRequiredExternalFields, SetDeduplicationShardCountToGOMAXPROCS: true, AllowCustomExtensionProperties: opts.RouterEngineConfig.SubgraphExtensionPropagation.Enabled, + Caches: opts.EntityCacheInstances, + EntityCacheConfigs: buildEntityCacheInvalidationConfigs(opts.EntityCachingConfig, opts.Subgraphs, opts.EngineConfig, b.logger), } if opts.ApolloCompatibilityFlags.ValueCompletion.Enabled { @@ -215,7 +219,7 @@ func (b *ExecutorConfigurationBuilder) Build(ctx context.Context, opts *Executor }, providers, nil } -func (b *ExecutorConfigurationBuilder) buildPlannerConfiguration(ctx context.Context, engineConfig *nodev1.EngineConfiguration, subgraphs []*nodev1.Subgraph, routerEngineCfg *RouterEngineConfiguration, pluginsEnabled bool) (*plan.Configuration, []pubsub_datasource.Provider, error) { +func (b *ExecutorConfigurationBuilder) buildPlannerConfiguration(ctx context.Context, engineConfig *nodev1.EngineConfiguration, subgraphs []*nodev1.Subgraph, routerEngineCfg *RouterEngineConfiguration, pluginsEnabled bool, entityCachingConfig *config.EntityCachingConfiguration) (*plan.Configuration, []pubsub_datasource.Provider, error) { // this loader is used to take the engine config and create a plan config // the plan config is what the engine uses to turn a GraphQL Request into an execution plan // the plan config is stateful as it carries connection pools and other things @@ -231,6 +235,7 @@ func (b *ExecutorConfigurationBuilder) buildPlannerConfiguration(ctx context.Con routerEngineCfg.Execution.EnableNetPoll, b.instanceData, ), b.logger, b.subscriptionHooks) + loader.entityCachingConfig = entityCachingConfig // this generates the plan config using the data source factories from the config package planConfig, providers, err := loader.Load(engineConfig, subgraphs, routerEngineCfg, pluginsEnabled) @@ -264,3 +269,72 @@ func (b *ExecutorConfigurationBuilder) buildPlannerConfiguration(ctx context.Con return planConfig, providers, nil } + +func buildEntityCacheInvalidationConfigs( + cfg *config.EntityCachingConfiguration, + subgraphs []*nodev1.Subgraph, + engineConfig *nodev1.EngineConfiguration, + logger *zap.Logger, +) map[string]map[string]*resolve.EntityCacheInvalidationConfig { + if cfg == nil || !cfg.Enabled || len(engineConfig.GetDatasourceConfigurations()) == 0 { + return nil + } + result := make(map[string]map[string]*resolve.EntityCacheInvalidationConfig) + for _, ds := range engineConfig.GetDatasourceConfigurations() { + subgraphName := subgraphNameByID(subgraphs, ds.GetId()) + if subgraphName == "" { + // Datasource ID doesn't match any known subgraph — skip instead of + // bucketing under "" which would collide across datasources and + // produce a wrong cache lookup downstream. + if logger != nil { + logger.Warn("entity caching: skipping datasource with unknown subgraph id", + zap.String("datasource_id", ds.GetId())) + } + continue + } + for _, ec := range ds.GetEntityCacheConfigurations() { + if _, ok := result[subgraphName]; !ok { + result[subgraphName] = make(map[string]*resolve.EntityCacheInvalidationConfig) + } + result[subgraphName][ec.GetTypeName()] = &resolve.EntityCacheInvalidationConfig{ + CacheName: resolveEntityCacheProviderID(cfg, subgraphName, ec.GetTypeName()), + IncludeSubgraphHeaderPrefix: ec.GetIncludeHeaders(), + } + } + } + if len(result) == 0 { + return nil + } + return result +} + +func subgraphNameByID(subgraphs []*nodev1.Subgraph, id string) string { + for _, sg := range subgraphs { + if sg.Id == id { + return sg.Name + } + } + return "" +} + +func resolveEntityCacheProviderID(cfg *config.EntityCachingConfiguration, subgraphName, typeName string) string { + if cfg == nil { + return "default" + } + for _, sg := range cfg.SubgraphCacheOverrides { + if sg.Name == subgraphName { + // Tier 1: entity-level override + for _, e := range sg.Entities { + if e.Type == typeName && e.StorageProviderID != "" { + return e.StorageProviderID + } + } + // Tier 2: subgraph-level override + if sg.StorageProviderID != "" { + return sg.StorageProviderID + } + } + } + // Tier 3: global default + return "default" +} diff --git a/router/core/executor_entity_cache_test.go b/router/core/executor_entity_cache_test.go new file mode 100644 index 0000000000..e4c39f2898 --- /dev/null +++ b/router/core/executor_entity_cache_test.go @@ -0,0 +1,200 @@ +package core + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" + + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" + "github.com/wundergraph/cosmo/router/pkg/config" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +func TestResolveEntityCacheProviderID(t *testing.T) { + t.Parallel() + cfg := &config.EntityCachingConfiguration{ + SubgraphCacheOverrides: []config.EntityCachingSubgraphCacheOverride{ + { + Name: "products", + StorageProviderID: "sg-redis", + Entities: []config.EntityCachingEntityConfig{ + {Type: "Product", StorageProviderID: "entity-redis"}, + }, + }, + { + Name: "reviews", + StorageProviderID: "reviews-redis", + }, + }, + } + + t.Run("default_fallback", func(t *testing.T) { + t.Parallel() + result := resolveEntityCacheProviderID(cfg, "unknown-subgraph", "AnyType") + require.Equal(t, "default", result) + }) + + t.Run("subgraph_level_match", func(t *testing.T) { + t.Parallel() + result := resolveEntityCacheProviderID(cfg, "reviews", "Review") + require.Equal(t, "reviews-redis", result) + }) + + t.Run("entity_level_match", func(t *testing.T) { + t.Parallel() + result := resolveEntityCacheProviderID(cfg, "products", "Product") + require.Equal(t, "entity-redis", result) + }) + + t.Run("entity_takes_precedence_over_subgraph", func(t *testing.T) { + t.Parallel() + // "products" subgraph has sg-redis, but Product entity has entity-redis + result := resolveEntityCacheProviderID(cfg, "products", "Product") + require.Equal(t, "entity-redis", result) + }) + + t.Run("no_entity_match_falls_to_subgraph", func(t *testing.T) { + t.Parallel() + result := resolveEntityCacheProviderID(cfg, "products", "Category") + require.Equal(t, "sg-redis", result) + }) +} + +func TestSubgraphNameByID(t *testing.T) { + t.Parallel() + subgraphs := []*nodev1.Subgraph{ + {Id: "sg-1", Name: "products"}, + {Id: "sg-2", Name: "reviews"}, + } + + t.Run("found", func(t *testing.T) { + t.Parallel() + result := subgraphNameByID(subgraphs, "sg-1") + require.Equal(t, "products", result) + }) + + t.Run("not_found", func(t *testing.T) { + t.Parallel() + result := subgraphNameByID(subgraphs, "sg-unknown") + require.Equal(t, "", result) + }) +} + +func TestBuildEntityCacheInvalidationConfigs(t *testing.T) { + t.Parallel() + t.Run("nil_config", func(t *testing.T) { + t.Parallel() + result := buildEntityCacheInvalidationConfigs(nil, nil, &nodev1.EngineConfiguration{}, zap.NewNop()) + require.Nil(t, result) + }) + + t.Run("disabled", func(t *testing.T) { + t.Parallel() + cfg := &config.EntityCachingConfiguration{Enabled: false} + result := buildEntityCacheInvalidationConfigs(cfg, nil, &nodev1.EngineConfiguration{}, zap.NewNop()) + require.Nil(t, result) + }) + + t.Run("no_datasources", func(t *testing.T) { + t.Parallel() + cfg := &config.EntityCachingConfiguration{Enabled: true} + result := buildEntityCacheInvalidationConfigs(cfg, nil, &nodev1.EngineConfiguration{}, zap.NewNop()) + require.Nil(t, result) + }) + + t.Run("skips_datasource_with_unknown_subgraph_id", func(t *testing.T) { + t.Parallel() + core, observed := observer.New(zapcore.WarnLevel) + logger := zap.New(core) + cfg := &config.EntityCachingConfiguration{Enabled: true} + subgraphs := []*nodev1.Subgraph{ + {Id: "ds-known", Name: "products"}, + } + engineConfig := &nodev1.EngineConfiguration{ + DatasourceConfigurations: []*nodev1.DataSourceConfiguration{ + { + Id: "ds-unknown", // no matching subgraph + EntityCacheConfigurations: []*nodev1.EntityCacheConfiguration{ + {TypeName: "Mystery", MaxAgeSeconds: 60}, + }, + }, + { + Id: "ds-known", + EntityCacheConfigurations: []*nodev1.EntityCacheConfiguration{ + {TypeName: "Product", MaxAgeSeconds: 60}, + }, + }, + }, + } + + result := buildEntityCacheInvalidationConfigs(cfg, subgraphs, engineConfig, logger) + + // Known subgraph is present, unknown is skipped (not bucketed under ""). + require.Equal(t, map[string]map[string]*resolve.EntityCacheInvalidationConfig{ + "products": { + "Product": {CacheName: "default", IncludeSubgraphHeaderPrefix: false}, + }, + }, result) + + // And a single warning was emitted for the unknown datasource ID. + entries := observed.FilterMessage("entity caching: skipping datasource with unknown subgraph id").All() + require.Len(t, entries, 1) + require.Equal(t, "ds-unknown", entries[0].ContextMap()["datasource_id"]) + }) + + t.Run("builds_correct_map", func(t *testing.T) { + t.Parallel() + cfg := &config.EntityCachingConfiguration{ + Enabled: true, + SubgraphCacheOverrides: []config.EntityCachingSubgraphCacheOverride{ + { + Name: "products", + StorageProviderID: "custom-redis", + }, + }, + } + subgraphs := []*nodev1.Subgraph{ + {Id: "ds-1", Name: "products"}, + {Id: "ds-2", Name: "reviews"}, + } + engineConfig := &nodev1.EngineConfiguration{ + DatasourceConfigurations: []*nodev1.DataSourceConfiguration{ + { + Id: "ds-1", + EntityCacheConfigurations: []*nodev1.EntityCacheConfiguration{ + {TypeName: "Product", MaxAgeSeconds: 60, IncludeHeaders: true}, + }, + }, + { + Id: "ds-2", + EntityCacheConfigurations: []*nodev1.EntityCacheConfiguration{ + {TypeName: "Review", MaxAgeSeconds: 30}, + }, + }, + }, + } + + result := buildEntityCacheInvalidationConfigs(cfg, subgraphs, engineConfig, zap.NewNop()) + require.NotNil(t, result) + require.Len(t, result, 2) + + // products subgraph, Product type -> custom-redis + require.Contains(t, result, "products") + require.Contains(t, result["products"], "Product") + require.Equal(t, &resolve.EntityCacheInvalidationConfig{ + CacheName: "custom-redis", + IncludeSubgraphHeaderPrefix: true, + }, result["products"]["Product"]) + + // reviews subgraph, Review type -> default + require.Contains(t, result, "reviews") + require.Contains(t, result["reviews"], "Review") + require.Equal(t, &resolve.EntityCacheInvalidationConfig{ + CacheName: "default", + IncludeSubgraphHeaderPrefix: false, + }, result["reviews"]["Review"]) + }) +} diff --git a/router/core/factoryresolver.go b/router/core/factoryresolver.go index d0a94d9a51..b1545140c6 100644 --- a/router/core/factoryresolver.go +++ b/router/core/factoryresolver.go @@ -6,7 +6,6 @@ import ( "fmt" "net/http" "net/url" - "slices" "time" "github.com/buger/jsonparser" @@ -29,6 +28,13 @@ import ( "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan" ) +// Proto operation_type string values from the composition layer. +// CachePopulateConfiguration and CacheInvalidateConfiguration use these title-case strings +// (distinct from the router's internal lowercase OperationType constants in context.go). +const ( + protoOperationTypeSubscription = "Subscription" +) + // Loader translates the protobuf-based router engine configuration into a // plan.Configuration consumed by the GraphQL engine planner. It resolves // data source factories (HTTP, gRPC, pub/sub) for each subgraph through the @@ -38,8 +44,10 @@ type Loader struct { ctx context.Context resolver FactoryResolver subscriptionHooks subscriptionHooks - includeInfo bool - logger *zap.Logger + // includeInfo controls whether additional information like type usage and field usage is included in the plan de + includeInfo bool + logger *zap.Logger + entityCachingConfig *config.EntityCachingConfiguration } type InstanceData struct { @@ -347,6 +355,7 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod for _, in := range engineConfig.DatasourceConfigurations { var out plan.DataSource + dataSourceName := l.subgraphName(subgraphs, in.Id) switch in.Kind { case nodev1.DataSourceKind_STATIC: @@ -358,7 +367,7 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod out, err = plan.NewDataSourceConfiguration[staticdatasource.Configuration]( in.Id, factory, - l.dataSourceMetaData(in), + l.dataSourceMetaData(in, dataSourceName), staticdatasource.Configuration{ Data: config.LoadStringVariable(in.CustomStatic.Data), }, @@ -481,8 +490,6 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod return nil, providers, fmt.Errorf("error creating custom configuration for data source %s: %w", in.Id, err) } - dataSourceName := l.subgraphName(subgraphs, in.Id) - factory, err := l.resolver.ResolveGraphqlFactory(dataSourceName) if err != nil { return nil, providers, err @@ -492,7 +499,7 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod in.Id, dataSourceName, factory, - l.dataSourceMetaData(in), + l.dataSourceMetaData(in, dataSourceName), customConfiguration, ) if err != nil { @@ -502,7 +509,7 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod case nodev1.DataSourceKind_PUBSUB: pubSubDS = append(pubSubDS, pubsub.DataSourceConfigurationWithMetadata{ Configuration: in, - Metadata: l.dataSourceMetaData(in), + Metadata: l.dataSourceMetaData(in, dataSourceName), }) default: return nil, providers, fmt.Errorf("unknown data source type %q", in.Kind) @@ -566,19 +573,11 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod } func (l *Loader) subgraphName(subgraphs []*nodev1.Subgraph, dataSourceID string) string { - i := slices.IndexFunc(subgraphs, func(s *nodev1.Subgraph) bool { - return s.Id == dataSourceID - }) - - if i != -1 { - return subgraphs[i].Name - } - - return "" + return subgraphNameByID(subgraphs, dataSourceID) } // dataSourceMetaData converts a protobuf configuration into the planner's DataSourceMetadata. -func (l *Loader) dataSourceMetaData(in *nodev1.DataSourceConfiguration) *plan.DataSourceMetadata { +func (l *Loader) dataSourceMetaData(in *nodev1.DataSourceConfiguration, subgraphName string) *plan.DataSourceMetadata { var d plan.DirectiveConfigurations = make([]plan.DirectiveConfiguration, 0, len(in.Directives)) out := &plan.DataSourceMetadata{ @@ -674,6 +673,134 @@ func (l *Loader) dataSourceMetaData(in *nodev1.DataSourceConfiguration) *plan.Da }) } + // Entity caching configurations + for _, ec := range in.EntityCacheConfigurations { + cacheName := resolveEntityCacheProviderID(l.entityCachingConfig, subgraphName, ec.TypeName) + out.FederationMetaData.EntityCaching = append(out.FederationMetaData.EntityCaching, plan.EntityCacheConfiguration{ + TypeName: ec.TypeName, + CacheName: cacheName, + TTL: time.Duration(ec.MaxAgeSeconds) * time.Second, + NegativeCacheTTL: time.Duration(ec.NotFoundCacheTtlSeconds) * time.Second, + IncludeSubgraphHeaderPrefix: ec.IncludeHeaders, + EnablePartialCacheLoad: ec.PartialCacheLoad, + ShadowMode: ec.ShadowMode, + }) + } + + // Root field cache configurations + for _, rfc := range in.RootFieldCacheConfigurations { + cacheName := resolveEntityCacheProviderID(l.entityCachingConfig, subgraphName, rfc.EntityTypeName) + var mappings []plan.EntityKeyMapping + for _, m := range rfc.EntityKeyMappings { + var fieldMappings []plan.FieldMapping + for _, fm := range m.FieldMappings { + fieldMappings = append(fieldMappings, plan.FieldMapping{ + EntityKeyField: fm.EntityKeyField, + ArgumentPath: fm.ArgumentPath, + ArgumentIsEntityKey: fm.IsBatch, + }) + } + mappings = append(mappings, plan.EntityKeyMapping{ + EntityTypeName: m.EntityTypeName, + FieldMappings: fieldMappings, + }) + } + rootTypeName := rootTypeNameForField(in.RootNodes, rfc.FieldName) + out.FederationMetaData.RootFieldCaching = append(out.FederationMetaData.RootFieldCaching, plan.RootFieldCacheConfiguration{ + TypeName: rootTypeName, + FieldName: rfc.FieldName, + CacheName: cacheName, + TTL: time.Duration(rfc.MaxAgeSeconds) * time.Second, + IncludeSubgraphHeaderPrefix: rfc.IncludeHeaders, + ShadowMode: rfc.ShadowMode, + EntityKeyMappings: mappings, + }) + } + + // Mutation/subscription cache populate + for _, cp := range in.CachePopulateConfigurations { + if cp.OperationType == protoOperationTypeSubscription { + var targetEntity *nodev1.EntityCacheConfiguration + for _, ec := range in.EntityCacheConfigurations { + if ec.TypeName == cp.EntityTypeName { + targetEntity = ec + break + } + } + if targetEntity == nil { + continue + } + ttl := time.Duration(targetEntity.MaxAgeSeconds) * time.Second + if cp.MaxAgeSeconds != nil { + ttl = time.Duration(*cp.MaxAgeSeconds) * time.Second + } + cacheName := resolveEntityCacheProviderID(l.entityCachingConfig, subgraphName, targetEntity.TypeName) + out.FederationMetaData.SubscriptionEntityPopulation = append( + out.FederationMetaData.SubscriptionEntityPopulation, + plan.SubscriptionEntityPopulationConfiguration{ + TypeName: targetEntity.TypeName, + FieldName: cp.FieldName, + CacheName: cacheName, + TTL: ttl, + IncludeSubgraphHeaderPrefix: targetEntity.IncludeHeaders, + }, + ) + } else { + // @cachePopulate(maxAge:) — when set, override the entity's default TTL on + // mutation-time writes. Without this, the populate path falls back to the + // cache implementation's default TTL. + var mutationTTL time.Duration + if cp.MaxAgeSeconds != nil { + mutationTTL = time.Duration(*cp.MaxAgeSeconds) * time.Second + } + out.FederationMetaData.MutationFieldCaching = append(out.FederationMetaData.MutationFieldCaching, plan.MutationFieldCacheConfiguration{ + FieldName: cp.FieldName, + EnableEntityL2CachePopulation: true, + TTL: mutationTTL, + }) + } + } + + // Mutation/subscription cache invalidation + for _, ci := range in.CacheInvalidateConfigurations { + if ci.OperationType == protoOperationTypeSubscription { + cacheName := resolveEntityCacheProviderID(l.entityCachingConfig, subgraphName, ci.EntityTypeName) + var includeHeaders bool + for _, ec := range in.EntityCacheConfigurations { + if ec.TypeName == ci.EntityTypeName { + includeHeaders = ec.IncludeHeaders + break + } + } + out.FederationMetaData.SubscriptionEntityPopulation = append( + out.FederationMetaData.SubscriptionEntityPopulation, + plan.SubscriptionEntityPopulationConfiguration{ + TypeName: ci.EntityTypeName, + FieldName: ci.FieldName, + CacheName: cacheName, + IncludeSubgraphHeaderPrefix: includeHeaders, + EnableInvalidationOnKeyOnly: true, + }, + ) + } else { + out.FederationMetaData.MutationCacheInvalidation = append(out.FederationMetaData.MutationCacheInvalidation, plan.MutationCacheInvalidationConfiguration{ + FieldName: ci.FieldName, + EntityTypeName: ci.EntityTypeName, + }) + } + } + + // Request-scoped field configurations. Every field annotated with @requestScoped + // in the subgraph is both a potential reader and writer of the coordinate L1 under + // its L1Key. The planner emits both a hint (read) and an export (write) for each. + for _, rsf := range in.RequestScopedFields { + out.FederationMetaData.RequestScopedFields = append(out.FederationMetaData.RequestScopedFields, plan.RequestScopedField{ + FieldName: rsf.FieldName, + TypeName: rsf.TypeName, + L1Key: rsf.L1Key, + }) + } + // Costs costConfig := in.GetCostConfiguration() if costConfig == nil { @@ -723,6 +850,17 @@ func (l *Loader) dataSourceMetaData(in *nodev1.DataSourceConfiguration) *plan.Da return out } +func rootTypeNameForField(rootNodes []*nodev1.TypeField, fieldName string) string { + for _, node := range rootNodes { + for _, fn := range node.FieldNames { + if fn == fieldName { + return node.TypeName + } + } + } + return "" +} + func (l *Loader) fieldHasAuthorizationRule(fieldConfiguration *nodev1.FieldConfiguration) bool { if fieldConfiguration == nil { return false diff --git a/router/core/factoryresolver_entity_cache_test.go b/router/core/factoryresolver_entity_cache_test.go new file mode 100644 index 0000000000..7960cf5d53 --- /dev/null +++ b/router/core/factoryresolver_entity_cache_test.go @@ -0,0 +1,224 @@ +package core + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" + "github.com/wundergraph/cosmo/router/pkg/config" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan" +) + +func TestDataSourceMetaDataMapsNegativeEntityCacheTTL(t *testing.T) { + t.Parallel() + + loader := &Loader{ + entityCachingConfig: &config.EntityCachingConfiguration{Enabled: true}, + } + + meta := loader.dataSourceMetaData(&nodev1.DataSourceConfiguration{ + EntityCacheConfigurations: []*nodev1.EntityCacheConfiguration{ + { + TypeName: "Item", + MaxAgeSeconds: 300, + NotFoundCacheTtlSeconds: 15, + IncludeHeaders: true, + PartialCacheLoad: true, + ShadowMode: true, + }, + }, + }, "items") + + require.Len(t, meta.FederationMetaData.EntityCaching, 1) + + cfg := meta.FederationMetaData.EntityCaching[0] + require.Equal(t, "Item", cfg.TypeName) + require.Equal(t, "default", cfg.CacheName) + require.Equal(t, 300*time.Second, cfg.TTL) + require.Equal(t, 15*time.Second, cfg.NegativeCacheTTL) + require.True(t, cfg.IncludeSubgraphHeaderPrefix) + require.True(t, cfg.EnablePartialCacheLoad) + require.True(t, cfg.ShadowMode) +} + +func TestDataSourceMetaDataMapsRootFieldMutationSubscriptionAndRequestScopedCacheConfig(t *testing.T) { + t.Parallel() + + mutationTTL := int64(15) + loader := &Loader{ + entityCachingConfig: &config.EntityCachingConfiguration{ + Enabled: true, + L2: config.EntityCachingL2Configuration{ + Enabled: true, + Storage: config.EntityCachingL2StorageConfig{ + ProviderID: "memory-default", + }, + }, + SubgraphCacheOverrides: []config.EntityCachingSubgraphCacheOverride{ + { + Name: "items", + Entities: []config.EntityCachingEntityConfig{ + {Type: "Item", StorageProviderID: "memory-items"}, + }, + }, + }, + }, + } + + meta := loader.dataSourceMetaData(&nodev1.DataSourceConfiguration{ + RootNodes: []*nodev1.TypeField{ + {TypeName: "Query", FieldNames: []string{"item"}}, + {TypeName: "Mutation", FieldNames: []string{"createItem", "deleteItem"}}, + {TypeName: "Subscription", FieldNames: []string{"itemCreated", "itemDeleted"}}, + }, + EntityCacheConfigurations: []*nodev1.EntityCacheConfiguration{ + { + TypeName: "Item", + MaxAgeSeconds: 60, + IncludeHeaders: true, + }, + }, + RootFieldCacheConfigurations: []*nodev1.RootFieldCacheConfiguration{ + { + FieldName: "item", + EntityTypeName: "Item", + MaxAgeSeconds: 30, + IncludeHeaders: true, + ShadowMode: true, + EntityKeyMappings: []*nodev1.EntityKeyMapping{ + { + EntityTypeName: "Item", + FieldMappings: []*nodev1.EntityCacheFieldMapping{ + { + EntityKeyField: "id", + ArgumentPath: []string{"id"}, + IsBatch: true, + }, + }, + }, + }, + }, + }, + CachePopulateConfigurations: []*nodev1.CachePopulateConfiguration{ + { + FieldName: "createItem", + EntityTypeName: "Item", + OperationType: "Mutation", + MaxAgeSeconds: &mutationTTL, + }, + { + FieldName: "itemCreated", + EntityTypeName: "Item", + OperationType: "Subscription", + }, + }, + CacheInvalidateConfigurations: []*nodev1.CacheInvalidateConfiguration{ + { + FieldName: "deleteItem", + EntityTypeName: "Item", + OperationType: "Mutation", + }, + { + FieldName: "itemDeleted", + EntityTypeName: "Item", + OperationType: "Subscription", + }, + }, + RequestScopedFields: []*nodev1.RequestScopedFieldConfiguration{ + { + FieldName: "currentViewer", + TypeName: "Query", + L1Key: "items.currentViewer", + }, + }, + }, "items") + + require.Len(t, meta.FederationMetaData.RootFieldCaching, 1) + rootCfg := meta.FederationMetaData.RootFieldCaching[0] + require.Equal(t, "Query", rootCfg.TypeName) + require.Equal(t, "item", rootCfg.FieldName) + require.Equal(t, "memory-items", rootCfg.CacheName) + require.Equal(t, 30*time.Second, rootCfg.TTL) + require.True(t, rootCfg.IncludeSubgraphHeaderPrefix) + require.True(t, rootCfg.ShadowMode) + require.Len(t, rootCfg.EntityKeyMappings, 1) + require.Len(t, rootCfg.EntityKeyMappings[0].FieldMappings, 1) + require.Equal(t, "id", rootCfg.EntityKeyMappings[0].FieldMappings[0].EntityKeyField) + require.Equal(t, []string{"id"}, rootCfg.EntityKeyMappings[0].FieldMappings[0].ArgumentPath) + require.True(t, rootCfg.EntityKeyMappings[0].FieldMappings[0].ArgumentIsEntityKey) + + require.Len(t, meta.FederationMetaData.MutationFieldCaching, 1) + require.Equal(t, "createItem", meta.FederationMetaData.MutationFieldCaching[0].FieldName) + require.True(t, meta.FederationMetaData.MutationFieldCaching[0].EnableEntityL2CachePopulation) + require.Equal(t, 15*time.Second, meta.FederationMetaData.MutationFieldCaching[0].TTL) + + require.Len(t, meta.FederationMetaData.MutationCacheInvalidation, 1) + require.Equal(t, "deleteItem", meta.FederationMetaData.MutationCacheInvalidation[0].FieldName) + require.Equal(t, "Item", meta.FederationMetaData.MutationCacheInvalidation[0].EntityTypeName) + + require.Len(t, meta.FederationMetaData.SubscriptionEntityPopulation, 2) + require.Equal(t, "itemCreated", meta.FederationMetaData.SubscriptionEntityPopulation[0].FieldName) + require.Equal(t, "memory-items", meta.FederationMetaData.SubscriptionEntityPopulation[0].CacheName) + require.Equal(t, 60*time.Second, meta.FederationMetaData.SubscriptionEntityPopulation[0].TTL) + require.True(t, meta.FederationMetaData.SubscriptionEntityPopulation[0].IncludeSubgraphHeaderPrefix) + require.False(t, meta.FederationMetaData.SubscriptionEntityPopulation[0].EnableInvalidationOnKeyOnly) + + require.Equal(t, "itemDeleted", meta.FederationMetaData.SubscriptionEntityPopulation[1].FieldName) + require.Equal(t, "memory-items", meta.FederationMetaData.SubscriptionEntityPopulation[1].CacheName) + require.True(t, meta.FederationMetaData.SubscriptionEntityPopulation[1].IncludeSubgraphHeaderPrefix) + require.True(t, meta.FederationMetaData.SubscriptionEntityPopulation[1].EnableInvalidationOnKeyOnly) + + require.Len(t, meta.FederationMetaData.RequestScopedFields, 1) + require.Equal(t, plan.RequestScopedField{ + FieldName: "currentViewer", + TypeName: "Query", + L1Key: "items.currentViewer", + }, meta.FederationMetaData.RequestScopedFields[0]) +} + +func TestRootTypeNameForField(t *testing.T) { + t.Parallel() + + t.Run("field found in Query type", func(t *testing.T) { + t.Parallel() + rootNodes := []*nodev1.TypeField{ + {TypeName: "Query", FieldNames: []string{"user", "users"}}, + {TypeName: "Mutation", FieldNames: []string{"createUser"}}, + } + assert.Equal(t, "Query", rootTypeNameForField(rootNodes, "user")) + }) + + t.Run("field found in Mutation type", func(t *testing.T) { + t.Parallel() + rootNodes := []*nodev1.TypeField{ + {TypeName: "Query", FieldNames: []string{"user"}}, + {TypeName: "Mutation", FieldNames: []string{"createUser", "deleteUser"}}, + } + assert.Equal(t, "Mutation", rootTypeNameForField(rootNodes, "createUser")) + }) + + t.Run("field not found", func(t *testing.T) { + t.Parallel() + rootNodes := []*nodev1.TypeField{ + {TypeName: "Query", FieldNames: []string{"user"}}, + {TypeName: "Mutation", FieldNames: []string{"createUser"}}, + } + assert.Equal(t, "", rootTypeNameForField(rootNodes, "nonExistent")) + }) + + t.Run("empty root nodes", func(t *testing.T) { + t.Parallel() + assert.Equal(t, "", rootTypeNameForField(nil, "user")) + }) + + t.Run("field in renamed query type", func(t *testing.T) { + t.Parallel() + rootNodes := []*nodev1.TypeField{ + {TypeName: "RootQuery", FieldNames: []string{"user", "products"}}, + } + assert.Equal(t, "RootQuery", rootTypeNameForField(rootNodes, "products")) + }) +} diff --git a/router/core/factoryresolver_test.go b/router/core/factoryresolver_test.go new file mode 100644 index 0000000000..0e7b778ba6 --- /dev/null +++ b/router/core/factoryresolver_test.go @@ -0,0 +1,55 @@ +package core + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan" +) + +func TestDataSourceMetaData_RequestScopedFields(t *testing.T) { + l := &Loader{} + + in := &nodev1.DataSourceConfiguration{ + RequestScopedFields: []*nodev1.RequestScopedFieldConfiguration{ + { + FieldName: "currentUser", + TypeName: "Query", + L1Key: "viewer.user", + }, + { + FieldName: "currentUser", + TypeName: "Personalized", + L1Key: "viewer.user", + }, + }, + } + + out := l.dataSourceMetaData(in, "test-subgraph") + + assert.Len(t, out.FederationMetaData.RequestScopedFields, 2) + + assert.Equal(t, plan.RequestScopedField{ + FieldName: "currentUser", + TypeName: "Query", + L1Key: "viewer.user", + }, out.FederationMetaData.RequestScopedFields[0]) + + assert.Equal(t, plan.RequestScopedField{ + FieldName: "currentUser", + TypeName: "Personalized", + L1Key: "viewer.user", + }, out.FederationMetaData.RequestScopedFields[1]) +} + +func TestDataSourceMetaData_RequestScopedFields_Empty(t *testing.T) { + l := &Loader{} + + in := &nodev1.DataSourceConfiguration{} + + out := l.dataSourceMetaData(in, "test-subgraph") + + assert.Nil(t, out.FederationMetaData.RequestScopedFields) +} diff --git a/router/core/flushwriter.go b/router/core/flushwriter.go index 6d39f32dc8..322e1fc4a2 100644 --- a/router/core/flushwriter.go +++ b/router/core/flushwriter.go @@ -220,7 +220,7 @@ func wrapMultipartMessage(resp []byte, wrapPayload bool) ([]byte, error) { if err != nil { return nil, err } - respValue, _, err := astjson.MergeValuesWithPath(nil, payloadWrapper, respValuePreMerge, "payload") + respValue, err := astjson.MergeValuesWithPath(nil, payloadWrapper, respValuePreMerge, "payload") if err != nil { return nil, err } diff --git a/router/core/graph_server.go b/router/core/graph_server.go index 2d6be1f8fc..fcd0faf3d1 100644 --- a/router/core/graph_server.go +++ b/router/core/graph_server.go @@ -10,7 +10,9 @@ import ( "net/http" "net/url" "path/filepath" + "reflect" "runtime" + "sort" "strings" "sync" "time" @@ -59,8 +61,8 @@ import ( "github.com/wundergraph/cosmo/router/pkg/slowplancache" "github.com/wundergraph/cosmo/router/pkg/statistics" rtrace "github.com/wundergraph/cosmo/router/pkg/trace" - "github.com/wundergraph/graphql-go-tools/v2/pkg/astparser" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" ) const ( @@ -98,18 +100,20 @@ type ( inFlightRequests *atomic.Uint64 // graphMuxList contains all graph muxes of this graph server. // It's keyed by mux name (feature flag name or empty string for base graph). - graphMuxList map[string]*graphMux - graphMuxListLock sync.Mutex - runtimeMetrics *rmetric.RuntimeMetrics - otlpEngineMetrics *rmetric.EngineMetrics - prometheusEngineMetrics *rmetric.EngineMetrics - connectionMetrics *rmetric.ConnectionMetrics - instanceData InstanceData - pubSubProviders []datasource.Provider - traceDialer *TraceDialer - connector *grpcconnector.Connector - circuitBreakerManager *circuit.Manager - headerPropagation *HeaderPropagation + graphMuxList map[string]*graphMux + graphMuxListLock sync.Mutex + runtimeMetrics *rmetric.RuntimeMetrics + otlpEngineMetrics *rmetric.EngineMetrics + prometheusEngineMetrics *rmetric.EngineMetrics + connectionMetrics *rmetric.ConnectionMetrics + instanceData InstanceData + pubSubProviders []datasource.Provider + traceDialer *TraceDialer + connector *grpcconnector.Connector + circuitBreakerManager *circuit.Manager + headerPropagation *HeaderPropagation + entityCacheInstances map[string]resolve.LoaderCache + entityCacheKeyInterceptors []EntityCacheKeyInterceptor } ) @@ -226,8 +230,23 @@ func newGraphServer(routerCtx context.Context, r *Router, response *routerconfig HostName: r.hostName, ListenAddress: r.listenAddr, }, - storageProviders: &r.storageProviders, - headerPropagation: r.headerPropagation, + storageProviders: &r.storageProviders, + headerPropagation: r.headerPropagation, + entityCacheKeyInterceptors: r.entityCacheKeyInterceptors, + } + + entityCacheInstances, err := r.buildEntityCacheInstances() + if err != nil { + return nil, fmt.Errorf("failed to build entity cache instances: %w", err) + } + s.entityCacheInstances = entityCacheInstances + + if entityCacheInstances != nil && r.entityCachingConfig.Enabled { + s.logEntityCacheOverrideIssues( + &r.entityCachingConfig, + response.Config.GetSubgraphs(), + response.Config.GetEngineConfig(), + ) } baseOtelAttributes := []attribute.KeyValue{ @@ -661,6 +680,53 @@ func (s *graphServer) setupEngineStatistics(baseAttributes []attribute.KeyValue) return nil } +// logEntityCacheOverrideIssues walks the entity caching overrides and emits +// warnings for references to unknown subgraphs or unknown entity types. It is +// non-fatal by design (void+log): startup continues regardless. Renaming from +// "validate*" to "log*" reflects this shape — it does not gate router startup. +func (s *graphServer) logEntityCacheOverrideIssues( + cfg *config.EntityCachingConfiguration, + configSubgraphs []*nodev1.Subgraph, + engineConfig *nodev1.EngineConfiguration, +) { + // Build lookup: subgraph name set + subgraphNames := make(map[string]bool, len(configSubgraphs)) + for _, sg := range configSubgraphs { + subgraphNames[sg.Name] = true + } + + // Build lookup: subgraph name → set of entity type names + // Datasources are keyed by ID, not name — map via subgraphNameByID + entityTypesBySubgraph := make(map[string]map[string]bool) + for _, ds := range engineConfig.DatasourceConfigurations { + sgName := subgraphNameByID(configSubgraphs, ds.Id) + if sgName == "" { + continue + } + if entityTypesBySubgraph[sgName] == nil { + entityTypesBySubgraph[sgName] = make(map[string]bool) + } + for _, ec := range ds.EntityCacheConfigurations { + entityTypesBySubgraph[sgName][ec.TypeName] = true + } + } + + for _, override := range cfg.SubgraphCacheOverrides { + if !subgraphNames[override.Name] { + s.logger.Warn("entity caching: subgraph_cache_overrides references unknown subgraph", + zap.String("subgraph", override.Name)) + continue + } + for _, entity := range override.Entities { + if entities := entityTypesBySubgraph[override.Name]; entities == nil || !entities[entity.Type] { + s.logger.Warn("entity caching: subgraph_cache_overrides references unknown entity type", + zap.String("subgraph", override.Name), + zap.String("entity_type", entity.Type)) + } + } + } +} + type graphMux struct { ctx context.Context cancel context.CancelFunc @@ -686,6 +752,65 @@ type graphMux struct { prometheusMetricsExporter *graphqlmetrics.PrometheusMetricsExporter } +type cacheMetricSource interface { + Metrics() *ristretto.Metrics + MaxSizeBytes() int64 +} + +type cacheMetricRegistration struct { + cacheType string + maxCost int64 + metrics *ristretto.Metrics +} + +func entityCacheMetricRegistrations(caches map[string]cacheMetricSource) []cacheMetricRegistration { + if len(caches) == 0 { + return nil + } + + type cacheGroup struct { + source cacheMetricSource + names []string + } + + grouped := make(map[uintptr]*cacheGroup, len(caches)) + for name, source := range caches { + if source == nil || source.Metrics() == nil || source.MaxSizeBytes() <= 0 { + continue + } + id := reflect.ValueOf(source).Pointer() + group := grouped[id] + if group == nil { + group = &cacheGroup{source: source} + grouped[id] = group + } + group.names = append(group.names, name) + } + + registrations := make([]cacheMetricRegistration, 0, len(grouped)) + for _, group := range grouped { + sort.Strings(group.names) + name := group.names[0] + for _, candidate := range group.names { + if candidate != "default" { + name = candidate + break + } + } + registrations = append(registrations, cacheMetricRegistration{ + cacheType: "entity_" + name, + maxCost: group.source.MaxSizeBytes(), + metrics: group.source.Metrics(), + }) + } + + sort.Slice(registrations, func(i, j int) bool { + return registrations[i].cacheType < registrations[j].cacheType + }) + + return registrations +} + // buildOperationCaches creates the caches for the graph mux. // The caches are created based on the engine configuration. func (s *graphMux) buildOperationCaches(srv *graphServer) (computeSha256 bool, err error) { @@ -929,6 +1054,18 @@ func (s *graphMux) configureCacheMetrics(srv *graphServer, baseOtelAttributes [] metricInfos = append(metricInfos, rmetric.NewCacheMetricInfo("query_hash", srv.engineExecutionConfiguration.OperationHashCacheSize, s.operationHashCache.Metrics)) } + entityMetricSources := make(map[string]cacheMetricSource) + for name, cache := range srv.entityCacheInstances { + source, ok := cache.(cacheMetricSource) + if !ok { + continue + } + entityMetricSources[name] = source + } + for _, registration := range entityCacheMetricRegistrations(entityMetricSources) { + metricInfos = append(metricInfos, rmetric.NewCacheMetricInfo(registration.cacheType, registration.maxCost, registration.metrics)) + } + if s.otelCacheMetrics != nil { if err := s.otelCacheMetrics.RegisterObservers(metricInfos); err != nil { return fmt.Errorf("failed to register observer for OTLP cache metrics: %w", err) @@ -1479,6 +1616,8 @@ func (s *graphServer) buildGraphMux( HeartbeatInterval: s.subscriptionHeartbeatInterval, PluginsEnabled: s.plugins.Enabled, InstanceData: s.instanceData, + EntityCacheInstances: s.entityCacheInstances, + EntityCachingConfig: &s.entityCachingConfig, }, ) if err != nil { @@ -1747,6 +1886,16 @@ func (s *graphServer) buildGraphMux( handlerOpts.ApolloSubscriptionMultipartPrintBoundary = s.apolloCompatibilityFlags.SubscriptionMultipartPrintBoundary.Enabled } + if s.entityCachingConfig.Enabled { + handlerOpts.EntityCaching = EntityCachingHandlerOptions{ + L1Enabled: s.entityCachingConfig.L1.Enabled, + L2Enabled: s.entityCachingConfig.L2.Enabled, + GlobalKeyPrefix: s.entityCachingConfig.GlobalCacheKeyPrefix, + KeyInterceptors: s.entityCacheKeyInterceptors, + } + // TODO: Add entity analytics exporter to handler options here once analytics pipeline is implemented (see ENTITY_CACHE_ANALYTICS.md). + } + graphqlHandler := NewGraphQLHandler(handlerOpts) executor.Resolver.SetAsyncErrorWriter(graphqlHandler) diff --git a/router/core/graphql_handler.go b/router/core/graphql_handler.go index f02817e8ac..76b1e195d4 100644 --- a/router/core/graphql_handler.go +++ b/router/core/graphql_handler.go @@ -85,6 +85,16 @@ type HandlerOptions struct { ApolloSubscriptionMultipartPrintBoundary bool HeaderPropagation *HeaderPropagation + + EntityCaching EntityCachingHandlerOptions +} + +// EntityCachingHandlerOptions groups all entity caching configuration passed to the GraphQL handler. +type EntityCachingHandlerOptions struct { + L1Enabled bool + L2Enabled bool + GlobalKeyPrefix string + KeyInterceptors []EntityCacheKeyInterceptor } func NewGraphQLHandler(opts HandlerOptions) *GraphQLHandler { @@ -106,6 +116,7 @@ func NewGraphQLHandler(opts HandlerOptions) *GraphQLHandler { engineLoaderHooks: opts.EngineLoaderHooks, apolloSubscriptionMultipartPrintBoundary: opts.ApolloSubscriptionMultipartPrintBoundary, headerPropagation: opts.HeaderPropagation, + entityCaching: opts.EntityCaching, } return graphQLHandler } @@ -138,6 +149,8 @@ type GraphQLHandler struct { enableCostResponseHeaders bool apolloSubscriptionMultipartPrintBoundary bool + + entityCaching EntityCachingHandlerOptions } func (h *GraphQLHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { @@ -163,6 +176,7 @@ func (h *GraphQLHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { resolveCtx.InitialPayload = reqCtx.operation.initialPayload resolveCtx.Extensions = reqCtx.operation.extensions resolveCtx.ExecutionOptions = reqCtx.operation.executionOptions + resolveCtx.ExecutionOptions.Caching = h.cachingOptions(reqCtx) if h.headerPropagation != nil { resolveCtx.SubgraphHeadersBuilder = SubgraphHeadersBuilder( @@ -586,3 +600,60 @@ func (h *GraphQLHandler) setDebugCacheHeaders(w http.ResponseWriter, opCtx *oper } } } + +const ( + disableEntityCacheHeader = "X-WG-Disable-Entity-Cache" + disableEntityCacheL1Header = "X-WG-Disable-Entity-Cache-L1" + disableEntityCacheL2Header = "X-WG-Disable-Entity-Cache-L2" + cacheKeyPrefixHeader = "X-WG-Cache-Key-Prefix" +) + +func (h *GraphQLHandler) cachingOptions(reqCtx *requestContext) resolve.CachingOptions { + enableL1 := h.entityCaching.L1Enabled + enableL2 := h.entityCaching.L2Enabled + globalKeyPrefix := h.entityCaching.GlobalKeyPrefix + + // Allow per-request cache control headers only when tracing is authorized + // (dev mode or valid studio request token). This prevents production abuse. + if reqCtx.operation.traceOptions.Enable { + if reqCtx.request.Header.Get(disableEntityCacheHeader) == "true" { + enableL1 = false + enableL2 = false + } else { + if reqCtx.request.Header.Get(disableEntityCacheL1Header) == "true" { + enableL1 = false + } + if reqCtx.request.Header.Get(disableEntityCacheL2Header) == "true" { + enableL2 = false + } + } + if prefix := reqCtx.request.Header.Get(cacheKeyPrefixHeader); prefix != "" { + if globalKeyPrefix != "" { + globalKeyPrefix = prefix + ":" + globalKeyPrefix + } else { + globalKeyPrefix = prefix + } + } + } + + return resolve.CachingOptions{ + EnableL1Cache: enableL1, + EnableL2Cache: enableL2, + EnableCacheAnalytics: false, + GlobalCacheKeyPrefix: globalKeyPrefix, + L2CacheKeyInterceptor: h.buildL2CacheKeyInterceptor(reqCtx), + } +} + +func (h *GraphQLHandler) buildL2CacheKeyInterceptor(reqCtx *requestContext) resolve.L2CacheKeyInterceptor { + if len(h.entityCaching.KeyInterceptors) == 0 { + return nil + } + return func(ctx context.Context, key string, info resolve.L2CacheKeyInterceptorInfo) string { + keys := [][]byte{[]byte(key)} + for _, interceptor := range h.entityCaching.KeyInterceptors { + keys = interceptor.OnEntityCacheKeys(keys, reqCtx) + } + return string(keys[0]) + } +} diff --git a/router/core/graphql_handler_caching_options_test.go b/router/core/graphql_handler_caching_options_test.go new file mode 100644 index 0000000000..705fc7e3e8 --- /dev/null +++ b/router/core/graphql_handler_caching_options_test.go @@ -0,0 +1,172 @@ +package core + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +func newCachingOptionsHandler(entity EntityCachingHandlerOptions) *GraphQLHandler { + return &GraphQLHandler{entityCaching: entity} +} + +func newCachingOptionsReqCtx(t *testing.T, traceEnabled bool, headers map[string]string) *requestContext { + t.Helper() + req := httptest.NewRequest(http.MethodPost, "/graphql", nil) + for k, v := range headers { + req.Header.Set(k, v) + } + return &requestContext{ + request: req, + operation: &operationContext{ + traceOptions: resolve.TraceOptions{Enable: traceEnabled}, + }, + } +} + +func TestGraphQLHandler_cachingOptions_DefaultsFromHandler(t *testing.T) { + t.Parallel() + h := newCachingOptionsHandler(EntityCachingHandlerOptions{ + L1Enabled: true, + L2Enabled: true, + GlobalKeyPrefix: "router-a", + }) + reqCtx := newCachingOptionsReqCtx(t, false, nil) + + opts := h.cachingOptions(reqCtx) + require.Equal(t, resolve.CachingOptions{ + EnableL1Cache: true, + EnableL2Cache: true, + EnableCacheAnalytics: false, + GlobalCacheKeyPrefix: "router-a", + L2CacheKeyInterceptor: nil, + }, opts) +} + +func TestGraphQLHandler_cachingOptions_DisableCacheHeaderIgnoredWithoutTracing(t *testing.T) { + t.Parallel() + h := newCachingOptionsHandler(EntityCachingHandlerOptions{ + L1Enabled: true, + L2Enabled: true, + }) + // Tracing NOT enabled — headers should be ignored. + reqCtx := newCachingOptionsReqCtx(t, false, map[string]string{ + disableEntityCacheHeader: "true", + disableEntityCacheL1Header: "true", + disableEntityCacheL2Header: "true", + cacheKeyPrefixHeader: "ignored", + }) + + opts := h.cachingOptions(reqCtx) + require.Equal(t, resolve.CachingOptions{ + EnableL1Cache: true, + EnableL2Cache: true, + EnableCacheAnalytics: false, + GlobalCacheKeyPrefix: "", + L2CacheKeyInterceptor: nil, + }, opts) +} + +func TestGraphQLHandler_cachingOptions_DisableAllWithTracing(t *testing.T) { + t.Parallel() + h := newCachingOptionsHandler(EntityCachingHandlerOptions{ + L1Enabled: true, + L2Enabled: true, + }) + reqCtx := newCachingOptionsReqCtx(t, true, map[string]string{ + disableEntityCacheHeader: "true", + }) + + opts := h.cachingOptions(reqCtx) + require.Equal(t, resolve.CachingOptions{ + EnableL1Cache: false, + EnableL2Cache: false, + EnableCacheAnalytics: false, + GlobalCacheKeyPrefix: "", + L2CacheKeyInterceptor: nil, + }, opts) +} + +func TestGraphQLHandler_cachingOptions_DisableL1Only(t *testing.T) { + t.Parallel() + h := newCachingOptionsHandler(EntityCachingHandlerOptions{ + L1Enabled: true, + L2Enabled: true, + }) + reqCtx := newCachingOptionsReqCtx(t, true, map[string]string{ + disableEntityCacheL1Header: "true", + }) + + opts := h.cachingOptions(reqCtx) + require.Equal(t, resolve.CachingOptions{ + EnableL1Cache: false, + EnableL2Cache: true, + EnableCacheAnalytics: false, + GlobalCacheKeyPrefix: "", + L2CacheKeyInterceptor: nil, + }, opts) +} + +func TestGraphQLHandler_cachingOptions_DisableL2Only(t *testing.T) { + t.Parallel() + h := newCachingOptionsHandler(EntityCachingHandlerOptions{ + L1Enabled: true, + L2Enabled: true, + }) + reqCtx := newCachingOptionsReqCtx(t, true, map[string]string{ + disableEntityCacheL2Header: "true", + }) + + opts := h.cachingOptions(reqCtx) + require.Equal(t, resolve.CachingOptions{ + EnableL1Cache: true, + EnableL2Cache: false, + EnableCacheAnalytics: false, + GlobalCacheKeyPrefix: "", + L2CacheKeyInterceptor: nil, + }, opts) +} + +func TestGraphQLHandler_cachingOptions_CacheKeyPrefixPrependsToGlobal(t *testing.T) { + t.Parallel() + h := newCachingOptionsHandler(EntityCachingHandlerOptions{ + L1Enabled: true, + L2Enabled: true, + GlobalKeyPrefix: "base", + }) + reqCtx := newCachingOptionsReqCtx(t, true, map[string]string{ + cacheKeyPrefixHeader: "req-42", + }) + + opts := h.cachingOptions(reqCtx) + require.Equal(t, resolve.CachingOptions{ + EnableL1Cache: true, + EnableL2Cache: true, + EnableCacheAnalytics: false, + GlobalCacheKeyPrefix: "req-42:base", + L2CacheKeyInterceptor: nil, + }, opts) +} + +func TestGraphQLHandler_cachingOptions_CacheKeyPrefixReplacesEmptyGlobal(t *testing.T) { + t.Parallel() + h := newCachingOptionsHandler(EntityCachingHandlerOptions{ + L1Enabled: true, + L2Enabled: true, + }) + reqCtx := newCachingOptionsReqCtx(t, true, map[string]string{ + cacheKeyPrefixHeader: "standalone", + }) + + opts := h.cachingOptions(reqCtx) + require.Equal(t, resolve.CachingOptions{ + EnableL1Cache: true, + EnableL2Cache: true, + EnableCacheAnalytics: false, + GlobalCacheKeyPrefix: "standalone", + L2CacheKeyInterceptor: nil, + }, opts) +} diff --git a/router/core/modules.go b/router/core/modules.go index a05ac63683..e528bcfb3f 100644 --- a/router/core/modules.go +++ b/router/core/modules.go @@ -155,6 +155,16 @@ type SpanNameFormatterProvider interface { WrapSpanNameFormatter(next SpanNameFormatterFunc) SpanNameFormatterFunc } +// EntityCacheKeyInterceptor allows custom modules to transform entity cache keys +// before they are used for L2 cache operations. +type EntityCacheKeyInterceptor interface { + // OnEntityCacheKeys transforms a batch of cache keys for an entity cache operation. + // Each key is a JSON-encoded entity key or root field key. + // Returns the transformed keys in the same order. The returned slice must have + // the same length as the input slice. + OnEntityCacheKeys(keys [][]byte, ctx RequestContext) [][]byte +} + // Provisioner is called before the server starts // It allows you to initialize your module, e.g., create a database connection // or load a configuration file. diff --git a/router/core/router.go b/router/core/router.go index 4dd2f64b54..22f2b8a547 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -6,6 +6,7 @@ import ( "crypto/x509" "errors" "fmt" + "io" "net" "net/http" "net/url" @@ -51,6 +52,7 @@ import ( "github.com/wundergraph/cosmo/router/pkg/controlplane/configpoller" "github.com/wundergraph/cosmo/router/pkg/controlplane/selfregister" "github.com/wundergraph/cosmo/router/pkg/cors" + "github.com/wundergraph/cosmo/router/pkg/entitycache" "github.com/wundergraph/cosmo/router/pkg/execution_config" "github.com/wundergraph/cosmo/router/pkg/health" "github.com/wundergraph/cosmo/router/pkg/mcpserver" @@ -60,6 +62,7 @@ import ( rtrace "github.com/wundergraph/cosmo/router/pkg/trace" "github.com/wundergraph/cosmo/router/pkg/trace/attributeprocessor" "github.com/wundergraph/cosmo/router/pkg/watcher" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" "github.com/wundergraph/graphql-go-tools/v2/pkg/netpoll" ) @@ -347,6 +350,11 @@ func NewRouter(ctx context.Context, opts ...Option) (*Router, error) { "x-wg-token", "x-wg-skip-loader", "x-wg-include-query-plan", + // Required for the studio playground's Cache Explorer (cache-mode dropdown) + "x-wg-disable-entity-cache", + "x-wg-disable-entity-cache-l1", + "x-wg-disable-entity-cache-l2", + "x-wg-cache-key-prefix", // Required for Trace Context propagation "traceparent", "tracestate", @@ -766,6 +774,10 @@ func (r *Router) initModules(ctx context.Context) error { r.subscriptionHooks.onReceiveEvents.handlers = append(r.subscriptionHooks.onReceiveEvents.handlers, handler.OnReceiveEvents) } + if interceptor, ok := moduleInstance.(EntityCacheKeyInterceptor); ok { + r.entityCacheKeyInterceptors = append(r.entityCacheKeyInterceptors, interceptor) + } + r.modules = append(r.modules, moduleInstance) r.logger.Info("Module registered", @@ -959,6 +971,8 @@ func (r *Router) bootstrap(ctx context.Context) error { r.logger.Info("GraphQL schema coverage metrics enabled") } + // TODO: Add entity analytics exporter setup here once the analytics pipeline is implemented (see ENTITY_CACHE_ANALYTICS.md). + // Create Prometheus metrics exporter for schema field usage // Note: This is separate from the Prometheus meter provider which handles OTEL metrics // This exporter is specifically for schema field usage tracking via the Prometheus sink @@ -1468,6 +1482,111 @@ func (r *Router) buildConfigPoller(registry *ProviderRegistry) error { return nil } +// buildEntityCacheInstances creates Redis-backed LoaderCache instances from storage providers +// based on the entity caching configuration. If pre-seeded instances are set (via WithEntityCacheInstances), +// those are returned directly. +func (r *Router) buildEntityCacheInstances() (map[string]resolve.LoaderCache, error) { + if r.entityCacheInstances != nil { + return r.entityCacheInstances, nil + } + if !r.entityCachingConfig.Enabled || !r.entityCachingConfig.L2.Enabled { + return nil, nil + } + + caches := make(map[string]resolve.LoaderCache) + l2Cfg := r.entityCachingConfig.L2 + + // Build default cache from l2.storage.provider_id. Store it under both the + // literal "default" key (used when no override matches) and under its real + // provider_id so an override that redirects to the same backend reuses the + // same instance instead of allocating a second one. + if l2Cfg.Storage.ProviderID != "" { + cache, err := r.buildSingleEntityCache(l2Cfg.Storage.ProviderID, l2Cfg) + if err != nil { + return nil, fmt.Errorf("entity caching default provider: %w", err) + } + caches["default"] = cache + caches[l2Cfg.Storage.ProviderID] = cache + } + + // Build per-subgraph/entity caches from subgraph_cache_overrides + for _, sg := range r.entityCachingConfig.SubgraphCacheOverrides { + // Collect unique provider IDs from subgraph-level and entity-level overrides + providerIDs := make(map[string]string) // providerID → context (for error messages) + if sg.StorageProviderID != "" && sg.StorageProviderID != "default" { + providerIDs[sg.StorageProviderID] = sg.Name + } + for _, entity := range sg.Entities { + if entity.StorageProviderID != "" && entity.StorageProviderID != "default" { + providerIDs[entity.StorageProviderID] = sg.Name + "." + entity.Type + } + } + for providerID, context := range providerIDs { + if _, exists := caches[providerID]; exists { + // Already built (either as the default cache's provider alias + // or from an earlier override); reuse the same instance. + continue + } + cache, err := r.buildSingleEntityCache(providerID, l2Cfg) + if err != nil { + return nil, fmt.Errorf("entity caching provider %q for %s: %w", + providerID, context, err) + } + caches[providerID] = cache + } + } + + return caches, nil +} + +// buildSingleEntityCache creates a cache backed by either Redis or memory, with optional circuit breaker wrapping. +func (r *Router) buildSingleEntityCache(providerID string, l2Cfg config.EntityCachingL2Configuration) (resolve.LoaderCache, error) { + var cache resolve.LoaderCache + if memProvider, ok := r.findMemoryProvider(providerID); ok { + mc, err := entitycache.NewMemoryEntityCache(int64(memProvider.MaxSize)) + if err != nil { + return nil, fmt.Errorf("creating memory cache: %w", err) + } + cache = mc + } else { + client, err := r.buildRedisClient(providerID) + if err != nil { + return nil, err + } + cache = entitycache.NewRedisEntityCache(client, l2Cfg.Storage.KeyPrefix) + } + if l2Cfg.CircuitBreaker.Enabled { + cache = entitycache.NewCircuitBreakerCache(cache, entitycache.CircuitBreakerConfig{ + Enabled: true, + FailureThreshold: l2Cfg.CircuitBreaker.FailureThreshold, + CooldownPeriod: l2Cfg.CircuitBreaker.CooldownPeriod, + }) + } + return cache, nil +} + +func (r *Router) buildRedisClient(providerID string) (rd.RDCloser, error) { + for _, provider := range r.storageProviders.Redis { + if provider.ID == providerID { + return rd.NewRedisCloser(&rd.RedisCloserOptions{ + Logger: r.logger, + URLs: provider.URLs, + ClusterEnabled: provider.ClusterEnabled, + }) + } + } + return nil, fmt.Errorf("storage provider %q not found in storage_providers (checked redis, memory)", providerID) +} + +func (r *Router) findMemoryProvider(providerID string) (*config.MemoryStorageProvider, bool) { + for i := range r.storageProviders.Memory { + if r.storageProviders.Memory[i].ID == providerID { + return &r.storageProviders.Memory[i], true + } + } + return nil, false +} + // Start starts the router. It does block until the router has been initialized. After that the server is listening // on a separate goroutine. The server can be shutdown with Router.Shutdown(). Not safe for concurrent use. // During initialization, the router will register itself with the control plane and poll the config from the CDN @@ -1913,6 +2032,15 @@ func (r *Router) Shutdown(ctx context.Context) error { r.pqlStore.Close() } + // Close entity cache instances that implement io.Closer (e.g. ristretto-backed MemoryEntityCache). + for _, cache := range r.entityCacheInstances { + if closer, ok := cache.(io.Closer); ok { + if closeErr := closer.Close(); closeErr != nil { + err.Append(fmt.Errorf("failed to close entity cache: %w", closeErr)) + } + } + } + r.usage.Close() wg.Wait() @@ -2507,6 +2635,18 @@ func WithStorageProviders(cfg config.StorageProviders) Option { } } +func WithEntityCaching(cfg config.EntityCachingConfiguration) Option { + return func(r *Router) { + r.entityCachingConfig = cfg + } +} + +func WithEntityCacheInstances(caches map[string]resolve.LoaderCache) Option { + return func(r *Router) { + r.entityCacheInstances = caches + } +} + func WithClientHeader(cfg config.ClientHeader) Option { return func(r *Router) { r.clientHeader = cfg diff --git a/router/core/router_config.go b/router/core/router_config.go index 6b380ede12..f64220082e 100644 --- a/router/core/router_config.go +++ b/router/core/router_config.go @@ -20,6 +20,7 @@ import ( rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" rtrace "github.com/wundergraph/cosmo/router/pkg/trace" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" "go.opentelemetry.io/otel/propagation" sdkmetric "go.opentelemetry.io/otel/sdk/metric" sdktrace "go.opentelemetry.io/otel/sdk/trace" @@ -153,6 +154,9 @@ type Config struct { grpcPluginDialOptions []grpc.DialOption tracingAttributes []config.CustomAttribute subscriptionHooks subscriptionHooks + entityCachingConfig config.EntityCachingConfiguration + entityCacheInstances map[string]resolve.LoaderCache + entityCacheKeyInterceptors []EntityCacheKeyInterceptor } // Usage returns an anonymized version of the config for usage tracking diff --git a/router/core/router_entity_cache_test.go b/router/core/router_entity_cache_test.go new file mode 100644 index 0000000000..20cc98e791 --- /dev/null +++ b/router/core/router_entity_cache_test.go @@ -0,0 +1,170 @@ +package core + +import ( + "testing" + "time" + + ristretto "github.com/dgraph-io/ristretto/v2" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/wundergraph/cosmo/router/pkg/config" + "github.com/wundergraph/cosmo/router/pkg/entitycache" +) + +// TestBuildEntityCacheInstances_ReusesDefaultCacheForSameProviderID verifies +// that when an override points at the same provider_id as l2.storage.provider_id, +// no second cache instance is allocated. The default entry and the provider_id +// entry must resolve to the same *MemoryEntityCache pointer. +func TestBuildEntityCacheInstances_ReusesDefaultCacheForSameProviderID(t *testing.T) { + t.Parallel() + + r := &Router{ + Config: Config{ + logger: zap.NewNop(), + entityCachingConfig: config.EntityCachingConfiguration{ + Enabled: true, + L2: config.EntityCachingL2Configuration{ + Enabled: true, + Storage: config.EntityCachingL2StorageConfig{ + ProviderID: "memory-1", + }, + }, + SubgraphCacheOverrides: []config.EntityCachingSubgraphCacheOverride{ + { + Name: "products", + StorageProviderID: "memory-1", // same as the default + }, + }, + }, + storageProviders: config.StorageProviders{ + Memory: []config.MemoryStorageProvider{ + {ID: "memory-1", MaxSize: config.BytesString(1024 * 1024)}, + }, + }, + }, + } + + caches, err := r.buildEntityCacheInstances() + require.NoError(t, err) + require.Len(t, caches, 2, "expected exactly two keys: default and memory-1") + + defaultCache, ok := caches["default"] + require.True(t, ok, `missing "default" entry`) + namedCache, ok := caches["memory-1"] + require.True(t, ok, `missing "memory-1" entry`) + require.Same(t, defaultCache, namedCache, + "default cache and same-provider-id override must share the same instance") +} + +// TestBuildEntityCacheInstances_DistinctProviderIDs verifies that overrides +// pointing at a different provider still allocate their own cache instance. +func TestBuildEntityCacheInstances_DistinctProviderIDs(t *testing.T) { + t.Parallel() + + r := &Router{ + Config: Config{ + logger: zap.NewNop(), + entityCachingConfig: config.EntityCachingConfiguration{ + Enabled: true, + L2: config.EntityCachingL2Configuration{ + Enabled: true, + Storage: config.EntityCachingL2StorageConfig{ + ProviderID: "memory-1", + }, + }, + SubgraphCacheOverrides: []config.EntityCachingSubgraphCacheOverride{ + { + Name: "products", + StorageProviderID: "memory-2", + }, + }, + }, + storageProviders: config.StorageProviders{ + Memory: []config.MemoryStorageProvider{ + {ID: "memory-1", MaxSize: config.BytesString(1024 * 1024)}, + {ID: "memory-2", MaxSize: config.BytesString(2 * 1024 * 1024)}, + }, + }, + }, + } + + caches, err := r.buildEntityCacheInstances() + require.NoError(t, err) + require.Len(t, caches, 3, "expected three keys: default, memory-1 alias, memory-2 override") + require.NotSame(t, caches["memory-1"], caches["memory-2"], + "distinct provider ids must yield distinct cache instances") + require.Same(t, caches["default"], caches["memory-1"], + "default alias must point at the memory-1 instance") +} + +func TestBuildEntityCacheInstances_DisabledReturnsNil(t *testing.T) { + t.Parallel() + + r := &Router{ + Config: Config{ + logger: zap.NewNop(), + entityCachingConfig: config.EntityCachingConfiguration{ + Enabled: false, + }, + }, + } + + caches, err := r.buildEntityCacheInstances() + require.NoError(t, err) + require.Nil(t, caches) +} + +func TestBuildSingleEntityCache_WrapsMemoryProviderWithCircuitBreaker(t *testing.T) { + t.Parallel() + + r := &Router{ + Config: Config{ + logger: zap.NewNop(), + storageProviders: config.StorageProviders{ + Memory: []config.MemoryStorageProvider{ + {ID: "memory-1", MaxSize: config.BytesString(2048)}, + }, + }, + }, + } + + cache, err := r.buildSingleEntityCache("memory-1", config.EntityCachingL2Configuration{ + CircuitBreaker: config.EntityCachingCircuitBreakerConfig{ + Enabled: true, + FailureThreshold: 3, + CooldownPeriod: time.Second, + }, + }) + require.NoError(t, err) + + breaker, ok := cache.(*entitycache.CircuitBreakerCache) + require.True(t, ok, "expected circuit breaker wrapper") + + metricsProvider, ok := any(breaker).(interface { + Metrics() *ristretto.Metrics + MaxSizeBytes() int64 + }) + require.True(t, ok, "wrapped cache should expose metrics accessors") + require.NotNil(t, metricsProvider.Metrics()) + require.EqualValues(t, 2048, metricsProvider.MaxSizeBytes()) +} + +func TestFindMemoryProvider_ReturnsFalseForUnknownProvider(t *testing.T) { + t.Parallel() + + r := &Router{ + Config: Config{ + logger: zap.NewNop(), + storageProviders: config.StorageProviders{ + Memory: []config.MemoryStorageProvider{ + {ID: "memory-1", MaxSize: config.BytesString(1024)}, + }, + }, + }, + } + + provider, ok := r.findMemoryProvider("missing") + require.False(t, ok) + require.Nil(t, provider) +} diff --git a/router/core/supervisor_instance.go b/router/core/supervisor_instance.go index d6a29db483..565883ec95 100644 --- a/router/core/supervisor_instance.go +++ b/router/core/supervisor_instance.go @@ -219,6 +219,7 @@ func optionsFromResources(logger *zap.Logger, config *config.Config, reloadPersi WithApolloCompatibilityFlagsConfig(config.ApolloCompatibilityFlags), WithApolloRouterCompatibilityFlags(config.ApolloRouterCompatibilityFlags), WithStorageProviders(config.StorageProviders), + WithEntityCaching(config.EntityCaching), WithGraphQLPath(config.GraphQLPath), WithModulesConfig(config.Modules), WithGracePeriod(config.GracePeriod), diff --git a/router/core/websocket.go b/router/core/websocket.go index 95f83864b6..293cd3d7a3 100644 --- a/router/core/websocket.go +++ b/router/core/websocket.go @@ -1102,6 +1102,7 @@ func (h *WebSocketConnectionHandler) executeSubscription(registration *Subscript resolveCtx.TracingOptions = operationCtx.traceOptions resolveCtx.Extensions = operationCtx.extensions resolveCtx.ExecutionOptions = operationCtx.executionOptions + resolveCtx.ExecutionOptions.Caching = h.graphqlHandler.cachingOptions(reqContext) if operationCtx.initialPayload != nil { resolveCtx.InitialPayload = operationCtx.initialPayload diff --git a/router/go.mod b/router/go.mod index 1d099273e3..ea0473cae8 100644 --- a/router/go.mod +++ b/router/go.mod @@ -31,7 +31,7 @@ require ( github.com/tidwall/gjson v1.18.0 github.com/tidwall/sjson v1.2.5 github.com/twmb/franz-go v1.16.1 - github.com/wundergraph/graphql-go-tools/v2 v2.4.4 + github.com/wundergraph/graphql-go-tools/v2 v2.4.5-0.20260610161004-63fa1c88eaea // Do not upgrade, it renames attributes we rely on go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 go.opentelemetry.io/contrib/propagators/b3 v1.43.0 @@ -82,8 +82,8 @@ require ( github.com/prometheus/otlptranslator v1.0.0 github.com/santhosh-tekuri/jsonschema/v6 v6.0.1 github.com/tonglil/opentelemetry-go-datadog-propagator v0.1.3 - github.com/wundergraph/astjson v1.1.0 - github.com/wundergraph/go-arena v1.1.0 + github.com/wundergraph/astjson v1.1.1-0.20260419105127-f600d161463f + github.com/wundergraph/go-arena v1.2.0 go.uber.org/goleak v1.3.0 go.uber.org/ratelimit v0.3.1 golang.org/x/exp v0.0.0-20250813145105-42675adae3e6 diff --git a/router/go.sum b/router/go.sum index c0bb919835..ed70b5f879 100644 --- a/router/go.sum +++ b/router/go.sum @@ -329,12 +329,12 @@ github.com/vbatts/tar-split v0.12.1 h1:CqKoORW7BUWBe7UL/iqTVvkTBOF8UvOMKOIZykxnn github.com/vbatts/tar-split v0.12.1/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA= github.com/vektah/gqlparser/v2 v2.5.30 h1:EqLwGAFLIzt1wpx1IPpY67DwUujF1OfzgEyDsLrN6kE= github.com/vektah/gqlparser/v2 v2.5.30/go.mod h1:D1/VCZtV3LPnQrcPBeR/q5jkSQIPti0uYCP/RI0gIeo= -github.com/wundergraph/astjson v1.1.0 h1:xORDosrZ87zQFJwNGe/HIHXqzpdHOFmqWgykCLVL040= -github.com/wundergraph/astjson v1.1.0/go.mod h1:h12D/dxxnedtLzsKyBLK7/Oe4TAoGpRVC9nDpDrZSWw= -github.com/wundergraph/go-arena v1.1.0 h1:9+wSRkJAkA2vbYHp6s8tEGhPViRGQNGXqPHT0QzhdIc= -github.com/wundergraph/go-arena v1.1.0/go.mod h1:ROOysEHWJjLQ8FSfNxZCziagb7Qw2nXY3/vgKRh7eWw= -github.com/wundergraph/graphql-go-tools/v2 v2.4.4 h1:VCvS9bku4ie7+St3+H5SNuVz6dtQiDKujqQ439yrMBM= -github.com/wundergraph/graphql-go-tools/v2 v2.4.4/go.mod h1:7ljNHLrBOoOszCk4ir4Z+O6Yrf+vwBBmxjwqM3imVgA= +github.com/wundergraph/astjson v1.1.1-0.20260419105127-f600d161463f h1:MoVoeMlgY9Ej1aoF3Y/kniBZ8pv+WfIA3YSCnPBh+6M= +github.com/wundergraph/astjson v1.1.1-0.20260419105127-f600d161463f/go.mod h1:uHSJv7uowLN/nIPvkTFqUDt1sXk4qQU0KNwHfwfDcQE= +github.com/wundergraph/go-arena v1.2.0 h1:6MlhEy0NBY3Z+BuK3rj0F9YoT3bM0SlahGkzK0lKRZ4= +github.com/wundergraph/go-arena v1.2.0/go.mod h1:ROOysEHWJjLQ8FSfNxZCziagb7Qw2nXY3/vgKRh7eWw= +github.com/wundergraph/graphql-go-tools/v2 v2.4.5-0.20260610161004-63fa1c88eaea h1:ACjZjX87K3ADlFy54YrwZ/UPCugKL56/DtQNWB5EGeU= +github.com/wundergraph/graphql-go-tools/v2 v2.4.5-0.20260610161004-63fa1c88eaea/go.mod h1:3NuqY1nBh7g4IkytYazmT6RHg/giCsdZpmX0NkpayNs= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M= diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index faec91db69..8311568672 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -1017,9 +1017,15 @@ type StorageProviders struct { S3 []S3StorageProvider `yaml:"s3,omitempty" envPrefix:"S3_"` CDN []CDNStorageProvider `yaml:"cdn,omitempty" envPrefix:"CDN_"` Redis []RedisStorageProvider `yaml:"redis,omitempty" envPrefix:"REDIS_"` + Memory []MemoryStorageProvider `yaml:"memory,omitempty" envPrefix:"MEMORY_"` FileSystem []FileSystemStorageProvider `yaml:"file_system,omitempty" envPrefix:"FS_"` } +type MemoryStorageProvider struct { + ID string `yaml:"id,omitempty" env:"STORAGE_PROVIDER_MEMORY_ID"` + MaxSize BytesString `yaml:"max_size" envDefault:"100MB" env:"STORAGE_PROVIDER_MEMORY_MAX_SIZE"` +} + type PersistedOperationsStorageConfig struct { ProviderID string `yaml:"provider_id,omitempty" env:"PERSISTED_OPERATIONS_STORAGE_PROVIDER_ID"` ObjectPrefix string `yaml:"object_prefix,omitempty" env:"PERSISTED_OPERATIONS_STORAGE_OBJECT_PREFIX"` @@ -1148,6 +1154,46 @@ type AutomaticPersistedQueriesConfig struct { Storage AutomaticPersistedQueriesStorageConfig `yaml:"storage"` } +type EntityCachingConfiguration struct { + Enabled bool `yaml:"enabled" envDefault:"false" env:"ENTITY_CACHING_ENABLED"` + GlobalCacheKeyPrefix string `yaml:"global_cache_key_prefix,omitempty" env:"ENTITY_CACHING_GLOBAL_CACHE_KEY_PREFIX"` + L1 EntityCachingL1Configuration `yaml:"l1"` + L2 EntityCachingL2Configuration `yaml:"l2"` + SubgraphCacheOverrides []EntityCachingSubgraphCacheOverride `yaml:"subgraph_cache_overrides,omitempty"` +} + +type EntityCachingL1Configuration struct { + Enabled bool `yaml:"enabled" envDefault:"true" env:"ENTITY_CACHING_L1_ENABLED"` +} + +type EntityCachingL2Configuration struct { + Enabled bool `yaml:"enabled" envDefault:"true" env:"ENTITY_CACHING_L2_ENABLED"` + Storage EntityCachingL2StorageConfig `yaml:"storage"` + CircuitBreaker EntityCachingCircuitBreakerConfig `yaml:"circuit_breaker"` +} + +type EntityCachingL2StorageConfig struct { + ProviderID string `yaml:"provider_id,omitempty" env:"ENTITY_CACHING_L2_STORAGE_PROVIDER_ID"` + KeyPrefix string `yaml:"key_prefix,omitempty" envDefault:"cosmo_entity_cache" env:"ENTITY_CACHING_L2_STORAGE_KEY_PREFIX"` +} + +type EntityCachingCircuitBreakerConfig struct { + Enabled bool `yaml:"enabled" envDefault:"false" env:"ENTITY_CACHING_L2_CIRCUIT_BREAKER_ENABLED"` + FailureThreshold int `yaml:"failure_threshold" envDefault:"5" env:"ENTITY_CACHING_L2_CIRCUIT_BREAKER_FAILURE_THRESHOLD"` + CooldownPeriod time.Duration `yaml:"cooldown_period" envDefault:"10s" env:"ENTITY_CACHING_L2_CIRCUIT_BREAKER_COOLDOWN_PERIOD"` +} + +type EntityCachingSubgraphCacheOverride struct { + Name string `yaml:"name"` + StorageProviderID string `yaml:"storage_provider_id,omitempty"` + Entities []EntityCachingEntityConfig `yaml:"entities,omitempty"` +} + +type EntityCachingEntityConfig struct { + Type string `yaml:"type"` + StorageProviderID string `yaml:"storage_provider_id,omitempty" envDefault:""` +} + type AccessLogsConfig struct { Enabled bool `yaml:"enabled" env:"ACCESS_LOGS_ENABLED" envDefault:"true"` Level string `yaml:"level" env:"ACCESS_LOGS_LEVEL" envDefault:"info"` @@ -1419,6 +1465,7 @@ type Config struct { SubgraphExtensionPropagation SubgraphExtensionPropagationConfiguration `yaml:"subgraph_extension_propagation" envPrefix:"SUBGRAPH_EXTENSION_PROPAGATION_"` StorageProviders StorageProviders `yaml:"storage_providers" envPrefix:"STORAGE_PROVIDER_"` + EntityCaching EntityCachingConfiguration `yaml:"entity_caching,omitempty"` ExecutionConfig ExecutionConfig `yaml:"execution_config"` SplitConfigPoller SplitConfigPollerRules `yaml:"split_config_poller" envPrefix:"SPLIT_CONFIG_POLLER_"` PersistedOperationsConfig PersistedOperationsConfig `yaml:"persisted_operations" envPrefix:"PERSISTED_OPERATIONS_"` @@ -1572,5 +1619,58 @@ func LoadConfig(configFilePaths []string) (*LoadResult, error) { cfg.Config.SubgraphErrorPropagation.AllowedExtensionFields = unique.SliceElements(append(cfg.Config.SubgraphErrorPropagation.AllowedExtensionFields, "code", "stacktrace")) } + if err := validateMemoryProviderUsage(&cfg.Config); err != nil { + return nil, err + } + return cfg, nil } + +// validateMemoryProviderUsage ensures memory storage providers are only used for entity caching. +// Memory providers are in-process caches (Ristretto) that don't support the object storage semantics +// required by persisted operations, execution config, APQ, MCP, and ConnectRPC. +func validateMemoryProviderUsage(cfg *Config) error { + memoryIDs := make(map[string]bool, len(cfg.StorageProviders.Memory)) + for _, m := range cfg.StorageProviders.Memory { + memoryIDs[m.ID] = true + } + if len(memoryIDs) == 0 { + return nil + } + + type providerRef struct { + id string + feature string + } + + var refs []providerRef + if id := cfg.PersistedOperationsConfig.Storage.ProviderID; id != "" { + refs = append(refs, providerRef{id, "persisted_operations.storage"}) + } + if id := cfg.AutomaticPersistedQueries.Storage.ProviderID; id != "" { + refs = append(refs, providerRef{id, "automatic_persisted_queries.storage"}) + } + if id := cfg.ExecutionConfig.Storage.ProviderID; id != "" { + refs = append(refs, providerRef{id, "execution_config.storage"}) + } + if id := cfg.ExecutionConfig.FallbackStorage.ProviderID; id != "" { + refs = append(refs, providerRef{id, "execution_config.fallback_storage"}) + } + if id := cfg.MCP.Storage.ProviderID; id != "" { + refs = append(refs, providerRef{id, "mcp.storage"}) + } + if id := cfg.ConnectRPC.Storage.ProviderID; id != "" { + refs = append(refs, providerRef{id, "connect_rpc.storage"}) + } + + var errs error + for _, ref := range refs { + if memoryIDs[ref.id] { + errs = errors.Join(errs, fmt.Errorf( + "memory storage provider %q cannot be used for %s: memory providers are only supported for entity caching (entity_caching.l2.storage or subgraph_cache_overrides)", + ref.id, ref.feature, + )) + } + } + return errs +} diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index 7b4ead7123..d6f87c6adc 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -135,6 +135,29 @@ } } } + }, + "memory": { + "type": "array", + "description": "In-process memory cache using Ristretto. Memory providers can only be used for entity caching (entity_caching.l2.storage or subgraph_cache_overrides). They cannot be used for persisted operations, execution config, APQ, MCP, or ConnectRPC storage. Useful for development, testing, or single-instance deployments.", + "items": { + "type": "object", + "required": [ + "id" + ], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "description": "The provider ID. The provider ID is used to identify the provider in the configuration." + }, + "max_size": { + "type": "string", + "format": "bytes-string", + "description": "Maximum cache size. Supports human-readable byte strings (e.g., '100MB', '1GB'). Default: 100MB.", + "default": "100MB" + } + } + } } } }, @@ -311,6 +334,130 @@ } } }, + "entity_caching": { + "type": "object", + "additionalProperties": false, + "description": "Entity caching stores resolved entities in a multi-layer cache (L1 per-request, L2 cross-request) to avoid redundant subgraph fetches.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Global enable/disable flag for entity caching. When false, neither L1 nor L2 caching is available.", + "default": false + }, + "global_cache_key_prefix": { + "type": "string", + "description": "Prefix prepended to all L2 cache keys. Can be used e.g. when the Redis instance is shared across multiple use cases." + }, + "l1": { + "type": "object", + "additionalProperties": false, + "description": "L1 is an in-memory per-request cache. It deduplicates entity fetches within a single request.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable/disable L1 per-request in-memory cache.", + "default": true + } + } + }, + "l2": { + "type": "object", + "additionalProperties": false, + "description": "L2 is a cross-request cache shared across all requests. Supports Redis and in-memory (Ristretto) backends.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable/disable L2 external cache.", + "default": true + }, + "storage": { + "type": "object", + "additionalProperties": false, + "description": "Storage backend configuration for L2 cache.", + "properties": { + "provider_id": { + "type": "string", + "description": "References a storage_providers.redis or storage_providers.memory entry by ID." + }, + "key_prefix": { + "type": "string", + "description": "Prefix for all entity cache keys in the storage backend.", + "default": "cosmo_entity_cache" + } + } + }, + "circuit_breaker": { + "type": "object", + "additionalProperties": false, + "description": "Circuit breaker for L2 cache operations. Protects against cascading latency when the cache backend is slow or unavailable.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable/disable the L2 circuit breaker.", + "default": false + }, + "failure_threshold": { + "type": "integer", + "description": "Number of consecutive L2 operation failures that trips the breaker.", + "default": 5, + "minimum": 1 + }, + "cooldown_period": { + "type": "string", + "description": "How long the breaker stays open before allowing a probe request. Specified as a duration string (e.g., '10s', '1m').", + "default": "10s", + "duration": { + "minimum": "1s" + } + } + } + } + } + }, + "subgraph_cache_overrides": { + "type": "array", + "description": "Per-subgraph storage provider overrides. Resolution order: entity-level override > subgraph-level override > global default.", + "items": { + "type": "object", + "required": [ + "name" + ], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Subgraph name (must match a subgraph in the router config)." + }, + "storage_provider_id": { + "type": "string", + "description": "Storage provider for all entities in this subgraph (unless overridden per-entity). References a storage_providers.redis or storage_providers.memory entry by ID." + }, + "entities": { + "type": "array", + "description": "Per-entity storage provider overrides within this subgraph.", + "items": { + "type": "object", + "required": [ + "type" + ], + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "description": "Entity type name (must be a type with @entityCache in this subgraph)." + }, + "storage_provider_id": { + "type": "string", + "description": "Storage provider for this specific entity type. Overrides the subgraph-level storage_provider_id." + } + } + } + } + } + } + } + } + }, "execution_config": { "type": "object", "description": "The configuration for the execution config. You can load the execution config from the local file system or from a storage provider.", diff --git a/router/pkg/config/config_test.go b/router/pkg/config/config_test.go index 00cb69de18..0107a8a11e 100644 --- a/router/pkg/config/config_test.go +++ b/router/pkg/config/config_test.go @@ -2131,6 +2131,218 @@ security: }) } +func TestMemoryProviderOnlyForEntityCaching(t *testing.T) { + t.Parallel() + + t.Run("memory provider allowed for entity caching", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "100MB" + +entity_caching: + enabled: true + l2: + storage: + provider_id: "in-memory" +`) + _, err := LoadConfig([]string{f}) + require.NoError(t, err) + }) + + t.Run("memory provider max_size rejects invalid bytes string", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "not-a-byte-string" + +entity_caching: + enabled: true + l2: + storage: + provider_id: "in-memory" +`) + _, err := LoadConfig([]string{f}) + // Invalid bytes strings are caught by the BytesString YAML unmarshaler + // before json-schema validation runs, so the user-facing error surfaces + // as a parse error rather than a schema error. Either shape proves the + // value is rejected; assert only that the error path mentions the parse + // failure. + require.Error(t, err) + require.Contains(t, err.Error(), "bytes string") + }) + + t.Run("config schema annotates memory max_size as bytes-string", func(t *testing.T) { + t.Parallel() + // The JSON schema itself does not enforce "bytes-string" (not a standard + // format), but the annotation is documented and surfaces in tooling that + // generates docs/IDE completions. Guard against accidental removal. + var schemaMap map[string]any + require.NoError(t, yaml.Unmarshal(JSONSchema, &schemaMap)) + topProps := schemaMap["properties"].(map[string]any) + storage := topProps["storage_providers"].(map[string]any) + storageProps := storage["properties"].(map[string]any) + memory := storageProps["memory"].(map[string]any) + items := memory["items"].(map[string]any) + itemProps := items["properties"].(map[string]any) + maxSize := itemProps["max_size"].(map[string]any) + require.Equal(t, "bytes-string", maxSize["format"]) + }) + + t.Run("memory provider rejected for persisted_operations", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "100MB" + +persisted_operations: + storage: + provider_id: "in-memory" + object_prefix: "ops" +`) + _, err := LoadConfig([]string{f}) + require.EqualError(t, err, `memory storage provider "in-memory" cannot be used for persisted_operations.storage: memory providers are only supported for entity caching (entity_caching.l2.storage or subgraph_cache_overrides)`) + }) + + t.Run("memory provider rejected for automatic_persisted_queries", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "100MB" + +automatic_persisted_queries: + enabled: true + storage: + provider_id: "in-memory" + object_prefix: "apq" +`) + _, err := LoadConfig([]string{f}) + require.EqualError(t, err, `memory storage provider "in-memory" cannot be used for automatic_persisted_queries.storage: memory providers are only supported for entity caching (entity_caching.l2.storage or subgraph_cache_overrides)`) + }) + + t.Run("memory provider rejected for execution_config storage", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "100MB" + +execution_config: + storage: + provider_id: "in-memory" + object_path: "/config.json" +`) + _, err := LoadConfig([]string{f}) + require.EqualError(t, err, `memory storage provider "in-memory" cannot be used for execution_config.storage: memory providers are only supported for entity caching (entity_caching.l2.storage or subgraph_cache_overrides)`) + }) + + t.Run("memory provider rejected for connect_rpc storage", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "100MB" + +connect_rpc: + enabled: true + storage: + provider_id: "in-memory" + graphql_endpoint: "http://localhost:3002/graphql" +`) + _, err := LoadConfig([]string{f}) + require.EqualError(t, err, `memory storage provider "in-memory" cannot be used for connect_rpc.storage: memory providers are only supported for entity caching (entity_caching.l2.storage or subgraph_cache_overrides)`) + }) + + t.Run("memory provider rejected for mcp storage", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "100MB" + +mcp: + enabled: true + storage: + provider_id: "in-memory" +`) + _, err := LoadConfig([]string{f}) + require.EqualError(t, err, `memory storage provider "in-memory" cannot be used for mcp.storage: memory providers are only supported for entity caching (entity_caching.l2.storage or subgraph_cache_overrides)`) + }) + + t.Run("non-memory provider allowed everywhere", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "100MB" + redis: + - id: "my-redis" + urls: + - "redis://localhost:6379" + +persisted_operations: + storage: + provider_id: "my-redis" + object_prefix: "ops" +`) + _, err := LoadConfig([]string{f}) + require.NoError(t, err) + }) + + t.Run("multiple violations reported together", func(t *testing.T) { + t.Parallel() + f := createTempFileFromFixture(t, ` +version: "1" + +storage_providers: + memory: + - id: "in-memory" + max_size: "100MB" + +persisted_operations: + storage: + provider_id: "in-memory" + object_prefix: "ops" + +automatic_persisted_queries: + enabled: true + storage: + provider_id: "in-memory" + object_prefix: "apq" +`) + _, err := LoadConfig([]string{f}) + require.EqualError(t, err, `memory storage provider "in-memory" cannot be used for persisted_operations.storage: memory providers are only supported for entity caching (entity_caching.l2.storage or subgraph_cache_overrides)`+"\n"+`memory storage provider "in-memory" cannot be used for automatic_persisted_queries.storage: memory providers are only supported for entity caching (entity_caching.l2.storage or subgraph_cache_overrides)`) + }) +} + func TestPQLManifestConfig(t *testing.T) { t.Run("defaults", func(t *testing.T) { t.Parallel() diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index 3475e2cc11..c8454d01f6 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -549,8 +549,29 @@ "S3": null, "CDN": null, "Redis": null, + "Memory": null, "FileSystem": null }, + "EntityCaching": { + "Enabled": false, + "GlobalCacheKeyPrefix": "", + "L1": { + "Enabled": true + }, + "L2": { + "Enabled": true, + "Storage": { + "ProviderID": "", + "KeyPrefix": "cosmo_entity_cache" + }, + "CircuitBreaker": { + "Enabled": false, + "FailureThreshold": 5, + "CooldownPeriod": 10000000000 + } + }, + "SubgraphCacheOverrides": null + }, "ExecutionConfig": { "File": { "Path": "", diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index 3a3849cdfb..0c057d6f02 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -1023,6 +1023,7 @@ "ClusterEnabled": false } ], + "Memory": null, "FileSystem": [ { "ID": "mcp", @@ -1030,6 +1031,26 @@ } ] }, + "EntityCaching": { + "Enabled": false, + "GlobalCacheKeyPrefix": "", + "L1": { + "Enabled": true + }, + "L2": { + "Enabled": true, + "Storage": { + "ProviderID": "", + "KeyPrefix": "cosmo_entity_cache" + }, + "CircuitBreaker": { + "Enabled": false, + "FailureThreshold": 5, + "CooldownPeriod": 10000000000 + } + }, + "SubgraphCacheOverrides": null + }, "ExecutionConfig": { "File": { "Path": "", diff --git a/router/pkg/entitycache/circuit_breaker.go b/router/pkg/entitycache/circuit_breaker.go new file mode 100644 index 0000000000..9203b13fd2 --- /dev/null +++ b/router/pkg/entitycache/circuit_breaker.go @@ -0,0 +1,162 @@ +package entitycache + +import ( + "context" + "io" + "sync/atomic" + "time" + + ristretto "github.com/dgraph-io/ristretto/v2" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +var _ resolve.LoaderCache = (*CircuitBreakerCache)(nil) +var _ io.Closer = (*CircuitBreakerCache)(nil) + +const ( + stateClosed int32 = 0 + stateOpen int32 = 1 + stateHalfOpen int32 = 2 +) + +// CircuitBreakerConfig holds the configuration for a cache circuit breaker. +type CircuitBreakerConfig struct { + Enabled bool + FailureThreshold int + CooldownPeriod time.Duration +} + +// CircuitBreakerCache wraps a LoaderCache with circuit breaker protection. +// When the underlying cache fails repeatedly (FailureThreshold consecutive failures), +// the breaker opens and all cache operations return nil/no-op, falling back to subgraph fetches. +// After CooldownPeriod, one probe request is allowed through (half-open state). +type CircuitBreakerCache struct { + cache resolve.LoaderCache + failureThreshold int32 + cooldownPeriod time.Duration + + state atomic.Int32 + consecutiveFails atomic.Int32 + lastStateChange atomic.Int64 // unix nanos +} + +// NewCircuitBreakerCache wraps the given cache with circuit breaker logic. +func NewCircuitBreakerCache(cache resolve.LoaderCache, cfg CircuitBreakerConfig) *CircuitBreakerCache { + cb := &CircuitBreakerCache{ + cache: cache, + failureThreshold: int32(cfg.FailureThreshold), + cooldownPeriod: cfg.CooldownPeriod, + } + cb.lastStateChange.Store(time.Now().UnixNano()) + return cb +} + +// IsOpen returns true if the circuit breaker is in the open state. +func (cb *CircuitBreakerCache) IsOpen() bool { + return cb.state.Load() == stateOpen +} + +func (cb *CircuitBreakerCache) Get(ctx context.Context, keys []string) ([]*resolve.CacheEntry, error) { + if !cb.allowRequest() { + return make([]*resolve.CacheEntry, len(keys)), nil + } + entries, err := cb.cache.Get(ctx, keys) + cb.recordResult(err) + if err != nil { + return make([]*resolve.CacheEntry, len(keys)), nil + } + return entries, nil +} + +func (cb *CircuitBreakerCache) Set(ctx context.Context, entries []*resolve.CacheEntry) error { + if !cb.allowRequest() { + return nil + } + err := cb.cache.Set(ctx, entries) + cb.recordResult(err) + return nil +} + +func (cb *CircuitBreakerCache) Delete(ctx context.Context, keys []string) error { + if !cb.allowRequest() { + return nil + } + err := cb.cache.Delete(ctx, keys) + cb.recordResult(err) + return nil +} + +func (cb *CircuitBreakerCache) allowRequest() bool { + switch cb.state.Load() { + case stateClosed: + return true + case stateOpen: + if time.Since(time.Unix(0, cb.lastStateChange.Load())) >= cb.cooldownPeriod { + // Transition to half-open: allow one probe + if cb.state.CompareAndSwap(stateOpen, stateHalfOpen) { + cb.lastStateChange.Store(time.Now().UnixNano()) + return true + } + } + return false + case stateHalfOpen: + // Only one probe at a time; additional requests are rejected + return false + } + return true +} + +func (cb *CircuitBreakerCache) recordResult(err error) { + if err == nil { + cb.onSuccess() + } else { + cb.onFailure() + } +} + +func (cb *CircuitBreakerCache) onSuccess() { + cb.consecutiveFails.Store(0) + state := cb.state.Load() + if state == stateHalfOpen { + cb.state.Store(stateClosed) + cb.lastStateChange.Store(time.Now().UnixNano()) + } +} + +func (cb *CircuitBreakerCache) onFailure() { + fails := cb.consecutiveFails.Add(1) + state := cb.state.Load() + if state == stateHalfOpen { + cb.state.Store(stateOpen) + cb.lastStateChange.Store(time.Now().UnixNano()) + return + } + if state == stateClosed && fails >= cb.failureThreshold { + cb.state.Store(stateOpen) + cb.lastStateChange.Store(time.Now().UnixNano()) + cb.consecutiveFails.Store(0) + } +} + +func (cb *CircuitBreakerCache) Close() error { + if closer, ok := cb.cache.(io.Closer); ok { + return closer.Close() + } + return nil +} + +func (cb *CircuitBreakerCache) Metrics() *ristretto.Metrics { + provider, ok := cb.cache.(interface{ Metrics() *ristretto.Metrics }) + if !ok { + return nil + } + return provider.Metrics() +} + +func (cb *CircuitBreakerCache) MaxSizeBytes() int64 { + provider, ok := cb.cache.(interface{ MaxSizeBytes() int64 }) + if !ok { + return 0 + } + return provider.MaxSizeBytes() +} diff --git a/router/pkg/entitycache/circuit_breaker_test.go b/router/pkg/entitycache/circuit_breaker_test.go new file mode 100644 index 0000000000..d471f8eb16 --- /dev/null +++ b/router/pkg/entitycache/circuit_breaker_test.go @@ -0,0 +1,373 @@ +package entitycache + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +var errFakeCache = errors.New("cache unavailable") + +// fakeCache is a test double that can be configured to fail. +type fakeCache struct { + shouldFail atomic.Bool + getCalls atomic.Int32 + setCalls atomic.Int32 + delCalls atomic.Int32 +} + +func (f *fakeCache) Get(_ context.Context, keys []string) ([]*resolve.CacheEntry, error) { + f.getCalls.Add(1) + if f.shouldFail.Load() { + return nil, errFakeCache + } + return make([]*resolve.CacheEntry, len(keys)), nil +} + +func (f *fakeCache) Set(_ context.Context, _ []*resolve.CacheEntry) error { + f.setCalls.Add(1) + if f.shouldFail.Load() { + return errFakeCache + } + return nil +} + +func (f *fakeCache) Delete(_ context.Context, _ []string) error { + f.delCalls.Add(1) + if f.shouldFail.Load() { + return errFakeCache + } + return nil +} + +func newTestBreaker(inner *fakeCache, threshold int, cooldown time.Duration) *CircuitBreakerCache { + return NewCircuitBreakerCache(inner, CircuitBreakerConfig{ + Enabled: true, + FailureThreshold: threshold, + CooldownPeriod: cooldown, + }) +} + +func TestCircuitBreakerCache_ClosedState_PassThrough(t *testing.T) { + inner := &fakeCache{} + cb := newTestBreaker(inner, 5, time.Minute) + + entries, err := cb.Get(context.Background(), []string{"a", "b"}) + require.NoError(t, err) + require.Len(t, entries, 2) + require.Equal(t, int32(1), inner.getCalls.Load()) + + err = cb.Set(context.Background(), []*resolve.CacheEntry{{Key: "a", Value: []byte("v"), TTL: time.Second}}) + require.NoError(t, err) + require.Equal(t, int32(1), inner.setCalls.Load()) + + err = cb.Delete(context.Background(), []string{"a"}) + require.NoError(t, err) + require.Equal(t, int32(1), inner.delCalls.Load()) + + require.False(t, cb.IsOpen()) +} + +func TestCircuitBreakerCache_OpensAfterThreshold(t *testing.T) { + inner := &fakeCache{} + inner.shouldFail.Store(true) + cb := newTestBreaker(inner, 3, time.Minute) + + ctx := context.Background() + + // 3 consecutive failures should trip the breaker + for range 3 { + _, _ = cb.Get(ctx, []string{"a"}) + } + require.True(t, cb.IsOpen()) + + // Subsequent calls should not reach the inner cache + callsBefore := inner.getCalls.Load() + entries, err := cb.Get(ctx, []string{"a"}) + require.NoError(t, err) + require.Len(t, entries, 1) + require.Nil(t, entries[0]) + require.Equal(t, callsBefore, inner.getCalls.Load()) +} + +func TestCircuitBreakerCache_SetDeleteSkippedWhenOpen(t *testing.T) { + inner := &fakeCache{} + inner.shouldFail.Store(true) + cb := newTestBreaker(inner, 2, time.Minute) + + ctx := context.Background() + + // Trip the breaker + for range 2 { + _, _ = cb.Get(ctx, []string{"a"}) + } + require.True(t, cb.IsOpen()) + + setBefore := inner.setCalls.Load() + delBefore := inner.delCalls.Load() + + err := cb.Set(ctx, []*resolve.CacheEntry{{Key: "a", TTL: time.Second}}) + require.NoError(t, err) + require.Equal(t, setBefore, inner.setCalls.Load()) + + err = cb.Delete(ctx, []string{"a"}) + require.NoError(t, err) + require.Equal(t, delBefore, inner.delCalls.Load()) +} + +func TestCircuitBreakerCache_HalfOpenProbeSuccess(t *testing.T) { + inner := &fakeCache{} + inner.shouldFail.Store(true) + cb := newTestBreaker(inner, 2, 10*time.Millisecond) + + ctx := context.Background() + + // Trip the breaker + for range 2 { + _, _ = cb.Get(ctx, []string{"a"}) + } + require.True(t, cb.IsOpen()) + + // Wait for cooldown + time.Sleep(15 * time.Millisecond) + + // Fix the cache + inner.shouldFail.Store(false) + + // Probe request should go through and close the breaker + _, err := cb.Get(ctx, []string{"a"}) + require.NoError(t, err) + require.False(t, cb.IsOpen()) + + // Normal operations should work again + _, err = cb.Get(ctx, []string{"b"}) + require.NoError(t, err) +} + +func TestCircuitBreakerCache_HalfOpenProbeFailure(t *testing.T) { + inner := &fakeCache{} + inner.shouldFail.Store(true) + cb := newTestBreaker(inner, 2, 10*time.Millisecond) + + ctx := context.Background() + + // Trip the breaker + for range 2 { + _, _ = cb.Get(ctx, []string{"a"}) + } + require.True(t, cb.IsOpen()) + + // Wait for cooldown + time.Sleep(15 * time.Millisecond) + + // Probe request fails — breaker stays open + _, err := cb.Get(ctx, []string{"a"}) + require.NoError(t, err) // Circuit breaker swallows the error + require.True(t, cb.IsOpen()) +} + +func TestCircuitBreakerCache_SuccessResetsFailureCount(t *testing.T) { + inner := &fakeCache{} + cb := newTestBreaker(inner, 3, time.Minute) + + ctx := context.Background() + + // 2 failures, then 1 success, then 2 more failures — should NOT trip + inner.shouldFail.Store(true) + _, _ = cb.Get(ctx, []string{"a"}) + _, _ = cb.Get(ctx, []string{"a"}) + + inner.shouldFail.Store(false) + _, _ = cb.Get(ctx, []string{"a"}) + + inner.shouldFail.Store(true) + _, _ = cb.Get(ctx, []string{"a"}) + _, _ = cb.Get(ctx, []string{"a"}) + + require.False(t, cb.IsOpen()) +} + +func TestCircuitBreakerCache_NeverErrorsToCallers(t *testing.T) { + inner := &fakeCache{} + inner.shouldFail.Store(true) + cb := newTestBreaker(inner, 100, time.Minute) + + ctx := context.Background() + + // Even when inner cache fails, circuit breaker never returns errors + entries, err := cb.Get(ctx, []string{"a"}) + require.NoError(t, err) + require.Len(t, entries, 1) + + err = cb.Set(ctx, []*resolve.CacheEntry{{Key: "a", TTL: time.Second}}) + require.NoError(t, err) + + err = cb.Delete(ctx, []string{"a"}) + require.NoError(t, err) +} + +func TestCircuitBreakerCache_Close_DelegatesToInner(t *testing.T) { + mr := miniredis.RunT(t) + client := redis.NewClient(&redis.Options{Addr: mr.Addr()}) + inner := NewRedisEntityCache(client, "test") + cb := newTestBreaker(&fakeCache{}, 3, time.Minute) + // Replace the inner cache with one that implements io.Closer + cb.cache = inner + + err := cb.Close() + require.NoError(t, err) + + // After closing, the inner Redis cache should be closed + _, err = inner.Get(context.Background(), []string{"key"}) + require.Error(t, err) +} + +func TestCircuitBreakerCache_Close_NoopWhenInnerNotCloser(t *testing.T) { + inner := &fakeCache{} + cb := newTestBreaker(inner, 3, time.Minute) + + err := cb.Close() + require.NoError(t, err) +} + +// TestCircuitBreakerCache_ConcurrentFailuresTripOnce verifies that under +// concurrent failure traffic the breaker opens, and that it only opens once +// (the state machine is safe under contention). Run with -race. +func TestCircuitBreakerCache_ConcurrentFailuresTripOnce(t *testing.T) { + t.Parallel() + + inner := &fakeCache{} + inner.shouldFail.Store(true) + const threshold = 5 + cb := newTestBreaker(inner, threshold, time.Minute) + + ctx := context.Background() + const goroutines = 16 + const callsPerGoroutine = 50 + + var wg sync.WaitGroup + for range goroutines { + wg.Add(1) + go func() { + defer wg.Done() + for range callsPerGoroutine { + _, _ = cb.Get(ctx, []string{"k"}) + } + }() + } + wg.Wait() + + // Breaker must have opened (many failures, all above threshold). + require.True(t, cb.IsOpen()) + + // Once open, most calls must have short-circuited and never touched inner. + // Because multiple goroutines can race past allowRequest before the first + // failing Set transitions state, some failures still hit inner after the + // breaker opens — but the total inner calls must be vastly less than the + // total surface of goroutines*callsPerGoroutine. + total := int32(goroutines * callsPerGoroutine) + require.Less(t, inner.getCalls.Load(), total, + "breaker should short-circuit after opening, not let all calls through") +} + +// TestCircuitBreakerCache_ConcurrentHalfOpenProbeCloses verifies that when +// the breaker is half-open and many goroutines race, at least one probe +// reaches inner and the successful probe closes the breaker. The stricter +// "exactly one probe" invariant is not asserted here because once the probe +// succeeds and the breaker closes, subsequent goroutines also reach inner. +func TestCircuitBreakerCache_ConcurrentHalfOpenProbeCloses(t *testing.T) { + t.Parallel() + + inner := &fakeCache{} + inner.shouldFail.Store(true) + cb := newTestBreaker(inner, 2, 5*time.Millisecond) + + ctx := context.Background() + + // Trip the breaker. + for range 2 { + _, _ = cb.Get(ctx, []string{"k"}) + } + require.True(t, cb.IsOpen()) + + // Freeze the inner call count; block the inner cache from failing so the + // half-open probe can succeed. Wait past cooldown, then fire many goroutines + // concurrently. Exactly one of them is allowed to reach inner (the probe). + inner.shouldFail.Store(false) + time.Sleep(10 * time.Millisecond) + + callsBefore := inner.getCalls.Load() + + const goroutines = 32 + var wg sync.WaitGroup + for range goroutines { + wg.Add(1) + go func() { + defer wg.Done() + _, _ = cb.Get(ctx, []string{"k"}) + }() + } + wg.Wait() + + // After the first probe succeeds the breaker closes, so later-arriving + // calls will also reach inner. The strict invariant is that the very first + // transition from open→half-open admitted only one probe. We verify the + // state is now closed and that the probe path was exercised. + require.False(t, cb.IsOpen(), "successful probe must close the breaker") + require.Greater(t, inner.getCalls.Load(), callsBefore, + "at least the probe must have reached inner after cooldown") +} + +// TestCircuitBreakerCache_ConcurrentMixedSuccessFailure stresses the state +// machine with interleaved successes and failures across goroutines. The +// only invariant we assert is that the breaker never panics, the state field +// stays within the three known values, and Close is safe to call at the end. +func TestCircuitBreakerCache_ConcurrentMixedSuccessFailure(t *testing.T) { + t.Parallel() + + inner := &fakeCache{} + cb := newTestBreaker(inner, 10, time.Millisecond) + + ctx := context.Background() + const goroutines = 8 + + var wg sync.WaitGroup + // Goroutines that toggle inner's failure mode. + for i := range goroutines { + wg.Add(1) + go func(seed int) { + defer wg.Done() + for j := range 100 { + inner.shouldFail.Store((seed+j)%3 == 0) + } + }(i) + } + // Goroutines that hammer the breaker's public surface. + for range goroutines { + wg.Add(1) + go func() { + defer wg.Done() + for range 100 { + _, _ = cb.Get(ctx, []string{"x"}) + _ = cb.Set(ctx, []*resolve.CacheEntry{{Key: "x", Value: []byte("v"), TTL: time.Second}}) + _ = cb.Delete(ctx, []string{"x"}) + } + }() + } + wg.Wait() + + // Sanity: state must be one of the three defined values. + state := cb.state.Load() + require.True(t, state == stateClosed || state == stateOpen || state == stateHalfOpen, + "breaker state escaped the valid set: got %d", state) + + require.NoError(t, cb.Close()) +} diff --git a/router/pkg/entitycache/memory.go b/router/pkg/entitycache/memory.go new file mode 100644 index 0000000000..0499fa0c14 --- /dev/null +++ b/router/pkg/entitycache/memory.go @@ -0,0 +1,128 @@ +package entitycache + +import ( + "context" + "fmt" + "sync/atomic" + "time" + + ristretto "github.com/dgraph-io/ristretto/v2" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +var _ resolve.LoaderCache = (*MemoryEntityCache)(nil) + +type MemoryEntityCache struct { + cache *ristretto.Cache[string, []byte] + len atomic.Int64 + maxSizeBytes int64 +} + +func NewMemoryEntityCache(maxSizeBytes int64) (*MemoryEntityCache, error) { + if maxSizeBytes <= 0 { + return nil, fmt.Errorf("maxSizeBytes must be positive, got %d", maxSizeBytes) + } + // NumCounters should be ~10x the expected number of items. + // Assuming an average entry size of ~1KB. + numCounters := max((maxSizeBytes/1024)*10, 1000) + m := &MemoryEntityCache{maxSizeBytes: maxSizeBytes} + cache, err := ristretto.NewCache(&ristretto.Config[string, []byte]{ + NumCounters: numCounters, + MaxCost: maxSizeBytes, + BufferItems: 64, + IgnoreInternalCost: true, + Metrics: true, + OnEvict: func(item *ristretto.Item[[]byte]) { + m.len.Add(-1) + }, + }) + if err != nil { + return nil, fmt.Errorf("creating ristretto cache: %w", err) + } + m.cache = cache + return m, nil +} + +func (c *MemoryEntityCache) Get(_ context.Context, keys []string) ([]*resolve.CacheEntry, error) { + if len(keys) == 0 { + return nil, nil + } + entries := make([]*resolve.CacheEntry, len(keys)) + for i, k := range keys { + val, ok := c.cache.Get(k) + if !ok { + continue + } + var remainingTTL time.Duration + if ttl, found := c.cache.GetTTL(k); found && ttl > 0 { + remainingTTL = ttl + } + entries[i] = &resolve.CacheEntry{ + Key: k, + Value: val, + RemainingTTL: remainingTTL, + } + } + return entries, nil +} + +func (c *MemoryEntityCache) Set(_ context.Context, entries []*resolve.CacheEntry) error { + if len(entries) == 0 { + return nil + } + for _, entry := range entries { + if entry == nil { + continue + } + // Negative TTL means "no expiration" per the LoaderCache contract. + // Ristretto treats ttl<=0 as no expiration, so clamp negatives to 0. + ttl := entry.TTL + if ttl < 0 { + ttl = 0 + } + // Check if key already exists (update vs new entry) + _, exists := c.cache.Get(entry.Key) + if c.cache.SetWithTTL(entry.Key, entry.Value, int64(len(entry.Value)), ttl) && !exists { + c.len.Add(1) + } + } + c.cache.Wait() + return nil +} + +func (c *MemoryEntityCache) Delete(_ context.Context, keys []string) error { + if len(keys) == 0 { + return nil + } + for _, k := range keys { + if _, ok := c.cache.Get(k); ok { + c.cache.Del(k) + c.len.Add(-1) + } + } + return nil +} + +// Len returns the approximate number of items in the cache. +// This is intended for use in tests only. The count may drift +// under heavy concurrent access due to races between Get/Set/Delete +// and the asynchronous eviction callback. +// Ristretto metrics don't cover Delete, so we keep a manual counter. +func (c *MemoryEntityCache) Len() int { + return int(c.len.Load()) +} + +// Metrics returns the underlying ristretto metrics for exporter wiring. +// Exposes hits, misses, keys added/updated/evicted, cost added/evicted. +func (c *MemoryEntityCache) Metrics() *ristretto.Metrics { + return c.cache.Metrics +} + +func (c *MemoryEntityCache) MaxSizeBytes() int64 { + return c.maxSizeBytes +} + +func (c *MemoryEntityCache) Close() error { + c.cache.Close() + return nil +} diff --git a/router/pkg/entitycache/memory_test.go b/router/pkg/entitycache/memory_test.go new file mode 100644 index 0000000000..059fe98262 --- /dev/null +++ b/router/pkg/entitycache/memory_test.go @@ -0,0 +1,274 @@ +package entitycache + +import ( + "context" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +const testCacheSize = 10 * 1024 * 1024 // 10MB + +func newTestCache(t *testing.T) *MemoryEntityCache { + t.Helper() + c, err := NewMemoryEntityCache(testCacheSize) + require.NoError(t, err) + t.Cleanup(func() { _ = c.Close() }) + return c +} + +func TestMemoryEntityCache_GetMiss(t *testing.T) { + c := newTestCache(t) + entries, err := c.Get(context.Background(), []string{"key1", "key2"}) + require.NoError(t, err) + require.Len(t, entries, 2) + assert.Nil(t, entries[0]) + assert.Nil(t, entries[1]) +} + +func TestMemoryEntityCache_SetThenGet(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + err := c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1"), TTL: 5 * time.Second}, + {Key: "k2", Value: []byte("v2"), TTL: 5 * time.Second}, + }) + require.NoError(t, err) + + entries, err := c.Get(ctx, []string{"k1", "k2"}) + require.NoError(t, err) + require.Len(t, entries, 2) + assert.Equal(t, []byte("v1"), entries[0].Value) + assert.Equal(t, []byte("v2"), entries[1].Value) +} + +func TestMemoryEntityCache_PartialHit(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1"), TTL: 5 * time.Second}, + })) + + entries, err := c.Get(ctx, []string{"k1", "k2"}) + require.NoError(t, err) + require.Len(t, entries, 2) + assert.NotNil(t, entries[0]) + assert.Nil(t, entries[1]) +} + +func TestMemoryEntityCache_Delete(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1"), TTL: 5 * time.Second}, + })) + + require.NoError(t, c.Delete(ctx, []string{"k1"})) + + entries, err := c.Get(ctx, []string{"k1"}) + require.NoError(t, err) + assert.Nil(t, entries[0]) +} + +func TestMemoryEntityCache_DeleteNonexistent(t *testing.T) { + c := newTestCache(t) + err := c.Delete(context.Background(), []string{"nonexistent"}) + require.NoError(t, err) +} + +func TestMemoryEntityCache_TTLExpiry(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1"), TTL: 50 * time.Millisecond}, + })) + + // Should be present immediately + entries, err := c.Get(ctx, []string{"k1"}) + require.NoError(t, err) + assert.NotNil(t, entries[0]) + + // Wait for expiry — ristretto's TTL cleanup ticker runs periodically + require.Eventually(t, func() bool { + entries, err = c.Get(ctx, []string{"k1"}) + return err == nil && entries[0] == nil + }, 2*time.Second, 50*time.Millisecond) +} + +func TestMemoryEntityCache_Overwrite(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1"), TTL: 5 * time.Second}, + })) + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v2"), TTL: 5 * time.Second}, + })) + + entries, err := c.Get(ctx, []string{"k1"}) + require.NoError(t, err) + assert.Equal(t, []byte("v2"), entries[0].Value) +} + +func TestMemoryEntityCache_EmptyBatch(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + + entries, err := c.Get(ctx, nil) + require.NoError(t, err) + assert.Nil(t, entries) + + require.NoError(t, c.Set(ctx, nil)) + require.NoError(t, c.Delete(ctx, nil)) +} + +func TestMemoryEntityCache_NilEntriesInSet(t *testing.T) { + c := newTestCache(t) + err := c.Set(context.Background(), []*resolve.CacheEntry{ + nil, + {Key: "k1", Value: []byte("v1"), TTL: 5 * time.Second}, + nil, + }) + require.NoError(t, err) + + entries, err := c.Get(context.Background(), []string{"k1"}) + require.NoError(t, err) + assert.NotNil(t, entries[0]) +} + +func TestMemoryEntityCache_ConcurrentAccess(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + var wg sync.WaitGroup + + var firstErrOnce sync.Once + var firstErr error + recordErr := func(err error) { + if err == nil { + return + } + firstErrOnce.Do(func() { firstErr = err }) + } + + for i := range 10 { + wg.Add(1) + go func(n int) { + defer wg.Done() + key := "key" + string(rune('0'+n)) + recordErr(c.Set(ctx, []*resolve.CacheEntry{ + {Key: key, Value: []byte("val"), TTL: 5 * time.Second}, + })) + _, err := c.Get(ctx, []string{key}) + recordErr(err) + recordErr(c.Delete(ctx, []string{key})) + }(i) + } + wg.Wait() + require.NoError(t, firstErr) +} + +func TestMemoryEntityCache_NoExpiryWithZeroTTL(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1")}, + })) + + // Should still be present (no expiry) + time.Sleep(10 * time.Millisecond) + entries, err := c.Get(ctx, []string{"k1"}) + require.NoError(t, err) + assert.NotNil(t, entries[0]) +} + +func TestMemoryEntityCache_RemainingTTL(t *testing.T) { + c := newTestCache(t) + ctx := context.Background() + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1"), TTL: 5 * time.Second}, + })) + + entries, err := c.Get(ctx, []string{"k1"}) + require.NoError(t, err) + require.NotNil(t, entries[0]) + assert.True(t, entries[0].RemainingTTL > 0) + assert.True(t, entries[0].RemainingTTL <= 5*time.Second) +} + +func TestMemoryEntityCache_InvalidMaxSize(t *testing.T) { + _, err := NewMemoryEntityCache(0) + require.Error(t, err) + + _, err = NewMemoryEntityCache(-1) + require.Error(t, err) +} + +func TestMemoryEntityCache_EvictsWhenFull(t *testing.T) { + // Create a tiny cache (1KB) + c, err := NewMemoryEntityCache(1024) + require.NoError(t, err) + t.Cleanup(func() { _ = c.Close() }) + + ctx := context.Background() + // Fill with entries larger than cache capacity + val := make([]byte, 512) + for i := range len(val) { + val[i] = byte(i % 256) + } + const totalKeys = 10 + for i := range totalKeys { + key := "key" + string(rune('A'+i)) + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: key, Value: val, TTL: 5 * time.Second}, + })) + } + + // Ristretto's admission policy and sampled counters make the exact number + // of survivors non-deterministic. With 1KB MaxCost and 512B entries the + // cache CANNOT hold all 10 entries, and in practice rarely holds the full + // theoretical 2. Assert the upper bound (eviction happened) and the lower + // bound (the cache isn't completely empty). Flush outstanding async work + // via cache.Wait() to stabilize before measuring. + c.cache.Wait() + + hitCount := 0 + for i := range totalKeys { + key := "key" + string(rune('A'+i)) + entries, err := c.Get(ctx, []string{key}) + require.NoError(t, err) + if entries[0] != nil { + hitCount++ + } + } + + // With 1KB max and 512B entries, at most ~2 can coexist. Admission may + // evict entries we just wrote before we read, so the lower bound is 0 — + // the only invariant we care about is that not ALL 10 survive. + assert.LessOrEqual(t, hitCount, 2, "cache must evict to stay within MaxCost") + assert.Less(t, hitCount, totalKeys, "cache must evict at least some entries") +} + +func TestMemoryEntityCache_Close(t *testing.T) { + c, err := NewMemoryEntityCache(testCacheSize) + require.NoError(t, err) + + ctx := context.Background() + require.NoError(t, c.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1"), TTL: 5 * time.Second}, + })) + + c.Close() + + // After close, Get returns zero values without panicking. The post-close + // path may return nil entries or an empty slice; either is acceptable so + // long as the call doesn't panic and no entry is resurrected. + entries, err := c.Get(ctx, []string{"k1"}) + require.NoError(t, err) + if len(entries) > 0 { + assert.Nil(t, entries[0]) + } +} diff --git a/router/pkg/entitycache/redis.go b/router/pkg/entitycache/redis.go new file mode 100644 index 0000000000..89c0ee8690 --- /dev/null +++ b/router/pkg/entitycache/redis.go @@ -0,0 +1,86 @@ +package entitycache + +import ( + "context" + "io" + + "github.com/redis/go-redis/v9" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +var _ resolve.LoaderCache = (*RedisEntityCache)(nil) +var _ io.Closer = (*RedisEntityCache)(nil) + +type RedisEntityCache struct { + client redis.UniversalClient + keyPrefix string +} + +func NewRedisEntityCache(client redis.UniversalClient, keyPrefix string) *RedisEntityCache { + return &RedisEntityCache{client: client, keyPrefix: keyPrefix} +} + +func (c *RedisEntityCache) Get(ctx context.Context, keys []string) ([]*resolve.CacheEntry, error) { + if len(keys) == 0 { + return nil, nil + } + prefixedKeys := make([]string, len(keys)) + for i, k := range keys { + prefixedKeys[i] = c.keyPrefix + ":" + k + } + vals, err := c.client.MGet(ctx, prefixedKeys...).Result() + if err != nil { + return nil, err + } + entries := make([]*resolve.CacheEntry, len(keys)) + for i, val := range vals { + if val == nil { + continue + } + str, ok := val.(string) + if !ok { + continue + } + entries[i] = &resolve.CacheEntry{ + Key: keys[i], + Value: []byte(str), + } + } + return entries, nil +} + +func (c *RedisEntityCache) Set(ctx context.Context, entries []*resolve.CacheEntry) error { + if len(entries) == 0 { + return nil + } + pipe := c.client.Pipeline() + for _, entry := range entries { + if entry == nil { + continue + } + // Per LoaderCache contract: TTL<=0 means no expiration; for go-redis + // passing 0 (redis.KeepTTL is -1) tells the server to omit EX/PX. + ttl := entry.TTL + if ttl < 0 { + ttl = 0 + } + pipe.Set(ctx, c.keyPrefix+":"+entry.Key, entry.Value, ttl) + } + _, err := pipe.Exec(ctx) + return err +} + +func (c *RedisEntityCache) Delete(ctx context.Context, keys []string) error { + if len(keys) == 0 { + return nil + } + prefixedKeys := make([]string, len(keys)) + for i, k := range keys { + prefixedKeys[i] = c.keyPrefix + ":" + k + } + return c.client.Del(ctx, prefixedKeys...).Err() +} + +func (c *RedisEntityCache) Close() error { + return c.client.Close() +} diff --git a/router/pkg/entitycache/redis_test.go b/router/pkg/entitycache/redis_test.go new file mode 100644 index 0000000000..50f68792bf --- /dev/null +++ b/router/pkg/entitycache/redis_test.go @@ -0,0 +1,191 @@ +package entitycache + +import ( + "context" + "testing" + "time" + + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +func newTestRedisCache(t *testing.T, prefix string) (*RedisEntityCache, *miniredis.Miniredis) { + t.Helper() + mr := miniredis.RunT(t) + client := redis.NewClient(&redis.Options{Addr: mr.Addr()}) + t.Cleanup(func() { client.Close() }) + return NewRedisEntityCache(client, prefix), mr +} + +func TestRedisEntityCache_ConstructorStoresKeyPrefix(t *testing.T) { + t.Parallel() + cache, _ := newTestRedisCache(t, "test") + require.NotNil(t, cache) + require.Equal(t, "test", cache.keyPrefix) +} + +func TestRedisEntityCache_GetReturnsNilForMissingKey(t *testing.T) { + t.Parallel() + cache, _ := newTestRedisCache(t, "pfx") + ctx := context.Background() + + entries, err := cache.Get(ctx, []string{"nonexistent"}) + require.NoError(t, err) + require.Len(t, entries, 1) + require.Nil(t, entries[0]) +} + +func TestRedisEntityCache_GetReturnsValueForExistingKey(t *testing.T) { + t.Parallel() + cache, mr := newTestRedisCache(t, "pfx") + ctx := context.Background() + + // Pre-populate directly via miniredis + require.NoError(t, mr.Set("pfx:mykey", "myvalue")) + + entries, err := cache.Get(ctx, []string{"mykey"}) + require.NoError(t, err) + require.Len(t, entries, 1) + require.NotNil(t, entries[0]) + require.Equal(t, "mykey", entries[0].Key) + require.Equal(t, []byte("myvalue"), entries[0].Value) +} + +func TestRedisEntityCache_SetThenGetReturnsStoredValues(t *testing.T) { + t.Parallel() + cache, _ := newTestRedisCache(t, "pfx") + ctx := context.Background() + + err := cache.Set(ctx, []*resolve.CacheEntry{ + {Key: "k1", Value: []byte("v1"), TTL: time.Minute}, + {Key: "k2", Value: []byte("v2"), TTL: time.Minute}, + }) + require.NoError(t, err) + + entries, err := cache.Get(ctx, []string{"k1", "k2"}) + require.NoError(t, err) + require.Len(t, entries, 2) + require.Equal(t, []byte("v1"), entries[0].Value) + require.Equal(t, []byte("v2"), entries[1].Value) +} + +func TestRedisEntityCache_KeyExpiresAfterTTL(t *testing.T) { + t.Parallel() + cache, mr := newTestRedisCache(t, "pfx") + ctx := context.Background() + + err := cache.Set(ctx, []*resolve.CacheEntry{ + {Key: "ephemeral", Value: []byte("gone-soon"), TTL: 5 * time.Second}, + }) + require.NoError(t, err) + + // Key exists before expiry + entries, err := cache.Get(ctx, []string{"ephemeral"}) + require.NoError(t, err) + require.NotNil(t, entries[0]) + + // Fast-forward past TTL + mr.FastForward(6 * time.Second) + + entries, err = cache.Get(ctx, []string{"ephemeral"}) + require.NoError(t, err) + require.Nil(t, entries[0]) +} + +func TestRedisEntityCache_DeleteRemovesKey(t *testing.T) { + t.Parallel() + cache, _ := newTestRedisCache(t, "pfx") + ctx := context.Background() + + err := cache.Set(ctx, []*resolve.CacheEntry{ + {Key: "delme", Value: []byte("val"), TTL: time.Minute}, + }) + require.NoError(t, err) + + err = cache.Delete(ctx, []string{"delme"}) + require.NoError(t, err) + + entries, err := cache.Get(ctx, []string{"delme"}) + require.NoError(t, err) + require.Nil(t, entries[0]) +} + +func TestRedisEntityCache_KeyPrefixAppliedToStoredKey(t *testing.T) { + t.Parallel() + cache, mr := newTestRedisCache(t, "myprefix") + ctx := context.Background() + + err := cache.Set(ctx, []*resolve.CacheEntry{ + {Key: "item", Value: []byte("data"), TTL: time.Minute}, + }) + require.NoError(t, err) + + // The key in Redis should be prefixed + require.True(t, mr.Exists("myprefix:item")) + + // The raw key without prefix should not exist + require.False(t, mr.Exists("item")) +} + +func TestRedisEntityCache_GetWithEmptyKeysReturnsNil(t *testing.T) { + t.Parallel() + cache, _ := newTestRedisCache(t, "pfx") + ctx := context.Background() + + entries, err := cache.Get(ctx, []string{}) + require.NoError(t, err) + require.Nil(t, entries) +} + +func TestRedisEntityCache_SetWithEmptyEntriesIsNoop(t *testing.T) { + t.Parallel() + cache, _ := newTestRedisCache(t, "pfx") + ctx := context.Background() + + err := cache.Set(ctx, []*resolve.CacheEntry{}) + require.NoError(t, err) +} + +func TestRedisEntityCache_DeleteWithEmptyKeysIsNoop(t *testing.T) { + t.Parallel() + cache, _ := newTestRedisCache(t, "pfx") + ctx := context.Background() + + err := cache.Delete(ctx, []string{}) + require.NoError(t, err) +} + +func TestRedisEntityCache_GetAfterCloseReturnsError(t *testing.T) { + t.Parallel() + mr := miniredis.RunT(t) + client := redis.NewClient(&redis.Options{Addr: mr.Addr()}) + cache := NewRedisEntityCache(client, "test") + + err := cache.Close() + require.NoError(t, err) + + // After closing, operations should fail + _, err = cache.Get(context.Background(), []string{"key"}) + require.Error(t, err) +} + +// TestRedisEntityCache_SetAndDeleteAfterCloseReturnError verifies the same +// post-close semantics for the mutating operations: Set and Delete must also +// surface an error instead of silently succeeding against a closed client. +func TestRedisEntityCache_SetAndDeleteAfterCloseReturnError(t *testing.T) { + t.Parallel() + mr := miniredis.RunT(t) + client := redis.NewClient(&redis.Options{Addr: mr.Addr()}) + cache := NewRedisEntityCache(client, "test") + + require.NoError(t, cache.Close()) + + err := cache.Set(context.Background(), + []*resolve.CacheEntry{{Key: "k", Value: []byte("v"), TTL: time.Minute}}) + require.Error(t, err, "Set on closed client must return error") + + err = cache.Delete(context.Background(), []string{"k"}) + require.Error(t, err, "Delete on closed client must return error") +} diff --git a/router/pkg/graphqlschemausage/schemausage_bench_test.go b/router/pkg/graphqlschemausage/schemausage_bench_test.go index 1acca645dc..639fc50240 100644 --- a/router/pkg/graphqlschemausage/schemausage_bench_test.go +++ b/router/pkg/graphqlschemausage/schemausage_bench_test.go @@ -98,7 +98,7 @@ func setupBenchmark(b *testing.B) (plan.Plan, *ast.Document, *ast.Document, *ast inputVariables, err := astjson.ParseBytes(op.Input.Variables) require.NoError(b, err) - merged, _, err := astjson.MergeValues(nil, vars, inputVariables) + merged, err := astjson.MergeValues(nil, vars, inputVariables) require.NoError(b, err) return generatedPlan, &op, &def, merged diff --git a/router/pkg/graphqlschemausage/schemausage_test.go b/router/pkg/graphqlschemausage/schemausage_test.go index cb93fbfbe8..88fea46bbd 100644 --- a/router/pkg/graphqlschemausage/schemausage_test.go +++ b/router/pkg/graphqlschemausage/schemausage_test.go @@ -208,7 +208,7 @@ func TestGetSchemaUsageInfo(t *testing.T) { inputVariables, err := astjson.ParseBytes(op.Input.Variables) assert.NoError(t, err) - merged, _, err := astjson.MergeValues(arena.NewMonotonicArena(), vars, inputVariables) + merged, err := astjson.MergeValues(arena.NewMonotonicArena(), vars, inputVariables) assert.NoError(t, err) fieldUsageInfo := GetTypeFieldUsageInfo(generatedPlan)