From 3bd2fd169e5fd9214bd7daec505a852d6fc19893 Mon Sep 17 00:00:00 2001 From: Yury Smolski <140245+ysmolski@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:10:47 +0300 Subject: [PATCH 1/4] chore: add non cached benchmark to find the price of costs control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have two benchmark that use router with caching enabled. That does not make a good indicator of the delta when we enable the cost control feature. This PR adds two benchmarks where caching is disabled and we could see the real price of Cost Control: $ go test -bench=BenchmarkSequentialBigNotCached -benchmem -count=20 -run=^$ . > new.txt $ benchstat -col .name new.txt goos: darwin goarch: arm64 pkg: github.com/wundergraph/cosmo/router-tests/protocol cpu: Apple M4 Max │ SequentialBigNotCached │ SequentialBigNotCachedCostControl │ │ sec/op │ sec/op vs base │ *-14 1.092m ± 1% 1.125m ± 0% +3.05% (p=0.003 n=20) │ SequentialBigNotCached │ SequentialBigNotCachedCostControl │ │ B/s │ B/s vs base │ *-14 2.952Mi ± 1% 2.871Mi ± 0% -2.75% (p=0.003 n=20) │ SequentialBigNotCached │ SequentialBigNotCachedCostControl │ │ B/op │ B/op vs base │ *-14 998.0Ki ± 0% 986.5Ki ± 0% -1.15% (p=0.000 n=20) │ SequentialBigNotCached │ SequentialBigNotCachedCostControl │ │ allocs/op │ allocs/op vs base │ *-14 11.15k ± 0% 11.22k ± 0% +0.66% (p=0.000 n=20) It shows that costs adds ~80 allocations, 3% of CPU time when caching is disabled. --- router-tests/protocol/integration_test.go | 67 ++++++++++++++++++++++- router-tests/testenv/testenv.go | 2 + 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/router-tests/protocol/integration_test.go b/router-tests/protocol/integration_test.go index f69be840db..26b5e69a09 100644 --- a/router-tests/protocol/integration_test.go +++ b/router-tests/protocol/integration_test.go @@ -1593,7 +1593,66 @@ func BenchmarkSequentialBigCostControl(b *testing.B) { cfg.CostControl = &config.CostControl{ Enabled: true, Mode: config.CostControlModeMeasure, - EstimatedListSize: 15, + EstimatedListSize: 5, + ExposeHeaders: true, + } + }, + }, func(b *testing.B, xEnv *testenv.Environment) { + b.SetBytes(int64(len(bigEmployeesResponse))) + b.ReportAllocs() + for b.Loop() { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: bigEmployeesQuery, + }) + if len(res.Body) < 3000 { + b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse) + } + estimated := res.Response.Header.Get(core.CostEstimatedHeader) + require.Equal(b, "4650", estimated) + actual := res.Response.Header.Get(core.CostActualHeader) + require.Equal(b, "189", actual) + } + }) +} + +func BenchmarkSequentialBigNotCached(b *testing.B) { + testenv.Bench(b, &testenv.Config{ + ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) { + cfg.EnableNormalizationCache = false + cfg.EnableValidationCache = false + cfg.EnablePersistedOperationsCache = false + cfg.ExecutionPlanCacheSize = 0 + cfg.OperationHashCacheSize = 0 + }, + }, func(b *testing.B, xEnv *testenv.Environment) { + b.SetBytes(int64(len(bigEmployeesResponse))) + b.ReportAllocs() + for b.Loop() { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: bigEmployeesQuery, + }) + if len(res.Body) < 3000 { + b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse) + } + } + }) +} + +func BenchmarkSequentialBigNotCachedCostControl(b *testing.B) { + testenv.Bench(b, &testenv.Config{ + ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) { + cfg.EnableNormalizationCache = false + cfg.EnableValidationCache = false + cfg.EnablePersistedOperationsCache = false + cfg.ExecutionPlanCacheSize = 0 + cfg.OperationHashCacheSize = 0 + }, + ModifySecurityConfiguration: func(cfg *config.SecurityConfiguration) { + cfg.CostControl = &config.CostControl{ + Enabled: true, + Mode: config.CostControlModeMeasure, + EstimatedListSize: 5, + ExposeHeaders: true, } }, }, func(b *testing.B, xEnv *testenv.Environment) { @@ -1603,9 +1662,15 @@ func BenchmarkSequentialBigCostControl(b *testing.B) { res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ Query: bigEmployeesQuery, }) + b.StopTimer() if len(res.Body) < 3000 { b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse) } + estimated := res.Response.Header.Get(core.CostEstimatedHeader) + require.Equal(b, "4650", estimated) + actual := res.Response.Header.Get(core.CostActualHeader) + require.Equal(b, "189", actual) + b.StartTimer() } }) } diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 71e11e48fe..a79c30b7f7 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -173,6 +173,7 @@ func Bench(b *testing.B, cfg *Config, f func(b *testing.B, xEnv *Environment)) { } b.StartTimer() f(b, env) + b.StopTimer() if cfg.AssertCacheMetrics != nil { assertCacheMetrics(b, env, cfg.AssertCacheMetrics.BaseGraphAssertions, "") @@ -180,6 +181,7 @@ func Bench(b *testing.B, cfg *Config, f func(b *testing.B, xEnv *Environment)) { assertCacheMetrics(b, env, v, ff) } } + b.StartTimer() } const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" From de4c309ab00404e6650cfbb4a675f8898742f13e Mon Sep 17 00:00:00 2001 From: Yury Smolski <140245+ysmolski@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:41:51 +0300 Subject: [PATCH 2/4] stop timer --- router-tests/protocol/integration_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/router-tests/protocol/integration_test.go b/router-tests/protocol/integration_test.go index 26b5e69a09..bc3ca91b4e 100644 --- a/router-tests/protocol/integration_test.go +++ b/router-tests/protocol/integration_test.go @@ -1580,6 +1580,7 @@ func BenchmarkSequentialBig(b *testing.B) { res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ Query: bigEmployeesQuery, }) + // Not worth stopping the timer. if len(res.Body) < 3000 { b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse) } @@ -1604,6 +1605,7 @@ func BenchmarkSequentialBigCostControl(b *testing.B) { res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ Query: bigEmployeesQuery, }) + b.StopTimer() if len(res.Body) < 3000 { b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse) } @@ -1611,6 +1613,7 @@ func BenchmarkSequentialBigCostControl(b *testing.B) { require.Equal(b, "4650", estimated) actual := res.Response.Header.Get(core.CostActualHeader) require.Equal(b, "189", actual) + b.StartTimer() } }) } @@ -1631,6 +1634,7 @@ func BenchmarkSequentialBigNotCached(b *testing.B) { res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ Query: bigEmployeesQuery, }) + // Not worth stopping the timer. if len(res.Body) < 3000 { b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse) } From 617603815294fd01de6f80e10064c84a93b0cfb5 Mon Sep 17 00:00:00 2001 From: Yury Smolski <140245+ysmolski@users.noreply.github.com> Date: Thu, 11 Jun 2026 15:04:07 +0300 Subject: [PATCH 3/4] add big-response --- router/__schemas/graph.yaml | 2 + router/bench-big-response.js | 79 +++++++++++++++++++++++++++++++++++ router/docs/Profiling.md | 80 +++++++++++++++++++++--------------- 3 files changed, 127 insertions(+), 34 deletions(-) create mode 100644 router/bench-big-response.js diff --git a/router/__schemas/graph.yaml b/router/__schemas/graph.yaml index 87d83e2809..d389cda497 100644 --- a/router/__schemas/graph.yaml +++ b/router/__schemas/graph.yaml @@ -18,3 +18,5 @@ subgraphs: - name: employeeUpdates schema: file: ../../demo/pkg/subgraphs/employeeupdated/subgraph/schema.graphqls + - name: test1 + routing_url: http://localhost:4006/graphql diff --git a/router/bench-big-response.js b/router/bench-big-response.js new file mode 100644 index 0000000000..f991916682 --- /dev/null +++ b/router/bench-big-response.js @@ -0,0 +1,79 @@ +import http from 'k6/http'; +import { check } from 'k6'; + +// Load test for LARGE responses through the router. +// +// PREREQUISITES: +// 1. test1 subgraph running on 4006 port: `cd ../demo && go run cmd/all/main.go` +// 2. test1 present in the router's execution config +// +// GOTCHA: the leaf object is a shared singleton of identical lorem-ipsum text, +// so gzip compresses the payload to almost nothing. To genuinely push 20-60 MB +// over the wire we request `identity` (uncompressed) by default. +// Set ENCODING=gzip to measure the compressed path instead. + +const URL = __ENV.URL || 'http://localhost:3002/graphql'; +const BIG_OBJECTS = __ENV.BIG_OBJECTS || '200'; +const NESTED_OBJECTS = __ENV.NESTED_OBJECTS || '72'; +const DEEPLY_NESTED = __ENV.DEEPLY_NESTED || '10'; +const ENCODING = __ENV.ENCODING || 'identity'; // 'identity' = full bytes on the wire; 'gzip' = compressed +const DISCARD = (__ENV.DISCARD || 'false') === 'true'; + +export const options = { + discardResponseBodies: DISCARD, + scenarios: { + big: { + executor: 'constant-vus', + vus: parseInt(__ENV.VUS || '1', 10), + duration: __ENV.DURATION || '60s', + }, + }, + thresholds: { + http_req_failed: ['rate<0.01'], + }, +}; + +const query = `query Big($big: Int!, $nested: Int!, $deep: Int!) { + bigResponse(bigObjects: $big, nestedObjects: $nested, deeplyNestedObjects: $deep) { + nestedObjects { + deeplyNestedObjects { + aFieldOnDeeplyNestedObject bFieldOnDeeplyNestedObject cFieldOnDeeplyNestedObject + dFieldOnDeeplyNestedObject eFieldOnDeeplyNestedObject fFieldOnDeeplyNestedObject + gFieldOnDeeplyNestedObject hFieldOnDeeplyNestedObject iFieldOnDeeplyNestedObject + jFieldOnDeeplyNestedObject kFieldOnDeeplyNestedObject lFieldOnDeeplyNestedObject + mFieldOnDeeplyNestedObject nFieldOnDeeplyNestedObject oFieldOnDeeplyNestedObject + pFieldOnDeeplyNestedObject qFieldOnDeeplyNestedObject rFieldOnDeeplyNestedObject + sFieldOnDeeplyNestedObject tFieldOnDeeplyNestedObject uFieldOnDeeplyNestedObject + vFieldOnDeeplyNestedObject wFieldOnDeeplyNestedObject xFieldOnDeeplyNestedObject + yFieldOnDeeplyNestedObject zFieldOnDeeplyNestedObject + } + } + } +}`; + +const body = JSON.stringify({ + query, + operationName: 'Big', + variables: { + big: parseInt(BIG_OBJECTS, 10), + nested: parseInt(NESTED_OBJECTS, 10), + deep: parseInt(DEEPLY_NESTED, 10), + }, +}); + +const headers = { + 'Content-Type': 'application/json', + 'Accept-Encoding': ENCODING, + 'GraphQL-Client-Name': 'k6-bigresponse', + 'GraphQL-Client-Version': '0.0.1', +}; + +export default function () { + const res = http.post(URL, body, { headers }); + // When DISCARD=true, r.body is null — only assert status. When DISCARD=false, + // also assert the payload is large and error-free. + check(res, { + 'status is 200': (r) => r.status === 200, + 'no graphql errors': (r) => DISCARD || (r.status === 200 && r.body.includes('errors') === false) + }); +} diff --git a/router/docs/Profiling.md b/router/docs/Profiling.md index 4e7ebe2b0d..7e1dbbeb60 100644 --- a/router/docs/Profiling.md +++ b/router/docs/Profiling.md @@ -1,68 +1,78 @@ # Profiling -The router is set up with pprof, so users can run the router with `pprof` running, and get a thorough understanding of the active performance. +The router is set up with pprof, so users can run the router with `pprof` running, and get a +thorough understanding of the active performance. -> **We recommend that before adding a new feature, users should profile it to make sure that there aren't any surprise resource drains from the feature.** +> **We recommend that before adding a new feature, users should profile it to make sure that there +aren't any surprise resource drains from the feature.** ## Running the router with pprof -To run the router with pprof, follow the steps in [Contributing.md](../../CONTRIBUTING.md) (Local Development) to set up a local development environment, aside from running `make start-router`. + +To run the router with pprof, follow the steps in [Contributing.md](../../CONTRIBUTING.md) (Local +Development) to set up a local development environment, aside from running `make start-router`. In the `router` directory, run the following command: + ```shell go run cmd/router/main.go -pprof-addr=":6060" ``` This will start the router with pprof running on port 6060. -To run a solid workflow and get a sense of the routers performance, you can use [k6](https://grafana.com/docs/k6/latest/). +To run a solid workflow and get a sense of the routers performance, you can +use [k6](https://grafana.com/docs/k6/latest/). To do so, run: + ```shell brew install k6 k6 run bench.js ``` This will run a benchmark on the router, and you can see the results in the terminal. + ``` - ✓ is status 200 - - checks.........................: 100.00% 40978 out of 40978 - data_received..................: 294 MB 5.8 MB/s - data_sent......................: 114 MB 2.3 MB/s - http_req_blocked...............: avg=7.01µs min=2µs med=5µs max=2.19ms p(90)=6µs p(95)=7µs - http_req_connecting............: avg=1.57µs min=0s med=0s max=1.06ms p(90)=0s p(95)=0s - http_req_duration..............: avg=52.54ms min=8.68ms med=48.33ms max=235.82ms p(90)=93.36ms p(95)=106.39ms - { expected_response:true }...: avg=52.54ms min=8.68ms med=48.33ms max=235.82ms p(90)=93.36ms p(95)=106.39ms - http_req_failed................: 0.00% 0 out of 40978 - http_req_receiving.............: avg=1.88ms min=32µs med=318µs max=153.15ms p(90)=4.24ms p(95)=6.37ms - http_req_sending...............: avg=24.66µs min=9µs med=19µs max=7.72ms p(90)=29µs p(95)=34µs - http_req_tls_handshaking.......: avg=0s min=0s med=0s max=0s p(90)=0s p(95)=0s - http_req_waiting...............: avg=50.62ms min=8.42ms med=46.39ms max=230.97ms p(90)=90.72ms p(95)=103.53ms - http_reqs......................: 40978 816.55677/s - iteration_duration.............: avg=52.83ms min=8.95ms med=48.62ms max=236.13ms p(90)=93.65ms p(95)=106.71ms - iterations.....................: 40978 816.55677/s - vus............................: 99 min=2 max=99 - vus_max........................: 100 min=100 max=100 - - -running (0m50.2s), 000/100 VUs, 40978 complete and 0 interrupted iterations -default ✓ [======================================] 000/100 VUs 50s + ✓ is status 200 + + HTTP + http_req_duration..............: avg=2.04ms min=192µs med=1.82ms max=27.02ms p(90)=3.5ms p(95)=4.05ms + { expected_response:true }...: avg=2.04ms min=192µs med=1.82ms max=27.02ms p(90)=3.5ms p(95)=4.05ms + http_req_failed................: 0.00% 0 out of 1014298 + http_reqs......................: 1014298 20284.858532/s + + EXECUTION + iteration_duration.............: avg=2.12ms min=240.62µs med=1.89ms max=27.13ms p(90)=3.58ms p(95)=4.13ms + iterations.....................: 1014298 20284.858532/s + vus............................: 99 min=2 max=99 + vus_max........................: 100 min=100 max=100 + + NETWORK + data_received..................: 7.3 GB 145 MB/s + data_sent......................: 2.8 GB 56 MB/s ``` -This can show you, for example, how many requests were sent, how long it took, and it can help us diagnose the proper configuration of the router for users. +This can show you, for example, how many requests were sent, how long it took, and it can help us +diagnose the proper configuration of the router for users. ## Profiling the router -There are many different things you can use pprof for, and we recommend reading the [pprof documentation](https://pkg.go.dev/net/http/pprof) to get a better understanding of what you can do with it. + +There are many different things you can use pprof for, and we recommend reading +the [pprof documentation](https://pkg.go.dev/net/http/pprof) to get a better understanding of what +you can do with it. As an example, to look at heap and memory usage, you can run the following commands: In a terminal, as the router is running, run: + ```shell go tool pprof http://localhost:6060/debug/pprof/heap # or go tool pprof -http 127.0.0.1:6060 heap.out ``` -That will open a `pprof` shell, and in it, you can explore commands. Some useful ones are: `web` (which depends on graphviz, `brew install graphviz`), `top`, and `pdf`, which will give you different ways to look at the heap allocations. +That will open a `pprof` shell, and in it, you can explore commands. Some useful ones are: `web` ( +which depends on graphviz, `brew install graphviz`), `top`, and `pdf`, which will give you different +ways to look at the heap allocations. For example, `top20` will return: + ``` (pprof) top20 Showing nodes accounting for 13458.23kB, 100% of 13458.23kB total @@ -91,11 +101,11 @@ Showing top 20 nodes out of 69 (pprof) ``` -Then, you can also do `list ` to get a better understanding of where the memory is being allocated, line by line, in a function. - -In addition, we can also see how allocs work by running +Then, you can also do `list ` to get a better understanding of where the memory is being +allocated, line by line, in a function. You can also run a profile for an amount of time (for example, 5 seconds), by running: + ``` go tool pprof ‘http://localhost:6060/debug/pprof/profile?seconds=5’ ``` @@ -105,6 +115,8 @@ go tool pprof ‘http://localhost:6060/debug/pprof/profile?seconds=5’ To use Pyroscope for continuous profiling of the router: 1. Raise the infra-debug stack with `make infra-debug-up` -2. Run the router with either `PYROSCOPE_ADDR=http://localhost:4040` or `-pyroscope-addr http://localhost:4040` to enable sending continuous profiling data to Pyroscope. You can view this data in Grafana. +2. Run the router with either `PYROSCOPE_ADDR=http://localhost:4040` or + `-pyroscope-addr http://localhost:4040` to enable sending continuous profiling data to Pyroscope. + You can view this data in Grafana. 3. Visit the drilldown profiles section in Grafana at `http://localhost:9300` 4. Select the router from the service dropdown From a79f9e44922e68229750767ed0c328bc48507be8 Mon Sep 17 00:00:00 2001 From: Yury Smolski <140245+ysmolski@users.noreply.github.com> Date: Thu, 11 Jun 2026 15:39:33 +0300 Subject: [PATCH 4/4] replace quotes --- router/docs/Profiling.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/docs/Profiling.md b/router/docs/Profiling.md index 7e1dbbeb60..93ca9ec034 100644 --- a/router/docs/Profiling.md +++ b/router/docs/Profiling.md @@ -107,7 +107,7 @@ allocated, line by line, in a function. You can also run a profile for an amount of time (for example, 5 seconds), by running: ``` -go tool pprof ‘http://localhost:6060/debug/pprof/profile?seconds=5’ +go tool pprof 'http://localhost:6060/debug/pprof/profile?seconds=5' ``` ## Pyroscope