Skip to content
71 changes: 70 additions & 1 deletion router-tests/protocol/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1580,6 +1580,7 @@ func BenchmarkSequentialBig(b *testing.B) {
res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{
Query: bigEmployeesQuery,
})
// Not worth stopping the timer.
if len(res.Body) < 3000 {
b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse)
}
Expand All @@ -1593,7 +1594,69 @@ func BenchmarkSequentialBigCostControl(b *testing.B) {
cfg.CostControl = &config.CostControl{
Enabled: true,
Mode: config.CostControlModeMeasure,
EstimatedListSize: 15,
EstimatedListSize: 5,
ExposeHeaders: true,
}
},
}, func(b *testing.B, xEnv *testenv.Environment) {
b.SetBytes(int64(len(bigEmployeesResponse)))
b.ReportAllocs()
for b.Loop() {
res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{
Query: bigEmployeesQuery,
})
b.StopTimer()
if len(res.Body) < 3000 {
b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse)
}
estimated := res.Response.Header.Get(core.CostEstimatedHeader)
require.Equal(b, "4650", estimated)
actual := res.Response.Header.Get(core.CostActualHeader)
require.Equal(b, "189", actual)
b.StartTimer()
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
})
}

func BenchmarkSequentialBigNotCached(b *testing.B) {
testenv.Bench(b, &testenv.Config{
ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
cfg.EnableNormalizationCache = false
cfg.EnableValidationCache = false
cfg.EnablePersistedOperationsCache = false
cfg.ExecutionPlanCacheSize = 0
cfg.OperationHashCacheSize = 0
},
}, func(b *testing.B, xEnv *testenv.Environment) {
b.SetBytes(int64(len(bigEmployeesResponse)))
b.ReportAllocs()
for b.Loop() {
res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{
Query: bigEmployeesQuery,
})
// Not worth stopping the timer.
if len(res.Body) < 3000 {
b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse)
}
}
})
}

func BenchmarkSequentialBigNotCachedCostControl(b *testing.B) {
testenv.Bench(b, &testenv.Config{
ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
cfg.EnableNormalizationCache = false
cfg.EnableValidationCache = false
cfg.EnablePersistedOperationsCache = false
cfg.ExecutionPlanCacheSize = 0
cfg.OperationHashCacheSize = 0
},
ModifySecurityConfiguration: func(cfg *config.SecurityConfiguration) {
cfg.CostControl = &config.CostControl{
Enabled: true,
Mode: config.CostControlModeMeasure,
EstimatedListSize: 5,
ExposeHeaders: true,
}
},
}, func(b *testing.B, xEnv *testenv.Environment) {
Expand All @@ -1603,9 +1666,15 @@ func BenchmarkSequentialBigCostControl(b *testing.B) {
res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{
Query: bigEmployeesQuery,
})
b.StopTimer()
if len(res.Body) < 3000 {
b.Errorf("unexpected result %q, expecting \n\n%q", res.Body, bigEmployeesResponse)
}
estimated := res.Response.Header.Get(core.CostEstimatedHeader)
require.Equal(b, "4650", estimated)
actual := res.Response.Header.Get(core.CostActualHeader)
require.Equal(b, "189", actual)
b.StartTimer()
}
})
}
Expand Down
2 changes: 2 additions & 0 deletions router-tests/testenv/testenv.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,15 @@ func Bench(b *testing.B, cfg *Config, f func(b *testing.B, xEnv *Environment)) {
}
b.StartTimer()
f(b, env)
b.StopTimer()
if cfg.AssertCacheMetrics != nil {
assertCacheMetrics(b, env, cfg.AssertCacheMetrics.BaseGraphAssertions, "")

for ff, v := range cfg.AssertCacheMetrics.FeatureFlagAssertions {
assertCacheMetrics(b, env, v, ff)
}
}
b.StartTimer()
}

const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
Expand Down
2 changes: 2 additions & 0 deletions router/__schemas/graph.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ subgraphs:
- name: employeeUpdates
schema:
file: ../../demo/pkg/subgraphs/employeeupdated/subgraph/schema.graphqls
- name: test1
routing_url: http://localhost:4006/graphql
79 changes: 79 additions & 0 deletions router/bench-big-response.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import http from 'k6/http';
import { check } from 'k6';

// Load test for LARGE responses through the router.
//
// PREREQUISITES:
// 1. test1 subgraph running on 4006 port: `cd ../demo && go run cmd/all/main.go`
// 2. test1 present in the router's execution config
//
// GOTCHA: the leaf object is a shared singleton of identical lorem-ipsum text,
// so gzip compresses the payload to almost nothing. To genuinely push 20-60 MB
// over the wire we request `identity` (uncompressed) by default.
// Set ENCODING=gzip to measure the compressed path instead.

const URL = __ENV.URL || 'http://localhost:3002/graphql';
const BIG_OBJECTS = __ENV.BIG_OBJECTS || '200';
const NESTED_OBJECTS = __ENV.NESTED_OBJECTS || '72';
const DEEPLY_NESTED = __ENV.DEEPLY_NESTED || '10';
const ENCODING = __ENV.ENCODING || 'identity'; // 'identity' = full bytes on the wire; 'gzip' = compressed
const DISCARD = (__ENV.DISCARD || 'false') === 'true';

export const options = {
discardResponseBodies: DISCARD,
scenarios: {
big: {
executor: 'constant-vus',
vus: parseInt(__ENV.VUS || '1', 10),
duration: __ENV.DURATION || '60s',
},
},
thresholds: {
http_req_failed: ['rate<0.01'],
},
};

const query = `query Big($big: Int!, $nested: Int!, $deep: Int!) {
bigResponse(bigObjects: $big, nestedObjects: $nested, deeplyNestedObjects: $deep) {
nestedObjects {
deeplyNestedObjects {
aFieldOnDeeplyNestedObject bFieldOnDeeplyNestedObject cFieldOnDeeplyNestedObject
dFieldOnDeeplyNestedObject eFieldOnDeeplyNestedObject fFieldOnDeeplyNestedObject
gFieldOnDeeplyNestedObject hFieldOnDeeplyNestedObject iFieldOnDeeplyNestedObject
jFieldOnDeeplyNestedObject kFieldOnDeeplyNestedObject lFieldOnDeeplyNestedObject
mFieldOnDeeplyNestedObject nFieldOnDeeplyNestedObject oFieldOnDeeplyNestedObject
pFieldOnDeeplyNestedObject qFieldOnDeeplyNestedObject rFieldOnDeeplyNestedObject
sFieldOnDeeplyNestedObject tFieldOnDeeplyNestedObject uFieldOnDeeplyNestedObject
vFieldOnDeeplyNestedObject wFieldOnDeeplyNestedObject xFieldOnDeeplyNestedObject
yFieldOnDeeplyNestedObject zFieldOnDeeplyNestedObject
}
}
}
}`;

const body = JSON.stringify({
query,
operationName: 'Big',
variables: {
big: parseInt(BIG_OBJECTS, 10),
nested: parseInt(NESTED_OBJECTS, 10),
deep: parseInt(DEEPLY_NESTED, 10),
},
});

const headers = {
'Content-Type': 'application/json',
'Accept-Encoding': ENCODING,
'GraphQL-Client-Name': 'k6-bigresponse',
'GraphQL-Client-Version': '0.0.1',
};

export default function () {
const res = http.post(URL, body, { headers });
// When DISCARD=true, r.body is null — only assert status. When DISCARD=false,
// also assert the payload is large and error-free.
check(res, {
'status is 200': (r) => r.status === 200,
'no graphql errors': (r) => DISCARD || (r.status === 200 && r.body.includes('errors') === false)
Comment thread
ysmolski marked this conversation as resolved.
});
}
Comment thread
ysmolski marked this conversation as resolved.
82 changes: 47 additions & 35 deletions router/docs/Profiling.md
Original file line number Diff line number Diff line change
@@ -1,68 +1,78 @@
# Profiling

The router is set up with pprof, so users can run the router with `pprof` running, and get a thorough understanding of the active performance.
The router is set up with pprof, so users can run the router with `pprof` running, and get a
thorough understanding of the active performance.

> **We recommend that before adding a new feature, users should profile it to make sure that there aren't any surprise resource drains from the feature.**
> **We recommend that before adding a new feature, users should profile it to make sure that there
aren't any surprise resource drains from the feature.**

## Running the router with pprof
To run the router with pprof, follow the steps in [Contributing.md](../../CONTRIBUTING.md) (Local Development) to set up a local development environment, aside from running `make start-router`.

To run the router with pprof, follow the steps in [Contributing.md](../../CONTRIBUTING.md) (Local
Development) to set up a local development environment, aside from running `make start-router`.

In the `router` directory, run the following command:

```shell
go run cmd/router/main.go -pprof-addr=":6060"
```

This will start the router with pprof running on port 6060.

To run a solid workflow and get a sense of the routers performance, you can use [k6](https://grafana.com/docs/k6/latest/).
To run a solid workflow and get a sense of the routers performance, you can
use [k6](https://grafana.com/docs/k6/latest/).
To do so, run:

```shell
brew install k6
k6 run bench.js
```

This will run a benchmark on the router, and you can see the results in the terminal.

```
✓ is status 200

checks.........................: 100.00% 40978 out of 40978
data_received..................: 294 MB 5.8 MB/s
data_sent......................: 114 MB 2.3 MB/s
http_req_blocked...............: avg=7.01µs min=2µs med=5µs max=2.19ms p(90)=6µs p(95)=7µs
http_req_connecting............: avg=1.57µs min=0s med=0s max=1.06ms p(90)=0s p(95)=0s
http_req_duration..............: avg=52.54ms min=8.68ms med=48.33ms max=235.82ms p(90)=93.36ms p(95)=106.39ms
{ expected_response:true }...: avg=52.54ms min=8.68ms med=48.33ms max=235.82ms p(90)=93.36ms p(95)=106.39ms
http_req_failed................: 0.00% 0 out of 40978
http_req_receiving.............: avg=1.88ms min=32µs med=318µs max=153.15ms p(90)=4.24ms p(95)=6.37ms
http_req_sending...............: avg=24.66µs min=9µs med=19µs max=7.72ms p(90)=29µs p(95)=34µs
http_req_tls_handshaking.......: avg=0s min=0s med=0s max=0s p(90)=0s p(95)=0s
http_req_waiting...............: avg=50.62ms min=8.42ms med=46.39ms max=230.97ms p(90)=90.72ms p(95)=103.53ms
http_reqs......................: 40978 816.55677/s
iteration_duration.............: avg=52.83ms min=8.95ms med=48.62ms max=236.13ms p(90)=93.65ms p(95)=106.71ms
iterations.....................: 40978 816.55677/s
vus............................: 99 min=2 max=99
vus_max........................: 100 min=100 max=100


running (0m50.2s), 000/100 VUs, 40978 complete and 0 interrupted iterations
default ✓ [======================================] 000/100 VUs 50s
✓ is status 200

HTTP
http_req_duration..............: avg=2.04ms min=192µs med=1.82ms max=27.02ms p(90)=3.5ms p(95)=4.05ms
{ expected_response:true }...: avg=2.04ms min=192µs med=1.82ms max=27.02ms p(90)=3.5ms p(95)=4.05ms
http_req_failed................: 0.00% 0 out of 1014298
http_reqs......................: 1014298 20284.858532/s

EXECUTION
iteration_duration.............: avg=2.12ms min=240.62µs med=1.89ms max=27.13ms p(90)=3.58ms p(95)=4.13ms
iterations.....................: 1014298 20284.858532/s
vus............................: 99 min=2 max=99
vus_max........................: 100 min=100 max=100

NETWORK
data_received..................: 7.3 GB 145 MB/s
data_sent......................: 2.8 GB 56 MB/s
```
Comment thread
ysmolski marked this conversation as resolved.

This can show you, for example, how many requests were sent, how long it took, and it can help us diagnose the proper configuration of the router for users.
This can show you, for example, how many requests were sent, how long it took, and it can help us
diagnose the proper configuration of the router for users.

## Profiling the router
There are many different things you can use pprof for, and we recommend reading the [pprof documentation](https://pkg.go.dev/net/http/pprof) to get a better understanding of what you can do with it.

There are many different things you can use pprof for, and we recommend reading
the [pprof documentation](https://pkg.go.dev/net/http/pprof) to get a better understanding of what
you can do with it.

As an example, to look at heap and memory usage, you can run the following commands:
In a terminal, as the router is running, run:

```shell
go tool pprof http://localhost:6060/debug/pprof/heap
# or
go tool pprof -http 127.0.0.1:6060 heap.out
```

That will open a `pprof` shell, and in it, you can explore commands. Some useful ones are: `web` (which depends on graphviz, `brew install graphviz`), `top`, and `pdf`, which will give you different ways to look at the heap allocations.
That will open a `pprof` shell, and in it, you can explore commands. Some useful ones are: `web` (
which depends on graphviz, `brew install graphviz`), `top`, and `pdf`, which will give you different
ways to look at the heap allocations.
For example, `top20` will return:

```
(pprof) top20
Showing nodes accounting for 13458.23kB, 100% of 13458.23kB total
Expand Down Expand Up @@ -91,20 +101,22 @@ Showing top 20 nodes out of 69
(pprof)
```

Then, you can also do `list <function>` to get a better understanding of where the memory is being allocated, line by line, in a function.

In addition, we can also see how allocs work by running
Then, you can also do `list <function>` to get a better understanding of where the memory is being
allocated, line by line, in a function.

You can also run a profile for an amount of time (for example, 5 seconds), by running:

```
go tool pprof http://localhost:6060/debug/pprof/profile?seconds=5
go tool pprof 'http://localhost:6060/debug/pprof/profile?seconds=5'
```

## Pyroscope

To use Pyroscope for continuous profiling of the router:

1. Raise the infra-debug stack with `make infra-debug-up`
2. Run the router with either `PYROSCOPE_ADDR=http://localhost:4040` or `-pyroscope-addr http://localhost:4040` to enable sending continuous profiling data to Pyroscope. You can view this data in Grafana.
2. Run the router with either `PYROSCOPE_ADDR=http://localhost:4040` or
`-pyroscope-addr http://localhost:4040` to enable sending continuous profiling data to Pyroscope.
You can view this data in Grafana.
3. Visit the drilldown profiles section in Grafana at `http://localhost:9300`
4. Select the router from the service dropdown
Loading