From 0301486c679bd3668dbf8c5ad8c192753aec2d2d Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 27 Apr 2026 22:01:48 +0200 Subject: [PATCH] Limit the size of responses from GitHub --- Cargo.lock | 27 +++++++++-------- Cargo.toml | 2 ++ src/gha_logs.rs | 13 ++++++++ src/github/client.rs | 70 ++++++++++++++++++++++++++++++++++++-------- src/github/repos.rs | 4 ++- 5 files changed, 90 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0e9c7ffe6..a9588d82c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1192,7 +1192,7 @@ dependencies = [ "indexmap", "slab", "tokio", - "tokio-util 0.7.1", + "tokio-util 0.7.18", "tracing", ] @@ -1260,12 +1260,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -3253,16 +3252,15 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.1" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0edfdeb067411dba2044da6d1cb2df793dd35add7888d73c16e3381ded401764" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", "futures-sink", "pin-project-lite", "tokio", - "tracing", ] [[package]] @@ -3310,7 +3308,7 @@ dependencies = [ "pin-project-lite", "sync_wrapper", "tokio", - "tokio-util 0.7.1", + "tokio-util 0.7.18", "tower-layer", "tower-service", "tracing", @@ -3333,7 +3331,7 @@ dependencies = [ "iri-string", "pin-project-lite", "tokio", - "tokio-util 0.7.1", + "tokio-util 0.7.18", "tower", "tower-layer", "tower-service", @@ -3453,6 +3451,8 @@ dependencies = [ "globset", "hex", "hmac", + "http", + "http-body-util", "hyper", "ignore", "imara-diff", @@ -3750,12 +3750,13 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.30" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f741de44b75e14c35df886aff5f1eb73aa114fa5d4d00dcd37b5e01259bf3b2" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] @@ -3794,9 +3795,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.57" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 20ef23173..5b8deaedb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,6 +54,8 @@ unicode-segmentation = "1.12.0" secrecy = { version = "0.10", features = ["serde"] } globset = { version = "0.4.18", default-features = false } tower_governor = { version = "0.8.0", default-features = false, features = ["axum", "tracing"] } +http-body-util = "0.1.3" +http = "1.4.0" [dependencies.serde] version = "1" diff --git a/src/gha_logs.rs b/src/gha_logs.rs index 1ab8baeb5..ec9bbfa4a 100644 --- a/src/gha_logs.rs +++ b/src/gha_logs.rs @@ -143,6 +143,19 @@ pub async fn gha_logs( let logs = match logs { Ok(logs) => logs, + Err(err) + if err + .downcast_ref::() + .is_some() => + { + // Return a friendly error message for no logs too big. + tracing::info!("gha_logs: raw logs too big (over 50 mib) for {log_uuid}"); + return Ok(( + StatusCode::BAD_REQUEST, + HeaderMap::new(), + "The requested logs are too large (over 50 Mib).\n\nTry download the raw logs from GitHub instead.".to_string(), + )); + } Err(err) if matches!(err.downcast_ref::(), Some(err) if err.status() == Some(StatusCode::GONE)) => { // Return a friendly error message for no longer available logs. diff --git a/src/github/client.rs b/src/github/client.rs index dc0bb8dee..db0da6e99 100644 --- a/src/github/client.rs +++ b/src/github/client.rs @@ -1,7 +1,10 @@ use anyhow::Context; use async_trait::async_trait; use futures::{FutureExt, future::BoxFuture}; +use http_body_util::BodyExt; +use http_body_util::Limited; use itertools::Itertools; +use reqwest::Body; use reqwest::header::{AUTHORIZATION, USER_AGENT}; use reqwest::{Client, Request, RequestBuilder, Response, StatusCode}; use secrecy::{ExposeSecret, SecretString}; @@ -98,36 +101,79 @@ impl GithubClient { } pub async fn send_req(&self, req: RequestBuilder) -> anyhow::Result<(Bytes, String)> { + const MAX_DEFAULT_RESPONSE_SIZE: usize = 1 * 1024 * 1024; // 1 Mib + + self.send_req_with_limit(req, MAX_DEFAULT_RESPONSE_SIZE) + .await + } + + pub async fn send_req_with_limit( + &self, + req: RequestBuilder, + max_response_size: usize, + ) -> anyhow::Result<(Bytes, String)> { const MAX_ATTEMPTS: u32 = 2; + log::debug!("send_req with {:?}", req); + let req_dbg = format!("{req:?}"); + let req = req .build() .with_context(|| format!("building reqwest {req_dbg}"))?; + let req_url = req.url().to_string(); + let mut resp = self.client.execute(req.try_clone().unwrap()).await?; if self.retry_rate_limit && let Some(sleep) = Self::needs_retry(&resp).await { resp = self.retry(req, sleep, MAX_ATTEMPTS).await?; } + let maybe_err = resp.error_for_status_ref().err(); let github_request_id = resp.headers().get("x-github-request-id").cloned(); - let body = resp - .bytes() - .await - .with_context(|| format!("failed to read response body {req_dbg}"))?; + + let resp: http::Response = resp.into(); + let limited = Limited::new(resp, max_response_size); + + let body = match limited.collect().await { + Ok(body) => body.to_bytes(), + Err(e) => match e.downcast::() { + Ok(e) => { + return Err(anyhow::Error::new(*e)).with_context(|| { + format!( + "req={req_url} (x-github-request-id: {}): lenght exceeded (over {max_response_size} bytes)", + github_request_id + .as_ref() + .and_then(|v| v.to_str().ok()) + .unwrap_or("unknown") + ) + }); + } + Err(e) => { + return Err(anyhow::Error::from_boxed(e)).with_context(|| { + format!( + "req={req_url} (x-github-request-id: {}): unable to complete the request", + github_request_id + .as_ref() + .and_then(|v| v.to_str().ok()) + .unwrap_or("unknown") + ) + }); + } + }, + }; + if let Some(e) = maybe_err { return Err(anyhow::Error::new(e)).with_context(|| { format!( - "response (x-github-request-id: {}): {}", - String::from_utf8_lossy( - github_request_id - .as_ref() - .map(|id| id.as_bytes()) - .unwrap_or_default() - ), - String::from_utf8_lossy(&body) + "req={req_url} (x-github-request-id: {}): {:.500}", + github_request_id + .as_ref() + .and_then(|v| v.to_str().ok()) + .unwrap_or("unknown"), + String::from_utf8_lossy(&body), ) }); } diff --git a/src/github/repos.rs b/src/github/repos.rs index c07d9f331..81deb0540 100644 --- a/src/github/repos.rs +++ b/src/github/repos.rs @@ -186,9 +186,11 @@ impl GithubClient { repo: &IssueRepository, job_id: u128, ) -> anyhow::Result { + const MAX_LOG_SIZE_IN_MB: usize = 50 * 1024 * 1024; // 50 Mib + let url = format!("{}/actions/jobs/{job_id}/logs", repo.url(self)); let (body, _req_dbg) = self - .send_req(self.get(&url)) + .send_req_with_limit(self.get(&url), MAX_LOG_SIZE_IN_MB) .await .context("failed to retrieve job logs")?; Ok(String::from_utf8_lossy(&body).to_string())