diff --git a/Cargo.lock b/Cargo.lock index 4b27306fcf7..4621aa1d209 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -577,6 +577,7 @@ dependencies = [ "bus", "bytemuck", "either", + "form_urlencoded", "futures", "futures-lite", "http", diff --git a/Cargo.toml b/Cargo.toml index 4645a317722..491a1f864a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -138,6 +138,7 @@ test-case = "3.3.1" rstest = "0.26.1" url = "2.5.8" tokio = { version = "1.51.1", default-features = false } +form_urlencoded = "1.2" futures-concurrency = "7.7.1" dynify = "0.1.2" futures-channel = "0.3.32" diff --git a/core/runtime/Cargo.toml b/core/runtime/Cargo.toml index a2f45b78f04..8f9bf4b38b6 100644 --- a/core/runtime/Cargo.toml +++ b/core/runtime/Cargo.toml @@ -17,6 +17,7 @@ boa_gc.workspace = true bus = { workspace = true, optional = true } bytemuck.workspace = true either = { workspace = true, optional = true } +form_urlencoded = { workspace = true, optional = true } futures = "0.3.32" futures-lite.workspace = true http = { workspace = true, optional = true } @@ -52,6 +53,7 @@ all = ["default", "reqwest-blocking"] url = ["dep:url"] fetch = [ "dep:either", + "dep:form_urlencoded", "dep:http", "dep:serde_json", "boa_engine/either", diff --git a/core/runtime/src/fetch/request.rs b/core/runtime/src/fetch/request.rs index 2991dfdd9a2..10a2a6fe430 100644 --- a/core/runtime/src/fetch/request.rs +++ b/core/runtime/src/fetch/request.rs @@ -5,12 +5,34 @@ //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request use super::HttpRequest; use super::headers::JsHeaders; +use boa_engine::object::builtins::JsPromise; use boa_engine::value::{Convert, TryFromJs}; use boa_engine::{ - Finalize, JsData, JsObject, JsResult, JsString, JsValue, Trace, boa_class, js_error, + Context, Finalize, JsData, JsNativeError, JsObject, JsResult, JsString, JsValue, Trace, + boa_class, js_error, }; use either::Either; +use std::cell::RefCell; +use std::future::Future; use std::mem; +use std::pin::Pin; +use std::rc::Rc; + +/// The body of a [`JsRequest`], which may be either already-read bytes or a +/// pending async future that will produce the bytes on first access. +/// +/// Stored behind an `Rc>` so that: +/// - multiple clones of the same [`JsRequest`] share one read (the first +/// awaiter stores `Ready`; subsequent callers reuse it), and +/// - body-consuming JS methods (`text`, `json`, `formData`) can be called from +/// async closures that capture the `Rc` by value. +enum BodyState { + /// Body bytes are already available (constructed synchronously from JS or + /// produced by a previous call to a body-consuming method). + Ready(Vec), + /// Body bytes have not been read yet; awaiting the future produces them. + Pending(Pin> + 'static>>), +} /// A [RequestInit][mdn] object. This is a JavaScript object (not a /// class) that can be used as options for creating a [`JsRequest`]. @@ -26,6 +48,12 @@ pub struct RequestInit { } impl RequestInit { + /// Returns `true` if a `body` field was explicitly provided in the init object. + #[must_use] + pub fn has_body(&self) -> bool { + self.body.is_some() + } + /// Takes the abort signal from the options, if present. pub fn take_signal(&mut self) -> Option { self.signal.take() @@ -110,31 +138,76 @@ impl RequestInit { /// The `Request` interface of the [Fetch API][mdn] represents a resource request. /// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API -#[derive(Clone, Debug, JsData, Trace, Finalize)] +#[derive(Clone, JsData, Trace, Finalize)] pub struct JsRequest { + /// Request metadata (method, URI, headers). The body field inside is always + /// empty (`Vec::new()`); the actual body is stored in `body` below. #[unsafe_ignore_trace] inner: HttpRequest>, signal: Option, + /// The body, which may be lazily awaited on first access. + /// + /// Shared via `Rc` so that [`JsRequest::clone_request`] and all + /// body-consuming methods can access the same underlying data without + /// duplicating or double-reading it. + #[unsafe_ignore_trace] + body: Rc>, +} + +impl std::fmt::Debug for JsRequest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("JsRequest") + .field("method", &self.inner.method()) + .field("uri", &self.inner.uri()) + .finish_non_exhaustive() + } } impl JsRequest { - /// Get the inner `http::Request` object. This drops the body (if any). - pub fn into_inner(mut self) -> HttpRequest> { - mem::replace(&mut self.inner, HttpRequest::new(Vec::new())) + /// Get the inner `http::Request` object. + /// + /// If the body is `Ready`, it is included in the returned request. + /// If the body is still `Pending` (not yet awaited), the returned request + /// has an empty body — the pending future is dropped. + pub fn into_inner(self) -> HttpRequest> { + let body_bytes = match &*self.body.borrow() { + BodyState::Ready(b) => b.clone(), + BodyState::Pending(_) => Vec::new(), + }; + let (parts, _) = self.inner.clone().into_parts(); + HttpRequest::from_parts(parts, body_bytes) } - /// Split this request into its HTTP request and abort signal. - fn into_parts(mut self) -> (HttpRequest>, Option) { + /// Split this request into its HTTP head, abort signal, and body state. + fn into_parts( + mut self, + ) -> ( + HttpRequest>, + Option, + Rc>, + ) { let request = mem::replace(&mut self.inner, HttpRequest::new(Vec::new())); let signal = self.signal.take(); - (request, signal) + let body = Rc::clone(&self.body); + (request, signal, body) } /// Get a reference to the inner `http::Request` object. + /// Note: the body in the returned request is always empty; use + /// [`Self::body_bytes`] to access the body. pub fn inner(&self) -> &HttpRequest> { &self.inner } + /// Returns the body bytes when the body is already `Ready`, or `None` if + /// the body is still `Pending` (not yet resolved from a lazy future). + pub fn body_bytes(&self) -> Option> { + match &*self.body.borrow() { + BodyState::Ready(b) => Some(b.clone()), + BodyState::Pending(_) => None, + } + } + /// Get the abort signal associated with this request, if any. pub(crate) fn signal(&self) -> Option { self.signal.clone() @@ -154,7 +227,9 @@ impl JsRequest { input: Either, options: Option, ) -> JsResult { - let (request, signal) = match input { + // `source_body` carries the body state from an input JsRequest, so that + // `new Request(existingReq)` preserves a Pending body rather than losing it. + let (request, signal, source_body) = match input { Either::Left(uri) => { let uri = http::Uri::try_from( uri.to_std_string() @@ -165,33 +240,108 @@ impl JsRequest { .uri(uri) .body(Vec::::new()) .map_err(|_| js_error!(Error: "Cannot construct request"))?; - (request, None) + (request, None, None) + } + Either::Right(r) => { + let (request, signal, body) = r.into_parts(); + (request, signal, Some(body)) } - Either::Right(r) => r.into_parts(), }; if let Some(mut options) = options { let signal = options.take_signal().or(signal); - let inner = options.into_request_builder(Some(request))?; - Ok(Self { inner, signal }) - } else { - Ok(Self { - inner: request, + // If options explicitly provides a body, use it; otherwise inherit source_body. + let has_body = options.has_body(); + let mut inner = options.into_request_builder(Some(request))?; + let body = if has_body { + let bytes = mem::take(inner.body_mut()); + Rc::new(RefCell::new(BodyState::Ready(bytes))) + } else { + source_body.unwrap_or_else(|| Rc::new(RefCell::new(BodyState::Ready(Vec::new())))) + }; + return Ok(Self { + inner, signal, - }) + body, + }); + } + + // No options: propagate source body or default to empty Ready. + let body = + source_body.unwrap_or_else(|| Rc::new(RefCell::new(BodyState::Ready(Vec::new())))); + Ok(Self { + inner: request, + signal, + body, + }) + } + + /// Create a [`JsRequest`] whose body is resolved lazily by awaiting + /// `body_future` on first access. + /// + /// Use this when the body is available as an async stream (e.g. an + /// incoming HTTP request in a WASI component) and you want to avoid + /// blocking until the body is actually needed by the JS handler. + pub fn with_lazy_body( + head: HttpRequest>, + body_future: impl Future> + 'static, + ) -> Self { + Self { + inner: head, + signal: None, + body: Rc::new(RefCell::new(BodyState::Pending(Box::pin(body_future)))), } } } impl From>> for JsRequest { - fn from(inner: HttpRequest>) -> Self { + fn from(mut inner: HttpRequest>) -> Self { + // Split the body out of inner so that body bytes live only in `body`. + let bytes = mem::take(inner.body_mut()); Self { inner, signal: None, + body: Rc::new(RefCell::new(BodyState::Ready(bytes))), } } } +/// Helper: resolve the `Rc>` to bytes, awaiting the pending +/// future if needed, and caching the result in the `RefCell`. +/// +/// This is the shared async core used by `text()`, `json()`, and `formData()`. +async fn resolve_body(body_cell: Rc>) -> Vec { + // Fast path: body is already ready. + { + let guard = body_cell.borrow(); + if let BodyState::Ready(ref bytes) = *guard { + return bytes.clone(); + } + } + + // Slow path: take the pending future, await it outside the borrow, then + // store the result back as `Ready` so subsequent calls are cheap. + let fut = { + let mut guard = body_cell.borrow_mut(); + match mem::replace(&mut *guard, BodyState::Ready(Vec::new())) { + BodyState::Pending(f) => f, + BodyState::Ready(_) => { + // Another concurrent caller already resolved it; we just + // stored an empty Ready above — restore it properly. + // This branch should be unreachable in practice (single-threaded + // Boa event loop), but is handled defensively. + return Vec::new(); + } + } + }; + + let bytes = fut.await; + *body_cell.borrow_mut() = BodyState::Ready(bytes.clone()); + bytes +} + +// ------ Boa class implementation ------ + #[boa_class(rename = "Request")] #[boa(rename_all = "camelCase")] impl JsRequest { @@ -218,8 +368,127 @@ impl JsRequest { JsRequest::create_from_js(input, options) } + /// Clones the request. + /// + /// The body state is shared via `Rc`: if the body future has not yet been + /// awaited, the first of the two to call `text()` / `json()` / `formData()` + /// will await it and cache the result for the other. #[boa(rename = "clone")] fn clone_request(&self) -> Self { self.clone() } + + /// Returns the HTTP method of the request. + /// + /// See + #[boa(getter)] + fn method(&self) -> JsString { + JsString::from(self.inner.method().as_str()) + } + + /// Returns the URL of the request. + /// + /// See + #[boa(getter)] + fn url(&self) -> JsString { + JsString::from(self.inner.uri().to_string().as_str()) + } + + /// Returns the headers associated with the request. + /// + /// See + #[boa(getter)] + fn headers(&self) -> JsHeaders { + JsHeaders::from_http(self.inner.headers().clone()) + } + + /// Reads the request body as a UTF-8 string. + /// + /// Returns a `Promise` that resolves to a string. If the body has not yet + /// been received from the network it is awaited transparently. + /// + /// See + fn text(&self, context: &mut Context) -> JsPromise { + let body_cell = Rc::clone(&self.body); + JsPromise::from_async_fn( + async move |_| { + let bytes = resolve_body(body_cell).await; + let text = String::from_utf8_lossy(&bytes); + Ok(JsString::from(text.as_ref()).into()) + }, + context, + ) + } + + /// Reads the request body and parses it as JSON. + /// + /// Returns a `Promise` that resolves to the parsed JavaScript value. + /// + /// See + fn json(&self, context: &mut Context) -> JsPromise { + let body_cell = Rc::clone(&self.body); + JsPromise::from_async_fn( + async move |context| { + let bytes = resolve_body(body_cell).await; + let json_str = String::from_utf8_lossy(&bytes); + let json = serde_json::from_str::(&json_str) + .map_err(|e| JsNativeError::syntax().with_message(e.to_string()))?; + JsValue::from_json(&json, &mut context.borrow_mut()) + }, + context, + ) + } + + /// Reads the request body and parses it as `application/x-www-form-urlencoded`. + /// + /// Returns a `Promise` that resolves to a plain JS object with the form + /// fields. When the same key appears multiple times the last value wins. + /// + /// Only `application/x-www-form-urlencoded` is supported. Multipart form + /// data is not supported and will cause the promise to reject with a + /// `TypeError`. + /// + /// See + fn form_data(&self, context: &mut Context) -> JsPromise { + let body_cell = Rc::clone(&self.body); + let content_type = self + .inner + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .map(str::to_string); + + JsPromise::from_async_fn( + async move |context| { + let is_url_encoded = content_type + .as_deref() + .is_none_or(|ct| ct.starts_with("application/x-www-form-urlencoded")); + + if !is_url_encoded { + return Err(JsNativeError::typ() + .with_message( + "formData() only supports application/x-www-form-urlencoded bodies", + ) + .into()); + } + + let bytes = resolve_body(body_cell).await; + + let ctx = &mut context.borrow_mut(); + let form_obj = JsObject::default(ctx.intrinsics()); + + for (key, value) in form_urlencoded::parse(&bytes) { + form_obj.set( + JsString::from(key.as_ref()), + JsString::from(value.as_ref()), + false, + ctx, + )?; + } + + Ok(form_obj.into()) + }, + context, + ) + } } diff --git a/core/runtime/src/fetch/tests/request.rs b/core/runtime/src/fetch/tests/request.rs index b14a7c84859..04397b1d510 100644 --- a/core/runtime/src/fetch/tests/request.rs +++ b/core/runtime/src/fetch/tests/request.rs @@ -93,7 +93,7 @@ fn request_clone_preserves_body_without_override() { let request = ctx.global_object().get(js_str!("cloned"), ctx).unwrap(); let request_obj = request.as_object().unwrap(); let request = request_obj.downcast_ref::().unwrap(); - assert_eq!(request.inner().body().as_slice(), b"payload"); + assert_eq!(request.body_bytes().unwrap(), b"payload"); }), ]); } @@ -147,7 +147,7 @@ fn request_clone_body_override() { let request = ctx.global_object().get(js_str!("cloned"), ctx).unwrap(); let request_obj = request.as_object().unwrap(); let request = request_obj.downcast_ref::().unwrap(); - assert_eq!(request.inner().body().as_slice(), b"override"); + assert_eq!(request.body_bytes().unwrap(), b"override"); }), ]); } @@ -196,7 +196,7 @@ fn request_clone_method_preserves_body() { let cloned = ctx.global_object().get(js_str!("cloned"), ctx).unwrap(); let cloned_obj = cloned.as_object().unwrap(); let cloned_req = cloned_obj.downcast_ref::().unwrap(); - assert_eq!(cloned_req.inner().body().as_slice(), b"payload"); + assert_eq!(cloned_req.body_bytes().unwrap(), b"payload"); }), ]); } @@ -227,14 +227,14 @@ fn request_clone_method_is_independent() { let cloned_obj = cloned.as_object().unwrap(); let cloned_req = cloned_obj.downcast_ref::().unwrap(); - assert_eq!(original_req.inner().body().as_slice(), b"original-body"); - assert_eq!(cloned_req.inner().body().as_slice(), b"original-body"); + let orig_body = original_req.body_bytes().unwrap(); + let cloned_body = cloned_req.body_bytes().unwrap(); - // Verify they are distinct objects (different pointers). - assert!(!std::ptr::eq( - original_req.inner().body().as_ptr(), - cloned_req.inner().body().as_ptr() - )); + assert_eq!(orig_body, b"original-body"); + assert_eq!(cloned_body, b"original-body"); + + // Verify the two allocations are independent (different heap pointers). + assert!(!std::ptr::eq(orig_body.as_ptr(), cloned_body.as_ptr())); }), ]); } diff --git a/flake.nix b/flake.nix index 48d7e0fcf97..aab01084e84 100644 --- a/flake.nix +++ b/flake.nix @@ -40,6 +40,7 @@ nativeBuildInputs = [ rust-toolchain pkg-config + cargo-make ]; # Required for jemalloc, see https://github.com/NixOS/nixpkgs/issues/370494 . CFLAGS = "-DJEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE";