-
Notifications
You must be signed in to change notification settings - Fork 384
Harden Accept-Encoding parsing to prevent NPE #10307
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -39,6 +39,7 @@ | |||||
| import java.util.Date; | ||||||
| import java.util.HashMap; | ||||||
| import java.util.List; | ||||||
| import java.util.Locale; | ||||||
| import java.util.Map; | ||||||
| import java.util.concurrent.ExecutionException; | ||||||
| import java.util.regex.Matcher; | ||||||
|
|
@@ -84,6 +85,10 @@ public class WebServer { | |||||
|
|
||||||
| private static final Pattern CACHE_JS_FILE = Pattern.compile("/(" + STRONG_NAME + ").cache.js$"); | ||||||
|
|
||||||
| private static final Pattern ACCEPT_ENCODING_SPEC = Pattern.compile( | ||||||
| "^\\s*([!#$%&'*+.^_`|~0-9A-Za-z-]+|\\*)\\s*(?:;\\s*q\\s*=\\s*" | ||||||
| + "(0(?:\\.\\d{0,3})?|1(?:\\.0{0,3})?))?\\s*$"); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. technically you could use two capture groups instead of one containing two non-capture groups. then, you can use their presence/absence to decide "is it zero" or "is it anything valid other than zero", and skip the Double.parseDouble entirely, leaving the work to the state machine. |
||||||
|
|
||||||
| private static final MimeTypes MIME_TYPES = new MimeTypes(); | ||||||
|
|
||||||
| private static final String TIME_IN_THE_PAST = "Mon, 01 Jan 1990 00:00:00 GMT"; | ||||||
|
|
@@ -372,9 +377,11 @@ public void send(HttpServletRequest request, HttpServletResponse response, TreeL | |||||
| } | ||||||
|
|
||||||
| if (contentEncoding != null) { | ||||||
| if (!request.getHeader("Accept-Encoding").contains("gzip")) { | ||||||
| response.sendError(HttpServletResponse.SC_NOT_IMPLEMENTED); | ||||||
| logger.log(TreeLogger.WARN, "client doesn't accept gzip; bailing"); | ||||||
| if (!acceptsGzipEncoding(request.getHeader("Accept-Encoding"))) { | ||||||
| response.setHeader("Accept-Encoding", "gzip"); | ||||||
| response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE); | ||||||
| logger.log(TreeLogger.WARN, | ||||||
| "client doesn't accept gzip and no uncompressed representation exists; bailing"); | ||||||
| return; | ||||||
| } | ||||||
| response.setHeader("Content-Encoding", "gzip"); | ||||||
|
|
@@ -543,6 +550,43 @@ static String guessMimeType(String filename) { | |||||
| return mimeType != null ? mimeType : ""; | ||||||
| } | ||||||
|
|
||||||
| /* visible for testing */ | ||||||
| static boolean acceptsGzipEncoding(String acceptEncodingHeader) { | ||||||
| if (acceptEncodingHeader == null) { | ||||||
| // RFC 9110: if Accept-Encoding is absent, any content coding is acceptable. | ||||||
| return true; | ||||||
| } | ||||||
| if (acceptEncodingHeader.trim().isEmpty()) { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
that said, if i'm right about simplifying, this can be included in the final return false, no need for this "present but nothing in it" check. |
||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| Double gzipQValue = null; | ||||||
| Double wildcardQValue = null; | ||||||
|
|
||||||
| for (String encodingSpec : acceptEncodingHeader.split(",")) { | ||||||
| Matcher matcher = ACCEPT_ENCODING_SPEC.matcher(encodingSpec); | ||||||
| if (!matcher.matches()) { | ||||||
| continue; | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. contents don't adhere to the spec, can we continue parsing at all? should this return false instead?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this current implementation takes a more tolerant approach: it ignores malformed entries and continues parsing valid ones so a single bad token doesn’t cause the entire Accept-Encoding header to be rejected. i will align with whichever behavior you prefer here:
Let me know which direction you’d like |
||||||
| } | ||||||
|
|
||||||
| String encoding = matcher.group(1).toLowerCase(Locale.ROOT); | ||||||
| String qValueText = matcher.group(2); | ||||||
| double qValue = qValueText == null ? 1.0 : Double.parseDouble(qValueText); | ||||||
|
|
||||||
| if (encoding.equals("gzip")) { | ||||||
| gzipQValue = qValue; | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can't we return early here? do we need to track the q values at all after we check a given entry? if the encoding is valid but isn't * or gzip, we ignore it. if it is either * or gzip and has a q value greater than zero, we return true. if no * or gzip had a value greater than zero, we return false. what am I missing that we can't substantially trim this down? |
||||||
| } else if (encoding.equals("*")) { | ||||||
| wildcardQValue = qValue; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if (gzipQValue != null) { | ||||||
| return gzipQValue > 0.0; | ||||||
| } | ||||||
|
|
||||||
| return wildcardQValue != null && wildcardQValue > 0.0; | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Returns the binding properties from the web page where dev mode is being used. (As passed in | ||||||
| * by dev_mode_on.js in a JSONP request to "/recompile".) | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,70 @@ | ||||||
| /* | ||||||
| * Copyright 2026 The GWT Project Authors. | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| * | ||||||
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||||||
| * use this file except in compliance with the License. You may obtain a copy of | ||||||
| * the License at | ||||||
| * | ||||||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||||||
| * | ||||||
| * Unless required by applicable law or agreed to in writing, software | ||||||
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||||||
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||||||
| * License for the specific language governing permissions and limitations under | ||||||
| * the License. | ||||||
| */ | ||||||
| package com.google.gwt.dev.codeserver; | ||||||
|
|
||||||
| import junit.framework.TestCase; | ||||||
|
|
||||||
| /** | ||||||
| * Tests request Accept-Encoding parsing for serving gzip-compressed responses. | ||||||
| */ | ||||||
| public class WebServerAcceptEncodingTest extends TestCase { | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingWhenHeaderIsAbsent() { | ||||||
| assertTrue(WebServer.acceptsGzipEncoding(null)); | ||||||
| } | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingRejectsEmptyHeaderValue() { | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("")); | ||||||
| assertFalse(WebServer.acceptsGzipEncoding(" ")); | ||||||
| } | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingAcceptsSimpleGzip() { | ||||||
| assertTrue(WebServer.acceptsGzipEncoding("gzip")); | ||||||
| assertTrue(WebServer.acceptsGzipEncoding("deflate, gzip")); | ||||||
| assertTrue(WebServer.acceptsGzipEncoding("GZIP")); | ||||||
| } | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingRejectsSubstringMatches() { | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("xgzip")); | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("gzip-alt")); | ||||||
| } | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingRejectsExplicitGzipZeroQValue() { | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("gzip;q=0")); | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("deflate, gzip; q=0.0")); | ||||||
| } | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingHonorsWildcardWhenGzipAbsent() { | ||||||
| assertTrue(WebServer.acceptsGzipEncoding("*")); | ||||||
| assertTrue(WebServer.acceptsGzipEncoding("br;q=0.2, *;q=0.7")); | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("*;q=0")); | ||||||
| } | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingPrefersExplicitGzipOverWildcard() { | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("gzip;q=0, *;q=1")); | ||||||
| assertTrue(WebServer.acceptsGzipEncoding("gzip;q=1, *;q=0")); | ||||||
| } | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingRejectsInvalidQualityValues() { | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("gzip;q=not-a-number")); | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("gzip;q=1.1")); | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("gzip;q=0.1234")); | ||||||
| } | ||||||
|
|
||||||
| public void testAcceptsGzipEncodingRejectsUnsupportedParameters() { | ||||||
| assertFalse(WebServer.acceptsGzipEncoding("gzip;level=9")); | ||||||
| } | ||||||
| } | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As per your note about structured parsing, this should include a test for something with gzip as a substring of its name - or as an alternative "metadata" for the named value?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please also test the supported variations in blank spaces, other valid characters |
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OWS doesn't appear to be allowed at any point after
qin the weight (and "BWS" is apparently only supported in specific cases around auth and transfer-encoding, TIL). Note also that all forms of "whitespace" in the spec are required to be limited to SP and TAB, while\sincludes others including CR, LF, NBSP.https://httpwg.org/specs/rfc9110.html#quality.values
https://httpwg.org/specs/rfc9110.html#whitespace
Or is that a concern that other servers deal with in the wild?
This regex is not easy to read - can you break it down into smaller segments to assemble it to be easier to validate?