-
Notifications
You must be signed in to change notification settings - Fork 13
Implement URL normalization function and enhance SSRF detection #252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 14 commits
de1fa00
d42f497
7bb57a4
6d5e7f4
d3f66d0
53fb8c7
976677b
f0bb7cc
a7eb170
ed1eed5
e13b062
95cfe5f
70d5d3d
0ef59f9
9e0a174
04c34e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| package helpers | ||
|
|
||
| import ( | ||
| "net/url" | ||
| "strings" | ||
| ) | ||
|
|
||
| // remove all control characters (< 32) and 0x7f(DEL) + whitespace | ||
| func removeCTLByte(urlStr string) string { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'urlStr' is reassigned/modified inside removeCTLByte, harming clarity and risking logic errors when mutating while iterating. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| for i := 0; i < len(urlStr); i++ { | ||
| if urlStr[i] <= ' ' || urlStr[i] == 0x7f { | ||
| urlStr = urlStr[:i] + urlStr[i+1:] | ||
| } | ||
| } | ||
| return urlStr | ||
| } | ||
|
|
||
| func removeUserInfo(raw string) string { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'raw' is reused/reassigned within removeUserInfo instead of using distinct local variables, reducing clarity. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| schemeEnd := strings.Index(raw, "://") | ||
| if schemeEnd == -1 { | ||
| // No scheme, can't safely identify authority | ||
| return raw | ||
| } | ||
|
|
||
| scheme := raw[:schemeEnd+3] | ||
| rest := raw[schemeEnd+3:] | ||
|
|
||
| // Authority is up to first '/', '?', or '#' (https://datatracker.ietf.org/doc/html/rfc3986#section-3.2) | ||
| authorityEnd := len(rest) | ||
| for _, sep := range []string{"/", "?", "#"} { | ||
| if idx := strings.Index(rest, sep); idx != -1 && idx < authorityEnd { | ||
| authorityEnd = idx | ||
| } | ||
| } | ||
|
|
||
| authority := rest[:authorityEnd] | ||
| path := rest[authorityEnd:] | ||
|
|
||
| // Remove userinfo if present | ||
| if at := strings.LastIndex(authority, "@"); at != -1 { | ||
| authority = authority[at+1:] | ||
| } | ||
|
|
||
| return scheme + authority + path | ||
| } | ||
|
|
||
| func UnescapeUrl(urlStr string) string { | ||
| unescapedUrl, err := url.QueryUnescape(urlStr) | ||
| if err != nil { | ||
| return urlStr | ||
| } | ||
| return unescapedUrl | ||
| } | ||
|
|
||
| func NormalizeRawUrl(urlStr string) string { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'urlStr' is repeatedly reassigned in NormalizeRawUrl rather than using a new local variable for the normalized value. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| urlStr = UnescapeUrl(urlStr) | ||
| urlStr = removeCTLByte(urlStr) | ||
| urlStr = FixURL(urlStr) | ||
| urlStr = removeUserInfo(urlStr) | ||
| return urlStr | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| package helpers | ||
|
|
||
| import "testing" | ||
|
|
||
| func TestNormalizeRequestUrl(t *testing.T) { | ||
| tests := []struct { | ||
| input string | ||
| expected string | ||
| }{ | ||
| {"http://localhost:4000", "http://localhost:4000"}, | ||
| {"http://localhost:4000 ", "http://localhost:4000"}, | ||
| {"http://localhost:4000" + "\x00", "http://localhost:4000"}, | ||
| {"http://\\@localhost:4000", "http://localhost:4000"}, | ||
| {"http://127.1.1.1:4000\\\\\\@127.0.0.1:80/", "http://127.0.0.1:80/"}, | ||
| {"https:/localhost:4000", "https://localhost:4000"}, | ||
| {"http://127%2E0%2E0%2E1:4000", "http://127.0.0.1:4000"}, | ||
| } | ||
| for _, test := range tests { | ||
| result := NormalizeRawUrl(test.input) | ||
| if result != test.expected { | ||
| t.Errorf("For input '%s', expected %v but got %v", test.input, test.expected, result) | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,16 @@ | ||
| package helpers | ||
|
|
||
| import ( | ||
| "net" | ||
| "net/url" | ||
| "strconv" | ||
|
|
||
| "golang.org/x/net/idna" | ||
| ) | ||
|
|
||
| func TryParseURL(input string) *url.URL { | ||
| parsedURL, err := url.ParseRequestURI(input) | ||
| if err != nil { | ||
| parsedURL, err := url.Parse(input) | ||
| if err != nil || parsedURL.Host == "" { | ||
| return nil | ||
| } | ||
|
|
||
|
|
@@ -17,5 +19,26 @@ func TryParseURL(input string) *url.URL { | |
| if err == nil { | ||
| parsedURL.Host = parsedHost | ||
| } | ||
| // If the port is not present, we need to add it based on the scheme | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment states the mechanical action (adding a port) rather than explaining why default ports are added; replace with a goal/rationale comment. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| if parsedURL.Port() == "" { | ||
| port := 0 | ||
| switch parsedURL.Scheme { | ||
| case "http": | ||
| port = 80 | ||
| case "https": | ||
| port = 443 | ||
| } | ||
| parsedURL.Host = parsedURL.Host + ":" + strconv.Itoa(int(port)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Defaulting to port 0 and appending ":0" for unknown schemes results in invalid host values (e.g., example.com:0). Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| } | ||
| host, port, err := net.SplitHostPort(parsedURL.Host) | ||
| if err == nil { | ||
| ip := net.ParseIP(host) | ||
| if ip != nil { | ||
| parsedURL.Host = ip.String() + ":" + port | ||
| } else { | ||
| parsedURL.Host = host + ":" + port | ||
| } | ||
| } | ||
|
|
||
| return parsedURL | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,28 +2,50 @@ package ssrf | |
|
|
||
| import ( | ||
| "main/helpers" | ||
| "main/log" | ||
| "net/url" | ||
| "strconv" | ||
| "strings" | ||
| ) | ||
|
|
||
| func findHostnameInUserInput(userInput string, hostname string, port uint32) bool { | ||
| func getVariants(userInput string) []string { | ||
| variants := []string{userInput, "http://" + userInput, "https://" + userInput} | ||
| decodedUserInput, err := url.QueryUnescape(userInput) | ||
| if err == nil && decodedUserInput != userInput { | ||
| variants = append(variants, decodedUserInput, "http://"+decodedUserInput, "https://"+decodedUserInput) | ||
| } | ||
| return variants | ||
| } | ||
|
|
||
| func findHostnameInUserInput(userInput string, hostname string, port uint32) bool { | ||
| log.Debugf("findHostnameInUserInput: userInput: %s, hostname: %s, port: %d", userInput, hostname, port) | ||
| if len(userInput) <= 1 { | ||
| return false | ||
| } | ||
| // if hostname contains : we need to add the [ and ] to the hostname (ipv6) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'hostname' is reassigned (modified) in-place; avoid reassigning function arguments. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| if strings.Contains(hostname, ":") { | ||
| hostname = "[" + hostname + "]" | ||
| } | ||
|
|
||
| hostnameURL := helpers.TryParseURL("http://" + hostname) | ||
| hostnameURL := helpers.TryParseURL("http://" + hostname + ":" + strconv.Itoa(int(port))) | ||
| if hostnameURL == nil { | ||
| return false | ||
| } | ||
|
|
||
| userInput = helpers.ExtractResourceOrOriginal(userInput) | ||
| variants := []string{userInput, "http://" + userInput, "https://" + userInput} | ||
| userInput = helpers.NormalizeRawUrl(userInput) | ||
|
|
||
| variants := getVariants(userInput) | ||
|
|
||
| for _, variant := range variants { | ||
| userInputURL := helpers.TryParseURL(variant) | ||
| if userInputURL == nil { | ||
| continue | ||
| } | ||
|
|
||
| // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 | ||
| // "The host subcomponent is case-insensitive." | ||
| if userInputURL != nil && strings.EqualFold(userInputURL.Hostname(), hostnameURL.Hostname()) { | ||
| if strings.EqualFold(userInputURL.Hostname(), hostnameURL.Hostname()) { | ||
| userPort := helpers.GetPortFromURL(userInputURL) | ||
|
|
||
| if port == 0 { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| --TEST-- | ||
| Test ssrf - obfuscated host | ||
|
|
||
| --ENV-- | ||
| AIKIDO_LOG_LEVEL=INFO | ||
| AIKIDO_BLOCK=1 | ||
|
|
||
| --POST-- | ||
| test=http://127.1.1.1:4000\@127.0.0.1:4000 | ||
|
|
||
| --FILE-- | ||
| <?php | ||
|
|
||
| $host = '127.0.0.1'; | ||
| $port = 4000; | ||
| $pid = null; | ||
|
|
||
| $descriptorspec = [ | ||
| 0 => ['pipe', 'r'], | ||
| 1 => ['file', '/dev/null', 'a'], | ||
| 2 => ['file', '/dev/null', 'a'] | ||
| ]; | ||
|
|
||
| try { | ||
| // Start PHP server | ||
| $process = proc_open("php -S $host:$port", $descriptorspec, $pipes); | ||
| if (!is_resource($process)) { | ||
| throw new RuntimeException("Failed to start PHP server."); | ||
| } | ||
|
|
||
| $status = proc_get_status($process); | ||
| $pid = $status['pid']; | ||
|
|
||
| // Wait a moment to ensure server starts | ||
| sleep(1); | ||
|
|
||
| // Perform the cURL request | ||
| $ch1 = curl_init("http://127.0.0.1:4000"); | ||
| curl_setopt($ch1, CURLOPT_RETURNTRANSFER, true); | ||
| curl_setopt($ch1, CURLOPT_FOLLOWLOCATION, true); | ||
| $response = curl_exec($ch1); | ||
| curl_close($ch1); | ||
|
|
||
| echo "Response:\n$response\n"; | ||
|
|
||
| } catch (Throwable $e) { | ||
| echo "Error: " . $e->getMessage() . "\n"; | ||
| } finally { | ||
| // Ensure the server is killed if started | ||
| if ($pid) { | ||
| exec("kill -9 $pid"); | ||
| } | ||
| if (isset($process) && is_resource($process)) { | ||
| proc_close($process); | ||
| } | ||
| } | ||
|
|
||
|
|
||
|
|
||
|
|
||
| --EXPECTREGEX-- | ||
| .*Aikido firewall has blocked a server-side request forgery.* |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment describes what the code does (removing control chars and whitespace) instead of explaining why URL normalization is required or what invariants it preserves.
Details
✨ AI Reasoning
1) The added comment on line 52-53 describes the exact string transformations performed (removing control characters, replacing @, removing whitespace) rather than explaining why these transformations are necessary or any constraints around them.
2) This harms maintainability because such 'what' comments can become outdated when the helper implementation changes and do not convey the rationale behind normalizing the URL before parsing.
3) The violation is introduced by this PR because the comment was added in the changed lines and is not present before.
🔧 How do I fix it?
Write comments that explain the purpose, reasoning, or business logic behind the code using words like 'because', 'so that', or 'in order to'.
More info - Comment
@AikidoSec feedback: [FEEDBACK]to get better review comments in the future.