diff --git a/analysis_artifacts/pr3489.patch b/analysis_artifacts/pr3489.patch new file mode 100644 index 000000000..19bc0b4c0 --- /dev/null +++ b/analysis_artifacts/pr3489.patch @@ -0,0 +1,819 @@ +From 80bcbbf022880c5177f8130118778ab22914b0bf Mon Sep 17 00:00:00 2001 +From: Praneet Tenkila <67573804+praneet390@users.noreply.github.com> +Date: Fri, 6 Feb 2026 23:18:43 +0530 +Subject: [PATCH 1/7] Hardening note: avoid shell-based popen in InspectFile + operator + +--- + src/operators/inspect_file.cc | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/operators/inspect_file.cc b/src/operators/inspect_file.cc +index 28c9c072ad..063e6bd14e 100644 +--- a/src/operators/inspect_file.cc ++++ b/src/operators/inspect_file.cc +@@ -63,6 +63,12 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + std::string res; + std::string openstr; + ++ // SECURITY HARDENING NOTE: ++ // popen() executes via shell with concatenated arguments. ++ // Current inputs are engine-controlled, but replacing this ++ // with argv-based exec/spawn would remove shell parsing risk. ++ ++ + openstr.append(m_param); + openstr.append(" "); + openstr.append(str); + +From ffc4b6722b5babf20f1354932187000c45d91598 Mon Sep 17 00:00:00 2001 +From: Praneet Tenkila <67573804+praneet390@users.noreply.github.com> +Date: Mon, 23 Feb 2026 18:28:57 +0530 +Subject: [PATCH 2/7] Hardening: replace shell-based popen() with execvp() in + InspectFile operator (v3) + +--- + src/operators/inspect_file.cc | 130 +++++++++++++++++++++++++--------- + 1 file changed, 95 insertions(+), 35 deletions(-) + +diff --git a/src/operators/inspect_file.cc b/src/operators/inspect_file.cc +index 063e6bd14e..192a53618b 100644 +--- a/src/operators/inspect_file.cc ++++ b/src/operators/inspect_file.cc +@@ -16,15 +16,20 @@ + #include "src/operators/inspect_file.h" + + #include +- + #include + #include ++#include + + #include "src/operators/operator.h" + #include "src/utils/system.h" + + #ifdef WIN32 + #include "src/compat/msvc.h" ++#else ++#include ++#include ++#include ++#include + #endif + + namespace modsecurity { +@@ -52,46 +57,101 @@ bool InspectFile::init(const std::string ¶m2, std::string *error) { + return true; + } + +- + bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + if (m_isScript) { + return m_lua.run(transaction, str); +- } else { +- FILE *in; +- char buff[512]; +- std::stringstream s; +- std::string res; +- std::string openstr; +- +- // SECURITY HARDENING NOTE: +- // popen() executes via shell with concatenated arguments. +- // Current inputs are engine-controlled, but replacing this +- // with argv-based exec/spawn would remove shell parsing risk. +- +- +- openstr.append(m_param); +- openstr.append(" "); +- openstr.append(str); +- if (!(in = popen(openstr.c_str(), "r"))) { +- return false; +- } +- +- while (fgets(buff, sizeof(buff), in) != NULL) { +- s << buff; +- } +- +- pclose(in); +- +- res.append(s.str()); +- if (res.size() > 1 && res[0] != '1') { +- return true; /* match */ +- } +- +- /* no match */ ++ } ++ ++#ifndef WIN32 ++ /* ++ * SECURITY HARDENING: ++ * Replace shell-based popen() execution with fork()+execvp() ++ * to avoid shell interpretation while preserving behavior. ++ */ ++ ++ int pipefd[2]; ++ if (pipe(pipefd) == -1) { + return false; + } +-} + ++ pid_t pid = fork(); ++ if (pid == -1) { ++ close(pipefd[0]); ++ close(pipefd[1]); ++ return false; ++ } ++ ++ if (pid == 0) { ++ // Child process ++ close(pipefd[0]); // Close read end ++ dup2(pipefd[1], STDOUT_FILENO); // Redirect stdout ++ close(pipefd[1]); ++ ++ std::vector argv; ++ argv.push_back(const_cast(m_param.c_str())); ++ argv.push_back(const_cast(str.c_str())); ++ argv.push_back(nullptr); ++ ++ execvp(argv[0], argv.data()); ++ ++ // execvp failed ++ _exit(1); ++ } ++ ++ // Parent process ++ close(pipefd[1]); // Close write end ++ ++ char buff[512]; ++ std::stringstream s; ++ ssize_t count; ++ ++ while ((count = read(pipefd[0], buff, sizeof(buff))) > 0) { ++ s.write(buff, count); ++ } ++ ++ close(pipefd[0]); ++ waitpid(pid, nullptr, 0); ++ ++ std::string res = s.str(); ++ ++ if (res.size() > 1 && res[0] != '1') { ++ return true; /* match */ ++ } ++ ++ return false; ++ ++#else ++ /* ++ * Windows fallback: preserve existing behavior ++ */ ++ FILE *in; ++ char buff[512]; ++ std::stringstream s; ++ std::string res; ++ std::string openstr; ++ ++ openstr.append(m_param); ++ openstr.append(" "); ++ openstr.append(str); ++ ++ if (!(in = popen(openstr.c_str(), "r"))) { ++ return false; ++ } ++ ++ while (fgets(buff, sizeof(buff), in) != NULL) { ++ s << buff; ++ } ++ ++ pclose(in); ++ ++ res.append(s.str()); ++ if (res.size() > 1 && res[0] != '1') { ++ return true; /* match */ ++ } ++ ++ return false; ++#endif ++} + + } // namespace operators + } // namespace modsecurity + +From 0c1209d60bc0be3956035a4696cc34b8e30f16d0 Mon Sep 17 00:00:00 2001 +From: Praneet Tenkila <67573804+praneet390@users.noreply.github.com> +Date: Mon, 23 Feb 2026 19:02:13 +0530 +Subject: [PATCH 3/7] Refactor inspect_file to use std::array and std::vector + +--- + src/operators/inspect_file.cc | 45 ++++++++++++++++++++++------------- + 1 file changed, 28 insertions(+), 17 deletions(-) + +diff --git a/src/operators/inspect_file.cc b/src/operators/inspect_file.cc +index 192a53618b..d53c0c8efe 100644 +--- a/src/operators/inspect_file.cc ++++ b/src/operators/inspect_file.cc +@@ -13,12 +13,21 @@ + * + */ + ++/* ++ * ModSecurity, http://www.modsecurity.org/ ++ * Copyright (c) 2015 - 2021 Trustwave Holdings, Inc. ++ * ++ * Licensed under the Apache License, Version 2.0 ++ */ ++ + #include "src/operators/inspect_file.h" + + #include + #include + #include + #include ++#include ++#include + + #include "src/operators/operator.h" + #include "src/utils/system.h" +@@ -29,7 +38,6 @@ + #include + #include + #include +-#include + #endif + + namespace modsecurity { +@@ -69,8 +77,8 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + * to avoid shell interpretation while preserving behavior. + */ + +- int pipefd[2]; +- if (pipe(pipefd) == -1) { ++ std::array pipefd{}; ++ if (pipe(pipefd.data()) == -1) { + return false; + } + +@@ -83,36 +91,39 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + + if (pid == 0) { + // Child process +- close(pipefd[0]); // Close read end +- dup2(pipefd[1], STDOUT_FILENO); // Redirect stdout ++ close(pipefd[0]); ++ dup2(pipefd[1], STDOUT_FILENO); + close(pipefd[1]); + ++ // Create mutable copies (avoid const_cast) ++ std::string param_copy = m_param; ++ std::string str_copy = str; ++ + std::vector argv; +- argv.push_back(const_cast(m_param.c_str())); +- argv.push_back(const_cast(str.c_str())); ++ argv.push_back(param_copy.data()); ++ argv.push_back(str_copy.data()); + argv.push_back(nullptr); + + execvp(argv[0], argv.data()); + +- // execvp failed +- _exit(1); ++ _exit(1); // exec failed + } + + // Parent process +- close(pipefd[1]); // Close write end ++ close(pipefd[1]); + +- char buff[512]; + std::stringstream s; ++ std::array buff{}; + ssize_t count; + +- while ((count = read(pipefd[0], buff, sizeof(buff))) > 0) { +- s.write(buff, count); ++ while ((count = read(pipefd[0], buff.data(), buff.size())) > 0) { ++ s.write(buff.data(), count); + } + + close(pipefd[0]); + waitpid(pid, nullptr, 0); + +- std::string res = s.str(); ++ const std::string res = s.str(); + + if (res.size() > 1 && res[0] != '1') { + return true; /* match */ +@@ -125,7 +136,7 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + * Windows fallback: preserve existing behavior + */ + FILE *in; +- char buff[512]; ++ std::array buff{}; + std::stringstream s; + std::string res; + std::string openstr; +@@ -138,8 +149,8 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + return false; + } + +- while (fgets(buff, sizeof(buff), in) != NULL) { +- s << buff; ++ while (fgets(buff.data(), buff.size(), in) != NULL) { ++ s << buff.data(); + } + + pclose(in); + +From 35ad0d04a9b95d81ae2c53aef7d90c36b7b1ee3e Mon Sep 17 00:00:00 2001 +From: Praneet Tenkila <67573804+praneet390@users.noreply.github.com> +Date: Mon, 23 Feb 2026 19:10:00 +0530 +Subject: [PATCH 4/7] Refactor inspect_file.cc to use inline variable + declaration + +--- + src/operators/inspect_file.cc | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/src/operators/inspect_file.cc b/src/operators/inspect_file.cc +index d53c0c8efe..f47deec017 100644 +--- a/src/operators/inspect_file.cc ++++ b/src/operators/inspect_file.cc +@@ -123,13 +123,12 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + close(pipefd[0]); + waitpid(pid, nullptr, 0); + +- const std::string res = s.str(); +- +- if (res.size() > 1 && res[0] != '1') { +- return true; /* match */ +- } ++ if (const std::string res = s.str(); ++ res.size() > 1 && res[0] != '1') { ++ return true; ++} + +- return false; ++return false; + + #else + /* +@@ -155,10 +154,10 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + + pclose(in); + +- res.append(s.str()); +- if (res.size() > 1 && res[0] != '1') { +- return true; /* match */ +- } ++ if (const std::string res = s.str(); ++ res.size() > 1 && res[0] != '1') { ++ return true; ++} + + return false; + #endif + +From f3b96472a7701a01574209b685680769a30d1a70 Mon Sep 17 00:00:00 2001 +From: Praneet Tenkila <67573804+praneet390@users.noreply.github.com> +Date: Fri, 13 Mar 2026 11:57:28 +0530 +Subject: [PATCH 5/7] Refactor InspectFile for security and clarity + +--- + src/operators/inspect_file.cc | 99 ++++++++++++++++++++--------------- + 1 file changed, 57 insertions(+), 42 deletions(-) + +diff --git a/src/operators/inspect_file.cc b/src/operators/inspect_file.cc +index f47deec017..5f5d4456b5 100644 +--- a/src/operators/inspect_file.cc ++++ b/src/operators/inspect_file.cc +@@ -13,21 +13,12 @@ + * + */ + +-/* +- * ModSecurity, http://www.modsecurity.org/ +- * Copyright (c) 2015 - 2021 Trustwave Holdings, Inc. +- * +- * Licensed under the Apache License, Version 2.0 +- */ +- + #include "src/operators/inspect_file.h" + + #include + #include + #include + #include +-#include +-#include + + #include "src/operators/operator.h" + #include "src/utils/system.h" +@@ -38,11 +29,14 @@ + #include + #include + #include ++#include ++#include + #endif + + namespace modsecurity { + namespace operators { + ++ + bool InspectFile::init(const std::string ¶m2, std::string *error) { + std::ifstream *iss; + std::string err; +@@ -50,7 +44,6 @@ bool InspectFile::init(const std::string ¶m2, std::string *error) { + + m_file = utils::find_resource(m_param, param2, &err); + iss = new std::ifstream(m_file, std::ios::in); +- + if (iss->is_open() == false) { + error->assign("Failed to open file: " + m_param + ". " + err); + delete iss; +@@ -65,6 +58,7 @@ bool InspectFile::init(const std::string ¶m2, std::string *error) { + return true; + } + ++ + bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + if (m_isScript) { + return m_lua.run(transaction, str); +@@ -72,13 +66,11 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + + #ifndef WIN32 + /* +- * SECURITY HARDENING: +- * Replace shell-based popen() execution with fork()+execvp() +- * to avoid shell interpretation while preserving behavior. ++ * Use fork()+execv() to avoid shell interpretation and PATH ambiguity. ++ * Execute the resolved m_file path directly instead of m_param. + */ +- +- std::array pipefd{}; +- if (pipe(pipefd.data()) == -1) { ++ int pipefd[2]; ++ if (pipe(pipefd) == -1) { + return false; + } + +@@ -91,55 +83,77 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + + if (pid == 0) { + // Child process +- close(pipefd[0]); +- dup2(pipefd[1], STDOUT_FILENO); ++ close(pipefd[0]); // Close read end ++ dup2(pipefd[1], STDOUT_FILENO); // Redirect stdout to pipe + close(pipefd[1]); + +- // Create mutable copies (avoid const_cast) +- std::string param_copy = m_param; ++ // Create mutable copies for execv() argument array ++ std::string file_copy = m_file; + std::string str_copy = str; + +- std::vector argv; +- argv.push_back(param_copy.data()); ++ std::vector argv; ++ argv.push_back(file_copy.data()); + argv.push_back(str_copy.data()); + argv.push_back(nullptr); + +- execvp(argv[0], argv.data()); ++ // Use execv() with the resolved path — avoids PATH lookup ambiguity ++ execv(file_copy.data(), argv.data()); + +- _exit(1); // exec failed ++ // execv() failed: exit child immediately ++ _exit(1); + } + + // Parent process +- close(pipefd[1]); ++ close(pipefd[1]); // Close write end + +- std::stringstream s; + std::array buff{}; ++ std::stringstream s; + ssize_t count; + +- while ((count = read(pipefd[0], buff.data(), buff.size())) > 0) { +- s.write(buff.data(), count); ++ // Retry on EINTR so a signal does not silently truncate output ++ while (true) { ++ count = read(pipefd[0], buff.data(), buff.size()); ++ if (count > 0) { ++ s.write(buff.data(), count); ++ } else if (count == 0) { ++ // EOF ++ break; ++ } else { ++ if (errno == EINTR) { ++ continue; ++ } ++ // Unrecoverable read error ++ break; ++ } + } + + close(pipefd[0]); +- waitpid(pid, nullptr, 0); + +- if (const std::string res = s.str(); +- res.size() > 1 && res[0] != '1') { +- return true; +-} ++ // Check child exit status; treat non-zero exit as no match ++ int wstatus = 0; ++ if (waitpid(pid, &wstatus, 0) == -1) { ++ return false; ++ } ++ if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) { ++ return false; ++ } + +-return false; ++ const std::string res = s.str(); ++ if (res.size() > 1 && res[0] != '1') { ++ return true; ++ } ++ ++ return false; + + #else + /* +- * Windows fallback: preserve existing behavior ++ * Windows fallback: use popen() to invoke the script. + */ + FILE *in; + std::array buff{}; + std::stringstream s; +- std::string res; +- std::string openstr; + ++ std::string openstr; + openstr.append(m_param); + openstr.append(" "); + openstr.append(str); +@@ -148,20 +162,21 @@ return false; + return false; + } + +- while (fgets(buff.data(), buff.size(), in) != NULL) { ++ while (fgets(buff.data(), static_cast(buff.size()), in) != NULL) { + s << buff.data(); + } + + pclose(in); + +- if (const std::string res = s.str(); +- res.size() > 1 && res[0] != '1') { +- return true; +-} ++ const std::string res = s.str(); ++ if (res.size() > 1 && res[0] != '1') { ++ return true; ++ } + + return false; + #endif + } + ++ + } // namespace operators + } // namespace modsecurity + +From 69b774e89d900dc13e68d28ad836d5b7bfbd8ff9 Mon Sep 17 00:00:00 2001 +From: Praneet Tenkila <67573804+praneet390@users.noreply.github.com> +Date: Fri, 13 Mar 2026 12:05:17 +0530 +Subject: [PATCH 6/7] Refactor inspect_file.cc for better pipe management + +--- + src/operators/inspect_file.cc | 27 +++++++++------------------ + 1 file changed, 9 insertions(+), 18 deletions(-) + +diff --git a/src/operators/inspect_file.cc b/src/operators/inspect_file.cc +index 5f5d4456b5..3f44f85d13 100644 +--- a/src/operators/inspect_file.cc ++++ b/src/operators/inspect_file.cc +@@ -69,8 +69,8 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + * Use fork()+execv() to avoid shell interpretation and PATH ambiguity. + * Execute the resolved m_file path directly instead of m_param. + */ +- int pipefd[2]; +- if (pipe(pipefd) == -1) { ++ std::array pipefd{}; ++ if (pipe(pipefd.data()) == -1) { + return false; + } + +@@ -108,24 +108,17 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + + std::array buff{}; + std::stringstream s; +- ssize_t count; + + // Retry on EINTR so a signal does not silently truncate output +- while (true) { ++ ssize_t count = 0; ++ do { + count = read(pipefd[0], buff.data(), buff.size()); + if (count > 0) { + s.write(buff.data(), count); +- } else if (count == 0) { +- // EOF +- break; +- } else { +- if (errno == EINTR) { +- continue; +- } +- // Unrecoverable read error +- break; ++ } else if (count < 0 && errno == EINTR) { ++ count = 1; // sentinel: keep looping + } +- } ++ } while (count > 0); + + close(pipefd[0]); + +@@ -138,8 +131,7 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + return false; + } + +- const std::string res = s.str(); +- if (res.size() > 1 && res[0] != '1') { ++ if (const std::string res = s.str(); res.size() > 1 && res[0] != '1') { + return true; + } + +@@ -168,8 +160,7 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + + pclose(in); + +- const std::string res = s.str(); +- if (res.size() > 1 && res[0] != '1') { ++ if (const std::string res = s.str(); res.size() > 1 && res[0] != '1') { + return true; + } + + +From 72c9c3096c4bf7b93f2cbb83fbec98ac8a7124c6 Mon Sep 17 00:00:00 2001 +From: Praneet Tenkila <67573804+praneet390@users.noreply.github.com> +Date: Wed, 8 Apr 2026 15:33:47 +0530 +Subject: [PATCH 7/7] Refactor process handling in inspect_file.cc + +--- + src/operators/inspect_file.cc | 67 ++++++++++++++++++----------------- + 1 file changed, 34 insertions(+), 33 deletions(-) + +diff --git a/src/operators/inspect_file.cc b/src/operators/inspect_file.cc +index 3f44f85d13..7e5b3f9cf7 100644 +--- a/src/operators/inspect_file.cc ++++ b/src/operators/inspect_file.cc +@@ -15,10 +15,12 @@ + + #include "src/operators/inspect_file.h" + +-#include +-#include ++#include + #include + #include ++#include ++#include ++#include + + #include "src/operators/operator.h" + #include "src/utils/system.h" +@@ -26,11 +28,10 @@ + #ifdef WIN32 + #include "src/compat/msvc.h" + #else +-#include ++#include + #include + #include +-#include +-#include ++#include + #endif + + namespace modsecurity { +@@ -66,8 +67,8 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + + #ifndef WIN32 + /* +- * Use fork()+execv() to avoid shell interpretation and PATH ambiguity. +- * Execute the resolved m_file path directly instead of m_param. ++ * Use fork()+execv() with the resolved m_file path to avoid shell ++ * interpretation and PATH-lookup ambiguity. + */ + std::array pipefd{}; + if (pipe(pipefd.data()) == -1) { +@@ -82,52 +83,53 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + } + + if (pid == 0) { +- // Child process +- close(pipefd[0]); // Close read end +- dup2(pipefd[1], STDOUT_FILENO); // Redirect stdout to pipe ++ // Child process: wire stdout to the pipe then exec the script. ++ close(pipefd[0]); // Close unused read end ++ dup2(pipefd[1], STDOUT_FILENO); // Redirect stdout to pipe write end + close(pipefd[1]); + +- // Create mutable copies for execv() argument array ++ // Mutable copies required by execv()'s char* const argv[] signature. + std::string file_copy = m_file; +- std::string str_copy = str; ++ std::string str_copy = str; + + std::vector argv; + argv.push_back(file_copy.data()); + argv.push_back(str_copy.data()); + argv.push_back(nullptr); + +- // Use execv() with the resolved path — avoids PATH lookup ambiguity ++ // execv() uses an exact path — no PATH lookup, no shell. + execv(file_copy.data(), argv.data()); + +- // execv() failed: exit child immediately ++ // Only reached if execv() fails. + _exit(1); + } + +- // Parent process +- close(pipefd[1]); // Close write end ++ // Parent process: read all child output, retrying on EINTR. ++ close(pipefd[1]); // Close unused write end + + std::array buff{}; + std::stringstream s; +- +- // Retry on EINTR so a signal does not silently truncate output + ssize_t count = 0; ++ + do { + count = read(pipefd[0], buff.data(), buff.size()); + if (count > 0) { + s.write(buff.data(), count); + } else if (count < 0 && errno == EINTR) { +- count = 1; // sentinel: keep looping ++ count = 1; // Signal interrupted — keep looping. + } + } while (count > 0); + + close(pipefd[0]); + +- // Check child exit status; treat non-zero exit as no match ++ // Reap child and treat abnormal exit or exec failure as no-match. + int wstatus = 0; +- if (waitpid(pid, &wstatus, 0) == -1) { +- return false; +- } +- if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) { ++ pid_t waited = 0; ++ do { ++ waited = waitpid(pid, &wstatus, 0); ++ } while (waited == -1 && errno == EINTR); ++ ++ if (waited == -1 || !WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) { + return false; + } + +@@ -137,24 +139,23 @@ bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + + return false; + +-#else ++#else // WIN32 + /* +- * Windows fallback: use popen() to invoke the script. ++ * Windows: no fork()/execv(); use _popen() via the popen() alias ++ * provided by src/compat/msvc.h. Command injection risk here is ++ * accepted as a pre-existing platform limitation on Windows. + */ +- FILE *in; + std::array buff{}; + std::stringstream s; + +- std::string openstr; +- openstr.append(m_param); +- openstr.append(" "); +- openstr.append(str); ++ const std::string openstr = m_param + " " + str; + +- if (!(in = popen(openstr.c_str(), "r"))) { ++ FILE *in = popen(openstr.c_str(), "r"); ++ if (in == nullptr) { + return false; + } + +- while (fgets(buff.data(), static_cast(buff.size()), in) != NULL) { ++ while (fgets(buff.data(), static_cast(buff.size()), in) != nullptr) { + s << buff.data(); + } + diff --git a/analysis_artifacts/pr3489_inspect_file_final.patch b/analysis_artifacts/pr3489_inspect_file_final.patch new file mode 100644 index 000000000..c6a49e40b --- /dev/null +++ b/analysis_artifacts/pr3489_inspect_file_final.patch @@ -0,0 +1,172 @@ +@@ -15,29 +15,36 @@ + + #include "src/operators/inspect_file.h" + ++#include ++#include ++#include + #include +- + #include +-#include ++#include + + #include "src/operators/operator.h" + #include "src/utils/system.h" + + #ifdef WIN32 + #include "src/compat/msvc.h" ++#else ++#include ++#include ++#include ++#include + #endif + + namespace modsecurity { + namespace operators { + ++ + bool InspectFile::init(const std::string ¶m2, std::string *error) { + std::ifstream *iss; + std::string err; + std::string err_lua; + + m_file = utils::find_resource(m_param, param2, &err); + iss = new std::ifstream(m_file, std::ios::in); +- + if (iss->is_open() == false) { + error->assign("Failed to open file: " + m_param + ". " + err); + delete iss; +@@ -56,34 +63,110 @@ bool InspectFile::init(const std::string ¶m2, std::string *error) { + bool InspectFile::evaluate(Transaction *transaction, const std::string &str) { + if (m_isScript) { + return m_lua.run(transaction, str); +- } else { +- FILE *in; +- char buff[512]; +- std::stringstream s; +- std::string res; +- std::string openstr; +- +- openstr.append(m_param); +- openstr.append(" "); +- openstr.append(str); +- if (!(in = popen(openstr.c_str(), "r"))) { +- return false; +- } ++ } + +- while (fgets(buff, sizeof(buff), in) != NULL) { +- s << buff; +- } ++#ifndef WIN32 ++ /* ++ * Use fork()+execv() with the resolved m_file path to avoid shell ++ * interpretation and PATH-lookup ambiguity. ++ */ ++ std::array pipefd{}; ++ if (pipe(pipefd.data()) == -1) { ++ return false; ++ } ++ ++ pid_t pid = fork(); ++ if (pid == -1) { ++ close(pipefd[0]); ++ close(pipefd[1]); ++ return false; ++ } ++ ++ if (pid == 0) { ++ // Child process: wire stdout to the pipe then exec the script. ++ close(pipefd[0]); // Close unused read end ++ dup2(pipefd[1], STDOUT_FILENO); // Redirect stdout to pipe write end ++ close(pipefd[1]); ++ ++ // Mutable copies required by execv()'s char* const argv[] signature. ++ std::string file_copy = m_file; ++ std::string str_copy = str; ++ ++ std::vector argv; ++ argv.push_back(file_copy.data()); ++ argv.push_back(str_copy.data()); ++ argv.push_back(nullptr); ++ ++ // execv() uses an exact path — no PATH lookup, no shell. ++ execv(file_copy.data(), argv.data()); ++ ++ // Only reached if execv() fails. ++ _exit(1); ++ } + +- pclose(in); ++ // Parent process: read all child output, retrying on EINTR. ++ close(pipefd[1]); // Close unused write end + +- res.append(s.str()); +- if (res.size() > 1 && res[0] != '1') { +- return true; /* match */ ++ std::array buff{}; ++ std::stringstream s; ++ ssize_t count = 0; ++ ++ do { ++ count = read(pipefd[0], buff.data(), buff.size()); ++ if (count > 0) { ++ s.write(buff.data(), count); ++ } else if (count < 0 && errno == EINTR) { ++ count = 1; // Signal interrupted — keep looping. + } ++ } while (count > 0); ++ ++ close(pipefd[0]); ++ ++ // Reap child and treat abnormal exit or exec failure as no-match. ++ int wstatus = 0; ++ pid_t waited = 0; ++ do { ++ waited = waitpid(pid, &wstatus, 0); ++ } while (waited == -1 && errno == EINTR); ++ ++ if (waited == -1 || !WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) { ++ return false; ++ } ++ ++ if (const std::string res = s.str(); res.size() > 1 && res[0] != '1') { ++ return true; ++ } ++ ++ return false; + +- /* no match */ ++#else // WIN32 ++ /* ++ * Windows: no fork()/execv(); use _popen() via the popen() alias ++ * provided by src/compat/msvc.h. Command injection risk here is ++ * accepted as a pre-existing platform limitation on Windows. ++ */ ++ std::array buff{}; ++ std::stringstream s; ++ ++ const std::string openstr = m_param + " " + str; ++ ++ FILE *in = popen(openstr.c_str(), "r"); ++ if (in == nullptr) { + return false; + } ++ ++ while (fgets(buff.data(), static_cast(buff.size()), in) != nullptr) { ++ s << buff.data(); ++ } ++ ++ pclose(in); ++ ++ if (const std::string res = s.str(); res.size() > 1 && res[0] != '1') { ++ return true; ++ } ++ ++ return false; ++#endif + } + + diff --git a/analysis_artifacts/pr3489_metadata.json b/analysis_artifacts/pr3489_metadata.json new file mode 100644 index 000000000..f1cded026 --- /dev/null +++ b/analysis_artifacts/pr3489_metadata.json @@ -0,0 +1,14 @@ +{ + "number": 3489, + "title": "Hardening: Avoid shell-based popen usage in InspectFile operator", + "state": "open", + "created_at": "2026-02-06T17:50:55Z", + "updated_at": "2026-04-09T14:09:02Z", + "base": "v3/master", + "head": "hardening-inspectfile-exec", + "commits": 8, + "changed_files": 1, + "additions": 107, + "deletions": 24, + "html_url": "https://github.com/owasp-modsecurity/ModSecurity/pull/3489" +} diff --git a/modsecurity_security_analysis.md b/modsecurity_security_analysis.md new file mode 100644 index 000000000..866469090 --- /dev/null +++ b/modsecurity_security_analysis.md @@ -0,0 +1,283 @@ +## Teil 1: Deutsch + +### 1. Kurzfazit + +- **Gesicherte Einordnung:** PR #3489 ersetzt unter Linux/Unix in `@inspectFile` die Shell-Ausführung via `popen(" ")` durch `fork() + execv()` mit Pipe und `waitpid()`. Dadurch wird Shell-Parsing im Nicht-Windows-Pfad entfernt. +- **Sicherheitswirkung:** Das reduziert klar das Risiko von Shell-Injection im betroffenen Codepfad. +- **Robustheitswirkung:** Die PR führt gleichzeitig neue Prozess-/Thread-Interaktionen ein (Fork in potenziell multithreaded Prozess, blockierendes Lesen/`waitpid` ohne Timeout). Das ist primär ein **DoS-/Stabilitätsrisiko**, weniger ein neuer klassischer RCE-Bug. +- **Gesamturteil:** **Hardening + teilweise Robustheitsverbesserung**, aber **nicht vollständig** hinsichtlich Laufzeitgrenzen/Worker-Starvation. + +### 2. Gesicherte Befunde + +1. `@inspectFile` ist ein Operator (`InspectFile`) und wird regulär im Rule-Evaluierungsfluss aufgerufen (`RuleWithOperator::evaluate` → `Operator::evaluateInternal` → `InspectFile::evaluate`). Damit läuft externer Prozessaufruf pro betroffener Variable/Rule-Match-Kontext. +2. Der aktuelle Repository-Stand nutzt für Nicht-Lua derzeit `popen()` mit String-Konkatenation (`m_param + " " + str`) und liest synchron bis EOF, danach `pclose()`. +3. PR #3489 ändert genau diese Stelle (eine Datei), laut GitHub-Metadaten: Titel „Hardening: Avoid shell-based popen usage in InspectFile operator“, 1 Datei geändert. +4. Im finalen PR-Diff wird auf Nicht-Windows `pipe()` + `fork()` + `execv(m_file, argv)` umgestellt; Parent liest Pipe (mit EINTR-Handling) und ruft `waitpid()` auf. +5. Der Windows-Pfad bleibt Shell-basiert (`popen()`), im PR explizit als bestehende Plattformlimitation kommentiert. + +### 3. Wahrscheinliche Risiken + +1. **Blockierendes Verhalten / Worker-Starvation (wahrscheinlich):** + - Parent liest Pipe bis EOF und wartet anschließend via `waitpid(..., 0)` ohne Timeout. + - Hängt das externe Programm (oder produziert sehr langsam), blockiert der Request-Worker. Unter Last kann das zu Thread/Worker-Erschöpfung führen. +2. **DoS-Potenzial durch teure externe Prozesse (wahrscheinlich):** + - Pro Auswertung wird ein Kindprozess gestartet. In hochparallelen Szenarien kann dies CPU/Context-Switch/Prozesslimit belasten. +3. **Fork in multithreaded Prozess (wahrscheinlich relevant):** + - Das ist generell ein heikler Bereich. Die PR vermeidet im Child nach `fork()` weitgehend komplexe Interaktion (zielt schnell auf `execv()`), aber verwendet dennoch C++-Objekte (`std::string`, `std::vector`) im Child vor `execv()`. +4. **Fehlende harte Laufzeitbegrenzung (wahrscheinlich):** + - Kein `alarm`, kein nonblocking + Poll + Timeout, kein Watchdog/kill-Pfad. + +### 4. Hypothesen (nicht bewiesen) + +1. **Async-signal-safety/Deadlock-Hypothese im Child:** + - In multithreaded Elternprozessen gilt nach `fork()` theoretisch erhöhte Vorsicht: nur async-signal-safe Operationen bis `exec*`. + - Die PR baut `std::string`/`std::vector` erst im Child auf; das kann je nach libc/Allocator intern Locks/Heap-Zustände berühren. + - **Nicht verifizierbar in dieser Analyse** (kein Laufzeittest/Instrumentierung durchgeführt). +2. **Pipe-/Output-induzierte Hänger bei Protokollen mit großem Output:** + - Parent liest zwar die Pipe; trotzdem kann bei bestimmten I/O-Mustern (z. B. Script schreibt auf stderr oder wartet auf etwas Externes) ein Hänger entstehen. + - **Nicht verifizierbar in dieser Analyse.** +3. **Sekundäre Race-Effekte bei Shared-State außerhalb dieses Files:** + - Falls der externe Scanprozess Seiteneffekte auf geteilte Dateien/Lockfiles hat, können unter Last Race Conditions entstehen. + - **Nicht verifizierbar in dieser Analyse.** + +### 5. Analyse der PR #3489 + +**Was wird konkret geändert?** +- Linux/Unix-Pfad: `popen()`-Aufruf wird ersetzt durch explizites `fork`/`execv`-Modell mit Pipe. +- Kindprozess: stdout via `dup2` in Pipe, dann `execv` auf aufgelösten Pfad `m_file`. +- Elternprozess: liest Pipe-Inhalt, `waitpid` mit EINTR-Retry, non-zero child exit => no-match. +- Windows: behält `popen` (plattformspezifisch). + +**Welche Probleme adressiert die PR?** +- Shell-Interpretation (und PATH-Ambiguität durch `execvp`) wird im Nicht-Windows-Pfad reduziert/entfernt, da final `execv` auf exakt aufgelösten Pfad verwendet wird. + +**Welche neuen Risiken entstehen ggf.?** +- Explizites Fork/Exec in einem potenziell multithreaded Serverprozess verstärkt Anforderungen an robustes Timeout-/Lifecycle-Management. +- Kein Timeout/Abbruchpfad beim Warten auf Kindprozess. +- Child-Code vor `execv` nutzt C++-Objekte (potenziell problematisch im strengen POSIX-Sinn). + +### 6. Relevante Codepfade + +1. **Regel-Ausführung → Operator-Evaluierung** + - `src/rule_with_operator.cc` (`RuleWithOperator::evaluate`) + - `src/operators/operator.cc` (`Operator::evaluateInternal`, Dispatch in `instantiate`) +2. **`@inspectFile` Implementierung** + - `src/operators/inspect_file.cc` (bestehender `popen`-Pfad) + - `src/operators/inspect_file.h` (Klasse/Zustand: `m_file`, `m_isScript`, `m_lua`) +3. **PR #3489 Zieländerung (externer Diff-Artefakt in dieser Analyse gespeichert)** + - `analysis_artifacts/pr3489_inspect_file_final.patch` +4. **Multithread-Bezug im Projekt** + - `examples/multithread/multithread.cc` (100 Threads, viele Transaktionen) + +### 7. Test-/Reproduktionsplan + +Da in dieser Analyse **keine belastbare End-to-End-Ausführung** des PR-Codes erfolgt ist, folgt ein reproduzierbarer Plan. + +#### 7.1 Ziel +- Verhalten von `@inspectFile` unter Parallelität und problematischen Child-Prozessen prüfen. + +#### 7.2 Setup +1. Build zwei Stände: + - Baseline (ohne PR #3489) + - PR-Stand #3489 +2. Regeldatei mit `@inspectFile` aktivieren (z. B. auf Upload-/Request-Variable). +3. Externes Testskript bereitstellen: + - `fast_ok.sh` (schnelle Ausgabe, exit 0) + - `slow.sh` (sleep 10) + - `hang.sh` (endlose Schleife) + - `spam.sh` (viel stdout) +4. Lastgenerator: parallele Requests (z. B. 50/100/200 gleichzeitig), ideal kombiniert mit dem `examples/multithread`-Ansatz. + +#### 7.3 Zu messende Metriken +- p50/p95/p99 Latenz +- Anzahl blockierter Worker/Threads +- Anzahl laufender/zombie Prozesse +- Fehlerquote/Timeoutquote +- CPU/RAM/FD-Verbrauch + +#### 7.4 Erwartete Beobachtungen +- PR reduziert Shell-bezogene Angriffsoberfläche im Unix-Pfad. +- Bei `hang.sh`/`slow.sh` weiterhin erhöhte Blockierwahrscheinlichkeit ohne Timeout-Mechanismus. + +#### 7.5 Verifikationsstatus +- **Nicht verifizierbar in dieser Analyse** (kein reproduzierter Laufbericht mit Messwerten). + +### 8. Empfehlungen + +1. **Timeout einführen (hoch priorisiert):** + - `waitpid` nicht unbegrenzt blockierend; mit Deadline + `kill(SIGKILL)`/Cleanup. +2. **Child-Minimalismus nach `fork()` (hoch priorisiert):** + - Argumentvektor bereits im Parent vorbereiten oder auf strikt async-signal-safe Pfad achten. +3. **Ressourcenlimits/Härtung:** + - Begrenzung paralleler `@inspectFile` Child-Prozesse pro Worker/gesamt. +4. **stderr-Handling und Exit-Code-Telemetrie:** + - Bessere Diagnose bei External-Tool-Fehlern. +5. **Dokumentation:** + - Klar dokumentieren, dass `@inspectFile` ein synchroner externer Aufruf ist und DoS-Risiko ohne Timeout erhöht. + +### 9. Offene Unsicherheiten + +1. Exakte Laufzeitwirkung unter realer Zielplattform/Connector (Apache, Nginx, standalone) wurde nicht gemessen. +2. Ob der Child-Pfad in spezifischer libc/Allocator-Kombination nach `fork()` seltene Deadlocks triggert, ist hier nicht experimentell geprüft. +3. Verhalten unter extremen FD-/Prozesslimit-Szenarien ist offen. + +### Klare Bewertung (gefordert) + +- **Ist das ein echter Security Bug?** + - Der ursprüngliche Shell-basierte Unix-Pfad ist ein **echtes Security-Risiko** (Injection/Interpretationsfläche), abhängig von Kontrollierbarkeit der Eingaben. +- **DoS-/Stabilitätsproblem?** + - Ja, **klar möglich** durch blockierende externe Prozessausführung ohne Timeout. +- **Hardening-Thema?** + - Ja, PR #3489 ist primär **Hardening** mit Sicherheitsgewinn im Unix-Pfad. +- **Dokumentiertes riskantes Verhalten?** + - Teilweise: Synchroner externer Prozessaufruf bleibt grundsätzlich riskant, wenn nicht stark begrenzt. + +- **Ist die PR sicher?** + - **Teilweise sicherer als vorher** (Shell-Risiko reduziert). +- **Unvollständig?** + - **Ja**, hinsichtlich Timeout/DoS-Resilienz und striktem post-fork-Minimalpfad. +- **Potenziell riskant?** + - **Ja**, primär als Verfügbarkeits-/Stabilitätsrisiko unter Last. + +--- + +## Section 2: English + +### 1. Executive Summary + +- **Confirmed classification:** PR #3489 replaces shell execution via `popen(" ")` in `@inspectFile` (Linux/Unix path) with `fork() + execv()` plus pipe and `waitpid()`. This removes shell parsing in the non-Windows path. +- **Security impact:** This clearly reduces shell-injection exposure in the affected code path. +- **Robustness impact:** At the same time, it introduces explicit process/thread interactions (fork in a potentially multithreaded process, blocking read/`waitpid` without timeout). This is primarily a **DoS/stability risk**, less a new classic RCE bug. +- **Overall judgment:** **Hardening + partial robustness improvement**, but **incomplete** regarding runtime limits/worker starvation. + +### 2. Confirmed Findings + +1. `@inspectFile` is an operator (`InspectFile`) and is invoked in the regular rule evaluation flow (`RuleWithOperator::evaluate` → `Operator::evaluateInternal` → `InspectFile::evaluate`). Therefore, external process execution can occur per affected variable/rule-evaluation context. +2. In the current repository state, the non-Lua path uses `popen()` with string concatenation (`m_param + " " + str`), synchronously reads until EOF, then calls `pclose()`. +3. PR #3489 changes exactly this location (one file), per GitHub metadata: title “Hardening: Avoid shell-based popen usage in InspectFile operator”, 1 changed file. +4. In the final PR diff, the non-Windows path switches to `pipe()` + `fork()` + `execv(m_file, argv)`; parent reads the pipe (with EINTR handling) and calls `waitpid()`. +5. The Windows path remains shell-based (`popen()`), explicitly documented in the PR as a pre-existing platform limitation. + +### 3. Probable Risks + +1. **Blocking behavior / worker starvation (probable):** + - Parent reads pipe until EOF and then waits via `waitpid(..., 0)` without timeout. + - If the external program hangs (or outputs very slowly), the request worker blocks. Under load this can exhaust worker threads/processes. +2. **DoS potential via expensive external processes (probable):** + - A child process is spawned per evaluation. In highly parallel scenarios this can stress CPU/context-switching/process limits. +3. **Fork in multithreaded process (probably relevant):** + - This is generally a sensitive area. The PR keeps child-side work relatively short (aiming to `execv()` quickly), but still uses C++ objects (`std::string`, `std::vector`) in the child before `execv()`. +4. **No hard runtime boundary (probable):** + - No `alarm`, no nonblocking + poll + timeout, no watchdog/kill path. + +### 4. Hypotheses (unproven) + +1. **Async-signal-safety/deadlock hypothesis in child:** + - In multithreaded parents, post-`fork()` behavior is theoretically sensitive: only async-signal-safe operations should occur until `exec*`. + - The PR constructs `std::string`/`std::vector` in the child; depending on libc/allocator internals, this may touch locks/heap state. + - **Nicht verifizierbar in dieser Analyse.** +2. **Pipe/output-induced hangs for certain tool behaviors:** + - Parent reads stdout, but some I/O patterns (e.g., tool writes to stderr or waits externally) may still hang the request path. + - **Nicht verifizierbar in dieser Analyse.** +3. **Secondary race effects outside this file:** + - If external scanner programs have side effects on shared files/lockfiles, races may appear under load. + - **Nicht verifizierbar in dieser Analyse.** + +### 5. Analysis of PR #3489 + +**What exactly changes?** +- Linux/Unix path: replaces `popen()` with explicit `fork`/`execv` and pipe. +- Child: redirects stdout to pipe via `dup2`, then `execv` on resolved path `m_file`. +- Parent: reads pipe output, retries `waitpid` on EINTR, treats non-zero child exit as no-match. +- Windows: keeps `popen` (platform-specific). + +**Which issues does the PR address?** +- Shell interpretation (and PATH ambiguity previously seen with `execvp`) is reduced/removed in non-Windows path because final code uses `execv` on an exact resolved path. + +**What new risks may be introduced?** +- Explicit fork/exec in a potentially multithreaded server process increases need for robust timeout/lifecycle controls. +- No timeout/cancellation path while waiting for child process. +- Child-side pre-`execv` code uses C++ objects (potentially problematic in strict POSIX interpretation). + +### 6. Relevant Code Paths + +1. **Rule execution → operator evaluation** + - `src/rule_with_operator.cc` (`RuleWithOperator::evaluate`) + - `src/operators/operator.cc` (`Operator::evaluateInternal`, dispatch in `instantiate`) +2. **`@inspectFile` implementation** + - `src/operators/inspect_file.cc` (current `popen` path) + - `src/operators/inspect_file.h` (class/state: `m_file`, `m_isScript`, `m_lua`) +3. **PR #3489 target change (saved external diff artifact in this analysis)** + - `analysis_artifacts/pr3489_inspect_file_final.patch` +4. **Project multithread context** + - `examples/multithread/multithread.cc` (100 threads, many transactions) + +### 7. Test / Reproduction Plan + +Because this analysis did **not** perform a validated end-to-end execution of PR code, a reproducible plan is provided. + +#### 7.1 Goal +- Validate `@inspectFile` behavior under parallel load and problematic child-process behavior. + +#### 7.2 Setup +1. Build two states: + - Baseline (without PR #3489) + - PR #3489 state +2. Enable a rule using `@inspectFile` (e.g., on upload/request variable). +3. Provide external test scripts: + - `fast_ok.sh` (quick output, exit 0) + - `slow.sh` (sleep 10) + - `hang.sh` (infinite loop) + - `spam.sh` (large stdout volume) +4. Load generator: parallel requests (e.g., 50/100/200 concurrent), ideally combined with `examples/multithread` pattern. + +#### 7.3 Metrics +- p50/p95/p99 latency +- Number of blocked workers/threads +- Number of running/zombie processes +- Error/timeout rate +- CPU/RAM/FD consumption + +#### 7.4 Expected observations +- PR should reduce shell-related attack surface in Unix path. +- With `hang.sh`/`slow.sh`, high blocking probability should remain without timeout mechanisms. + +#### 7.5 Verification status +- **Nicht verifizierbar in dieser Analyse.** + +### 8. Recommendations + +1. **Add timeout control (high priority):** + - Avoid unbounded `waitpid`; enforce deadline + `kill(SIGKILL)`/cleanup. +2. **Minimize child-side post-fork work (high priority):** + - Prepare argv in parent or ensure strictly async-signal-safe post-fork path. +3. **Resource limiting/hardening:** + - Cap concurrent `@inspectFile` child processes per worker/globally. +4. **stderr handling and exit-code telemetry:** + - Improve diagnostics for external tool failures. +5. **Documentation:** + - Clearly state that `@inspectFile` is a synchronous external call and can raise DoS risk without timeout. + +### 9. Open Uncertainties + +1. Exact runtime impact on target connector/platform (Apache, Nginx, standalone) was not measured. +2. Whether specific libc/allocator combinations trigger rare post-fork deadlocks was not experimentally validated here. +3. Behavior under extreme FD/process-limit scenarios remains open. + +### Clear Verdict (requested) + +- **Is this a real security bug?** + - The original shell-based Unix path is a **real security risk** (injection/interpretation surface), depending on input controllability. +- **A DoS/stability issue?** + - Yes, **clearly possible** due to blocking external process execution without timeout. +- **A hardening topic?** + - Yes, PR #3489 is primarily **hardening** with real security gain on Unix path. +- **Documented risky behavior?** + - Partly: synchronous external process execution remains inherently risky if not tightly bounded. + +- **Is the PR safe?** + - **Partially safer than before** (reduced shell risk). +- **Incomplete?** + - **Yes**, regarding timeout/DoS resilience and strict post-fork minimalism. +- **Potentially risky?** + - **Yes**, primarily as an availability/stability risk under load.