Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/docker/conda-python-emscripten.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ ARG arch
ARG python="3.12"
FROM ${repo}:${arch}-conda-python-${python}

ARG selenium_version="4.15.2"
ARG selenium_version="4.41.0"
ARG pyodide_version="0.26.0"
ARG chrome_version="latest"
ARG required_python_min="(3,12)"
Expand Down
21 changes: 14 additions & 7 deletions ci/scripts/install_chromedriver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,22 @@ set -e

chrome_version=$1

if [ "$chrome_version" = "latest" ]; then
latest_release_path=LATEST_RELEASE_STABLE
else
latest_release_path=LATEST_RELEASE_${chrome_version}
# Look up the Chrome version from the apt repo's Packages file.
CHROME_DEB_VERSION=$(wget --no-verbose -O - \
"https://dl.google.com/linux/chrome/deb/dists/stable/main/binary-amd64/Packages.gz" \
| gunzip \
| awk '/^Package: google-chrome-stable$/{found=1} found && /^Version: /{print $2; exit}')
CHROME_VERSION_FULL=${CHROME_DEB_VERSION%-*}

# Validate there hasn't been major version bumps since the last time we updated this script.
if [ "$chrome_version" != "latest" ] && [ "${CHROME_VERSION_FULL%%.*}" != "$chrome_version" ]; then
echo "Requested Chrome major ${chrome_version}, but apt repo currently publishes ${CHROME_VERSION_FULL}" >&2
exit 1
fi
CHROME_VERSION_FULL=$(wget -q --no-verbose -O - "https://googlechromelabs.github.io/chrome-for-testing/${latest_release_path}")
CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION_FULL}-1_amd64.deb"

CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_DEB_VERSION}_amd64.deb"
CHROMEDRIVER_DOWNLOAD_URL="https://storage.googleapis.com/chrome-for-testing-public/${CHROME_VERSION_FULL}/linux64/chromedriver-linux64.zip"
wget -q --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}"
wget --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}"
apt-get update
apt install -qqy /tmp/google-chrome.deb
rm -f /tmp/google-chrome.deb
Expand Down
4 changes: 2 additions & 2 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -951,8 +951,8 @@ services:
clang_tools: ${CLANG_TOOLS}
llvm: ${LLVM}
pyodide_version: "0.26.0"
chrome_version: "134"
selenium_version: "4.15.2"
chrome_version: "148"
selenium_version: "4.41.0"
required_python_min: "(3,12)"
python: ${PYTHON}
shm_size: *shm-size
Expand Down
108 changes: 105 additions & 3 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2059,22 +2059,29 @@ function(build_protobuf)

# Make protobuf_fc depend on the install completion marker
add_custom_target(protobuf_fc DEPENDS "${PROTOBUF_PREFIX}/.protobuf_installed")
set(ARROW_BUNDLED_STATIC_LIBS
${ARROW_BUNDLED_STATIC_LIBS} protobuf::libprotobuf
PARENT_SCOPE)
list(APPEND ARROW_BUNDLED_STATIC_LIBS protobuf::libprotobuf)

if(CMAKE_CROSSCOMPILING)
# If we are cross compiling, we need to build protoc for the host
# system also, as it is used when building Arrow
set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install")
set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc")

# cross-compiled (PyArrow on emscripten) needs utf8_range bundled explicitly.
list(APPEND ARROW_BUNDLED_STATIC_LIBS utf8_range)

set(PROTOBUF_HOST_CMAKE_ARGS
"-DCMAKE_CXX_FLAGS="
"-DCMAKE_C_FLAGS="
"-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}"
-Dprotobuf_BUILD_TESTS=OFF
-Dprotobuf_DEBUG_POSTFIX=)
if(ABSL_VENDORED)
# Force protobuf to reuse Arrow's already-extracted absl source
# so we don't re-download and we don't have issues with multiple abseil.
list(APPEND PROTOBUF_HOST_CMAKE_ARGS -Dprotobuf_FORCE_FETCH_DEPENDENCIES=ON
"-DFETCHCONTENT_SOURCE_DIR_ABSL=${absl_SOURCE_DIR}")
endif()

# We reuse the FetchContent downloaded source but build it with host compiler
externalproject_add(protobuf_ep_host
Expand All @@ -2089,7 +2096,102 @@ function(build_protobuf)
PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}")

add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host)
# For cross-compilation along with ExternalProject we need to
# manually include absl deps to the bundled static libs so that
# they are available for the generated code in protobuf v31.
list(APPEND
ARROW_BUNDLED_STATIC_LIBS
absl::bad_any_cast_impl
absl::bad_optional_access
absl::bad_variant_access
absl::base
absl::city
absl::civil_time
absl::cord
absl::cord_internal
absl::cordz_functions
absl::cordz_handle
absl::cordz_info
absl::cordz_sample_token
absl::crc32c
absl::crc_cord_state
absl::crc_cpu_detect
absl::crc_internal
absl::debugging_internal
absl::decode_rust_punycode
absl::demangle_internal
absl::demangle_rust
absl::die_if_null
absl::examine_stack
absl::exponential_biased
absl::failure_signal_handler
absl::flags_commandlineflag
absl::flags_commandlineflag_internal
absl::flags_config
absl::flags_internal
absl::flags_marshalling
absl::flags_parse
absl::flags_private_handle_accessor
absl::flags_program_name
absl::flags_reflection
absl::flags_usage
absl::flags_usage_internal
absl::graphcycles_internal
absl::hash
absl::hashtablez_sampler
absl::int128
absl::kernel_timeout_internal
absl::leak_check
absl::log_globals
absl::log_initialize
absl::log_internal_check_op
absl::log_internal_conditions
absl::log_internal_fnmatch
absl::log_internal_format
absl::log_internal_globals
absl::log_internal_log_sink_set
absl::log_internal_message
absl::log_internal_nullguard
absl::log_internal_proto
absl::log_severity
absl::log_sink
absl::low_level_hash
absl::malloc_internal
absl::periodic_sampler
absl::poison
absl::random_distributions
absl::random_internal_distribution_test_util
absl::random_internal_platform
absl::random_internal_pool_urbg
absl::random_internal_randen
absl::random_internal_randen_hwaes
absl::random_internal_randen_hwaes_impl
absl::random_internal_randen_slow
absl::random_internal_seed_material
absl::random_seed_gen_exception
absl::random_seed_sequences
absl::raw_hash_set
absl::raw_logging_internal
absl::scoped_set_env
absl::spinlock_wait
absl::stacktrace
absl::status
absl::statusor
absl::str_format_internal
absl::strerror
absl::strings
absl::strings_internal
absl::symbolize
absl::synchronization
absl::throw_delegate
absl::time
absl::time_zone
absl::utf8_for_code_point
absl::vlog_config_internal)
endif()
set(ARROW_BUNDLED_STATIC_LIBS
"${ARROW_BUNDLED_STATIC_LIBS}"
PARENT_SCOPE)
list(POP_BACK CMAKE_MESSAGE_INDENT)
endfunction()

Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5907,6 +5907,7 @@ def test_make_write_options_error():
pformat.make_write_options(43)


@pytest.mark.substrait
def test_scanner_from_substrait(dataset):
try:
import pyarrow.substrait as ps
Expand Down
15 changes: 11 additions & 4 deletions python/scripts/run_emscripten_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@

class TemplateOverrider(http.server.SimpleHTTPRequestHandler):
def log_request(self, code="-", size="-"):
# don't log successful requests
# don't log successful requests but log errors
if isinstance(code, int) and code >= 400:
sys.stderr.write(f"HTTP {code} for {self.path}\n")
return

def do_GET(self) -> bytes | None:
Expand Down Expand Up @@ -200,7 +202,8 @@ class BrowserDriver:
def __init__(self, hostname, port, driver):
self.driver = driver
self.driver.get(f"http://{hostname}:{port}/test.html")
self.driver.set_script_timeout(100)
# Chrome on CI takes longer than locally to compile.
self.driver.set_script_timeout(1200)

def load_pyodide(self, dist_dir):
pass
Expand Down Expand Up @@ -259,7 +262,9 @@ def __init__(self, hostname, port):
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
super().__init__(hostname, port, webdriver.Chrome(options=options))
driver = webdriver.Chrome(options=options)
driver.command_executor._client_config.timeout = 1200
super().__init__(hostname, port, driver)


class FirefoxDriver(BrowserDriver):
Expand Down Expand Up @@ -336,7 +341,9 @@ def _load_pyarrow_in_runner(driver, wheel_name):
"""
import pyarrow,pathlib
pyarrow_dir = pathlib.Path(pyarrow.__file__).parent
pytest.main([pyarrow_dir, '-r', 's'])
# Substrait expression serialization crashes pyodide with a
# "Cannot convert a BigInt value to a number" error.
pytest.main([pyarrow_dir, '-r', 's', '-m', 'not substrait'])
""",
wait_for_terminate=False,
)
Expand Down
Loading