Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,9 @@ jobs:
!cancelled()
uses: ./.github/workflows/publish_pytest_data.yml
secrets: inherit
permissions: {contents: write}
permissions:
contents: write
pull-requests: write
with:
environment: ${{needs.common_config.outputs.publish_env}}

Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/build_steps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,9 @@ jobs:
if: matrix.os == 'linux'
uses: ./.github/actions/install_mongodb

- name: Install rustfs (S3 mock server)
run: INSTALL_DIR="$RUNNER_TEMP/rustfs-bin" bash build_tooling/install_rustfs.sh && echo "$RUNNER_TEMP/rustfs-bin" >> $GITHUB_PATH

- name: Install the wheel and dependencies
run: |
cmake -P cpp/CMake/CpuCount.cmake | sed 's/^-- //' | tee -a $GITHUB_ENV
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/build_with_conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,9 @@ jobs:
max_attempts: 3
command: npm install -g azurite

- name: Install rustfs (S3 mock server)
run: INSTALL_DIR="$RUNNER_TEMP/rustfs-bin" bash build_tooling/install_rustfs.sh && echo "$RUNNER_TEMP/rustfs-bin" >> $GITHUB_PATH

- name: Check no arcticdb file depend on tests package
run: |
build_tooling/checks.sh
Expand Down Expand Up @@ -733,6 +736,9 @@ jobs:
max_attempts: 3
command: npm install -g azurite

- name: Install rustfs (S3 mock server)
run: INSTALL_DIR="$RUNNER_TEMP/rustfs-bin" bash build_tooling/install_rustfs.sh && echo "$RUNNER_TEMP/rustfs-bin" >> $GITHUB_PATH

- name: Check no arcticdb file depend on tests package
run: |
build_tooling/checks.sh
Expand Down Expand Up @@ -877,6 +883,9 @@ jobs:
max_attempts: 3
command: npm install -g azurite

- name: Install rustfs (S3 mock server)
run: INSTALL_DIR="$RUNNER_TEMP/rustfs-bin" bash build_tooling/install_rustfs.sh && echo "$RUNNER_TEMP/rustfs-bin" >> $GITHUB_PATH

- name: Check no arcticdb file depend on tests package
run: |
build_tooling/checks.sh
Expand Down Expand Up @@ -1189,6 +1198,10 @@ jobs:
echo "PATH includes: $npm_bin_dir"
which azurite && echo "Azurite found: $(which azurite)" || echo "Warning: azurite not found in PATH"

- name: Install rustfs (S3 mock server)
shell: bash -elo pipefail {0}
run: INSTALL_DIR="$RUNNER_TEMP/rustfs-bin" bash build_tooling/install_rustfs.sh && echo "$RUNNER_TEMP/rustfs-bin" >> $GITHUB_PATH

- name: Check no arcticdb file depend on tests package
shell: bash -elo pipefail {0}
run: |
Expand Down
31 changes: 30 additions & 1 deletion .github/workflows/publish_pytest_data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
run-name: Publish ${{github.ref}} to arcticdb-pytest-data
permissions:
contents: read
pull-requests: write
jobs:
sync_pytest_xmls:
runs-on: ubuntu-22.04
Expand Down Expand Up @@ -45,7 +46,7 @@ jobs:
run: |
apt update
apt install -y git
python3 -m pip install arcticdb[Testing] click "pandas<3" pytz
python3 -m pip install arcticdb[Testing] click "pandas<3" pytz polars

- name: Setup softlink for SSL
shell: bash -el {0}
Expand All @@ -65,3 +66,31 @@ jobs:
run: |
ls -R ${{ env.ARTEFACT_PATH }}
python3 build_tooling/process_pytest_artifacts.py --download-dir ${{ env.ARTEFACT_PATH }} --use-github-actions

- name: Compare durations against master
id: compare
if: always()
continue-on-error: true
run: |
cd build_tooling
python3 compare_pytest_durations.py --run-id ${{ github.run_id }} --threshold 10 --output /tmp/duration_report.md

- name: Post duration report to PR
if: always() && github.event_name == 'pull_request'
continue-on-error: true
env:
GH_TOKEN: ${{ github.token }}
run: |
if [ ! -f /tmp/duration_report.md ]; then
echo "No duration report found - skipping PR comment."
exit 0
fi
# Delete previous duration report comment if it exists
COMMENT_ID=$(gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments \
--jq '.[] | select(.body | startswith("## Pytest Duration Report")) | .id' | head -1)
if [ -n "$COMMENT_ID" ]; then
gh api repos/${{ github.repository }}/issues/comments/$COMMENT_ID -X DELETE
fi
gh pr comment ${{ github.event.pull_request.number }} \
--repo ${{ github.repository }} \
--body-file /tmp/duration_report.md
160 changes: 160 additions & 0 deletions build_tooling/compare_pytest_durations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""Compare pytest durations from the current run against the latest master run.

Both runs are read from ArcticDB. Reports tests whose duration changed by more
than a configurable threshold (default 10%).
"""

import sys
from pathlib import Path

import click
import polars as pl

from process_pytest_artifacts import get_results_lib


def find_symbol(lib, branch=None, run_id=None):
"""Find a symbol by branch (latest) or exact run_id."""
all_symbols = lib.list_symbols()

if run_id:
matches = [s for s in all_symbols if s.endswith(f"|{run_id}")]
return matches[0] if matches else None

if branch:
matches = [s for s in all_symbols if s.startswith(f"{branch}|")]
if not matches:
return None
# Symbol format: branch|commit|YYYY-MM-DD_HH-MM-SS|run_id
matches.sort(key=lambda s: s.split("|")[2], reverse=True)
return matches[0]

return None


def load_df(lib, symbol):
"""Load run data from ArcticDB as a polars DataFrame."""
pdf = lib.read(symbol).data
return pl.from_pandas(pdf)


def build_full_comparison(current_df, master_df):
"""Join current and master on test identity and compute pct_change for every test."""
join_keys = ["test_name", "python_version", "test_type", "cache_type"]

cur = current_df.filter(pl.col("status") == "passed").select(join_keys + ["time"])
mst = master_df.filter(pl.col("status") == "passed").select(join_keys + ["time"])

return (
cur.join(mst, on=join_keys, suffix="_master")
.filter(pl.col("time_master") > 0)
.with_columns(
((pl.col("time") - pl.col("time_master")) / pl.col("time_master") * 100).alias("pct_change")
)
.sort("pct_change", descending=True)
)


def filter_diverged(full_comparison, threshold_pct):
"""Return only the tests whose duration diverged by more than threshold_pct."""
return full_comparison.filter(pl.col("pct_change").abs() > threshold_pct)


def format_report(diverged, master_symbol, threshold_pct):
"""Format the comparison results as a Markdown string suitable for a PR comment."""
lines = []
lines.append("## Pytest Duration Report")
lines.append("")
lines.append(f"Threshold: **{threshold_pct}%** | Baseline: `{master_symbol}`")
lines.append("")

if diverged.is_empty():
lines.append("No tests exceeded the threshold - durations are stable. :white_check_mark:")
return "\n".join(lines)

slower = diverged.filter(pl.col("pct_change") > 0)
faster = diverged.filter(pl.col("pct_change") < 0)

if not slower.is_empty():
lines.append(f"### :warning: Slower ({len(slower)} tests)")
lines.append("")
lines.append("| Test | Config | Master | Current | Change |")
lines.append("|------|--------|-------:|--------:|-------:|")
for row in slower.iter_rows(named=True):
config = f"{row['python_version']}/{row['test_type']}/{row['cache_type']}"
lines.append(
f"| `{row['test_name']}` | {config} "
f"| {row['time_master']:.3f}s | {row['time']:.3f}s | {row['pct_change']:+.1f}% |"
)
lines.append("")

if not faster.is_empty():
lines.append(f"### :rocket: Faster ({len(faster)} tests)")
lines.append("")
lines.append("| Test | Config | Master | Current | Change |")
lines.append("|------|--------|-------:|--------:|-------:|")
for row in faster.iter_rows(named=True):
config = f"{row['python_version']}/{row['test_type']}/{row['cache_type']}"
lines.append(
f"| `{row['test_name']}` | {config} "
f"| {row['time_master']:.3f}s | {row['time']:.3f}s | {row['pct_change']:+.1f}% |"
)

lines.append("")
lines.append(f"**Total: {len(slower)} slower, {len(faster)} faster**")
return "\n".join(lines)


@click.command()
@click.option("--run-id", type=str, required=True, help="Run ID of the current build")
@click.option("--threshold", type=float, default=10.0, help="Percentage change threshold (default: 10)")
@click.option("--output", type=str, default=None, help="Write report to this file (for PR comments)")
def main(run_id, threshold, output):
lib = get_results_lib("pytest_results")

current_symbol = find_symbol(lib, run_id=run_id)
if current_symbol is None:
print(f"Error: No data found for run_id={run_id}", file=sys.stderr)
sys.exit(1)

master_symbol = find_symbol(lib, branch="master")
if master_symbol is None:
print("No master runs found in ArcticDB - skipping comparison.")
sys.exit(0)

# Don't compare master against itself.
if current_symbol == master_symbol:
print("Current run is the latest master run - skipping self-comparison.")
sys.exit(0)

print(f"Current run : {current_symbol}")
print(f"Master baseline: {master_symbol}")
print()

current_df = load_df(lib, current_symbol)
master_df = load_df(lib, master_symbol)

full = build_full_comparison(current_df, master_df)

print("=" * 80)
print("Full duration comparison (all tests)")
print("=" * 80)
with pl.Config(tbl_rows=-1, tbl_cols=-1, fmt_str_lengths=80):
print(full)
print()

diverged = filter_diverged(full, threshold)
report = format_report(diverged, master_symbol, threshold)
print(report)

if output:
Path(output).write_text(report)
print(f"\nReport written to {output}")

slower = diverged.filter(pl.col("pct_change") > 0)
if not slower.is_empty():
sys.exit(1)


if __name__ == "__main__":
main()
67 changes: 67 additions & 0 deletions build_tooling/install_rustfs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env bash
# Install the rustfs S3-compatible server (https://github.com/rustfs/rustfs).
# Downloads a prebuilt binary from GitHub releases.
#
# Usage:
# ./build_tooling/install_rustfs.sh # installs to ~/bin/rustfs
# ./build_tooling/install_rustfs.sh /usr/local/bin # installs to /usr/local/bin/rustfs
# INSTALL_DIR=/tmp/bin ./build_tooling/install_rustfs.sh # via env var

set -euo pipefail

INSTALL_DIR="${1:-${INSTALL_DIR:-$HOME/bin}}"
RUSTFS_VERSION="${RUSTFS_VERSION:-latest}"
WORK_DIR=$(mktemp -d)

cleanup() { rm -rf "$WORK_DIR"; }
trap cleanup EXIT

# Detect platform
case "$(uname -s)-$(uname -m)" in
Linux-x86_64) ASSET="rustfs-linux-x86_64-gnu" ;;
Linux-aarch64) ASSET="rustfs-linux-aarch64-gnu" ;;
Darwin-arm64) ASSET="rustfs-macos-aarch64" ;;
Darwin-x86_64) ASSET="rustfs-macos-x86_64" ;;
MINGW*|MSYS*|CYGWIN*)
ASSET="rustfs-windows-x86_64"
;;
*)
echo "Unsupported platform: $(uname -s)-$(uname -m)" >&2
exit 1
;;
esac

if [[ "$RUSTFS_VERSION" == "latest" ]]; then
# Resolve latest version tag from GitHub API (includes pre-releases)
RUSTFS_VERSION=$(curl -fsSL "https://api.github.com/repos/rustfs/rustfs/releases?per_page=1" \
| python3 -c "import json,sys; print(json.load(sys.stdin)[0]['tag_name'])")
echo "==> Resolved latest version: ${RUSTFS_VERSION}"
fi
DOWNLOAD_URL="https://github.com/rustfs/rustfs/releases/download/${RUSTFS_VERSION}/${ASSET}-v${RUSTFS_VERSION}.zip"

echo "==> Downloading rustfs (${ASSET})..."
cd "$WORK_DIR"
curl -fSL -o rustfs.zip "$DOWNLOAD_URL"

echo "==> Extracting..."
unzip -q rustfs.zip

echo "==> Installing to ${INSTALL_DIR}..."
mkdir -p "$INSTALL_DIR"
if [[ "$ASSET" == *windows* ]]; then
cp rustfs.exe "$INSTALL_DIR/" 2>/dev/null || cp */rustfs.exe "$INSTALL_DIR/" 2>/dev/null || find . -name "rustfs.exe" -exec cp {} "$INSTALL_DIR/" \;
BINARY="$INSTALL_DIR/rustfs.exe"
else
cp rustfs "$INSTALL_DIR/" 2>/dev/null || cp */rustfs "$INSTALL_DIR/" 2>/dev/null || find . -name "rustfs" -type f -exec cp {} "$INSTALL_DIR/" \;
chmod +x "$INSTALL_DIR/rustfs"
BINARY="$INSTALL_DIR/rustfs"
fi

echo "==> Installed: $BINARY ($(wc -c < "$BINARY" | tr -d ' ') bytes)"

# Check if install dir is in PATH
if ! echo "$PATH" | tr ':' '\n' | grep -qx "$INSTALL_DIR"; then
echo ""
echo "NOTE: ${INSTALL_DIR} is not in your PATH. Add it with:"
echo " export PATH=\"${INSTALL_DIR}:\$PATH\""
fi
Loading
Loading