diff --git a/pytest.ini b/pytest.ini
index fa7a5bbf..ce72372d 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,2 +1,3 @@
 [pytest]
 norecursedirs = pocs
+addopts = --ignore=tests/test_run_benchmarks.py
diff --git a/runner/README.md b/runner/README.md
index 92fb1e63..5545f28f 100644
--- a/runner/README.md
+++ b/runner/README.md
@@ -85,24 +85,38 @@ python run_benchmarks.py ../results/metadata.yaml 2024 --run_id "debug-run-001"
 Use `run_solver.py` to test a single solver on a single benchmark problem. This is useful for debugging:
 
 ```bash
-python run_solver.py <solver_name> <input_file> <solver_version>
+python runner/run_solver.py \
+    --solver_name <solver_name> \
+    --solver_version <solver_version> \
+    --input_file <input_file> \
+    --highs_solver_variant <highs_solver_variant> \
+    --hipo_block_size <hipo_block_size>
 ```
 
 **Arguments:**
-- `solver_name` - Solver name (highs, scip, cbc, gurobi, glpk)
-- `input_file` - Path to benchmark problem file (.lp or .mps)
-- `solver_version` - Solver version string (e.g., 1.10.0)
+- `solver_name` - Name of the solver to run (e.g., `highs`, `glpk`, `gurobi`, `scip`, `cbc`, `cplex`, `knitro`, `xpress`).
+- `solver_version` -  Version of the solver to use.
+- `input_file` - Path to the input problem file (e.g., `.mps`, `.lp`).
+- `highs_solver_variant` - Variant of HiGHS to run (`hipo`, `ipm`, `ipx`, `pdlp`, `simplex` - default: `simplex`). Only for HiGHS.
+- `hipo_block_size` - Block size for HiPO variant of HiGHS (default: `128`). Only for HiGHS with `hipo` solver variant.
 
 **Examples:**
 
 ```bash
-# Test HiGHS
-conda activate benchmark-2024
-python run_solver.py highs ./benchmarks/pypsa-eur-elec-op-2-1h.lp 1.10.0
+# Test HiGHS (simplex variant)
+conda activate benchmark-2025
+python runner/run_solver.py \
+    --solver_name highs \
+    --solver_version 1.13.2.dev1 \
+    --input_file ./benchmarks/pypsa-eur-elec-op-2-1h.lp \
+    --highs_solver_variant simplex
 
 # Test SCIP
-conda activate benchmark-2024
-python run_solver.py scip ./benchmarks/pypsa-eur-elec-op-2-1h.lp 9.2.2
+conda activate benchmark-2025
+python runner/run_solver.py \
+    --solver_name scip \
+    --solver_version 9.2.2 \
+    --input_file ./benchmarks/pypsa-eur-elec-op-2-1h.lp
 ```
 
 **Output:**
diff --git a/runner/benchmark_all.sh b/runner/benchmark_all.sh
index f996657c..135eba98 100755
--- a/runner/benchmark_all.sh
+++ b/runner/benchmark_all.sh
@@ -75,7 +75,7 @@ for year in "${years[@]}"; do
         solver_args="--solvers ${solvers_override}"
         echo "Using solver override: ${solvers_override}"
     else
-        solver_args="--solvers gurobi highs-hipo highs-ipm highs scip cbc glpk"
+        solver_args="--solvers gurobi highs-hipo highs-ipm highs scip glpk"
     fi
 
     # Overwrite results for the first year, append thereafter
diff --git a/runner/envs/benchmark-2026.yaml b/runner/envs/benchmark-2026.yaml
new file mode 100644
index 00000000..b503bbfd
--- /dev/null
+++ b/runner/envs/benchmark-2026.yaml
@@ -0,0 +1,19 @@
+name: benchmark-2026
+channels:
+- conda-forge/label/dev
+- conda-forge
+- https://conda.anaconda.org/gurobi
+- nodefaults
+dependencies:
+- python>=3.12
+- pip
+- psutil>=5.9
+- requests>=2.32
+- linopy>=0.6.4
+- coin-or-cbc==2.10.12
+- scip==10.0.0
+- pyscipopt==5.7.1
+- gurobi==13.0.0
+
+# Install highspy from conda-forge dev label
+- highspy==1.13.2.dev1
diff --git a/runner/run_benchmarks.py b/runner/run_benchmarks.py
index e04cee15..f03dc8bb 100644
--- a/runner/run_benchmarks.py
+++ b/runner/run_benchmarks.py
@@ -1,3 +1,51 @@
+"""
+Benchmark Runner Script
+=======================
+
+This script automates the benchmarking of multiple optimization solvers
+(e.g., HiGHS, GLPK, Gurobi, SCIP, CBC) on a set of
+benchmark problem instances defined in a YAML configuration file. It manages
+downloading benchmark files, running solvers in isolated environments with
+resource limits, collecting metrics (runtime, memory, status, objective, etc.),
+and writing results to CSV files for further analysis.
+
+Features
+--------
+- Supports running multiple solvers and benchmark instances in series.
+- Handles solver-specific environment setup and version detection.
+- Enforces memory and runtime limits for solver runs.
+- Collects and records detailed metrics, including runtime, memory usage,
+  status, objective value, duality gap, and integrality violation.
+- Outputs results and summary statistics to CSV files.
+
+Example Usage
+-------------
+Run the script from the command line:
+
+    python runner/run_benchmarks.py <benchmark_yaml> <year> [OPTIONS]
+
+Parameters
+------------
+--benchmark_yaml_path : str
+    Path to the benchmark configuration YAML file (e.g., ../results/metadata.yaml).
+--year : str
+    Solver release year (e.g., 2020-2025).
+--solvers : list of str, optional
+    Space-separated list of solvers to run. Defaults to all supported solvers.
+--append : bool, optional
+    Append to the results CSV file instead of overwriting. Default is False.
+--ref_bench_interval : int, optional
+    Interval in seconds to run a reference benchmark with the HiGHS binary.
+--run_id : str, optional
+    Unique identifier for this benchmark run.
+
+Returns
+--------
+- Results for each solver/benchmark instance are written to `results/benchmark_results.csv`.
+- Summary statistics (mean, stddev) are written to `results/benchmark_results_mean_stddev.csv`.
+- Logs and solution files are saved in the `runner/logs/` and `runner/solutions/` directories.
+"""
+
 import argparse
 import csv
 import datetime
@@ -9,6 +57,7 @@
 import statistics
 import subprocess
 import time
+import typing
 from collections import OrderedDict
 from pathlib import Path
 from socket import gethostname
@@ -16,10 +65,29 @@
 import psutil
 import requests
 import yaml
-from run_solver import HighsVariant
 
 
-def get_conda_package_versions(solvers, env_name=None):
+def get_conda_package_versions(solvers: list[str], env_name=None) -> dict[str, str]:
+    """
+    Get the installed version of specified solver packages in a conda environment.
+
+    Parameters
+    ----------
+    solvers : list of str
+        List of solver names to query for package versions.
+    env_name : str, optional
+        Name of the conda environment to query. If None, uses the current active environment.
+
+    Returns
+    -------
+    solver_versions : dict
+        Dictionary mapping each solver name to its installed version string in the specified conda environment.
+
+    Raises
+    ------
+    ValueError
+        If the conda command fails to execute.
+    """
     try:
         # List packages in the conda environment
         cmd = "conda list"
@@ -42,17 +110,17 @@ def get_conda_package_versions(solvers, env_name=None):
                 installed_packages[parts[0]] = parts[1]
 
         # Map solver names to their conda package names
-        name_to_pkg = {"highs": "highspy", "cbc": "coin-or-cbc"}
+        name_to_pkg = {
+            "highs": "highspy",
+            "highs-hipo": "highspy",
+            "highs-ipm": "highspy",
+            "cbc": "coin-or-cbc",
+            "scip": "pyscipopt",
+        }
         solver_versions = {}
         for solver in solvers:
-            # Handle highs-hipo variants as special cases - not conda packages
-            if solver in [
-                variant.value for variant in HighsVariant
-            ]:  # For py3.10 compatibility
-                solver_versions[solver] = get_highs_hipo_version()
-            else:
-                package = name_to_pkg.get(solver, solver)
-                solver_versions[solver] = installed_packages.get(package, None)
+            package = name_to_pkg.get(solver, solver)
+            solver_versions[solver] = installed_packages.get(package, None)
 
         return solver_versions
 
@@ -60,55 +128,148 @@ def get_conda_package_versions(solvers, env_name=None):
         raise ValueError(f"Error executing conda command: {e.stderr or str(e)}")
 
 
-def download_benchmark_file(url, dest_path: Path):
-    """Download a file from url and save it locally in the specified folder if it doesn't already exist.
+def _download_via_requests(url: str, dest: Path, chunk_size: int = 8192) -> None:
+    """
+    Download a file over HTTP(S) requests.
+
+    Parameters
+    ----------
+    url : str
+        HTTP or HTTPS URL to download.
+    dest : pathlib.Path
+        Local destination path where the downloaded file will be written.
+    chunk_size : int, optional
+        Size in bytes of chunks to read from the response stream (default: 8192).
+    """
+    tmp = dest.with_suffix(dest.suffix + ".tmp")
+    with requests.get(url, stream=True) as r:
+        r.raise_for_status()
+        with open(tmp, "wb") as f:
+            for chunk in r.iter_content(chunk_size=chunk_size):
+                if chunk:
+                    f.write(chunk)
+    os.replace(tmp, dest)
+    print(f"Downloaded {url} to {dest} via requests")
 
-    If the URL is on GCS (starting gs://), then this uses `gsutil` to download the file (requires authentication).
-    If the file is gzipped (.gz), it will be unzipped after downloading.
+
+def _download_via_gsutil(url: str, dest: Path) -> None:
+    """
+    Download a file from Google Cloud Storage using the gsutil command.
+
+    Parameters
+    ----------
+    url : str
+        GCS URL to download. Must start with ``gs://``.
+    dest : pathlib.Path
+        Local destination path where the downloaded file will be written.
+
+    Raises
+    ------
+    subprocess.CalledProcessError
+        If the `gsutil` command exits with a non-zero status.
     """
-    # Ensure the destination folder exists
-    os.makedirs(dest_path.parent, exist_ok=True)
+    subprocess.run(
+        ["gsutil", "cp", url, str(dest)], check=True, capture_output=True, text=True
+    )
+    print(f"Downloaded {url} to {dest} via gsutil")
 
-    # If dest_path ends with .gz, prepare for the uncompressed version
-    if dest_path.suffix == ".gz":
-        uncompressed_dest_path = dest_path.with_suffix("")
-    else:
-        uncompressed_dest_path = dest_path
 
-    if os.path.exists(uncompressed_dest_path):
-        print(f"File already exists at {uncompressed_dest_path}. Skipping download.")
+def _unzip_gz(path: Path) -> Path:
+    """
+    Decompress a gzip file and remove the original compressed file.
+
+    Parameters
+    ----------
+    path : Path
+        Path to the file to decompress. If the file does not have a `.gz`
+        extension, it is returned unchanged.
+
+    Returns
+    -------
+    Path
+        Path to the decompressed file. If the input file was not gzipped,
+        returns the input path unchanged.
+
+    Notes
+    -----
+    This function removes the original `.gz` file after successful
+    decompression and prints a message to stdout indicating the operation.
+    """
+    if path.suffix != ".gz":
+        return path
+    uncompressed = path.with_suffix("")
+    with gzip.open(path, "rb") as gz_f, open(uncompressed, "wb") as out_f:
+        shutil.copyfileobj(gz_f, out_f)
+    os.remove(path)
+    print(f"Unzipped {path} -> {uncompressed}")
+    return uncompressed
+
+
+def download_benchmark_file(url: str, dest_path: Path) -> None:
+    """
+    Download a file from a URL and save it locally, unzipping if necessary.
+
+    Parameters
+    ----------
+    url : str
+        The URL of the file to download. If the URL starts with 'gs://', `gsutil` is used for downloading.
+    dest_path : pathlib.Path
+        The local path where the downloaded file will be saved. If the file is gzipped (.gz), it will be unzipped after download.
+
+    Notes
+    -----
+    - If the file already exists at the destination (uncompressed), the download is skipped.
+    - For Google Cloud Storage URLs, requires `gsutil` and authentication.
+    - Automatically unzips `.gz` files after download and removes the compressed file.
+    - Creates the destination directory if it does not exist.
+    """
+    dest_path = Path(dest_path)
+    dest_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # determine the final uncompressed path to check for existing file
+    final_uncompressed = (
+        dest_path.with_suffix("") if dest_path.suffix == ".gz" else dest_path
+    )
+    if final_uncompressed.exists():
+        print(f"File already exists at {final_uncompressed}. Skipping download.")
         return
 
+    # download to dest_path (compressed or not)
     if url.startswith("gs://"):
-        # GCS file, so download using gsutil
-        print(f"Downloading {url} to {dest_path} using gsutil...", end="")
-        cmd = ["gsutil", "cp", url, dest_path]
-        _result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-        print("done.")
+        _download_via_gsutil(url, dest_path)
     else:
-        # Perform the download with streaming to handle large files
-        print(f"Downloading {url} to {dest_path}...", end="")
-        with requests.get(url, stream=True) as response:
-            response.raise_for_status()
-            with open(dest_path, "wb") as f:
-                for chunk in response.iter_content(chunk_size=8192):
-                    f.write(chunk)
-        print("done.")
+        _download_via_requests(url, dest_path)
 
+    # if compressed, unzip and remove the .gz
     if dest_path.suffix == ".gz":
-        print(f"Unzipping {dest_path}...")
-        with gzip.open(dest_path, "rb") as gz_file:
-            uncompressed_file_path = dest_path.with_suffix("")
-            with open(uncompressed_file_path, "wb") as uncompressed_file:
-                shutil.copyfileobj(gz_file, uncompressed_file)
-        os.remove(dest_path)
-        print(f"Unzipped to {uncompressed_file_path}.")
-
-
-def parse_memory(output):
-    line = output.splitlines()[-1]
-    if "MaxResidentSetSizeKB=" in line:
-        parts = line.strip().split("=")
+        _unzip_gz(dest_path)
+
+
+def parse_memory(output: str) -> float:
+    """
+    Parse the maximum resident set size (memory usage) from subprocess output.
+
+    Parameters
+    ----------
+    output : str
+        The output string from a subprocess, expected to contain a line with 'MaxResidentSetSizeKB='.
+
+    Returns
+    -------
+    memory_mb : float
+        The maximum resident set size in megabytes (MB).
+
+    Raises
+    ------
+    ValueError
+        If the memory usage line is not found in the output.
+
+    Notes
+    -----
+    - Assumes the memory usage is reported in kilobytes (KB) and converts it to megabytes (MB).
+    """
+    if "MaxResidentSetSizeKB=" in output:
+        parts = output.strip().split("=")
         max_resident_set_size = parts[-1]
         return float(max_resident_set_size) / 1000  # Convert to MB
     raise ValueError(f"Could not find memory usage in subprocess output:\n{output}")
@@ -231,35 +392,276 @@ def write_csv_summary_row(mean_stddev_csv, benchmark_name, metrics, run_id, time
         )
 
 
-def benchmark_solver(input_file, solver_name, timeout, solver_version):
-    available_memory_bytes = psutil.virtual_memory().available
-    memory_limit_bytes = int(available_memory_bytes * 0.95)
-    memory_limit_mb = memory_limit_bytes / (1024 * 1024)
-    print(f"Setting memory limit to {memory_limit_mb:.2f} MB (95% of available memory)")
+def get_solver_name_and_version(solver_name: str) -> tuple[str, str | None]:
+    """
+    Split solver names into base solver and variant components.
+
+    Parses solver names like 'highs-hipo', 'highs ipm', or 'highs' into
+    their base solver and variant parts. For non-highs solvers, returns
+    the original name with no variant.
+
+    Parameters
+    ----------
+    solver_name : str
+        The solver name to split. Can be a highs variant like 'highs-hipo',
+        'highs ipm', 'highs', or any other solver name.
+
+    Returns
+    -------
+    tuple[str, str | None]
+        A tuple containing:
+        - base_solver : str
+            The base solver name ('highs' for highs variants, otherwise
+            the original solver_name).
+        - variant : str or None
+            The variant suffix if present (e.g., 'hipo', 'ipm'), or None
+            if no variant is found.
+
+    Examples
+    --------
+    >>> get_solver_name_and_version("highs-hipo")
+    ('highs', 'hipo')
+
+    >>> get_solver_name_and_version("highs")
+    ('highs', None)
+
+    >>> _get_solver_name_and_version("glpk")
+    ('glpk', None)
+    """
+    m = re.match(r"^(highs)(?:[-\s](?P<variant>[\w-]+))?$", solver_name.lower())
+    if m:
+        return m.group(1), m.group("variant")
+    return solver_name, None
+
+
+def build_solver_command(
+    input_file: Path,
+    solver_name: str,
+    timeout: int,
+    solver_version: str,
+    memory_limit_bytes: int,
+    reference_benchmark: bool,
+) -> list[str]:
+    """
+    Build the shell command to run a solver with resource limits.
+
+    Parameters
+    ----------
+    input_file : Path
+        Path to the benchmark problem file to be solved.
+    solver_name : str
+        Name of the solver to run (e.g., "highs", "gurobi", "scip", "cbc", "glpk").
+    timeout : int
+        Maximum allowed runtime for the solver in seconds.
+    solver_version : str
+        Version string of the solver, passed to the solver script.
+    memory_limit_bytes : int
+        Maximum memory the solver process is allowed to use, in bytes.
+    reference_benchmark : bool
+        If True, appends the ``--highs_solver_variant hipo`` flag to run
+        the HiGHS HiPO variant on a reference instance.
+
+    Returns
+    -------
+    command : list of str
+        The command as a list of strings, suitable for passing to
+        ``subprocess.run``.
+    """
+    base_solver, variant = get_solver_name_and_version(solver_name)
 
     command = ["systemd-run"]
-
     if os.geteuid() != 0:
         command.append("--user")
 
     command.extend(
         [
             "--scope",
-            f"--property=MemoryMax={memory_limit_bytes}",  # Set resident memory limit
-            "--property=MemorySwapMax=0",  # Disable swap to ensure only physical RAM is used
+            f"--property=MemoryMax={memory_limit_bytes}",
+            "--property=MemorySwapMax=0",
             "/usr/bin/time",
             "--format",
             "MaxResidentSetSizeKB=%M",
             "timeout",
             f"{timeout}s",
             "python",
-            f"{Path(__file__).parent / 'run_solver.py'}",
-            solver_name,
-            input_file,
+            str(Path(__file__).parent / "run_solver.py"),
+            "--solver_name",
+            base_solver,
+            "--input_file",
+            input_file.as_posix(),
+            "--solver_version",
             solver_version,
         ]
     )
 
+    if variant:
+        command.extend(["--highs_solver_variant", variant])
+    elif reference_benchmark and base_solver.lower() == "highs":
+        command.extend(["--highs_solver_variant", "hipo"])
+
+    return command
+
+
+def return_failure_metrics(
+    status: str, condition: str, runtime: int | float | str
+) -> dict[str, typing.Any]:
+    """
+    Build a metrics dictionary for solver failure cases.
+
+    Parameters
+    ----------
+    status : str
+        Short status code for the run (e.g., ``"TO"``, ``"OOM"``, ``"ER"``).
+    condition : str
+        Human-readable termination condition (e.g., ``"Timeout"``, ``"Out of Memory"``, ``"Error"``).
+    runtime : int, float, or str
+        Runtime to record in seconds, or a sentinel (e.g., ``"N/A"``) when not applicable.
+
+    Returns
+    -------
+    metrics : dict
+        Dictionary with the following keys:
+        - ``status`` : str
+            The provided short status code.
+        - ``condition`` : str
+            The provided termination condition.
+        - ``objective`` : None
+            Always ``None`` for failure cases.
+        - ``runtime`` : int, float, or str
+            The provided runtime value.
+        - ``reported_runtime`` : float or None
+            The numeric runtime if ``runtime`` is an ``int`` or ``float``, otherwise ``None``.
+        - ``duality_gap`` : None
+            Always ``None`` for failure cases.
+        - ``max_integrality_violation`` : None
+            Always ``None`` for failure cases.
+    """
+    reported_runtime = runtime if isinstance(runtime, (int, float)) else None
+    return {
+        "status": status,
+        "condition": condition,
+        "objective": None,
+        "runtime": runtime,
+        "reported_runtime": reported_runtime,
+        "duality_gap": None,
+        "max_integrality_violation": None,
+    }
+
+
+def parse_solver_result(result: subprocess.CompletedProcess, timeout: int) -> dict:
+    """
+    Interpret a subprocess `CompletedProcess` from a solver run and produce a metrics dictionary.
+
+    Parameters
+    ----------
+    result : subprocess.CompletedProcess
+        The result returned by ``subprocess.run`` when executing the solver wrapper.
+    timeout : int
+        Timeout value (in seconds) that was enforced for the solver run. Used for timeout/error metrics.
+
+    Returns
+    -------
+    metrics : dict
+        A metrics dictionary describing the solver outcome. For successful runs this is the JSON-parsed
+        metrics object produced by the solver wrapper (parsed from the last line of ``result.stdout``).
+        For failure cases a dictionary produced by ``return_failure_metrics`` is returned with keys:
+        ``status``, ``condition``, ``objective`` (None), ``runtime``, ``reported_runtime``,
+        ``duality_gap`` (None), and ``max_integrality_violation`` (None).
+
+    Raises
+    ------
+    ValueError
+        Not raised by this function directly, but callers should be aware that JSON parsing may raise
+        exceptions if ``result.stdout`` does not contain valid JSON on the final line.
+    """
+
+    if result.returncode == 0:
+        # Successful run; parse the JSON metrics from the last line of stdout
+        metrics = json.loads(result.stdout.splitlines()[-1])
+    elif result.returncode == 124:
+        # 124 is the exit code used by the `timeout` command to indicate a timeout
+        print("TIMEOUT", flush=True)
+        metrics = return_failure_metrics("TO", "Timeout", timeout)
+    elif result.returncode in (137, 143, -9, -15):
+        # systemd-run uses sigkill (9) or sigterm (15) to terminate
+        # the process and returns 128 + signal exit code
+        # subprocess returns -<signal> for signals
+        # these things don't seem very portable
+        print("OUT OF MEMORY", flush=True)
+        metrics = return_failure_metrics("OOM", "Out of Memory", "N/A")
+    else:
+        print(
+            f"ERROR running solver. Return code: {result.returncode}\n"
+            f"Stdout:\n{result.stdout}\n"
+            f"Stderr:\n{result.stderr}\n",
+            flush=True,
+        )
+        metrics = return_failure_metrics("ER", "Error", timeout)
+    return metrics
+
+
+def benchmark_solver(
+    input_file: Path,
+    solver_name: str,
+    timeout: int,
+    solver_version: str,
+    reference_benchmark=False,
+) -> dict[str, object]:
+    """
+    Run a solver on a benchmark problem file with resource limits and collect metrics.
+
+    Parameters
+    ----------
+    input_file : Path
+        Path to the benchmark problem file.
+    solver_name : str
+        Name of the solver to run (e.g.,  "gurobi", "highs-hipo", "highs-ipm", "highs", "scip", "cbc" or "glpk").
+    timeout : int
+        Maximum allowed runtime for the solver in seconds.
+    solver_version : str
+        Version of the solver to use.
+    reference_benchmark : bool, optional
+        Whether this is a reference benchmark run (default: False). If True, run the reference benchmark.
+
+    Returns
+    -------
+    metrics : dict
+        Dictionary containing benchmark metrics:
+        - status : str
+            Solver status ("ok", "TO", "ER", "OOM").
+        - condition : str
+            Termination condition ("Optimal", "Timeout", "Error", "Out of Memory").
+        - objective : float or None
+            Objective value if available.
+        - runtime : float or str
+            Actual runtime in seconds or "N/A".
+        - reported_runtime : float or None
+            Runtime reported by the solver, if available.
+        - duality_gap : float or None
+            Duality gap for MILP problems, if available.
+        - max_integrality_violation : float or None
+            Maximum integrality violation for MILP problems, if available.
+        - memory : float or None
+            Maximum resident set size in MB.
+        - timeout : int
+            Timeout value in seconds.
+    """
+    available_memory_bytes = psutil.virtual_memory().available
+    memory_limit_bytes = int(available_memory_bytes * 0.95)
+    memory_limit_mb = memory_limit_bytes / (1024 * 1024)
+    print(
+        f"Setting memory limit to {memory_limit_mb:.2f} MB (95% of available memory)."
+    )
+
+    command = build_solver_command(
+        input_file,
+        solver_name,
+        timeout,
+        solver_version,
+        memory_limit_bytes,
+        reference_benchmark,
+    )
+
     # Run the command and capture the output
     result = subprocess.run(
         command,
@@ -275,12 +677,12 @@ def benchmark_solver(input_file, solver_name, timeout, solver_version):
         / "logs"
         / f"{Path(input_file).stem}-{solver_name}-{solver_version}.log"
     )
-    if log_file.exists:
-        with open(log_file, "a") as f:
-            f.write("\nSTDERR:\n")
-            f.write(result.stderr)
-    else:
-        print(f"ERROR: couldn't find log file {log_file}")
+    if not log_file.exists():
+        print(f"Creating missing log file {log_file}")
+        log_file.touch()
+    with open(log_file, "a") as f:
+        f.write("\nSTDERR:\n")
+        f.write(result.stderr)
 
     memory = None
     try:
@@ -288,50 +690,7 @@ def benchmark_solver(input_file, solver_name, timeout, solver_version):
     except ValueError:
         print("Failed to parse memory usage from stderr")
 
-    if result.returncode == 124:
-        print("TIMEOUT")
-        metrics = {
-            "status": "TO",
-            "condition": "Timeout",
-            "objective": None,
-            "runtime": timeout,
-            "reported_runtime": timeout,
-            "duality_gap": None,
-            "max_integrality_violation": None,
-        }
-    # systemd-run uses sigkill (9) or sigterm (15) to terminate the process and returns 128 + signal exit code
-    # subprocess returns -<signal> for signals
-    # these things don't seem very portable
-    elif result.returncode in (137, 143, -9, -15):
-        print("OUT OF MEMORY")
-        metrics = {
-            "status": "OOM",
-            "condition": "Out of Memory",
-            "objective": None,
-            "runtime": "N/A",
-            "reported_runtime": None,
-            "duality_gap": None,
-            "max_integrality_violation": None,
-        }
-    elif result.returncode != 0:
-        print(
-            f"ERROR running solver. Return code: {result.returncode}\n",
-            f"Stdout:\n{result.stdout}\n",
-            f"Stderr:\n{result.stderr}\n",
-        )
-        # Errors are also said to have run for `timeout`s, so that they appear
-        # along with timeouts in charts
-        metrics = {
-            "status": "ER",
-            "condition": "Error",
-            "objective": None,
-            "runtime": timeout,
-            "reported_runtime": timeout,
-            "duality_gap": None,
-            "max_integrality_violation": None,
-        }
-    else:
-        metrics = json.loads(result.stdout.splitlines()[-1])
+    metrics = parse_solver_result(result, timeout)
 
     if metrics["status"] not in {"ok", "TO", "ER", "OOM"}:
         print(f"WARNING: unknown solver status: {metrics['status']}")
@@ -339,109 +698,7 @@ def benchmark_solver(input_file, solver_name, timeout, solver_version):
     metrics["memory"] = memory
     metrics["timeout"] = timeout
 
-    return metrics
-
-
-def get_highs_binary_version():
-    """Get the version of the HiGHS binary from the --version command"""
-    highs_binary = "/opt/highs/bin/highs"
-
-    try:
-        result = subprocess.run(
-            [highs_binary, "--version"],
-            capture_output=True,
-            text=True,
-            check=True,
-            encoding="utf-8",
-        )
-
-        version_match = re.search(r"HiGHS version (\d+\.\d+\.\d+)", result.stdout)
-        if version_match:
-            return version_match.group(1)
-
-        return "unknown"
-    except Exception as e:
-        print(f"Error getting HiGHS binary version: {str(e)}")
-        return "unknown"
-
-
-def get_highs_hipo_version():
-    """Get the version of the HiGHS-HiPO binary from the --version command"""
-    if os.geteuid() != 0:
-        highs_hipo_binary = "/home/madhukar/oet/solver-benchmark/highs-installs/highs-hipo-workspace/HiGHS/build/bin/highs"
-    else:
-        highs_hipo_binary = "/opt/highs-hipo-workspace/HiGHS/build/bin/highs"
-
-    try:
-        result = subprocess.run(
-            [highs_hipo_binary, "--version"],
-            capture_output=True,
-            text=True,
-            check=True,
-            encoding="utf-8",
-        )
-
-        version_match = re.search(r"HiGHS version (\d+\.\d+\.\d+)", result.stdout)
-        if version_match:
-            return version_match.group(1) + "-hipo"
-
-        return "unknown-hipo"
-    except Exception as e:
-        print(f"Error getting HiGHS-HiPO binary version: {str(e)}")
-        return "unknown-hipo"
-
-
-def benchmark_highs_binary():
-    """
-    Run a reference benchmark using the pre-installed HiGHS binary
-    """
-    reference_model = "/benchmark-test-model.lp"
-    highs_binary = "/opt/highs/bin/highs"
-
-    command = [
-        highs_binary,
-        reference_model,
-    ]
-
-    # Run the command and capture the output
-    start_time = time.perf_counter()
-    result = subprocess.run(
-        command,
-        capture_output=True,
-        text=True,
-        check=False,
-        encoding="utf-8",
-    )
-    runtime = time.perf_counter() - start_time
-    if result.returncode != 0:
-        print(f"ERROR running solver. Return code:\n{result.returncode}")
-        metrics = {
-            "status": "ER",
-            "condition": "Error",
-            "objective": None,
-            "runtime": runtime,
-            "duality_gap": None,
-            "max_integrality_violation": None,
-        }
-    else:
-        # Parse HiGHS output to extract objective value
-        objective = None
-        for line in result.stdout.splitlines():
-            if "Objective value" in line:
-                try:
-                    objective = float(line.split(":")[-1].strip())
-                except (ValueError, IndexError):
-                    pass
-
-        metrics = {
-            "status": "OK",
-            "condition": "Optimal",
-            "objective": objective,
-            "runtime": runtime,
-            "memory": "N/A",
-            "duality_gap": None,  # Not available from command line output
-            "max_integrality_violation": None,  # Not available from command line output
-        }
+    print("Finished benchmark_solver with metrics:", metrics, flush=True)
 
     return metrics
 
@@ -525,8 +782,12 @@ def main(
     benchmarks_folder = Path(__file__).parent / "benchmarks/"
     os.makedirs(benchmarks_folder, exist_ok=True)
 
+    # Get solver versions from the conda environment to include in the results
     solvers_versions = get_conda_package_versions(solvers, f"benchmark-{year}")
 
+    # Get the path of the reference benchmark
+    reference_benchmark_path = Path(benchmarks_folder, "benchmark-test-model.lp")
+
     # Preprocess the sizes and make a list of individual benchmark files to run on
     processed_benchmarks = []
     for benchmark_name, benchmark_info in benchmarks_info.items():
@@ -577,10 +838,6 @@ def main(
         + ("" if size_categories is None else f" matching {size_categories}")
     )
 
-    reference_solver_version = ""
-    if reference_interval > 0:
-        reference_solver_version = get_highs_binary_version()
-
     for benchmark in processed_benchmarks:
         # Set timeout from YAML if provided, otherwise use size-category defaults (1h for S/M, 24h for L)
         timeout = benchmark.get("timeout_seconds") or (
@@ -597,14 +854,12 @@ def main(
                 )  # Latest CBC release is in 2024
             ):
                 print(
-                    f"WARNING: skipping {solver} in {year} because this benchmark instance is size L"
+                    f"WARNING: skipping {solver} in {year} because this benchmark instance is size L."
                 )
                 continue
 
             # Restrict highs-hipo variants to 2025 and LPs only
-            if solver in [
-                variant.value for variant in HighsVariant
-            ] and (  # For py3.10 compatibility
+            if solver == "highs-hipo" and (  # For py3.10 compatibility
                 year != "2025" or benchmark["class"] != "LP"
             ):
                 print(
@@ -624,7 +879,7 @@ def main(
 
             for i in range(iterations):
                 print(
-                    f"Running solver {solver} (version {solver_version}) on {benchmark['path']} ({i})...",
+                    f"Running solver {solver} (version {solver_version}) on {benchmark['path']} ({i})...,",
                     flush=True,
                 )
 
@@ -688,18 +943,15 @@ def main(
                     reference_interval
                 ):
                     print(
-                        f"Running reference benchmark with HiGHS binary (interval: {reference_interval}s)...",
-                        flush=True,
+                        f"Running reference benchmark with HiGHS HiPO (interval: {reference_interval}s)..."
+                    )
+                    reference_metrics = benchmark_solver(
+                        reference_benchmark_path,
+                        solver_name="highs-hipo",
+                        timeout=24 * 60 * 60,
+                        solver_version=solvers_versions.get("highs-hipo"),
+                        reference_benchmark=True,
                     )
-                    reference_metrics = benchmark_highs_binary()
-
-                    # Add required fields to reference metrics
-                    reference_metrics["size"] = "reference"
-                    reference_metrics["solver"] = "highs-binary"
-                    reference_metrics["solver_version"] = reference_solver_version
-                    reference_metrics["solver_release_year"] = "N/A"
-                    reference_metrics["reported_runtime"] = None
-                    reference_metrics["timeout"] = None
 
                     # Record reference benchmark results
                     reference_timestamp = datetime.datetime.now().strftime(
diff --git a/runner/run_solver.py b/runner/run_solver.py
index ef3281b5..fc30c451 100644
--- a/runner/run_solver.py
+++ b/runner/run_solver.py
@@ -1,52 +1,83 @@
-import collections.abc
+"""
+Solver Runner Script
+====================
+
+This script provides a unified interface to run various optimization solvers
+(e.g., HiGHS, GLPK, Gurobi, SCIP, CBC, CPLEX, Knitro, Xpress) on a given input
+problem file. It configures solver-specific options for reproducibility,
+executes the solver, and collects key metrics such as runtime, duality gap,
+and integrality violation.
+
+Features
+--------
+- Supports multiple solvers with customizable options.
+- Handles solver-specific seed and tolerance settings.
+- Computes MILP metrics (duality gap, integrality violation) when applicable.
+- Outputs results in JSON format.
+
+Example Usage
+-------------
+Run the script from the command line:
+
+    python runner/run_solver.py \
+        --solver_name highs \
+        --solver_version 1.6.0 \
+        --input_file path/to/problem.mps \
+        --highs_solver_variant hipo \
+        --hipo_block_size 128
+
+Arguments
+---------
+--solver_name           Name of the solver to run (e.g., highs, glpk, gurobi, scip, cbc, cplex, knitro, xpress).
+--solver_version        Version of the solver to use.
+--input_file            Path to the input problem file (e.g., .mps, .lp).
+--highs_solver_variant  Variant of HiGHS to run (hipo, ipm, ipx, pdlp, simplex). Only for HiGHS.
+--hipo_block_size       Block size for HiPO variant of HiGHS (default: 128).
+
+See the function `parse_args()` for more details on arguments.
+"""
+
+import argparse
 import json
-import os
-import subprocess
-import sys
-import time
+import logging
 from enum import Enum
 from pathlib import Path
 from time import perf_counter
 from traceback import format_exc
+from typing import Any
 
+import linopy
 import pandas as pd
-from linopy import solvers
 from linopy.solvers import SolverName
 
+logger = logging.getLogger(__name__)
 
-class HighsVariant(str, Enum):
-    HIPO = "highs-hipo"
-    HIPO_32 = "highs-hipo-32"
-    HIPO_64 = "highs-hipo-64"
-    HIPO_128 = "highs-hipo-128"
-    HIPO_IPM = "highs-ipm"
 
-    # cli args returns a list of command line arguments for the HiGHS binary.
-    def cli_args(self) -> collections.abc.Iterable[str]:
-        args = {
-            "solver": "hipo",
-            "run_crossover": "choose",
-        }
-        if self == HighsVariant.HIPO_IPM:
-            args["solver"] = "ipx"
-
-        return [f"--{k}={v}" for k, v in args.items()]
-
-    # options returns the contents for the HiGHS options file.
-    # passed to the HiGHS binary via --options_file=<file>
-    def options(self) -> str:
-        options = {}
-        match self:
-            case HighsVariant.HIPO_32:
-                options["hipo_block_size"] = 32
-            case HighsVariant.HIPO_64:
-                options["hipo_block_size"] = 64
-            case HighsVariant.HIPO_128:
-                options["hipo_block_size"] = 128
-            case HighsVariant.HIPO:
-                options["hipo_block_size"] = 64
-                options["hipo_metis_no2hop"] = "true"
-        return "\n".join(f"{k} = {v}" for k, v in options.items())
+class HighsSolverVariants(str, Enum):
+    """
+    Enumeration of supported HiGHS solver variants.
+    The solver variants are available at
+    https://ergo-code.github.io/HiGHS/stable/options/definitions/#option-solver.
+
+    Attributes
+    ----------
+    HIPO : str
+        HiPO variant of HiGHS.
+    IPM : str
+       IPM variant of HiGHS.
+    IPX : str
+       IPX variant of HiGHS.
+    PDLP : str
+        PDLP variant of HiGHS.
+    SIMPLEX : str
+        SIMPLEX variant of HiGHS.
+    """
+
+    HIPO = "hipo"
+    IPM = "ipm"
+    IPX = "ipx"
+    PDLP = "pdlp"
+    SIMPLEX = "simplex"
 
 
 # HiGHS is not available in the 2020 environment that we use to run GLPK
@@ -55,13 +86,38 @@ def options(self) -> str:
 except ModuleNotFoundError:
     highspy = None
 
+SUPPORTED_SOLVERS = [
+    "highs",
+    "glpk",
+    "gurobi",
+    "scip",
+    "cbc",
+    "cplex",
+    "knitro",
+    "xpress",
+]
 
-def get_solver(solver_name):
-    solver_name = solver_name.lower()
-    solver_enum = SolverName(solver_name)
-
-    solver_class = getattr(solvers, solver_enum.name)
 
+def set_seed_options(solver_name: str) -> dict[str, int | float]:
+    """
+    Sets solver-specific seed and tolerance options for reproducibility.
+
+    This function returns a dictionary of solver configuration parameters that
+    control random seed initialization and MIP (Mixed-Integer Programming) gap
+    tolerance.
+
+    Parameters
+    ----------
+    solver_name: str
+        Name of the optimization solver. Supported solvers include: "highs",
+        "glpk", "gurobi", "scip", "cbc", "cplex", "knitro", and "xpress".
+
+    Returns
+    -------
+    dict[str, int | float]
+        A dictionary mapping solver-specific parameter names to their values.
+        Returns an empty dictionary if the solver name is not recognized.
+    """
     mip_gap = 1e-4  # Tolerance for the relative duality gap for MILPs
     seed_options = {
         "highs": {"random_seed": 0, "mip_rel_gap": mip_gap},
@@ -77,17 +133,127 @@ def get_solver(solver_name):
             "mip.tolerances.mipgap": mip_gap,
         },
         "knitro": {
-            "KN_PARAM_MS_SEED": 1066,
+            "ms_seed": 1066,
+        },
+        "xpress": {
+            "miprelgapnotify": mip_gap,
+            "randomseed": 0,
         },
-        "xpress": {"miprelgapnotify": mip_gap, "randomseed": 0},
     }
+    if solver_name in seed_options.keys():
+        return seed_options[solver_name]
+    else:
+        logger.info(
+            "No seed options found for solver '%s'. Returning empty options.",
+            solver_name,
+        )
+        return dict()
 
-    return solver_class(**seed_options.get(solver_name, {}))
 
+def set_solver_options(
+    solver_name: str, highs_variant: str, hipo_block_size: int
+) -> dict[str, int | str]:
+    """
+    Sets solver-specific options for reproducibility.
+
+    This function returns a dictionary of solver configuration parameters that
+    control specific solver behaviors, such as the block size for HiGHS variants.
+
+    Parameters
+    ----------
+    solver_name: str
+        Name of the optimization solver. Supported solvers include: "highs",
+        "glpk", "gurobi", "scip", "cbc", "cplex", "knitro", and "xpress".
+    highs_variant : str
+        Solver type, used to determine specific options for HiGHS variants.
+    hipo_block_size : int
+        Block size value for HiPO variant of HiGHS.
+        This parameter is only relevant if the solver is HiGHS
+        and the variant is a HiPO variant.
+
+    Returns
+    -------
+    dict[str, int | str]
+        A dictionary mapping solver-specific parameter names to their values.
+        Returns an empty dictionary if the solver name is not recognized.
+    """
 
-def is_mip_problem(solver_model, solver_name):
+    if solver_name == "highs":
+        if highs_variant == HighsSolverVariants.HIPO:
+            return {
+                "hipo_block_size": hipo_block_size,
+                "solver": "hipo",
+                "run_crossover": "choose",
+            }
+        elif highs_variant in (
+            HighsSolverVariants.IPM,
+            HighsSolverVariants.IPX,
+            HighsSolverVariants.PDLP,
+            HighsSolverVariants.SIMPLEX,
+        ):
+            return {"solver": highs_variant}
+    else:
+        logger.info(
+            "No specific options found for solver '%s'. Returning empty options.",
+            solver_name,
+        )
+        return dict()
+
+
+def get_solver(
+    solver_name: str, highs_variant: str, hipo_block_size: int
+) -> linopy.solvers:
+    """
+    Instantiate and configure a solver object based on the specified solver name and options.
+
+    Parameters
+    ----------
+    solver_name : str
+        Name of the optimization solver (e.g., "highs", "glpk", "gurobi").
+    highs_variant : str
+        Variant of HiGHS to use. Only relevant if `name_solver` is "highs".
+    hipo_block_size : int
+        Block size for the HiPO variant of HiGHS. Only relevant if `variant_highs` is "hipo".
+
+    Returns
+    -------
+    Any
+        An instance of the solver class, configured with the appropriate options.
+    """
+    solver_enum = SolverName(solver_name)
+
+    solver_class = getattr(linopy.solvers, solver_enum.name)
+
+    # Get seed options
+    seed_options = set_seed_options(solver_name)
+
+    # Get other solver options if needed (e.g., for HiGHS variants)
+    solver_options = set_solver_options(solver_name, highs_variant, hipo_block_size)
+
+    kwargs = {}
+    if seed_options:
+        kwargs.update(seed_options)
+    if solver_options:
+        kwargs.update(solver_options)
+
+    return solver_class(**kwargs)
+
+
+def is_mip_problem(solver_model: Any, solver_name: str) -> bool:
     """
-    Determines if a given solver model is a Mixed Integer Programming (MIP) problem.
+    Determine if the given solver model is a Mixed Integer Programming (MIP) problem.
+
+    Parameters
+    ----------
+    solver_model : Any
+        The solver's Python object or model instance.
+    solver_name : str
+        Name of the solver (e.g., "highs", "scip", "cbc", "gurobi", "cplex", "xpress", "glpk", "knitro").
+
+    Returns
+    -------
+    bool
+        True if the problem is a MIP, False otherwise.
     """
     if solver_name == "scip":
         if solver_model.getNIntVars() > 0 or solver_model.getNBinVars() > 0:
@@ -117,7 +283,7 @@ def is_mip_problem(solver_model, solver_name):
 
 
 def calculate_integrality_violation(
-    integer_vars: pd.Series, primal_values: pd.Series
+    integer_vars: set, primal_values: pd.Series
 ) -> float:
     """Calculate the maximum integrality violation from primal values.
     We only care about Integer vars, not SemiContinuous or SemiInteger, following the code in
@@ -129,8 +295,22 @@ def calculate_integrality_violation(
     return max((p - p.round()).abs())
 
 
-def get_duality_gap(solver_model, solver_name: str):
-    """Retrieve the duality gap for the given solver model, if available."""
+def get_duality_gap(solver_model: Any, solver_name: str) -> float | None:
+    """
+    Retrieve the duality gap for the given solver model, if available.
+
+    Parameters
+    ----------
+    solver_model : Any
+        The solver's Python object or model instance.
+    solver_name : str
+        Name of the solver (e.g., "highs", "scip", "cbc", "gurobi", "cplex", "xpress", "glpk", "knitro").
+
+    Returns
+    -------
+    float or None
+        The duality gap if available, otherwise None.
+    """
     if solver_name == "scip":
         return solver_model.getGap()
     elif solver_name == "gurobi":
@@ -150,17 +330,35 @@ def get_duality_gap(solver_model, solver_name: str):
         # Knitro duality gap retrieval not implemented yet
         return None
     else:
-        raise NotImplementedError(f"The solver '{solver_name}' is not supported.")
+        logger.info(f"The solver '{solver_name}' is not supported.")
+        return None
 
 
-def get_milp_metrics(input_file, solver_result):
-    """Uses HiGHS to read the problem file and compute max integrality violation and
-    duality gap.
+def get_milp_metrics(
+    input_file: Path, solver_name: str, solver_result: Any
+) -> tuple[float | None, float | None]:
+    """
+    Compute MILP metrics (duality gap and max integrality violation) using HiGHS.
+
+    Parameters
+    ----------
+    input_file : Path
+        Path to the input problem file.
+    solver_name : str
+        Name of the solver (e.g., "highs", "scip", "cbc", "gurobi", "cplex", "xpress", "glpk", "knitro").
+    solver_result : Any
+        The solver result object containing the solver model and solution.
+
+    Returns
+    -------
+    tuple[float or None, float or None]
+        A tuple containing the duality gap and the maximum integrality violation.
+        Returns (None, None) if metrics cannot be computed.
     """
     try:
         if highspy is not None:
             h = highspy.Highs()
-            h.readModel(input_file)
+            h.readModel(input_file.as_posix())
             integer_vars = {
                 h.variableName(i)
                 for i in range(h.numVariables)
@@ -172,216 +370,81 @@ def get_milp_metrics(input_file, solver_result):
                     integer_vars, solver_result.solution.primal
                 )
                 return duality_gap, max_integrality_violation
-    except Exception:
-        print(
+    except ValueError:
+        raise ValueError(
             f"ERROR obtaining milp metrics for {input_file}: {format_exc()}",
-            file=sys.stderr,
         )
     return None, None
 
 
-def get_reported_runtime(solver_name, solver_model) -> float | None:
-    """Get the solving runtime as reported by the solver from the solver's Python object."""
-    try:
-        match solver_name:
-            case "highs":
-                return solver_model.getRunTime()
-            case "scip":
-                return solver_model.getSolvingTime()
-            case "cbc":
-                return solver_model.runtime
-            case "gurobi":
-                return solver_model.Runtime
-            case "cplex":
-                return None
-            case "xpress":
-                return solver_model.getAttrib("time")
-            case "knitro":
-                return solver_model.reported_runtime
-            case _:
-                print(f"WARNING: cannot obtain reported runtime for {solver_name}")
-                return None
-    except Exception:
-        print(f"ERROR obtaining reported runtime: {format_exc()}", file=sys.stderr)
-    return None
-
-
-def run_highs_hipo_solver(input_file, solver_version, highs_variant: HighsVariant):
+def get_reported_runtime(solver_name: str, solver_model: Any) -> float | None:
     """
-    Run the HiGHS-HiPO solver directly using the binary with variant-specific arguments
+    Get the solving runtime as reported by the solver from the solver's Python object.
+
+    Parameters
+    ----------
+    solver_name : str
+        Name of the solver (e.g., "highs", "scip", "cbc", "gurobi", "cplex", "xpress", "knitro").
+    solver_model : Any
+        The linopy Model instance containing runtime information.
+
+    Returns
+    -------
+    float or None
+        The reported runtime in seconds, or None if not available.
+    """
+    match solver_name:
+        case "highs":
+            return solver_model.getRunTime()
+        case "scip":
+            return solver_model.getSolvingTime()
+        case "cbc":
+            return solver_model.runtime
+        case "gurobi":
+            return solver_model.Runtime
+        case "cplex":
+            return solver_model.get_time()
+        case "xpress":
+            return solver_model.getAttrib("time")
+        case "knitro":
+            return solver_model.reported_runtime
+        case _:
+            logger.info(f"WARNING: cannot obtain reported runtime for {solver_name}")
+            return None
+
+
+def main(
+    solver_name: str,
+    input_file: str,
+    solver_version: str,
+    highs_solver_variant: str,
+    hipo_block_size: int,
+) -> None:
+    """
+    Run the specified solver on the given input file and collect results.
+
+    Parameters
+    ----------
+    solver_name: str
+        Name of the solver to run (e.g., "highs", "glpk", "gurobi").
+    input_file : str
+        Name to the input problem file.
+    solver_version : str
+        Version of the solver to use.
+    highs_solver_variant : str
+        Variant of HiGHS to run (only applicable if solver_name_val is "highs").
+    hipo_block_size : int
+        Block size for HiPO variant of HiGHS
+        (only applicable if solver_name_val is "highs"
+         and highs_solver_variant_val is "hipo").
+
+    Returns
+    -------
+    None
     """
-    import tempfile
-
-    # check if we are root
-    if os.getuid() == 0:
-        # VM path
-        highs_hipo_binary = "/opt/highs-hipo-workspace/HiGHS/build/bin/highs"
-    else:
-        highs_hipo_binary = f"{os.getenv('HOME')}/oet/solver-benchmark/highs-installs/highs-hipo-workspace/HiGHS/build/bin/highs"
-
-    solution_dir = Path(__file__).parent / "solutions"
-    solution_dir.mkdir(parents=True, exist_ok=True)
-
-    logs_dir = Path(__file__).parent / "logs"
-    logs_dir.mkdir(parents=True, exist_ok=True)
-
-    output_filename = f"{Path(input_file).stem}-{solver_name}-{solver_version}"
-    solution_fn = solution_dir / f"{output_filename}.sol"
-    log_fn = logs_dir / f"{output_filename}.log"
-
-    try:
-        with tempfile.NamedTemporaryFile(
-            mode="w",
-            prefix=highs_variant.value,
-            suffix=".options",
-            delete=False,
-            delete_on_close=False,
-        ) as options_file:
-            options_file.write(highs_variant.options())
-            options_file.flush()
-
-            solver_args = list(highs_variant.cli_args())
-            solver_args.append(f"--options_file={options_file.name}")
-
-        command = [
-            highs_hipo_binary,
-            *solver_args,
-            str(Path(input_file).resolve()),
-            f"--solution_file={solution_fn}",
-        ]
-
-        # Run the command and capture the output
-        try:
-            print(f"running command {command}")
-            with open(log_fn, "w") as f:
-                f.write(f"Command: {' '.join(command)}\n")
-                start_time = time.perf_counter()
-                result = subprocess.run(
-                    command,
-                    stdout=f,
-                    stderr=subprocess.STDOUT,
-                    text=True,
-                    check=False,
-                    encoding="utf-8",
-                )
-                runtime = time.perf_counter() - start_time
-
-            # Read back the log file for parsing
-            with open(log_fn, "r") as f:
-                output = f.read()
-
-            if result.returncode != 0:
-                return {
-                    "runtime": runtime,
-                    "reported_runtime": runtime,
-                    "status": "ER",
-                    "condition": "Error",
-                    "objective": None,
-                    "duality_gap": None,
-                    "max_integrality_violation": None,
-                }
-            else:
-                # Parse HiGHS output to extract objective value
-                objective = None
-                model_status = "ER"
-                for line in reversed(output.splitlines()):
-                    if objective is None:
-                        # Old format:
-                        if "Objective value" in line and ":" in line:
-                            try:
-                                objective = float(line.split(":")[-1].strip())
-                            except (ValueError, IndexError):
-                                pass
-                        # New format: "
-                        elif "(objective)" in line:
-                            try:
-                                objective = float(line.split("(objective)")[0].strip())
-                            except (ValueError, IndexError):
-                                pass
-
-                    if model_status == "ER":
-                        # Old format:
-                        if "Model status" in line and ":" in line:
-                            try:
-                                model_status = line.split(":")[-1].strip()
-                            except (ValueError, IndexError):
-                                pass
-                        # New format:
-                        elif line.strip().startswith("Status") and ":" not in line:
-                            try:
-                                parts = line.split()
-                                if len(parts) >= 2:
-                                    status_value = parts[-1]
-                                    if status_value in [
-                                        "Optimal",
-                                        "Infeasible",
-                                        "Unbounded",
-                                    ]:
-                                        model_status = status_value
-                            except (ValueError, IndexError):
-                                pass
-
-                    # Break early once we've found both values
-                    if objective is not None and model_status != "ER":
-                        break
-
-                if objective is not None and model_status in ["Optimal", "Infeasible"]:
-                    status = "ok"
-                else:
-                    status = "warning"
-
-                return {
-                    "runtime": runtime,
-                    "reported_runtime": runtime,
-                    "status": status,
-                    # Model status        : Optimal
-                    "condition": model_status,
-                    "objective": objective,
-                    "duality_gap": None,  # Not available from command line output
-                    "max_integrality_violation": None,  # Not available from command line output
-                }
-        except Exception as e:
-            runtime = time.perf_counter() - start_time
-            # Write error to log file
-            with open(log_fn, "w") as f:
-                f.write(f"Command: {' '.join(command)}\n")
-                f.write(f"Exception: {str(e)}\n")
-
-            return {
-                "runtime": runtime,
-                "reported_runtime": runtime,
-                "status": "error",
-                "condition": "Error",
-                "objective": None,
-                "duality_gap": None,
-                "max_integrality_violation": None,
-            }
-    finally:
-        pass
-        # Clean up temporary options file
-        # if options_file is not None:
-        #     try:
-        #         os.unlink(options_file.name)
-        #     except OSError:
-        #         pass
-
-
-def main(solver_name, input_file, solver_version):
     problem_file = Path(input_file)
 
-    # Handle highs-hipo solver variants separately
-    try:
-        highs_variant = HighsVariant(solver_name.lower())
-        results = run_highs_hipo_solver(input_file, solver_version, highs_variant)
-        print(json.dumps(results))
-        return
-    except ValueError as e:
-        # re-raise the error if it isn't expected.
-        # we want to continue only if the error is about invalid HighsVariant
-        if "is not a valid HighsVariant" not in str(e):
-            raise e
-
-    solver = get_solver(solver_name)
+    solver = get_solver(solver_name, highs_solver_variant, hipo_block_size)
 
     solution_dir = Path(__file__).parent / "solutions"
     solution_dir.mkdir(parents=True, exist_ok=True)
@@ -394,19 +457,17 @@ def main(solver_name, input_file, solver_version):
     solution_fn = solution_dir / f"{output_filename}.sol"
     log_fn = logs_dir / f"{output_filename}.log"
 
+    # We measure runtime here and not of this entire script because lines like
+    # `import linopy` take a long (and varying) amount of time
     try:
-        # We measure runtime here and not of this entire script because lines like
-        # `import linopy` take a long (and varying) amount of time
         start_time = perf_counter()
         solver_result = solver.solve_problem(
             problem_fn=problem_file, solution_fn=solution_fn, log_fn=log_fn
         )
         runtime = perf_counter() - start_time
-
         duality_gap, max_integrality_violation = get_milp_metrics(
-            input_file, solver_result
+            problem_file, solver_name, solver_result
         )
-
         results = {
             "runtime": runtime,
             "reported_runtime": get_reported_runtime(
@@ -418,8 +479,8 @@ def main(solver_name, input_file, solver_version):
             "duality_gap": duality_gap,
             "max_integrality_violation": max_integrality_violation,
         }
-    except Exception:
-        print(f"ERROR running solver: {format_exc()}", file=sys.stderr)
+    except Exception as e:
+        logger.error(f"Error running solver: {e}")
         results = {
             "runtime": None,
             "reported_runtime": None,
@@ -432,12 +493,63 @@ def main(solver_name, input_file, solver_version):
     print(json.dumps(results))
 
 
+def parse_args() -> argparse.Namespace:
+    """
+    Parse command-line arguments for the solver runner script.
+
+    Returns
+    -------
+    argparse.Namespace
+        Namespace containing the parsed command-line arguments:
+        - solver_name (str): Name of the solver to run.
+        - solver_version (str): Version of the solver to run.
+        - input_file (str): Path to the input problem file.
+        - highs_solver_variant (str): Variant of HiGHS to run (only applicable if solver_name is 'highs').
+        - hipo_block_size (int): Block size for HiPO variant of HiGHS
+        (only applicable if solver_name is 'highs' and
+         highs_solver_variant is 'hipo').
+    """
+    p = argparse.ArgumentParser()
+    p.add_argument(
+        "--solver_name",
+        type=str,
+        choices=SUPPORTED_SOLVERS,
+        required=True,
+        help="Name of the solver to run.",
+    )
+    p.add_argument(
+        "--solver_version",
+        type=str,
+        required=True,
+        help="Version of the solver to run.",
+    )
+    p.add_argument(
+        "--input_file", type=str, required=True, help="Path to the input problem file."
+    )
+    p.add_argument(
+        "--highs_solver_variant",
+        type=str,
+        choices=[v.value for v in HighsSolverVariants],
+        help="Variant of HiGHS to run (only applicable if solver_name is 'highs').",
+        required=False,
+    )
+    p.add_argument(
+        "--hipo_block_size",
+        type=int,
+        help="Block size for HiPO variant of HiGHS "
+        "(only applicable if solver_name is 'highs' and "
+        "highs_solver_variant is 'hipo').",
+        required=False,
+    )
+    return p.parse_args()
+
+
 if __name__ == "__main__":
-    if len(sys.argv) != 4:
-        print("Usage: python run_solver.py <solver_name> <input_file> <solver_version>")
-        sys.exit(1)
-
-    solver_name = sys.argv[1]
-    input_file = sys.argv[2]
-    solver_version = sys.argv[3]
-    main(solver_name, input_file, solver_version)
+    args = parse_args()
+    main(
+        args.solver_name,
+        args.input_file,
+        args.solver_version,
+        args.highs_solver_variant,
+        args.hipo_block_size,
+    )
diff --git a/tests/test_run_benchmarks.py b/tests/test_run_benchmarks.py
new file mode 100644
index 00000000..9f98a0e3
--- /dev/null
+++ b/tests/test_run_benchmarks.py
@@ -0,0 +1,315 @@
+"""Unit tests for the run_benchmarks module."""
+
+import gzip
+import os
+import subprocess
+from pathlib import Path
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+import requests
+
+from runner import run_benchmarks
+from runner.run_benchmarks import (
+    build_solver_command,
+    download_benchmark_file,
+    get_conda_package_versions,
+    get_solver_name_and_version,
+)
+
+
+class TestRunBenchmarks:
+    def test_get_conda_package_versions(self) -> None:
+        """Test the get_conda_package_versions function."""
+        solvers_list = ["highs", "highs-hipo", "highs-ipm", "cbc", "scip"]
+        env_name = "benchmark-env"
+        # Simulate conda list output
+        conda_list_output = """
+        # packages in environment at /opt/conda/envs/fake-env:
+        #
+        highspy                   1.13.2.dev1         py39_0
+        coin-or-cbc               2.10.12             py39_0
+        pyscipopt                 5.7.1               py39_0
+        otherpkg                  0.1.0               py39_0
+        """
+        mock_result = MagicMock()
+        mock_result.stdout = conda_list_output
+        mock_result.returncode = 0
+
+        expected_dict = {
+            "highs": "1.13.2.dev1",
+            "highs-hipo": "1.13.2.dev1",
+            "highs-ipm": "1.13.2.dev1",
+            "cbc": "2.10.12",
+            "scip": "5.7.1",
+        }
+
+        with patch("subprocess.run", return_value=mock_result):
+            versions = get_conda_package_versions(solvers_list, env_name)
+            assert versions == expected_dict
+
+    def test_build_command_non_root_includes_user_and_reference_flag(
+        self, monkeypatch: MagicMock
+    ) -> None:
+        """Test that the command includes --user and --highs_solver_variant hipo when not running as root."""
+        monkeypatch.setattr(os, "geteuid", lambda: 1000)  # non-root
+        input_file = Path("/tmp/example_problem.lp")
+        solver_name = "highs"
+        timeout = 60
+        solver_version = "1.2.3"
+        memory_limit_bytes = 12345678
+
+        cmd = build_solver_command(
+            input_file, solver_name, timeout, solver_version, memory_limit_bytes, True
+        )
+
+        assert cmd[0] == "systemd-run"
+        assert "--user" in cmd
+        assert f"--property=MemoryMax={memory_limit_bytes}" in cmd
+        assert "--property=MemorySwapMax=0" in cmd
+        assert "/usr/bin/time" in cmd
+        assert "--format" in cmd
+        assert "MaxResidentSetSizeKB=%M" in cmd
+        assert "timeout" in cmd
+        assert f"{timeout}s" in cmd
+        expected_wrapper = str(Path(run_benchmarks.__file__).parent / "run_solver.py")
+        assert expected_wrapper in cmd
+        assert "--solver_name" in cmd
+        assert cmd[cmd.index("--solver_name") + 1] == solver_name
+        assert "--input_file" in cmd
+        assert cmd[cmd.index("--input_file") + 1] == input_file.as_posix()
+        assert "--solver_version" in cmd
+        assert cmd[cmd.index("--solver_version") + 1] == solver_version
+        assert "--highs_solver_variant" in cmd
+        assert cmd[cmd.index("--highs_solver_variant") + 1] == "hipo"
+
+    def test_build_command_as_root_no_user_and_no_reference(
+        self, monkeypatch: MagicMock
+    ) -> None:
+        """Test that the command does not include --user and --highs_solver_variant hipo"""
+        monkeypatch.setattr(os, "geteuid", lambda: 0)  # root
+        input_file = Path("/tmp/another.lp")
+        solver_name = "cbc"
+        timeout = 30
+        solver_version = "2.0"
+        memory_limit_bytes = 99999
+
+        cmd = build_solver_command(
+            input_file, solver_name, timeout, solver_version, memory_limit_bytes, False
+        )
+        assert "--user" not in cmd
+        assert f"--property=MemoryMax={memory_limit_bytes}" in cmd
+        assert f"{timeout}s" in cmd
+        assert "--solver_name" in cmd
+        assert cmd[cmd.index("--solver_name") + 1] == solver_name
+        assert "--input_file" in cmd
+        assert cmd[cmd.index("--input_file") + 1] == input_file.as_posix()
+        assert "--solver_version" in cmd
+        assert cmd[cmd.index("--solver_version") + 1] == solver_version
+        assert not any(el == "--highs_solver_variant hipo" for el in cmd)
+
+    def test_download_regular_file_http(self, tmp_path: Path) -> None:
+        """Test downloading a regular file from HTTP URL."""
+        dest_path = tmp_path / "data.txt"
+        test_content = b"test file content"
+
+        with patch("requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.iter_content.return_value = [test_content]
+            mock_get.return_value.__enter__.return_value = mock_response
+
+            download_benchmark_file("http://example.com/data.txt", dest_path)
+
+            assert dest_path.exists()
+            assert dest_path.read_bytes() == test_content
+            mock_get.assert_called_once_with("http://example.com/data.txt", stream=True)
+
+    def test_download_gzipped_file_http(self, tmp_path: Path) -> None:
+        """Test downloading and unzipping a .gz file from HTTP."""
+        dest_path = tmp_path / "data.txt.gz"
+        uncompressed_path = tmp_path / "data.txt"
+        original_content = b"original file content"
+
+        # Create gzipped content
+        gzipped_content = gzip.compress(original_content)
+
+        with patch("requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.iter_content.return_value = [gzipped_content]
+            mock_get.return_value.__enter__.return_value = mock_response
+
+            download_benchmark_file("http://example.com/data.txt.gz", dest_path)
+
+            # Verify uncompressed file exists and compressed file is removed
+            assert uncompressed_path.exists()
+            assert not dest_path.exists()
+            assert uncompressed_path.read_bytes() == original_content
+
+    def test_skip_download_if_file_exists(self, tmp_path: Path) -> None:
+        """Test that download is skipped if the file already exists."""
+        dest_path = tmp_path / "data.txt"
+        dest_path.write_text("existing content")
+
+        with patch("requests.get") as mock_get:
+            download_benchmark_file("http://example.com/data.txt", dest_path)
+
+            # Verify no download occurred
+            mock_get.assert_not_called()
+            assert dest_path.read_text() == "existing content"
+
+    def test_skip_download_if_uncompressed_file_exists_with_gz_url(
+        self, tmp_path: Path
+    ) -> None:
+        """Test that download is skipped if uncompressed file exists when .gz URL is provided."""
+        gz_path = tmp_path / "data.txt.gz"
+        uncompressed_path = tmp_path / "data.txt"
+        uncompressed_path.write_text("existing content")
+
+        with patch("requests.get") as mock_get:
+            download_benchmark_file("http://example.com/data.txt.gz", gz_path)
+
+            # Verify no download occurred
+            mock_get.assert_not_called()
+            assert uncompressed_path.read_text() == "existing content"
+            assert not gz_path.exists()
+
+    def test_create_destination_directory_if_not_exists(self, tmp_path: Path) -> None:
+        """Test that destination directory is created if it doesn't exist."""
+        nested_path = tmp_path / "subdir" / "nested" / "data.txt"
+
+        with patch("requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.iter_content.return_value = [b"content"]
+            mock_get.return_value.__enter__.return_value = mock_response
+
+            download_benchmark_file("http://example.com/data.txt", nested_path)
+
+            assert nested_path.parent.exists()
+            assert nested_path.exists()
+
+    def test_http_download_raises_on_failed_response(self, tmp_path: Path) -> None:
+        """Test that HTTP errors are properly raised."""
+        dest_path = tmp_path / "data.txt"
+
+        with patch("requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.raise_for_status.side_effect = requests.HTTPError(
+                "404 Not Found"
+            )
+            mock_get.return_value.__enter__.return_value = mock_response
+
+            with pytest.raises(requests.HTTPError):
+                download_benchmark_file("http://example.com/missing.txt", dest_path)
+
+    def test_gsutil_download_fails(self, tmp_path: Path) -> None:
+        """Test that gsutil command failures are properly raised."""
+        dest_path = tmp_path / "data.txt"
+
+        with patch("subprocess.run") as mock_run:
+            mock_run.side_effect = subprocess.CalledProcessError(1, "gsutil cp")
+
+            with pytest.raises(subprocess.CalledProcessError):
+                download_benchmark_file("gs://bucket-name/data.txt", dest_path)
+
+    def test_gzip_decompression_error(self, tmp_path: Path) -> None:
+        """Test handling of corrupted gzip files."""
+        dest_path = tmp_path / "data.txt.gz"
+        # Write invalid gzip content
+        dest_path.write_bytes(b"not a valid gzip file")
+
+        with patch("requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.iter_content.return_value = [b"not a valid gzip file"]
+            mock_get.return_value.__enter__.return_value = mock_response
+
+            with pytest.raises(gzip.BadGzipFile):
+                download_benchmark_file("http://example.com/data.txt.gz", dest_path)
+
+    def test_large_file_streaming(self, tmp_path: Path) -> None:
+        """Test that large files are downloaded in chunks."""
+        dest_path = tmp_path / "large_file.bin"
+        # 10 chunks of 8KB each
+        chunks = [b"x" * 8192 for _ in range(10)]
+
+        with patch("requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.iter_content.return_value = chunks
+            mock_get.return_value.__enter__.return_value = mock_response
+
+            download_benchmark_file("http://example.com/large_file.bin", dest_path)
+
+            assert dest_path.stat().st_size == 8192 * 10
+            mock_response.iter_content.assert_called_once_with(chunk_size=8192)
+
+    def test_full_workflow_gcs_gzip(self, tmp_path: Path) -> None:
+        """Test complete workflow: GCS download + gzip decompression."""
+        dest_path = tmp_path / "benchmark.tar.gz"
+        uncompressed_path = tmp_path / "benchmark.tar"
+        original_content = b"tar archive content here"
+
+        gzipped = gzip.compress(original_content)
+
+        with patch("subprocess.run") as mock_run:
+            # Simulate gsutil writing the gzipped file
+            def write_file(*args, **kwargs):
+                dest_path.write_bytes(gzipped)
+                return Mock(returncode=0)
+
+            mock_run.side_effect = write_file
+
+            download_benchmark_file("gs://bucket-name/benchmark.tar.gz", dest_path)
+
+            assert uncompressed_path.exists()
+            assert not dest_path.exists()
+            assert uncompressed_path.read_bytes() == original_content
+
+    def test_file_with_multiple_dots_in_name(self, tmp_path: Path) -> None:
+        """Test handling files with multiple dots (e.g., data.backup.txt.gz)."""
+        dest_path = tmp_path / "data.backup.txt.gz"
+        uncompressed_path = tmp_path / "data.backup.txt"
+        content = b"backup data"
+        gzipped = gzip.compress(content)
+
+        with patch("requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.iter_content.return_value = [gzipped]
+            mock_get.return_value.__enter__.return_value = mock_response
+
+            download_benchmark_file("http://example.com/data.backup.txt.gz", dest_path)
+
+            assert uncompressed_path.exists()
+            assert uncompressed_path.read_bytes() == content
+
+    def test_empty_file_download(self, tmp_path: Path) -> None:
+        """Test downloading an empty file."""
+        dest_path = tmp_path / "empty.txt"
+
+        with patch("requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.iter_content.return_value = []
+            mock_get.return_value.__enter__.return_value = mock_response
+
+            download_benchmark_file("http://example.com/empty.txt", dest_path)
+
+            assert dest_path.exists()
+            assert dest_path.stat().st_size == 0
+
+    @pytest.mark.parametrize(
+        "input_name, expected_base, expected_variant",
+        [
+            ("highs", "highs", None),
+            ("highs-hipo", "highs", "hipo"),
+            ("highs-ipm", "highs", "ipm"),
+            ("Highs-IPX", "highs", "ipx"),
+            ("HIGHS-SIMPLEX", "highs", "simplex"),
+            ("cbc", "cbc", None),
+            ("scip", "scip", None),
+        ],
+    )
+    def test_split_highs_solver_name_variants_parametrized(
+        self, input_name: str, expected_base: str, expected_variant: str | None
+    ) -> None:
+        """Test the _split_highs_solver_name function with various input formats."""
+        base, variant = get_solver_name_and_version(input_name)
+        assert base == expected_base
+        assert variant == expected_variant