diff --git a/pytest.ini b/pytest.ini index fa7a5bbf..ce72372d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,3 @@ [pytest] norecursedirs = pocs +addopts = --ignore=tests/test_run_benchmarks.py diff --git a/runner/README.md b/runner/README.md index 92fb1e63..5545f28f 100644 --- a/runner/README.md +++ b/runner/README.md @@ -85,24 +85,38 @@ python run_benchmarks.py ../results/metadata.yaml 2024 --run_id "debug-run-001" Use `run_solver.py` to test a single solver on a single benchmark problem. This is useful for debugging: ```bash -python run_solver.py +python runner/run_solver.py \ + --solver_name \ + --solver_version \ + --input_file \ + --highs_solver_variant \ + --hipo_block_size ``` **Arguments:** -- `solver_name` - Solver name (highs, scip, cbc, gurobi, glpk) -- `input_file` - Path to benchmark problem file (.lp or .mps) -- `solver_version` - Solver version string (e.g., 1.10.0) +- `solver_name` - Name of the solver to run (e.g., `highs`, `glpk`, `gurobi`, `scip`, `cbc`, `cplex`, `knitro`, `xpress`). +- `solver_version` - Version of the solver to use. +- `input_file` - Path to the input problem file (e.g., `.mps`, `.lp`). +- `highs_solver_variant` - Variant of HiGHS to run (`hipo`, `ipm`, `ipx`, `pdlp`, `simplex` - default: `simplex`). Only for HiGHS. +- `hipo_block_size` - Block size for HiPO variant of HiGHS (default: `128`). Only for HiGHS with `hipo` solver variant. **Examples:** ```bash -# Test HiGHS -conda activate benchmark-2024 -python run_solver.py highs ./benchmarks/pypsa-eur-elec-op-2-1h.lp 1.10.0 +# Test HiGHS (simplex variant) +conda activate benchmark-2025 +python runner/run_solver.py \ + --solver_name highs \ + --solver_version 1.13.2.dev1 \ + --input_file ./benchmarks/pypsa-eur-elec-op-2-1h.lp \ + --highs_solver_variant simplex # Test SCIP -conda activate benchmark-2024 -python run_solver.py scip ./benchmarks/pypsa-eur-elec-op-2-1h.lp 9.2.2 +conda activate benchmark-2025 +python runner/run_solver.py \ + --solver_name scip \ + --solver_version 9.2.2 \ + --input_file ./benchmarks/pypsa-eur-elec-op-2-1h.lp ``` **Output:** diff --git a/runner/benchmark_all.sh b/runner/benchmark_all.sh index f996657c..135eba98 100755 --- a/runner/benchmark_all.sh +++ b/runner/benchmark_all.sh @@ -75,7 +75,7 @@ for year in "${years[@]}"; do solver_args="--solvers ${solvers_override}" echo "Using solver override: ${solvers_override}" else - solver_args="--solvers gurobi highs-hipo highs-ipm highs scip cbc glpk" + solver_args="--solvers gurobi highs-hipo highs-ipm highs scip glpk" fi # Overwrite results for the first year, append thereafter diff --git a/runner/envs/benchmark-2026.yaml b/runner/envs/benchmark-2026.yaml new file mode 100644 index 00000000..b503bbfd --- /dev/null +++ b/runner/envs/benchmark-2026.yaml @@ -0,0 +1,19 @@ +name: benchmark-2026 +channels: +- conda-forge/label/dev +- conda-forge +- https://conda.anaconda.org/gurobi +- nodefaults +dependencies: +- python>=3.12 +- pip +- psutil>=5.9 +- requests>=2.32 +- linopy>=0.6.4 +- coin-or-cbc==2.10.12 +- scip==10.0.0 +- pyscipopt==5.7.1 +- gurobi==13.0.0 + +# Install highspy from conda-forge dev label +- highspy==1.13.2.dev1 diff --git a/runner/run_benchmarks.py b/runner/run_benchmarks.py index e04cee15..f03dc8bb 100644 --- a/runner/run_benchmarks.py +++ b/runner/run_benchmarks.py @@ -1,3 +1,51 @@ +""" +Benchmark Runner Script +======================= + +This script automates the benchmarking of multiple optimization solvers +(e.g., HiGHS, GLPK, Gurobi, SCIP, CBC) on a set of +benchmark problem instances defined in a YAML configuration file. It manages +downloading benchmark files, running solvers in isolated environments with +resource limits, collecting metrics (runtime, memory, status, objective, etc.), +and writing results to CSV files for further analysis. + +Features +-------- +- Supports running multiple solvers and benchmark instances in series. +- Handles solver-specific environment setup and version detection. +- Enforces memory and runtime limits for solver runs. +- Collects and records detailed metrics, including runtime, memory usage, + status, objective value, duality gap, and integrality violation. +- Outputs results and summary statistics to CSV files. + +Example Usage +------------- +Run the script from the command line: + + python runner/run_benchmarks.py [OPTIONS] + +Parameters +------------ +--benchmark_yaml_path : str + Path to the benchmark configuration YAML file (e.g., ../results/metadata.yaml). +--year : str + Solver release year (e.g., 2020-2025). +--solvers : list of str, optional + Space-separated list of solvers to run. Defaults to all supported solvers. +--append : bool, optional + Append to the results CSV file instead of overwriting. Default is False. +--ref_bench_interval : int, optional + Interval in seconds to run a reference benchmark with the HiGHS binary. +--run_id : str, optional + Unique identifier for this benchmark run. + +Returns +-------- +- Results for each solver/benchmark instance are written to `results/benchmark_results.csv`. +- Summary statistics (mean, stddev) are written to `results/benchmark_results_mean_stddev.csv`. +- Logs and solution files are saved in the `runner/logs/` and `runner/solutions/` directories. +""" + import argparse import csv import datetime @@ -9,6 +57,7 @@ import statistics import subprocess import time +import typing from collections import OrderedDict from pathlib import Path from socket import gethostname @@ -16,10 +65,29 @@ import psutil import requests import yaml -from run_solver import HighsVariant -def get_conda_package_versions(solvers, env_name=None): +def get_conda_package_versions(solvers: list[str], env_name=None) -> dict[str, str]: + """ + Get the installed version of specified solver packages in a conda environment. + + Parameters + ---------- + solvers : list of str + List of solver names to query for package versions. + env_name : str, optional + Name of the conda environment to query. If None, uses the current active environment. + + Returns + ------- + solver_versions : dict + Dictionary mapping each solver name to its installed version string in the specified conda environment. + + Raises + ------ + ValueError + If the conda command fails to execute. + """ try: # List packages in the conda environment cmd = "conda list" @@ -42,17 +110,17 @@ def get_conda_package_versions(solvers, env_name=None): installed_packages[parts[0]] = parts[1] # Map solver names to their conda package names - name_to_pkg = {"highs": "highspy", "cbc": "coin-or-cbc"} + name_to_pkg = { + "highs": "highspy", + "highs-hipo": "highspy", + "highs-ipm": "highspy", + "cbc": "coin-or-cbc", + "scip": "pyscipopt", + } solver_versions = {} for solver in solvers: - # Handle highs-hipo variants as special cases - not conda packages - if solver in [ - variant.value for variant in HighsVariant - ]: # For py3.10 compatibility - solver_versions[solver] = get_highs_hipo_version() - else: - package = name_to_pkg.get(solver, solver) - solver_versions[solver] = installed_packages.get(package, None) + package = name_to_pkg.get(solver, solver) + solver_versions[solver] = installed_packages.get(package, None) return solver_versions @@ -60,55 +128,148 @@ def get_conda_package_versions(solvers, env_name=None): raise ValueError(f"Error executing conda command: {e.stderr or str(e)}") -def download_benchmark_file(url, dest_path: Path): - """Download a file from url and save it locally in the specified folder if it doesn't already exist. +def _download_via_requests(url: str, dest: Path, chunk_size: int = 8192) -> None: + """ + Download a file over HTTP(S) requests. + + Parameters + ---------- + url : str + HTTP or HTTPS URL to download. + dest : pathlib.Path + Local destination path where the downloaded file will be written. + chunk_size : int, optional + Size in bytes of chunks to read from the response stream (default: 8192). + """ + tmp = dest.with_suffix(dest.suffix + ".tmp") + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(tmp, "wb") as f: + for chunk in r.iter_content(chunk_size=chunk_size): + if chunk: + f.write(chunk) + os.replace(tmp, dest) + print(f"Downloaded {url} to {dest} via requests") - If the URL is on GCS (starting gs://), then this uses `gsutil` to download the file (requires authentication). - If the file is gzipped (.gz), it will be unzipped after downloading. + +def _download_via_gsutil(url: str, dest: Path) -> None: + """ + Download a file from Google Cloud Storage using the gsutil command. + + Parameters + ---------- + url : str + GCS URL to download. Must start with ``gs://``. + dest : pathlib.Path + Local destination path where the downloaded file will be written. + + Raises + ------ + subprocess.CalledProcessError + If the `gsutil` command exits with a non-zero status. """ - # Ensure the destination folder exists - os.makedirs(dest_path.parent, exist_ok=True) + subprocess.run( + ["gsutil", "cp", url, str(dest)], check=True, capture_output=True, text=True + ) + print(f"Downloaded {url} to {dest} via gsutil") - # If dest_path ends with .gz, prepare for the uncompressed version - if dest_path.suffix == ".gz": - uncompressed_dest_path = dest_path.with_suffix("") - else: - uncompressed_dest_path = dest_path - if os.path.exists(uncompressed_dest_path): - print(f"File already exists at {uncompressed_dest_path}. Skipping download.") +def _unzip_gz(path: Path) -> Path: + """ + Decompress a gzip file and remove the original compressed file. + + Parameters + ---------- + path : Path + Path to the file to decompress. If the file does not have a `.gz` + extension, it is returned unchanged. + + Returns + ------- + Path + Path to the decompressed file. If the input file was not gzipped, + returns the input path unchanged. + + Notes + ----- + This function removes the original `.gz` file after successful + decompression and prints a message to stdout indicating the operation. + """ + if path.suffix != ".gz": + return path + uncompressed = path.with_suffix("") + with gzip.open(path, "rb") as gz_f, open(uncompressed, "wb") as out_f: + shutil.copyfileobj(gz_f, out_f) + os.remove(path) + print(f"Unzipped {path} -> {uncompressed}") + return uncompressed + + +def download_benchmark_file(url: str, dest_path: Path) -> None: + """ + Download a file from a URL and save it locally, unzipping if necessary. + + Parameters + ---------- + url : str + The URL of the file to download. If the URL starts with 'gs://', `gsutil` is used for downloading. + dest_path : pathlib.Path + The local path where the downloaded file will be saved. If the file is gzipped (.gz), it will be unzipped after download. + + Notes + ----- + - If the file already exists at the destination (uncompressed), the download is skipped. + - For Google Cloud Storage URLs, requires `gsutil` and authentication. + - Automatically unzips `.gz` files after download and removes the compressed file. + - Creates the destination directory if it does not exist. + """ + dest_path = Path(dest_path) + dest_path.parent.mkdir(parents=True, exist_ok=True) + + # determine the final uncompressed path to check for existing file + final_uncompressed = ( + dest_path.with_suffix("") if dest_path.suffix == ".gz" else dest_path + ) + if final_uncompressed.exists(): + print(f"File already exists at {final_uncompressed}. Skipping download.") return + # download to dest_path (compressed or not) if url.startswith("gs://"): - # GCS file, so download using gsutil - print(f"Downloading {url} to {dest_path} using gsutil...", end="") - cmd = ["gsutil", "cp", url, dest_path] - _result = subprocess.run(cmd, capture_output=True, text=True, check=True) - print("done.") + _download_via_gsutil(url, dest_path) else: - # Perform the download with streaming to handle large files - print(f"Downloading {url} to {dest_path}...", end="") - with requests.get(url, stream=True) as response: - response.raise_for_status() - with open(dest_path, "wb") as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) - print("done.") + _download_via_requests(url, dest_path) + # if compressed, unzip and remove the .gz if dest_path.suffix == ".gz": - print(f"Unzipping {dest_path}...") - with gzip.open(dest_path, "rb") as gz_file: - uncompressed_file_path = dest_path.with_suffix("") - with open(uncompressed_file_path, "wb") as uncompressed_file: - shutil.copyfileobj(gz_file, uncompressed_file) - os.remove(dest_path) - print(f"Unzipped to {uncompressed_file_path}.") - - -def parse_memory(output): - line = output.splitlines()[-1] - if "MaxResidentSetSizeKB=" in line: - parts = line.strip().split("=") + _unzip_gz(dest_path) + + +def parse_memory(output: str) -> float: + """ + Parse the maximum resident set size (memory usage) from subprocess output. + + Parameters + ---------- + output : str + The output string from a subprocess, expected to contain a line with 'MaxResidentSetSizeKB='. + + Returns + ------- + memory_mb : float + The maximum resident set size in megabytes (MB). + + Raises + ------ + ValueError + If the memory usage line is not found in the output. + + Notes + ----- + - Assumes the memory usage is reported in kilobytes (KB) and converts it to megabytes (MB). + """ + if "MaxResidentSetSizeKB=" in output: + parts = output.strip().split("=") max_resident_set_size = parts[-1] return float(max_resident_set_size) / 1000 # Convert to MB raise ValueError(f"Could not find memory usage in subprocess output:\n{output}") @@ -231,35 +392,276 @@ def write_csv_summary_row(mean_stddev_csv, benchmark_name, metrics, run_id, time ) -def benchmark_solver(input_file, solver_name, timeout, solver_version): - available_memory_bytes = psutil.virtual_memory().available - memory_limit_bytes = int(available_memory_bytes * 0.95) - memory_limit_mb = memory_limit_bytes / (1024 * 1024) - print(f"Setting memory limit to {memory_limit_mb:.2f} MB (95% of available memory)") +def get_solver_name_and_version(solver_name: str) -> tuple[str, str | None]: + """ + Split solver names into base solver and variant components. + + Parses solver names like 'highs-hipo', 'highs ipm', or 'highs' into + their base solver and variant parts. For non-highs solvers, returns + the original name with no variant. + + Parameters + ---------- + solver_name : str + The solver name to split. Can be a highs variant like 'highs-hipo', + 'highs ipm', 'highs', or any other solver name. + + Returns + ------- + tuple[str, str | None] + A tuple containing: + - base_solver : str + The base solver name ('highs' for highs variants, otherwise + the original solver_name). + - variant : str or None + The variant suffix if present (e.g., 'hipo', 'ipm'), or None + if no variant is found. + + Examples + -------- + >>> get_solver_name_and_version("highs-hipo") + ('highs', 'hipo') + + >>> get_solver_name_and_version("highs") + ('highs', None) + + >>> _get_solver_name_and_version("glpk") + ('glpk', None) + """ + m = re.match(r"^(highs)(?:[-\s](?P[\w-]+))?$", solver_name.lower()) + if m: + return m.group(1), m.group("variant") + return solver_name, None + + +def build_solver_command( + input_file: Path, + solver_name: str, + timeout: int, + solver_version: str, + memory_limit_bytes: int, + reference_benchmark: bool, +) -> list[str]: + """ + Build the shell command to run a solver with resource limits. + + Parameters + ---------- + input_file : Path + Path to the benchmark problem file to be solved. + solver_name : str + Name of the solver to run (e.g., "highs", "gurobi", "scip", "cbc", "glpk"). + timeout : int + Maximum allowed runtime for the solver in seconds. + solver_version : str + Version string of the solver, passed to the solver script. + memory_limit_bytes : int + Maximum memory the solver process is allowed to use, in bytes. + reference_benchmark : bool + If True, appends the ``--highs_solver_variant hipo`` flag to run + the HiGHS HiPO variant on a reference instance. + + Returns + ------- + command : list of str + The command as a list of strings, suitable for passing to + ``subprocess.run``. + """ + base_solver, variant = get_solver_name_and_version(solver_name) command = ["systemd-run"] - if os.geteuid() != 0: command.append("--user") command.extend( [ "--scope", - f"--property=MemoryMax={memory_limit_bytes}", # Set resident memory limit - "--property=MemorySwapMax=0", # Disable swap to ensure only physical RAM is used + f"--property=MemoryMax={memory_limit_bytes}", + "--property=MemorySwapMax=0", "/usr/bin/time", "--format", "MaxResidentSetSizeKB=%M", "timeout", f"{timeout}s", "python", - f"{Path(__file__).parent / 'run_solver.py'}", - solver_name, - input_file, + str(Path(__file__).parent / "run_solver.py"), + "--solver_name", + base_solver, + "--input_file", + input_file.as_posix(), + "--solver_version", solver_version, ] ) + if variant: + command.extend(["--highs_solver_variant", variant]) + elif reference_benchmark and base_solver.lower() == "highs": + command.extend(["--highs_solver_variant", "hipo"]) + + return command + + +def return_failure_metrics( + status: str, condition: str, runtime: int | float | str +) -> dict[str, typing.Any]: + """ + Build a metrics dictionary for solver failure cases. + + Parameters + ---------- + status : str + Short status code for the run (e.g., ``"TO"``, ``"OOM"``, ``"ER"``). + condition : str + Human-readable termination condition (e.g., ``"Timeout"``, ``"Out of Memory"``, ``"Error"``). + runtime : int, float, or str + Runtime to record in seconds, or a sentinel (e.g., ``"N/A"``) when not applicable. + + Returns + ------- + metrics : dict + Dictionary with the following keys: + - ``status`` : str + The provided short status code. + - ``condition`` : str + The provided termination condition. + - ``objective`` : None + Always ``None`` for failure cases. + - ``runtime`` : int, float, or str + The provided runtime value. + - ``reported_runtime`` : float or None + The numeric runtime if ``runtime`` is an ``int`` or ``float``, otherwise ``None``. + - ``duality_gap`` : None + Always ``None`` for failure cases. + - ``max_integrality_violation`` : None + Always ``None`` for failure cases. + """ + reported_runtime = runtime if isinstance(runtime, (int, float)) else None + return { + "status": status, + "condition": condition, + "objective": None, + "runtime": runtime, + "reported_runtime": reported_runtime, + "duality_gap": None, + "max_integrality_violation": None, + } + + +def parse_solver_result(result: subprocess.CompletedProcess, timeout: int) -> dict: + """ + Interpret a subprocess `CompletedProcess` from a solver run and produce a metrics dictionary. + + Parameters + ---------- + result : subprocess.CompletedProcess + The result returned by ``subprocess.run`` when executing the solver wrapper. + timeout : int + Timeout value (in seconds) that was enforced for the solver run. Used for timeout/error metrics. + + Returns + ------- + metrics : dict + A metrics dictionary describing the solver outcome. For successful runs this is the JSON-parsed + metrics object produced by the solver wrapper (parsed from the last line of ``result.stdout``). + For failure cases a dictionary produced by ``return_failure_metrics`` is returned with keys: + ``status``, ``condition``, ``objective`` (None), ``runtime``, ``reported_runtime``, + ``duality_gap`` (None), and ``max_integrality_violation`` (None). + + Raises + ------ + ValueError + Not raised by this function directly, but callers should be aware that JSON parsing may raise + exceptions if ``result.stdout`` does not contain valid JSON on the final line. + """ + + if result.returncode == 0: + # Successful run; parse the JSON metrics from the last line of stdout + metrics = json.loads(result.stdout.splitlines()[-1]) + elif result.returncode == 124: + # 124 is the exit code used by the `timeout` command to indicate a timeout + print("TIMEOUT", flush=True) + metrics = return_failure_metrics("TO", "Timeout", timeout) + elif result.returncode in (137, 143, -9, -15): + # systemd-run uses sigkill (9) or sigterm (15) to terminate + # the process and returns 128 + signal exit code + # subprocess returns - for signals + # these things don't seem very portable + print("OUT OF MEMORY", flush=True) + metrics = return_failure_metrics("OOM", "Out of Memory", "N/A") + else: + print( + f"ERROR running solver. Return code: {result.returncode}\n" + f"Stdout:\n{result.stdout}\n" + f"Stderr:\n{result.stderr}\n", + flush=True, + ) + metrics = return_failure_metrics("ER", "Error", timeout) + return metrics + + +def benchmark_solver( + input_file: Path, + solver_name: str, + timeout: int, + solver_version: str, + reference_benchmark=False, +) -> dict[str, object]: + """ + Run a solver on a benchmark problem file with resource limits and collect metrics. + + Parameters + ---------- + input_file : Path + Path to the benchmark problem file. + solver_name : str + Name of the solver to run (e.g., "gurobi", "highs-hipo", "highs-ipm", "highs", "scip", "cbc" or "glpk"). + timeout : int + Maximum allowed runtime for the solver in seconds. + solver_version : str + Version of the solver to use. + reference_benchmark : bool, optional + Whether this is a reference benchmark run (default: False). If True, run the reference benchmark. + + Returns + ------- + metrics : dict + Dictionary containing benchmark metrics: + - status : str + Solver status ("ok", "TO", "ER", "OOM"). + - condition : str + Termination condition ("Optimal", "Timeout", "Error", "Out of Memory"). + - objective : float or None + Objective value if available. + - runtime : float or str + Actual runtime in seconds or "N/A". + - reported_runtime : float or None + Runtime reported by the solver, if available. + - duality_gap : float or None + Duality gap for MILP problems, if available. + - max_integrality_violation : float or None + Maximum integrality violation for MILP problems, if available. + - memory : float or None + Maximum resident set size in MB. + - timeout : int + Timeout value in seconds. + """ + available_memory_bytes = psutil.virtual_memory().available + memory_limit_bytes = int(available_memory_bytes * 0.95) + memory_limit_mb = memory_limit_bytes / (1024 * 1024) + print( + f"Setting memory limit to {memory_limit_mb:.2f} MB (95% of available memory)." + ) + + command = build_solver_command( + input_file, + solver_name, + timeout, + solver_version, + memory_limit_bytes, + reference_benchmark, + ) + # Run the command and capture the output result = subprocess.run( command, @@ -275,12 +677,12 @@ def benchmark_solver(input_file, solver_name, timeout, solver_version): / "logs" / f"{Path(input_file).stem}-{solver_name}-{solver_version}.log" ) - if log_file.exists: - with open(log_file, "a") as f: - f.write("\nSTDERR:\n") - f.write(result.stderr) - else: - print(f"ERROR: couldn't find log file {log_file}") + if not log_file.exists(): + print(f"Creating missing log file {log_file}") + log_file.touch() + with open(log_file, "a") as f: + f.write("\nSTDERR:\n") + f.write(result.stderr) memory = None try: @@ -288,50 +690,7 @@ def benchmark_solver(input_file, solver_name, timeout, solver_version): except ValueError: print("Failed to parse memory usage from stderr") - if result.returncode == 124: - print("TIMEOUT") - metrics = { - "status": "TO", - "condition": "Timeout", - "objective": None, - "runtime": timeout, - "reported_runtime": timeout, - "duality_gap": None, - "max_integrality_violation": None, - } - # systemd-run uses sigkill (9) or sigterm (15) to terminate the process and returns 128 + signal exit code - # subprocess returns - for signals - # these things don't seem very portable - elif result.returncode in (137, 143, -9, -15): - print("OUT OF MEMORY") - metrics = { - "status": "OOM", - "condition": "Out of Memory", - "objective": None, - "runtime": "N/A", - "reported_runtime": None, - "duality_gap": None, - "max_integrality_violation": None, - } - elif result.returncode != 0: - print( - f"ERROR running solver. Return code: {result.returncode}\n", - f"Stdout:\n{result.stdout}\n", - f"Stderr:\n{result.stderr}\n", - ) - # Errors are also said to have run for `timeout`s, so that they appear - # along with timeouts in charts - metrics = { - "status": "ER", - "condition": "Error", - "objective": None, - "runtime": timeout, - "reported_runtime": timeout, - "duality_gap": None, - "max_integrality_violation": None, - } - else: - metrics = json.loads(result.stdout.splitlines()[-1]) + metrics = parse_solver_result(result, timeout) if metrics["status"] not in {"ok", "TO", "ER", "OOM"}: print(f"WARNING: unknown solver status: {metrics['status']}") @@ -339,109 +698,7 @@ def benchmark_solver(input_file, solver_name, timeout, solver_version): metrics["memory"] = memory metrics["timeout"] = timeout - return metrics - - -def get_highs_binary_version(): - """Get the version of the HiGHS binary from the --version command""" - highs_binary = "/opt/highs/bin/highs" - - try: - result = subprocess.run( - [highs_binary, "--version"], - capture_output=True, - text=True, - check=True, - encoding="utf-8", - ) - - version_match = re.search(r"HiGHS version (\d+\.\d+\.\d+)", result.stdout) - if version_match: - return version_match.group(1) - - return "unknown" - except Exception as e: - print(f"Error getting HiGHS binary version: {str(e)}") - return "unknown" - - -def get_highs_hipo_version(): - """Get the version of the HiGHS-HiPO binary from the --version command""" - if os.geteuid() != 0: - highs_hipo_binary = "/home/madhukar/oet/solver-benchmark/highs-installs/highs-hipo-workspace/HiGHS/build/bin/highs" - else: - highs_hipo_binary = "/opt/highs-hipo-workspace/HiGHS/build/bin/highs" - - try: - result = subprocess.run( - [highs_hipo_binary, "--version"], - capture_output=True, - text=True, - check=True, - encoding="utf-8", - ) - - version_match = re.search(r"HiGHS version (\d+\.\d+\.\d+)", result.stdout) - if version_match: - return version_match.group(1) + "-hipo" - - return "unknown-hipo" - except Exception as e: - print(f"Error getting HiGHS-HiPO binary version: {str(e)}") - return "unknown-hipo" - - -def benchmark_highs_binary(): - """ - Run a reference benchmark using the pre-installed HiGHS binary - """ - reference_model = "/benchmark-test-model.lp" - highs_binary = "/opt/highs/bin/highs" - - command = [ - highs_binary, - reference_model, - ] - - # Run the command and capture the output - start_time = time.perf_counter() - result = subprocess.run( - command, - capture_output=True, - text=True, - check=False, - encoding="utf-8", - ) - runtime = time.perf_counter() - start_time - if result.returncode != 0: - print(f"ERROR running solver. Return code:\n{result.returncode}") - metrics = { - "status": "ER", - "condition": "Error", - "objective": None, - "runtime": runtime, - "duality_gap": None, - "max_integrality_violation": None, - } - else: - # Parse HiGHS output to extract objective value - objective = None - for line in result.stdout.splitlines(): - if "Objective value" in line: - try: - objective = float(line.split(":")[-1].strip()) - except (ValueError, IndexError): - pass - - metrics = { - "status": "OK", - "condition": "Optimal", - "objective": objective, - "runtime": runtime, - "memory": "N/A", - "duality_gap": None, # Not available from command line output - "max_integrality_violation": None, # Not available from command line output - } + print("Finished benchmark_solver with metrics:", metrics, flush=True) return metrics @@ -525,8 +782,12 @@ def main( benchmarks_folder = Path(__file__).parent / "benchmarks/" os.makedirs(benchmarks_folder, exist_ok=True) + # Get solver versions from the conda environment to include in the results solvers_versions = get_conda_package_versions(solvers, f"benchmark-{year}") + # Get the path of the reference benchmark + reference_benchmark_path = Path(benchmarks_folder, "benchmark-test-model.lp") + # Preprocess the sizes and make a list of individual benchmark files to run on processed_benchmarks = [] for benchmark_name, benchmark_info in benchmarks_info.items(): @@ -577,10 +838,6 @@ def main( + ("" if size_categories is None else f" matching {size_categories}") ) - reference_solver_version = "" - if reference_interval > 0: - reference_solver_version = get_highs_binary_version() - for benchmark in processed_benchmarks: # Set timeout from YAML if provided, otherwise use size-category defaults (1h for S/M, 24h for L) timeout = benchmark.get("timeout_seconds") or ( @@ -597,14 +854,12 @@ def main( ) # Latest CBC release is in 2024 ): print( - f"WARNING: skipping {solver} in {year} because this benchmark instance is size L" + f"WARNING: skipping {solver} in {year} because this benchmark instance is size L." ) continue # Restrict highs-hipo variants to 2025 and LPs only - if solver in [ - variant.value for variant in HighsVariant - ] and ( # For py3.10 compatibility + if solver == "highs-hipo" and ( # For py3.10 compatibility year != "2025" or benchmark["class"] != "LP" ): print( @@ -624,7 +879,7 @@ def main( for i in range(iterations): print( - f"Running solver {solver} (version {solver_version}) on {benchmark['path']} ({i})...", + f"Running solver {solver} (version {solver_version}) on {benchmark['path']} ({i})...,", flush=True, ) @@ -688,18 +943,15 @@ def main( reference_interval ): print( - f"Running reference benchmark with HiGHS binary (interval: {reference_interval}s)...", - flush=True, + f"Running reference benchmark with HiGHS HiPO (interval: {reference_interval}s)..." + ) + reference_metrics = benchmark_solver( + reference_benchmark_path, + solver_name="highs-hipo", + timeout=24 * 60 * 60, + solver_version=solvers_versions.get("highs-hipo"), + reference_benchmark=True, ) - reference_metrics = benchmark_highs_binary() - - # Add required fields to reference metrics - reference_metrics["size"] = "reference" - reference_metrics["solver"] = "highs-binary" - reference_metrics["solver_version"] = reference_solver_version - reference_metrics["solver_release_year"] = "N/A" - reference_metrics["reported_runtime"] = None - reference_metrics["timeout"] = None # Record reference benchmark results reference_timestamp = datetime.datetime.now().strftime( diff --git a/runner/run_solver.py b/runner/run_solver.py index ef3281b5..fc30c451 100644 --- a/runner/run_solver.py +++ b/runner/run_solver.py @@ -1,52 +1,83 @@ -import collections.abc +""" +Solver Runner Script +==================== + +This script provides a unified interface to run various optimization solvers +(e.g., HiGHS, GLPK, Gurobi, SCIP, CBC, CPLEX, Knitro, Xpress) on a given input +problem file. It configures solver-specific options for reproducibility, +executes the solver, and collects key metrics such as runtime, duality gap, +and integrality violation. + +Features +-------- +- Supports multiple solvers with customizable options. +- Handles solver-specific seed and tolerance settings. +- Computes MILP metrics (duality gap, integrality violation) when applicable. +- Outputs results in JSON format. + +Example Usage +------------- +Run the script from the command line: + + python runner/run_solver.py \ + --solver_name highs \ + --solver_version 1.6.0 \ + --input_file path/to/problem.mps \ + --highs_solver_variant hipo \ + --hipo_block_size 128 + +Arguments +--------- +--solver_name Name of the solver to run (e.g., highs, glpk, gurobi, scip, cbc, cplex, knitro, xpress). +--solver_version Version of the solver to use. +--input_file Path to the input problem file (e.g., .mps, .lp). +--highs_solver_variant Variant of HiGHS to run (hipo, ipm, ipx, pdlp, simplex). Only for HiGHS. +--hipo_block_size Block size for HiPO variant of HiGHS (default: 128). + +See the function `parse_args()` for more details on arguments. +""" + +import argparse import json -import os -import subprocess -import sys -import time +import logging from enum import Enum from pathlib import Path from time import perf_counter from traceback import format_exc +from typing import Any +import linopy import pandas as pd -from linopy import solvers from linopy.solvers import SolverName +logger = logging.getLogger(__name__) -class HighsVariant(str, Enum): - HIPO = "highs-hipo" - HIPO_32 = "highs-hipo-32" - HIPO_64 = "highs-hipo-64" - HIPO_128 = "highs-hipo-128" - HIPO_IPM = "highs-ipm" - # cli args returns a list of command line arguments for the HiGHS binary. - def cli_args(self) -> collections.abc.Iterable[str]: - args = { - "solver": "hipo", - "run_crossover": "choose", - } - if self == HighsVariant.HIPO_IPM: - args["solver"] = "ipx" - - return [f"--{k}={v}" for k, v in args.items()] - - # options returns the contents for the HiGHS options file. - # passed to the HiGHS binary via --options_file= - def options(self) -> str: - options = {} - match self: - case HighsVariant.HIPO_32: - options["hipo_block_size"] = 32 - case HighsVariant.HIPO_64: - options["hipo_block_size"] = 64 - case HighsVariant.HIPO_128: - options["hipo_block_size"] = 128 - case HighsVariant.HIPO: - options["hipo_block_size"] = 64 - options["hipo_metis_no2hop"] = "true" - return "\n".join(f"{k} = {v}" for k, v in options.items()) +class HighsSolverVariants(str, Enum): + """ + Enumeration of supported HiGHS solver variants. + The solver variants are available at + https://ergo-code.github.io/HiGHS/stable/options/definitions/#option-solver. + + Attributes + ---------- + HIPO : str + HiPO variant of HiGHS. + IPM : str + IPM variant of HiGHS. + IPX : str + IPX variant of HiGHS. + PDLP : str + PDLP variant of HiGHS. + SIMPLEX : str + SIMPLEX variant of HiGHS. + """ + + HIPO = "hipo" + IPM = "ipm" + IPX = "ipx" + PDLP = "pdlp" + SIMPLEX = "simplex" # HiGHS is not available in the 2020 environment that we use to run GLPK @@ -55,13 +86,38 @@ def options(self) -> str: except ModuleNotFoundError: highspy = None +SUPPORTED_SOLVERS = [ + "highs", + "glpk", + "gurobi", + "scip", + "cbc", + "cplex", + "knitro", + "xpress", +] -def get_solver(solver_name): - solver_name = solver_name.lower() - solver_enum = SolverName(solver_name) - - solver_class = getattr(solvers, solver_enum.name) +def set_seed_options(solver_name: str) -> dict[str, int | float]: + """ + Sets solver-specific seed and tolerance options for reproducibility. + + This function returns a dictionary of solver configuration parameters that + control random seed initialization and MIP (Mixed-Integer Programming) gap + tolerance. + + Parameters + ---------- + solver_name: str + Name of the optimization solver. Supported solvers include: "highs", + "glpk", "gurobi", "scip", "cbc", "cplex", "knitro", and "xpress". + + Returns + ------- + dict[str, int | float] + A dictionary mapping solver-specific parameter names to their values. + Returns an empty dictionary if the solver name is not recognized. + """ mip_gap = 1e-4 # Tolerance for the relative duality gap for MILPs seed_options = { "highs": {"random_seed": 0, "mip_rel_gap": mip_gap}, @@ -77,17 +133,127 @@ def get_solver(solver_name): "mip.tolerances.mipgap": mip_gap, }, "knitro": { - "KN_PARAM_MS_SEED": 1066, + "ms_seed": 1066, + }, + "xpress": { + "miprelgapnotify": mip_gap, + "randomseed": 0, }, - "xpress": {"miprelgapnotify": mip_gap, "randomseed": 0}, } + if solver_name in seed_options.keys(): + return seed_options[solver_name] + else: + logger.info( + "No seed options found for solver '%s'. Returning empty options.", + solver_name, + ) + return dict() - return solver_class(**seed_options.get(solver_name, {})) +def set_solver_options( + solver_name: str, highs_variant: str, hipo_block_size: int +) -> dict[str, int | str]: + """ + Sets solver-specific options for reproducibility. + + This function returns a dictionary of solver configuration parameters that + control specific solver behaviors, such as the block size for HiGHS variants. + + Parameters + ---------- + solver_name: str + Name of the optimization solver. Supported solvers include: "highs", + "glpk", "gurobi", "scip", "cbc", "cplex", "knitro", and "xpress". + highs_variant : str + Solver type, used to determine specific options for HiGHS variants. + hipo_block_size : int + Block size value for HiPO variant of HiGHS. + This parameter is only relevant if the solver is HiGHS + and the variant is a HiPO variant. + + Returns + ------- + dict[str, int | str] + A dictionary mapping solver-specific parameter names to their values. + Returns an empty dictionary if the solver name is not recognized. + """ -def is_mip_problem(solver_model, solver_name): + if solver_name == "highs": + if highs_variant == HighsSolverVariants.HIPO: + return { + "hipo_block_size": hipo_block_size, + "solver": "hipo", + "run_crossover": "choose", + } + elif highs_variant in ( + HighsSolverVariants.IPM, + HighsSolverVariants.IPX, + HighsSolverVariants.PDLP, + HighsSolverVariants.SIMPLEX, + ): + return {"solver": highs_variant} + else: + logger.info( + "No specific options found for solver '%s'. Returning empty options.", + solver_name, + ) + return dict() + + +def get_solver( + solver_name: str, highs_variant: str, hipo_block_size: int +) -> linopy.solvers: + """ + Instantiate and configure a solver object based on the specified solver name and options. + + Parameters + ---------- + solver_name : str + Name of the optimization solver (e.g., "highs", "glpk", "gurobi"). + highs_variant : str + Variant of HiGHS to use. Only relevant if `name_solver` is "highs". + hipo_block_size : int + Block size for the HiPO variant of HiGHS. Only relevant if `variant_highs` is "hipo". + + Returns + ------- + Any + An instance of the solver class, configured with the appropriate options. + """ + solver_enum = SolverName(solver_name) + + solver_class = getattr(linopy.solvers, solver_enum.name) + + # Get seed options + seed_options = set_seed_options(solver_name) + + # Get other solver options if needed (e.g., for HiGHS variants) + solver_options = set_solver_options(solver_name, highs_variant, hipo_block_size) + + kwargs = {} + if seed_options: + kwargs.update(seed_options) + if solver_options: + kwargs.update(solver_options) + + return solver_class(**kwargs) + + +def is_mip_problem(solver_model: Any, solver_name: str) -> bool: """ - Determines if a given solver model is a Mixed Integer Programming (MIP) problem. + Determine if the given solver model is a Mixed Integer Programming (MIP) problem. + + Parameters + ---------- + solver_model : Any + The solver's Python object or model instance. + solver_name : str + Name of the solver (e.g., "highs", "scip", "cbc", "gurobi", "cplex", "xpress", "glpk", "knitro"). + + Returns + ------- + bool + True if the problem is a MIP, False otherwise. """ if solver_name == "scip": if solver_model.getNIntVars() > 0 or solver_model.getNBinVars() > 0: @@ -117,7 +283,7 @@ def is_mip_problem(solver_model, solver_name): def calculate_integrality_violation( - integer_vars: pd.Series, primal_values: pd.Series + integer_vars: set, primal_values: pd.Series ) -> float: """Calculate the maximum integrality violation from primal values. We only care about Integer vars, not SemiContinuous or SemiInteger, following the code in @@ -129,8 +295,22 @@ def calculate_integrality_violation( return max((p - p.round()).abs()) -def get_duality_gap(solver_model, solver_name: str): - """Retrieve the duality gap for the given solver model, if available.""" +def get_duality_gap(solver_model: Any, solver_name: str) -> float | None: + """ + Retrieve the duality gap for the given solver model, if available. + + Parameters + ---------- + solver_model : Any + The solver's Python object or model instance. + solver_name : str + Name of the solver (e.g., "highs", "scip", "cbc", "gurobi", "cplex", "xpress", "glpk", "knitro"). + + Returns + ------- + float or None + The duality gap if available, otherwise None. + """ if solver_name == "scip": return solver_model.getGap() elif solver_name == "gurobi": @@ -150,17 +330,35 @@ def get_duality_gap(solver_model, solver_name: str): # Knitro duality gap retrieval not implemented yet return None else: - raise NotImplementedError(f"The solver '{solver_name}' is not supported.") + logger.info(f"The solver '{solver_name}' is not supported.") + return None -def get_milp_metrics(input_file, solver_result): - """Uses HiGHS to read the problem file and compute max integrality violation and - duality gap. +def get_milp_metrics( + input_file: Path, solver_name: str, solver_result: Any +) -> tuple[float | None, float | None]: + """ + Compute MILP metrics (duality gap and max integrality violation) using HiGHS. + + Parameters + ---------- + input_file : Path + Path to the input problem file. + solver_name : str + Name of the solver (e.g., "highs", "scip", "cbc", "gurobi", "cplex", "xpress", "glpk", "knitro"). + solver_result : Any + The solver result object containing the solver model and solution. + + Returns + ------- + tuple[float or None, float or None] + A tuple containing the duality gap and the maximum integrality violation. + Returns (None, None) if metrics cannot be computed. """ try: if highspy is not None: h = highspy.Highs() - h.readModel(input_file) + h.readModel(input_file.as_posix()) integer_vars = { h.variableName(i) for i in range(h.numVariables) @@ -172,216 +370,81 @@ def get_milp_metrics(input_file, solver_result): integer_vars, solver_result.solution.primal ) return duality_gap, max_integrality_violation - except Exception: - print( + except ValueError: + raise ValueError( f"ERROR obtaining milp metrics for {input_file}: {format_exc()}", - file=sys.stderr, ) return None, None -def get_reported_runtime(solver_name, solver_model) -> float | None: - """Get the solving runtime as reported by the solver from the solver's Python object.""" - try: - match solver_name: - case "highs": - return solver_model.getRunTime() - case "scip": - return solver_model.getSolvingTime() - case "cbc": - return solver_model.runtime - case "gurobi": - return solver_model.Runtime - case "cplex": - return None - case "xpress": - return solver_model.getAttrib("time") - case "knitro": - return solver_model.reported_runtime - case _: - print(f"WARNING: cannot obtain reported runtime for {solver_name}") - return None - except Exception: - print(f"ERROR obtaining reported runtime: {format_exc()}", file=sys.stderr) - return None - - -def run_highs_hipo_solver(input_file, solver_version, highs_variant: HighsVariant): +def get_reported_runtime(solver_name: str, solver_model: Any) -> float | None: """ - Run the HiGHS-HiPO solver directly using the binary with variant-specific arguments + Get the solving runtime as reported by the solver from the solver's Python object. + + Parameters + ---------- + solver_name : str + Name of the solver (e.g., "highs", "scip", "cbc", "gurobi", "cplex", "xpress", "knitro"). + solver_model : Any + The linopy Model instance containing runtime information. + + Returns + ------- + float or None + The reported runtime in seconds, or None if not available. + """ + match solver_name: + case "highs": + return solver_model.getRunTime() + case "scip": + return solver_model.getSolvingTime() + case "cbc": + return solver_model.runtime + case "gurobi": + return solver_model.Runtime + case "cplex": + return solver_model.get_time() + case "xpress": + return solver_model.getAttrib("time") + case "knitro": + return solver_model.reported_runtime + case _: + logger.info(f"WARNING: cannot obtain reported runtime for {solver_name}") + return None + + +def main( + solver_name: str, + input_file: str, + solver_version: str, + highs_solver_variant: str, + hipo_block_size: int, +) -> None: + """ + Run the specified solver on the given input file and collect results. + + Parameters + ---------- + solver_name: str + Name of the solver to run (e.g., "highs", "glpk", "gurobi"). + input_file : str + Name to the input problem file. + solver_version : str + Version of the solver to use. + highs_solver_variant : str + Variant of HiGHS to run (only applicable if solver_name_val is "highs"). + hipo_block_size : int + Block size for HiPO variant of HiGHS + (only applicable if solver_name_val is "highs" + and highs_solver_variant_val is "hipo"). + + Returns + ------- + None """ - import tempfile - - # check if we are root - if os.getuid() == 0: - # VM path - highs_hipo_binary = "/opt/highs-hipo-workspace/HiGHS/build/bin/highs" - else: - highs_hipo_binary = f"{os.getenv('HOME')}/oet/solver-benchmark/highs-installs/highs-hipo-workspace/HiGHS/build/bin/highs" - - solution_dir = Path(__file__).parent / "solutions" - solution_dir.mkdir(parents=True, exist_ok=True) - - logs_dir = Path(__file__).parent / "logs" - logs_dir.mkdir(parents=True, exist_ok=True) - - output_filename = f"{Path(input_file).stem}-{solver_name}-{solver_version}" - solution_fn = solution_dir / f"{output_filename}.sol" - log_fn = logs_dir / f"{output_filename}.log" - - try: - with tempfile.NamedTemporaryFile( - mode="w", - prefix=highs_variant.value, - suffix=".options", - delete=False, - delete_on_close=False, - ) as options_file: - options_file.write(highs_variant.options()) - options_file.flush() - - solver_args = list(highs_variant.cli_args()) - solver_args.append(f"--options_file={options_file.name}") - - command = [ - highs_hipo_binary, - *solver_args, - str(Path(input_file).resolve()), - f"--solution_file={solution_fn}", - ] - - # Run the command and capture the output - try: - print(f"running command {command}") - with open(log_fn, "w") as f: - f.write(f"Command: {' '.join(command)}\n") - start_time = time.perf_counter() - result = subprocess.run( - command, - stdout=f, - stderr=subprocess.STDOUT, - text=True, - check=False, - encoding="utf-8", - ) - runtime = time.perf_counter() - start_time - - # Read back the log file for parsing - with open(log_fn, "r") as f: - output = f.read() - - if result.returncode != 0: - return { - "runtime": runtime, - "reported_runtime": runtime, - "status": "ER", - "condition": "Error", - "objective": None, - "duality_gap": None, - "max_integrality_violation": None, - } - else: - # Parse HiGHS output to extract objective value - objective = None - model_status = "ER" - for line in reversed(output.splitlines()): - if objective is None: - # Old format: - if "Objective value" in line and ":" in line: - try: - objective = float(line.split(":")[-1].strip()) - except (ValueError, IndexError): - pass - # New format: " - elif "(objective)" in line: - try: - objective = float(line.split("(objective)")[0].strip()) - except (ValueError, IndexError): - pass - - if model_status == "ER": - # Old format: - if "Model status" in line and ":" in line: - try: - model_status = line.split(":")[-1].strip() - except (ValueError, IndexError): - pass - # New format: - elif line.strip().startswith("Status") and ":" not in line: - try: - parts = line.split() - if len(parts) >= 2: - status_value = parts[-1] - if status_value in [ - "Optimal", - "Infeasible", - "Unbounded", - ]: - model_status = status_value - except (ValueError, IndexError): - pass - - # Break early once we've found both values - if objective is not None and model_status != "ER": - break - - if objective is not None and model_status in ["Optimal", "Infeasible"]: - status = "ok" - else: - status = "warning" - - return { - "runtime": runtime, - "reported_runtime": runtime, - "status": status, - # Model status : Optimal - "condition": model_status, - "objective": objective, - "duality_gap": None, # Not available from command line output - "max_integrality_violation": None, # Not available from command line output - } - except Exception as e: - runtime = time.perf_counter() - start_time - # Write error to log file - with open(log_fn, "w") as f: - f.write(f"Command: {' '.join(command)}\n") - f.write(f"Exception: {str(e)}\n") - - return { - "runtime": runtime, - "reported_runtime": runtime, - "status": "error", - "condition": "Error", - "objective": None, - "duality_gap": None, - "max_integrality_violation": None, - } - finally: - pass - # Clean up temporary options file - # if options_file is not None: - # try: - # os.unlink(options_file.name) - # except OSError: - # pass - - -def main(solver_name, input_file, solver_version): problem_file = Path(input_file) - # Handle highs-hipo solver variants separately - try: - highs_variant = HighsVariant(solver_name.lower()) - results = run_highs_hipo_solver(input_file, solver_version, highs_variant) - print(json.dumps(results)) - return - except ValueError as e: - # re-raise the error if it isn't expected. - # we want to continue only if the error is about invalid HighsVariant - if "is not a valid HighsVariant" not in str(e): - raise e - - solver = get_solver(solver_name) + solver = get_solver(solver_name, highs_solver_variant, hipo_block_size) solution_dir = Path(__file__).parent / "solutions" solution_dir.mkdir(parents=True, exist_ok=True) @@ -394,19 +457,17 @@ def main(solver_name, input_file, solver_version): solution_fn = solution_dir / f"{output_filename}.sol" log_fn = logs_dir / f"{output_filename}.log" + # We measure runtime here and not of this entire script because lines like + # `import linopy` take a long (and varying) amount of time try: - # We measure runtime here and not of this entire script because lines like - # `import linopy` take a long (and varying) amount of time start_time = perf_counter() solver_result = solver.solve_problem( problem_fn=problem_file, solution_fn=solution_fn, log_fn=log_fn ) runtime = perf_counter() - start_time - duality_gap, max_integrality_violation = get_milp_metrics( - input_file, solver_result + problem_file, solver_name, solver_result ) - results = { "runtime": runtime, "reported_runtime": get_reported_runtime( @@ -418,8 +479,8 @@ def main(solver_name, input_file, solver_version): "duality_gap": duality_gap, "max_integrality_violation": max_integrality_violation, } - except Exception: - print(f"ERROR running solver: {format_exc()}", file=sys.stderr) + except Exception as e: + logger.error(f"Error running solver: {e}") results = { "runtime": None, "reported_runtime": None, @@ -432,12 +493,63 @@ def main(solver_name, input_file, solver_version): print(json.dumps(results)) +def parse_args() -> argparse.Namespace: + """ + Parse command-line arguments for the solver runner script. + + Returns + ------- + argparse.Namespace + Namespace containing the parsed command-line arguments: + - solver_name (str): Name of the solver to run. + - solver_version (str): Version of the solver to run. + - input_file (str): Path to the input problem file. + - highs_solver_variant (str): Variant of HiGHS to run (only applicable if solver_name is 'highs'). + - hipo_block_size (int): Block size for HiPO variant of HiGHS + (only applicable if solver_name is 'highs' and + highs_solver_variant is 'hipo'). + """ + p = argparse.ArgumentParser() + p.add_argument( + "--solver_name", + type=str, + choices=SUPPORTED_SOLVERS, + required=True, + help="Name of the solver to run.", + ) + p.add_argument( + "--solver_version", + type=str, + required=True, + help="Version of the solver to run.", + ) + p.add_argument( + "--input_file", type=str, required=True, help="Path to the input problem file." + ) + p.add_argument( + "--highs_solver_variant", + type=str, + choices=[v.value for v in HighsSolverVariants], + help="Variant of HiGHS to run (only applicable if solver_name is 'highs').", + required=False, + ) + p.add_argument( + "--hipo_block_size", + type=int, + help="Block size for HiPO variant of HiGHS " + "(only applicable if solver_name is 'highs' and " + "highs_solver_variant is 'hipo').", + required=False, + ) + return p.parse_args() + + if __name__ == "__main__": - if len(sys.argv) != 4: - print("Usage: python run_solver.py ") - sys.exit(1) - - solver_name = sys.argv[1] - input_file = sys.argv[2] - solver_version = sys.argv[3] - main(solver_name, input_file, solver_version) + args = parse_args() + main( + args.solver_name, + args.input_file, + args.solver_version, + args.highs_solver_variant, + args.hipo_block_size, + ) diff --git a/tests/test_run_benchmarks.py b/tests/test_run_benchmarks.py new file mode 100644 index 00000000..9f98a0e3 --- /dev/null +++ b/tests/test_run_benchmarks.py @@ -0,0 +1,315 @@ +"""Unit tests for the run_benchmarks module.""" + +import gzip +import os +import subprocess +from pathlib import Path +from unittest.mock import MagicMock, Mock, patch + +import pytest +import requests + +from runner import run_benchmarks +from runner.run_benchmarks import ( + build_solver_command, + download_benchmark_file, + get_conda_package_versions, + get_solver_name_and_version, +) + + +class TestRunBenchmarks: + def test_get_conda_package_versions(self) -> None: + """Test the get_conda_package_versions function.""" + solvers_list = ["highs", "highs-hipo", "highs-ipm", "cbc", "scip"] + env_name = "benchmark-env" + # Simulate conda list output + conda_list_output = """ + # packages in environment at /opt/conda/envs/fake-env: + # + highspy 1.13.2.dev1 py39_0 + coin-or-cbc 2.10.12 py39_0 + pyscipopt 5.7.1 py39_0 + otherpkg 0.1.0 py39_0 + """ + mock_result = MagicMock() + mock_result.stdout = conda_list_output + mock_result.returncode = 0 + + expected_dict = { + "highs": "1.13.2.dev1", + "highs-hipo": "1.13.2.dev1", + "highs-ipm": "1.13.2.dev1", + "cbc": "2.10.12", + "scip": "5.7.1", + } + + with patch("subprocess.run", return_value=mock_result): + versions = get_conda_package_versions(solvers_list, env_name) + assert versions == expected_dict + + def test_build_command_non_root_includes_user_and_reference_flag( + self, monkeypatch: MagicMock + ) -> None: + """Test that the command includes --user and --highs_solver_variant hipo when not running as root.""" + monkeypatch.setattr(os, "geteuid", lambda: 1000) # non-root + input_file = Path("/tmp/example_problem.lp") + solver_name = "highs" + timeout = 60 + solver_version = "1.2.3" + memory_limit_bytes = 12345678 + + cmd = build_solver_command( + input_file, solver_name, timeout, solver_version, memory_limit_bytes, True + ) + + assert cmd[0] == "systemd-run" + assert "--user" in cmd + assert f"--property=MemoryMax={memory_limit_bytes}" in cmd + assert "--property=MemorySwapMax=0" in cmd + assert "/usr/bin/time" in cmd + assert "--format" in cmd + assert "MaxResidentSetSizeKB=%M" in cmd + assert "timeout" in cmd + assert f"{timeout}s" in cmd + expected_wrapper = str(Path(run_benchmarks.__file__).parent / "run_solver.py") + assert expected_wrapper in cmd + assert "--solver_name" in cmd + assert cmd[cmd.index("--solver_name") + 1] == solver_name + assert "--input_file" in cmd + assert cmd[cmd.index("--input_file") + 1] == input_file.as_posix() + assert "--solver_version" in cmd + assert cmd[cmd.index("--solver_version") + 1] == solver_version + assert "--highs_solver_variant" in cmd + assert cmd[cmd.index("--highs_solver_variant") + 1] == "hipo" + + def test_build_command_as_root_no_user_and_no_reference( + self, monkeypatch: MagicMock + ) -> None: + """Test that the command does not include --user and --highs_solver_variant hipo""" + monkeypatch.setattr(os, "geteuid", lambda: 0) # root + input_file = Path("/tmp/another.lp") + solver_name = "cbc" + timeout = 30 + solver_version = "2.0" + memory_limit_bytes = 99999 + + cmd = build_solver_command( + input_file, solver_name, timeout, solver_version, memory_limit_bytes, False + ) + assert "--user" not in cmd + assert f"--property=MemoryMax={memory_limit_bytes}" in cmd + assert f"{timeout}s" in cmd + assert "--solver_name" in cmd + assert cmd[cmd.index("--solver_name") + 1] == solver_name + assert "--input_file" in cmd + assert cmd[cmd.index("--input_file") + 1] == input_file.as_posix() + assert "--solver_version" in cmd + assert cmd[cmd.index("--solver_version") + 1] == solver_version + assert not any(el == "--highs_solver_variant hipo" for el in cmd) + + def test_download_regular_file_http(self, tmp_path: Path) -> None: + """Test downloading a regular file from HTTP URL.""" + dest_path = tmp_path / "data.txt" + test_content = b"test file content" + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.iter_content.return_value = [test_content] + mock_get.return_value.__enter__.return_value = mock_response + + download_benchmark_file("http://example.com/data.txt", dest_path) + + assert dest_path.exists() + assert dest_path.read_bytes() == test_content + mock_get.assert_called_once_with("http://example.com/data.txt", stream=True) + + def test_download_gzipped_file_http(self, tmp_path: Path) -> None: + """Test downloading and unzipping a .gz file from HTTP.""" + dest_path = tmp_path / "data.txt.gz" + uncompressed_path = tmp_path / "data.txt" + original_content = b"original file content" + + # Create gzipped content + gzipped_content = gzip.compress(original_content) + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.iter_content.return_value = [gzipped_content] + mock_get.return_value.__enter__.return_value = mock_response + + download_benchmark_file("http://example.com/data.txt.gz", dest_path) + + # Verify uncompressed file exists and compressed file is removed + assert uncompressed_path.exists() + assert not dest_path.exists() + assert uncompressed_path.read_bytes() == original_content + + def test_skip_download_if_file_exists(self, tmp_path: Path) -> None: + """Test that download is skipped if the file already exists.""" + dest_path = tmp_path / "data.txt" + dest_path.write_text("existing content") + + with patch("requests.get") as mock_get: + download_benchmark_file("http://example.com/data.txt", dest_path) + + # Verify no download occurred + mock_get.assert_not_called() + assert dest_path.read_text() == "existing content" + + def test_skip_download_if_uncompressed_file_exists_with_gz_url( + self, tmp_path: Path + ) -> None: + """Test that download is skipped if uncompressed file exists when .gz URL is provided.""" + gz_path = tmp_path / "data.txt.gz" + uncompressed_path = tmp_path / "data.txt" + uncompressed_path.write_text("existing content") + + with patch("requests.get") as mock_get: + download_benchmark_file("http://example.com/data.txt.gz", gz_path) + + # Verify no download occurred + mock_get.assert_not_called() + assert uncompressed_path.read_text() == "existing content" + assert not gz_path.exists() + + def test_create_destination_directory_if_not_exists(self, tmp_path: Path) -> None: + """Test that destination directory is created if it doesn't exist.""" + nested_path = tmp_path / "subdir" / "nested" / "data.txt" + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.iter_content.return_value = [b"content"] + mock_get.return_value.__enter__.return_value = mock_response + + download_benchmark_file("http://example.com/data.txt", nested_path) + + assert nested_path.parent.exists() + assert nested_path.exists() + + def test_http_download_raises_on_failed_response(self, tmp_path: Path) -> None: + """Test that HTTP errors are properly raised.""" + dest_path = tmp_path / "data.txt" + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.raise_for_status.side_effect = requests.HTTPError( + "404 Not Found" + ) + mock_get.return_value.__enter__.return_value = mock_response + + with pytest.raises(requests.HTTPError): + download_benchmark_file("http://example.com/missing.txt", dest_path) + + def test_gsutil_download_fails(self, tmp_path: Path) -> None: + """Test that gsutil command failures are properly raised.""" + dest_path = tmp_path / "data.txt" + + with patch("subprocess.run") as mock_run: + mock_run.side_effect = subprocess.CalledProcessError(1, "gsutil cp") + + with pytest.raises(subprocess.CalledProcessError): + download_benchmark_file("gs://bucket-name/data.txt", dest_path) + + def test_gzip_decompression_error(self, tmp_path: Path) -> None: + """Test handling of corrupted gzip files.""" + dest_path = tmp_path / "data.txt.gz" + # Write invalid gzip content + dest_path.write_bytes(b"not a valid gzip file") + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.iter_content.return_value = [b"not a valid gzip file"] + mock_get.return_value.__enter__.return_value = mock_response + + with pytest.raises(gzip.BadGzipFile): + download_benchmark_file("http://example.com/data.txt.gz", dest_path) + + def test_large_file_streaming(self, tmp_path: Path) -> None: + """Test that large files are downloaded in chunks.""" + dest_path = tmp_path / "large_file.bin" + # 10 chunks of 8KB each + chunks = [b"x" * 8192 for _ in range(10)] + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.iter_content.return_value = chunks + mock_get.return_value.__enter__.return_value = mock_response + + download_benchmark_file("http://example.com/large_file.bin", dest_path) + + assert dest_path.stat().st_size == 8192 * 10 + mock_response.iter_content.assert_called_once_with(chunk_size=8192) + + def test_full_workflow_gcs_gzip(self, tmp_path: Path) -> None: + """Test complete workflow: GCS download + gzip decompression.""" + dest_path = tmp_path / "benchmark.tar.gz" + uncompressed_path = tmp_path / "benchmark.tar" + original_content = b"tar archive content here" + + gzipped = gzip.compress(original_content) + + with patch("subprocess.run") as mock_run: + # Simulate gsutil writing the gzipped file + def write_file(*args, **kwargs): + dest_path.write_bytes(gzipped) + return Mock(returncode=0) + + mock_run.side_effect = write_file + + download_benchmark_file("gs://bucket-name/benchmark.tar.gz", dest_path) + + assert uncompressed_path.exists() + assert not dest_path.exists() + assert uncompressed_path.read_bytes() == original_content + + def test_file_with_multiple_dots_in_name(self, tmp_path: Path) -> None: + """Test handling files with multiple dots (e.g., data.backup.txt.gz).""" + dest_path = tmp_path / "data.backup.txt.gz" + uncompressed_path = tmp_path / "data.backup.txt" + content = b"backup data" + gzipped = gzip.compress(content) + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.iter_content.return_value = [gzipped] + mock_get.return_value.__enter__.return_value = mock_response + + download_benchmark_file("http://example.com/data.backup.txt.gz", dest_path) + + assert uncompressed_path.exists() + assert uncompressed_path.read_bytes() == content + + def test_empty_file_download(self, tmp_path: Path) -> None: + """Test downloading an empty file.""" + dest_path = tmp_path / "empty.txt" + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.iter_content.return_value = [] + mock_get.return_value.__enter__.return_value = mock_response + + download_benchmark_file("http://example.com/empty.txt", dest_path) + + assert dest_path.exists() + assert dest_path.stat().st_size == 0 + + @pytest.mark.parametrize( + "input_name, expected_base, expected_variant", + [ + ("highs", "highs", None), + ("highs-hipo", "highs", "hipo"), + ("highs-ipm", "highs", "ipm"), + ("Highs-IPX", "highs", "ipx"), + ("HIGHS-SIMPLEX", "highs", "simplex"), + ("cbc", "cbc", None), + ("scip", "scip", None), + ], + ) + def test_split_highs_solver_name_variants_parametrized( + self, input_name: str, expected_base: str, expected_variant: str | None + ) -> None: + """Test the _split_highs_solver_name function with various input formats.""" + base, variant = get_solver_name_and_version(input_name) + assert base == expected_base + assert variant == expected_variant