diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..32dec1d --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,68 @@ +name: Run CI + +on: + push: + branches: [master] + pull_request: + +jobs: + tests: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + python-version: ["3.11", "3.12", "3.13"] + name: Run tests + steps: + - name: Checkout repo + uses: actions/checkout@v5.0.0 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6.0.0 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Crossflow and its testing dependencies + run: pip install -e .[testing] + + - name: Run test suite + run: pytest --cov crossflow --cov-report term-missing --cov-append . + + - name: Coveralls GitHub Action + uses: coverallsapp/github-action@v2.3.6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + + docs: + runs-on: ubuntu-24.04 + timeout-minutes: 15 + steps: + - uses: actions/checkout@v5.0.0 + - name: Set up Python 3.13 + uses: actions/setup-python@v6.0.0 + with: + python-version: 3.13 + - name: Install python dependencies + run: | + pip install --upgrade pip + pip install -e .[docs] + - name: Build docs + run: cd docs && make + + pre-commit: + runs-on: ubuntu-24.04 + timeout-minutes: 15 + steps: + - uses: actions/checkout@v5.0.0 + - name: Set up Python 3.13 + uses: actions/setup-python@v6.0.0 + with: + python-version: 3.13 + - name: Install python dependencies + run: | + pip install --upgrade pip + pip install -e .[pre-commit,docs,testing] + - name: Run pre-commit + run: | + pre-commit install + pre-commit run --all-files || ( git status --short ; git diff ; exit 1 ) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml deleted file mode 100644 index fd1ea31..0000000 --- a/.github/workflows/python-package.yml +++ /dev/null @@ -1,41 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python - -name: Python package - -on: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - -jobs: - build: - - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - python-version: ["3.9", "3.10", "3.11"] - os: ["ubuntu-latest", "macOS-latest"] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..b87f8d5 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,140 @@ + +name: release + +on: + workflow_dispatch: + inputs: + version: + required: true + default: 'x.y.z' + +permissions: + contents: write + pull-requests: write + +jobs: + checks: + name: Version check + runs-on: ubuntu-24.04 + steps: + - name: Checkout repository + id: repo + uses: actions/checkout@v5.0.0 + + - name: Set up Python + uses: actions/setup-python@v6.0.0 + with: + python-version: 3.13 + + - name: Get latest release from pip + id: latestreleased + run: | + PREVIOUS_VERSION=$(python -m pip index versions crossflow | grep "crossflow" | cut -d "(" -f2 | cut -d ")" -f1) + echo "pip_tag=$PREVIOUS_VERSION" >> "$GITHUB_OUTPUT" + echo $PREVIOUS_VERSION + + - name: version comparison + id: compare + run: | + pip3 install semver + output=$(pysemver compare ${{ steps.latestreleased.outputs.pip_tag }} ${{ github.event.inputs.version }}) + if [ $output -ge 0 ]; then exit 1; fi + + version: + name: prepare ${{ github.event.inputs.version }} + needs: checks + runs-on: ubuntu-24.04 + steps: + + - name: checkout + uses: actions/checkout@v5.0.0 + + - name: Change version in repo + run: sed -i "s/__version__ =.*/__version__ = \"${{ github.event.inputs.version }}\"/g" crossflow/__init__.py + + - name: send PR + id: pr_id + uses: peter-evans/create-pull-request@v7.0.8 + with: + commit-message: Update version to ${{ github.event.inputs.version }} + branch: version-update + title: "Update to version ${{ github.event.inputs.version }}" + body: | + Update version + - Update the __init__.py with new release + - Auto-generated by [CI] + committer: version-updater + author: version-updater + base: main + signoff: false + draft: false + + - name: auto approve review + uses: hmarr/auto-approve-action@v4.0.0 + with: + pull-request-number: ${{ steps.pr_id.outputs.pull-request-number }} + review-message: "Auto approved version bump PR" + github-token: ${{ secrets.AUTO_PR_MERGE }} + + - name: merge PR + run: gh pr merge --merge --delete-branch --auto "${{ steps.pr_id.outputs.pull-request-number }}" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + tag: + name: tag release + needs: version + runs-on: ubuntu-24.04 + steps: + - name: Checkout repository + uses: actions/checkout@v5.0.0 + with: + ref: main + + - name: tag v${{ github.event.inputs.version }} + run: | + git config user.name github-actions + git config user.email github-actions@github.com + git tag ${{ github.event.inputs.version }} + git push origin tag ${{ github.event.inputs.version }} + + release: + name: make github release + needs: tag + runs-on: ubuntu-24.04 + steps: + + - name: create release + uses: softprops/action-gh-release@v2.3.3 + with: + name: v${{ github.event.inputs.version }} + generate_release_notes: true + tag_name: ${{ github.event.inputs.version }} + + pypi: + name: make pypi release + needs: [tag, release] + runs-on: ubuntu-24.04 + steps: + + - name: checkout + uses: actions/checkout@v5.0.0 + with: + ref: main + + - name: Set up Python + uses: actions/setup-python@v6.0.0 + with: + python-version: 3.13 + + - name: Install flit + run: | + python -m pip install --upgrade pip + python -m pip install flit~=3.9 + + - name: Build and publish + run: | + flit publish + env: + FLIT_USERNAME: __token__ + FLIT_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index 5808f93..b5e4256 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,133 @@ +# Byte-compiled / optimized / DLL files __pycache__/ -.vscode -.idea -*.egg_info/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ build/ +develop-eggs/ dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +.pytest_cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/autosummary/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# result and temp file +*.out +*.obj +*.npz +*.pkl +*.csv +Example_output/ +Example/data/*.csv + +# profraw files from LLVM? Unclear exactly what triggers this +# There are reports this comes from LLVM profiling, but also Xcode 9. +*profraw +# trajectory file too big +Example/data/1AKI_prod.trr +Example/data/1AKI_prod.tpr + +# output directories, log file and outputs +job* +*.json +*.err +*.com +*.txt +.idea + +# dask dask-worker-space -.ipynb_checkpoints/ + +# visual studio +.vscode diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..11ebc11 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,41 @@ +exclude: "^examples/|.*.ipynb" +repos: + - repo: https://github.com/psf/black + rev: 25.1.0 + hooks: + - id: black + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-added-large-files + - id: check-ast + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: end-of-file-fixer + - id: mixed-line-ending + - id: trailing-whitespace + + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--profile=black"] + + - repo: local + hooks: + - id: pylint + language: system + types: [file, python] + name: pylint + description: "This hook runs the pylint static code analyzer" + exclude: &exclude_files > + (?x)^( + docs/.*| + examples/.*| + tests/.*| + )$ + entry: pylint diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..f08a84d --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,19 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details +version: 2 + +build: + os: "ubuntu-24.04" + tools: + python: "3.13" + +python: + install: + - method: pip + path: . + extra_requirements: + - docs + +sphinx: + # Path to your Sphinx configuration file. + configuration: docs/source/conf.py diff --git a/LICENSE.txt b/LICENSE old mode 100755 new mode 100644 similarity index 100% rename from LICENSE.txt rename to LICENSE diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100755 index 588f1fd..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ - -# Include the license file -include LICENSE.txt diff --git a/README.md b/README.md index 7c098bd..51c2f9f 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,21 @@ +[//]: # (Badges) +[![CodeEntropy CI](https://github.com/CharlieLaughton/crossflow/actions/workflows/ci.yaml/badge.svg)](https://github.com/CharlieLaughton/crossflow/actions/workflows/ci.yaml) +[![Docs - Status](https://app.readthedocs.org/projects/crossflow/badge/?version=latest)](https://crossflow.readthedocs.io/en/latest/?badge=latest) +![PyPI - Version](https://img.shields.io/pypi/v/crossflow?logo=pypi&logoColor=white) +![PyPI - Status](https://img.shields.io/pypi/status/crossflow?logo=pypi&logoColor=white) +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/crossflow) +![PyPI - Total Downloads](https://img.shields.io/pepy/dt/crossflow?logo=pypi&logoColor=white&color=blue) +![PyPI - Monthly Downloads](https://img.shields.io/pypi/dm/crossflow?logo=pypi&logoColor=white&color=blue) +[![Coverage Status](https://coveralls.io/repos/github/CharlieLaughton/crossflow/badge.svg?branch=main)](https://coveralls.io/github/CharlieLaughton/crossflow?branch=main) +[![DOI](https://zenodo.org/badge/802170523.svg)](https://zenodo.org/doi/10.5281/zenodo.12191599) + # Crossflow A Python-based workflow system. -Crossflow allows you to build and execute complex workflows that chain together +Crossflow allows you to build and execute complex workflows that chain together command-line driven tools. -For details, see the [Wiki](https://github.com/CharlieLaughton/crossflow/wiki). - -### Releases: - -[![DOI](https://zenodo.org/badge/802170523.svg)](https://zenodo.org/doi/10.5281/zenodo.12191599) ### Authors: @@ -17,4 +23,6 @@ Charlie Laughton [charles.laughton@nottingham.ac.uk](mailto:charles.laughton@not Christian Suess +James Gebbie-Rayet [james.gebbie@stfc.ac.uk](mailto:james.gebbie@stfc.ac.uk) + Sam Cox diff --git a/crossflow/__init__.py b/crossflow/__init__.py old mode 100755 new mode 100644 index e69de29..70495fd --- a/crossflow/__init__.py +++ b/crossflow/__init__.py @@ -0,0 +1,8 @@ +""" +Crossflow + +Crossflow allows you to build and execute complex workflows that chain +together command-line driven tools. +""" + +__version__ = "0.1.4" diff --git a/crossflow/_version.py b/crossflow/_version.py deleted file mode 100755 index 2eed5be..0000000 --- a/crossflow/_version.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -_version.py: defines the code version -""" -__version__ = "0.1.3" diff --git a/crossflow/clients.py b/crossflow/clients.py old mode 100755 new mode 100644 index 84554d8..d1e4044 --- a/crossflow/clients.py +++ b/crossflow/clients.py @@ -1,18 +1,23 @@ """ Clients.py: thin wrapper over dask client """ + import glob import pickle import sys + from dask.distributed import Client as DaskClient + try: from collections import Iterable except ImportError: from collections.abc import Iterable + from dask.distributed import Future -from .tasks import FunctionTask, SubprocessTask -from .filehandling import FileHandler + from . import config +from .filehandling import FileHandler +from .tasks import FunctionTask, SubprocessTask class Client(DaskClient): @@ -188,7 +193,7 @@ def submit(self, func, *args, **kwargs): newargs = self._futurize(newargs) if isinstance(func, (SubprocessTask, FunctionTask)): - kwargs['pure'] = False + kwargs["pure"] = False future = super().submit(func.run, *newargs, **kwargs) return self._unpack(func, future) else: @@ -224,24 +229,22 @@ def map(self, func, *iterables, **kwargs): if isinstance(iterable, (list, tuple)): n_items = len(iterable) if n_items != maxlen: - raise ValueError( - "Error: not all iterables are same length" - ) + raise ValueError("Error: not all iterables are same length") its.append(iterable) else: its.append([iterable] * maxlen) - kwargs['pure'] = False + kwargs["pure"] = False if isinstance(func, (SubprocessTask, FunctionTask)): newits = self._filehandlify(its) for i, arg in enumerate(newits): newits[i] = self._futurize(arg) - #futures = super().map(func, *newits, **kwargs) + # futures = super().map(func, *newits, **kwargs) futures = [super().submit(func, *newit, **kwargs) for newit in zip(*newits)] result = [self._unpack(func, future) for future in futures] else: - #result = super().map(func, *its, **kwargs) + # result = super().map(func, *its, **kwargs) result = [super().submit(func, *it, **kwargs) for it in zip(*its)] if isinstance(result[0], tuple): result = self._lt2tl(result) diff --git a/crossflow/filehandling.py b/crossflow/filehandling.py old mode 100755 new mode 100644 index fbb992e..e97b714 --- a/crossflow/filehandling.py +++ b/crossflow/filehandling.py @@ -5,11 +5,13 @@ """ import os -import zlib import os.path as op +import tempfile import uuid +import zlib + import fsspec -import tempfile + from . import config """ @@ -58,9 +60,7 @@ class FileHandle(object): def __init__(self, path, stage_point, must_exist=True): if not isinstance(path, (os.PathLike, str, bytes)): - raise IOError( - f"Error - illegal argument type {type(path)} for {path}" - ) + raise IOError(f"Error - illegal argument type {type(path)} for {path}") if must_exist: if not os.path.exists(path): raise IOError("Error - no such file") @@ -75,9 +75,7 @@ def __init__(self, path, stage_point, must_exist=True): self.store = zlib.compress(s.read()) else: self.staging_path = op.join(stage_point, self.uid) - self.store = fsspec.open( - self.staging_path, "wb", compression="bz2" - ) + self.store = fsspec.open(self.staging_path, "wb", compression="bz2") with source as s: with self.store as d: d.write(s.read()) @@ -162,9 +160,7 @@ def write_binary(self, data): if self.staging_path is None: self.store = compressed_data else: - self.store = fsspec.open( - self.staging_path, "wb", compression="bz2" - ) + self.store = fsspec.open(self.staging_path, "wb", compression="bz2") with self.store as s: s.write(data) self.store.mode = "rb" diff --git a/crossflow/tasks.py b/crossflow/tasks.py index c6208ee..ac823c2 100644 --- a/crossflow/tasks.py +++ b/crossflow/tasks.py @@ -2,17 +2,19 @@ Crossflow tasks: wrappers round subprocess calls and python functions for execution on a crossflow cluster """ -import re -import subprocess -import os -import os.path as op -import tempfile -import shutil + import copy import glob from math import log10 -from .filehandling import FileHandler +import os +import os.path as op +import re +import shutil +import subprocess +import tempfile + from . import config +from .filehandling import FileHandler STDOUT = "STDOUT" DEBUGINFO = "DEBUGINFO" diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..cf24a3d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,41 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -n -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: all help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext customdefault + +## Runs nit-picky and converting warnings into errors to +## make sure the documentation is properly written +customdefault: + $(SPHINXBUILD) -b html -nW $(ALLSPHINXOPTS) $(BUILDDIR)/html + +all: html + +clean: + rm -r $(BUILDDIR) + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + + +view: + xdg-open $(BUILDDIR)/html/index.html diff --git a/docs/source/clients.rst b/docs/source/clients.rst new file mode 100644 index 0000000..e52c587 --- /dev/null +++ b/docs/source/clients.rst @@ -0,0 +1,69 @@ +Crossflow Clients +----------------- + +The ``crossflow.clients`` sub-package provides a Client through which +one can execute tasks on distributed resources. At its heart a +``crossflow.clients.Client()`` is a +`dask.distributed `_ client, +and new users are strongly encouraged to read the documentation there to +understand how Crossflow works. + +Creating a crossflow.Client +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A Crossflow client provides access to a cluster of workers. These may be +remote machines, or a set of worker processes on the current compute +resource (see the dask documentation for more details on the many +different ways a cluster object can be created). The crossflow.Client is +initialised with the identity of the cluster it will serve: + +.. code:: python + + from crossflow.clients import Client + from distributed import LocalCluster # Just one way of creating a cluster + cluster = LocalCluster() + my_client = Client(cluster) + +As a shortcut (typically for testing purposes), a local cluster may be +created on the fly, to serve the Client: + +.. code:: python + + + my_client = Client() + +Using a crossflow.Client +~~~~~~~~~~~~~~~~~~~~~~~~ + +A crossflow.Task is sent to a crossflow.Client for execution using the +client’s .submit() or .map() method. + +Running a single job: + +.. code:: python + + + output_future, logfile_future = my_client.submit(my_othertool_task, 'input1.dat', 'input2.dat') + +Compare with the interactive version above: + +1. The outputs (output_future, logfile_future) are now Futures - again, + see the dask documentation for more detail, but also notice the + difference: dask’s .submit() method always returns a single Future, + while crossflow’s one returns one Future per expected output. + +Running a set of jobs in parallel: + +.. code:: python + + + xs = ['input1a.dat', 'input1b.dat', 'input1c.dat'] + ys = ['input2a.dat', 'input2b.dat', 'input2c.dat'] + output_futures, logfile_futures = my_client.map(my_othertool_task, xs, ys) + +In this case the .map() method returns lists of Futures. The individual +jobs are scheduled to the workers in the compute cluster in whatever way +is most efficient, if there are enough of them to run all four jobs in +parallel, they will. + +For more details on Crossflow Clients, see :doc:`here ` diff --git a/docs/source/clients_advanced.rst b/docs/source/clients_advanced.rst new file mode 100644 index 0000000..43b6d9b --- /dev/null +++ b/docs/source/clients_advanced.rst @@ -0,0 +1,73 @@ +Advanced Clients +================= + +Here we go into ``crossflow.clients`` in more detail, see +:doc:`here ` for the basics. + +Currently there is only one Crossflow Client. Under the hood, this is +basically a Dask.distributed client, and for a full understanding of how +this works please see the Dask.distributed documentation. Here we +concentrate on some of the extras the Crossflow Client offers. + +Transparent file handling +------------------------- + +The Crossflow ``Client`` understands crossflow ``FileHandles`` and +converts to/from them as required. The only place where some user +awareness is generally required is in dealing with the outputs returned +by client ``.submit()`` and ``.map()`` methods. These are +``concurrent.Futures``, and while they may be passed on to further tasks +in the workflow as-is, to extract their data locally one must first call +their ``.result()`` method and then either save the data to a local file +with the ``FileHandle``\ ’s ``.save()`` method, or, if working +interactively, maybe view the contents via the ``FileHandle``\ ’s +``.read_text()`` method: + +.. code:: python + + # Submit the job: + output = client.submit(my_task, 'input.dat') + # Wait for the job to finish and print the output to the screen: + print(output.result().read_text()) + +Multiple return values +~~~~~~~~~~~~~~~~~~~~~~ + +The ``Dask.distributed`` client’s ``.submit()`` method always returns a +single future, even if the function it is executing returns multiple +values, e.g.: + +.. code:: python + + cluster = distributed.LocalCluster() + dask_client = distributed.Client(cluster) + + def sumprod(a, b): + return a+b, a * b + + result = dask_client.submit(sumprod, 5, 7) # result is a Future for a tuple + +In contrast, a Crossflow client returns one future per expected output +value: + +.. code:: python + + cluster = distributed.LocalCluster() + crossflow_client = clients.Client(cluster) + + def sumprod(a, b): + return a+b, a * b + + sumprod_task = FunctionTask(sumprod) + sumprod_task.set_inputs(['a', 'b']) + sumprod_task.set_outputs(['sum', 'prod']) + + sum, prod = crossflow_client.submit(sumprod_task, 5, 7) # result is a pair of futures. + +The Crossflow client’s ``.map()`` method functions similarly, returning +one list of Futures per output variable: + +.. code:: python + + sums, prods = crossflow_client.map(sumprod_task, [5,6,7], [7,8,9]) # result is a pair of lists of futures. + assert len(sums) == 3 diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..c787327 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/stable/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +# Incase the project was not installed +import os +import sys +import time + +sys.path.insert(0, os.path.abspath("..")) + +# -- Project information ----------------------------------------------------- + +project = "Crossflow" +copyright_first_year = "2020" +copyright_owners = "Charlie Laughton and Christian Suess" +author = "Charlie Laughton" +current_year = str(time.localtime().tm_year) +copyright_year_string = ( + current_year + if current_year == copyright_first_year + else f"{copyright_first_year}-{current_year}" +) +copyright = f"{copyright_year_string}, {copyright_owners}. All rights reserved" + +# The short X.Y version +version = "" +# The full version, including alpha/beta/rc tags +release = "" + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autosummary", + "sphinx.ext.autodoc", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx.ext.extlinks", + "sphinx_copybutton", +] + +autosummary_generate = True +napoleon_google_docstring = False +napoleon_use_param = False +napoleon_use_ivar = True + +# Add any paths that contain templates here, relative to this directory. +# templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = ".rst" + +# The master toctree document. +master_doc = "index" + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = "en" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "default" + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "furo" +# html_logo = "images/blaah" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ["_static"] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = "crossflow_doc" + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ( + master_doc, + "Crossflow.tex", + "Crossflow Documentation", + "Crossflow", + "manual", + ), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, "Crossflow", "Crossflow Documentation", [author], 1)] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "Crossflow", + "Crossflow Documentation", + author, + "Crossflow", + "Crossflow allows you to build and execute complex workflows that chain together command-line driven tools.", + ), +] + + +# -- Extension configuration ------------------------------------------------- +def setup(app): + app.add_css_file("custom.css") diff --git a/docs/source/developer_guide.rst b/docs/source/developer_guide.rst new file mode 100644 index 0000000..c6edddf --- /dev/null +++ b/docs/source/developer_guide.rst @@ -0,0 +1,136 @@ +Developer Guide +=============== + +Crossflow is open-source, and we welcome contributions from the wider community to help improve and extend its functionality. This guide walks you through setting up a development environment, running tests, submitting contributions, and maintaining coding standards. + +Getting Started for Developers +------------------------------ + +Create a virtual environment:: + + python -m venv crossflow-dev + source crossflow-dev/bin/activate # Linux/macOS + crossflow-dev\Scripts\activate # Windows + +Clone the repository:: + + git clone https://github.com/CharlieLaughton/crossflow.git + cd crossflow + +Install development dependencies:: + + pip install -e .[testing,docs,pre-commit] + +Running Tests +------------- + +Run the full test suite:: + + pytest -v + +Run tests with code coverage:: + + pytest --cov crossflow --cov-report=term-missing + +Run tests for a specific module:: + + pytest crossflow/tests/test_client.py + +Run a specific test:: + + pytest crossflow/tests/test_client.py::test_subprocess_test_data + +Coding Standards +---------------- + +We use **pre-commit hooks** to maintain code quality and consistent style. To enable these hooks:: + + pre-commit install + +This ensures: + +- **Formatting** via ``black`` (`psf/black`) +- **Import sorting** via ``isort`` with the ``black`` profile +- **Linting** via ``flake8`` with ``flake8-pyproject`` +- **Basic checks** via ``pre-commit-hooks``, including: + + - Detection of large added files + - AST validity checks + - Case conflict detection + - Executable shebang verification + - Merge conflict detection + - TOML and YAML syntax validation + +To skip pre-commit checks for a commit:: + + git commit -n + +.. note:: + + Pull requests must pass all pre-commit checks before being merged. + +Continuous Integration (CI) +--------------------------- + +Crossflow uses **GitHub Actions** to automatically: + +- Run all tests +- Check coding style +- Build documentation +- Validate versioning + +Every pull request will trigger these checks to ensure quality and stability. + +Building Documentation +---------------------- + +Build locally:: + + cd docs + make html + +The generated HTML files will be in ``docs/build/html/``. Open ``index.html`` in your browser to view the documentation. + +Edit docs in the following directories: + +- ``docs/user_guide/`` +- ``docs/developer_guide/`` + +Contributing Code +----------------- + +Creating an Issue +^^^^^^^^^^^^^^^^^ + +If you encounter bugs or want to request features: + +1. Open an issue on GitHub. +2. Provide a clear description and input files if applicable. + +Branching +^^^^^^^^^ + +- Never commit directly to ``main``. +- Create a branch named after the issue:: + + git checkout -b 123-fix-something + +Pull Requests +^^^^^^^^^^^^^ + +1. Make your changes in a branch. +2. Ensure tests and pre-commit checks pass. +3. Submit a pull request. +4. At least one core developer will review it. +5. Include updated documentation and tests for new code. + +Summary +------- + +Full developer setup:: + + git clone https://github.com/CharlieLaughton/crossflow.git + cd crossflow + pip install -e .[testing,docs,pre-commit] + pre-commit install + pytest --cov crossflow --cov-report=term-missing diff --git a/docs/source/examples.rst b/docs/source/examples.rst new file mode 100644 index 0000000..ea449a2 --- /dev/null +++ b/docs/source/examples.rst @@ -0,0 +1,45 @@ +Putting it all together: a simple example +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here we create a ``SubprocessTask`` to reverse the order of the lines in +a file, submit the job to a local ``Client``, and then retrieve and view +the result. + +.. code:: python + + from crossflow import clients, tasks, filehandling + from pathlib import Path + + # Create a short text file: + here = Path('.') + inp_file = here /'input.txt' + with inp_file.open('w') as f: + for i in range(10): + f.write('line {}\n'.format(i)) + + # Create a SubprocessTask that will reverse the lines in a file: + reverser = tasks.SubprocessTask('tail -r input > output') + reverser.set_inputs(['input']) + reverser.set_outputs(['output']) + + # Create a local client to run the job, and submit it: + client = clients.Client() + output = client.submit(reverser, inp_file) + + # output is a Future; collect its result(), convert this FileHandle object to a file, and list its contents: + output_file = here / 'joined.txt' + output.result().save(output_file) + print(output_file.read_text()) # or: print(output.result().read_text()) + +:: + + line 9 + line 8 + line 7 + line 6 + line 5 + line 4 + line 3 + line 2 + line 1 + line 0 diff --git a/docs/source/filehandles.rst b/docs/source/filehandles.rst new file mode 100644 index 0000000..b7bd036 --- /dev/null +++ b/docs/source/filehandles.rst @@ -0,0 +1,70 @@ +Crossflow FileHandles +--------------------- + +Command line tools typically take file *names* as arguments: + +.. code:: bash + + + executable -i input.dat -o output.dat + +This has issues if you want to do the computing on a distributed system +where there is no shared filesystem. Crossflow ``FileHandles`` wrap data +files as serialisable Python objects that can be safely passed around a +network. Crossflow ``Tasks`` and ``Clients`` natively understand these +as the equivalents of the filenames one would use for the equivalent +command line call. + +In normal use, FileHandles are created automatically by ``Tasks`` and/or +``Clients`` as required, the user just has to extract the required data +from FileHandles returned by Tasks/Clients. + +So for example, in: + +.. code:: python + + output_future = client.submit(my_task, 'input.dat') + output = output_future.result() + +``input.dat`` - the name of a local file - is converted by the client +into a ``FileHandle`` before being sent for processing, and ``output`` +is a ``FileHandle`` which the user will (probably) want to convert back +to a conventional file, using one of the methods a ``FileHandle`` +provides. + +Methods of crossflow.FileHandles +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The .save() method of a ``crossflow.FileHandle`` creates a conventional +local file with the object’s contents: + +.. code:: python + + + output.save('output.txt') + +In addition, ``crossflow.FileHandles`` follow a limited subset of the +``pathlib.Path`` API. The file handles themselves are ‘path-like’ and +can be used in place of paths in many circumstances, e.g.: + +.. code:: python + + with open(output) as f: + data = f.read() + +If your 3rd-party Python library is more picky (e.g. expects paths to be +strings), then you can use the idiom: + +.. code:: python + + result = my_fussy_function(str(output)) + +In a ``Path``-like manner, ``FileHandles`` have ``.read_binary()`` and +``.read_text()`` methods, e.g.: + +.. code:: python + + + print(output.read_text()) # prints the contents of output to the screen + +For more details on FileHandles, see :doc:`here ` diff --git a/docs/source/filehandles_advanced.rst b/docs/source/filehandles_advanced.rst new file mode 100644 index 0000000..ed56217 --- /dev/null +++ b/docs/source/filehandles_advanced.rst @@ -0,0 +1,82 @@ +Advanced File Handling +======================= + +Here we go into ``Crossflow.FileHandle`` in more detail, see +:doc:`here ` for the basics. + +Crossflow is designed for use on distributed computing clusters, and +does not require that all the workers can see the same filesystem. To +achieve this, all input and output files are converted to portable +objects. Most of this is done behind the scenes by ``crossflow.Tasks`` +and ``crossflow.Clients`` and mostly requires no user intervention, but +on occasion this is helpful. + +Crossflow File Handling basics +------------------------------ + +When a ``crossflow.Task`` that takes file names as arguments is run, the +following takes place; + +1. The input file is ‘loaded’ into a ``crossflow.FileHandle`` object. +2. The objects, along with the function to be evaluated, are sent to the + worker process. +3. The worker unpacks the ``FileHandles`` into suitably-named files in + the working directory. +4. The task function is executed. +5. Output files are loaded into ``FileHandle`` objects and returned from + the worker. + +What exactly is meant by ‘loading’ a file can be varied. By default, +each ``FileHandle`` contains a compressed copy of the data in the file +it is constructed from. This means quite a lot of data may flow between +the parent process and the workers, but is normally fast. As an +alternative, file handling can be configured so that ‘loading’ means +making a copy of the input file in a place that is accessible to both +parent process and workers. + +Configuring Crossflow to use a shared filesystem for file staging +----------------------------------------------------------------- + +If there is a filesystem that is NFS mounted on all workers, then a +directory on this filesystem may be configured as a ‘stage_point’ for +crossflow: + +.. code:: python + + import crossflow + from crossflow import tasks, clients + + crossflow.set_stage_point('/usr/shared/tmp') + + my_task = tasks.SubprocessTask('cat a b > c') + my_task.set_inputs(['a', 'b']) + my_task.set_outputs(['c']) + + # Connect to an existing distributed cluster: + my_client = client.Client(scheduler_file='scheduler.json') + + # Submit the job. Files pass to/from the workers via copies in /usr/shared/tmp: + joined = my_client.submit('file1.txt', 'file2.txt') + +Configuring Crossflow to use an S3 bucket for file staging +---------------------------------------------------------- + +If you have an S3 bucket that can be visible on all workers, then this +can be configured as a ‘stage_point’ for crossflow: + +.. code:: python + + import crossflow + from crossflow import tasks, clients + + crossflow.set_stage_point('s3://groupname.username.crossflowbucket') + + my_task = tasks.SubprocessTask('cat a b > c') + my_task.set_inputs(['a', 'b']) + my_task.set_outputs(['c']) + + # Connect to an existing distributed cluster: + my_client = client.Client(scheduler_file='scheduler.json') + + # Submit the job. Files pass to/from the workers via copies in the s3 bucket: + joined = my_client.submit('file1.txt', 'file2.txt') diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst new file mode 100644 index 0000000..32ee253 --- /dev/null +++ b/docs/source/getting_started.rst @@ -0,0 +1,69 @@ +Getting Started +=============== + +Requirements +---------------- + +* Python >= 3.11 + +Installation +---------------- +To install the released version: + +.. code-block:: bash + + pip install crossflow + +To install the latest development version: + +.. code-block:: bash + + git clone https://github.com/CharlieLaughton/crossflow.git + +.. code-block:: bash + + cd crossflow + +.. code-block:: bash + + pip install . + +Crossflow 101 +---------------- + +An introduction to the fundamentals of Crossflow + +Workflows are a common feature of much computational science. In a +workflow, the work to be done requires more than one piece of software, +and the output from one becomes the input to the next, in some form of +chain. Classically one would write a bash script or similar to do the +job, e.g.: + +:: + + #!/usr/bin/env bash + input_file=input.dat + intermediate_file=intermediate.dat + result_file=result.dat + + executable1 -i $input_file -o $intermediate_file + executable2 -i $intermediate_file -o $result_file + +This is OK for basic use but: + +- what if your workflow has loops, conditional executions, etc? +- what happens if you want to do things at scale? + +Crossflow is designed to make this easier. Key points are: + +1. The workflow becomes a Python program, and can make use of all + programming workflow constructs (loops, if/then/else, etc.) +2. To do this, it provides a simple approach to turning command line + tools into Python functions - this is ``crossflow.tasks``. +3. It provides a way to hand the processing of individual workflow steps + out to a distributed cluster of workers - this is + ``crossflow.clients``. +4. It provides a way to pass data between these functions without + relying on the filesystem - this is ``crossflow.filehandling``. + +Here we look at each of these components in turn. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..5f66bdd --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,30 @@ +Welcome to Crossflow's documentation! +===================================== + +Crossflow allows you to build and execute complex workflows that chain together command-line driven tools. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + getting_started + tasks + clients + filehandles + examples + +.. toctree:: + :maxdepth: 2 + :caption: Advanced: + + tasks_advanced + clients_advanced + filehandles_advanced + developer_guide + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/tasks.rst b/docs/source/tasks.rst new file mode 100644 index 0000000..3241ace --- /dev/null +++ b/docs/source/tasks.rst @@ -0,0 +1,80 @@ +Crossflow Tasks +--------------- + +The ``crossflow.tasks`` subpackage provides methods to turn tools that +would usually be used via the command line into Python functions. The +basic concept is that a tool that is used from the commmand line +something like: + +:: + + #!bash + + my_tool -i input.dat -o output.dat + +becomes, in Python: + +:: + + output = my_tool_task(input) + +``Where my_tool_task`` is a ``crossflow.SubprocessTask`` for +``my_tool``, and ``input`` and ``output`` are ‘path-like’ Python objects +that in some way or other point at a file of data. This might be as +simple as strings, e.g. “myproject/input.dat”, but can be more complex - +see crossflow FileHandles later. + +Creating a crossflow.SubprocessTask +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a three step process: + +1. The task is created on the basis of a ``template``, a string with a + generalised version of the command you wish to execute. +2. The inputs for the task are specified. +3. The outputs from the task are specified. + +Thus: + +:: + + #!python + + my_tool_task = crossflow.tasks.SubprocessTask('my_tool -i x.in -o x.out') + my_tool_task.set_inputs(['x.in']) + my_tool_task.set_outputs(['x.out']) + +Note that the names of files used in the template string are arbitrary, +‘my_tool -i a -o b’ would do just as well, as long as the corresponding +names (‘a’, ‘b’) were used in .set_inputs() and .set_outputs(). + +If the tool takes multiple files as inputs, and/or produces multiple +output files, the process is the same: + +:: + + #!python + + my_othertool_task = crossflow.tasks.SubprocessTask('my_othertool -x x.in -y y.in -o x.out -l logfile') + my_othertool_task.set_inputs(['x.in', 'y.in']) + my_othertool_task.set_outputs(['x.out', 'logfile']) + +There is no restriction on the order that inputs and outputs are +specified in the template string, but the resulting task will expect its +inputs to be provided in the order they are given in .set_inputs() and +the tuple of outputs the task produces will be in the order they are +specified in .set_outputs(). + +For more advanced aspects of ``SubprocessTask`` creation, see :doc:`here `. + +Running a crossflow.SubprocessTask +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Although it is primarily expected that tasks will be run via a +``crossflow.Client``, they can also be executed directly: + +:: + + #!python + + output, logfile = my_othertool('input1.dat', 'input2.dat') diff --git a/docs/source/tasks_advanced.rst b/docs/source/tasks_advanced.rst new file mode 100644 index 0000000..3a5b893 --- /dev/null +++ b/docs/source/tasks_advanced.rst @@ -0,0 +1,138 @@ +Advanced Tasks +=============== + +Here we go into ``Crossflow.Tasks`` in more detail, see +:doc:`here ` for the basics. + +SubprocessTasks: More on templates +---------------------------------- + +“Cryptic” input and output files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``SubprocessTasks`` are instantiated with a template, a generic version +of the command line that should be executed, e.g.: + +.. code:: python + + my_task = SubprocessTask('my_executable -i input.dat -o output.dat') + my_task.set_inputs(['input.dat']) + my_task.set_outputs(['output.dat']) + +Sometimes a command line tool produces an output file whose name is +hard-wired and so does not appear in the template. As long as you know +what the name will be, this does not matter. E.g., suppose +``my_executable`` also always produces a file called ``logfile``: + +:: + + my_task = SubprocessTask('my_executable -i input.dat -o output.dat') + my_task.set_inputs(['input.dat']) + my_task.set_outputs(['output.dat', 'logfile']) + +The same applies to input files, e.g. if your executable also expects a +file called ‘config.dat’ to be present, you might write: + +:: + + my_task = SubprocessTask('my_executable -i input.dat -o output.dat') + my_task.set_inputs(['input.dat', 'config.dat']) + my_task.set_outputs(['output.dat', 'logfile']) + +Input variables that are not filenames +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default, all variables are assumed to be the names of input or output +files, but you can specify string variables (or variables that can be +represented as strings) by surrounding them by braces (“{}”) in the +template. E.g.: + +:: + + tail_task = SubprocessTask('tail -n {n_lines} input.dat > output.dat') + tail_task.set_inputs(['input.dat', 'n_lines']) + tail_task.set_outputs(['output.dat']) + + last15 = tail_task('data.txt', 15) + +Wildcards +~~~~~~~~~ + +The template string can contain wildcards ("*" or “?”), the +corresponding variable (input or output) then becomes a list of values. +E.g.: + +:: + + cat_task = SubprocessTask('cat file* > output.dat') + cat_task.set_inputs(['file*']) + cat_task.set_outputs(['output.dat']) + inputs = ['data1', 'data2', 'data3', 'data4'] + + output = cat_task(inputs) + +SubprocessTasks: Constants +-------------------------- + +If some of the inputs to your ``SubprocessTask`` are going to be +constants over many calls, you can mark them as such, in which case they +do not appear in the argument list when the task is run. E.g. if a task +will always use data from a file ‘constant.dat’ you could write: + +.. code:: python + + my_task = SubprocessTask('my_executable -i variable.dat -c constants -o output.dat') + my_task.set_inputs(['variable.dat', 'constants']) + my_task.set_outputs(['output.dat']) + my_task.set_constant('constants', 'constant.dat') + + output1 = my_task('input1.dat') # Note 'constant.dat' does not need to be specified + output2 = my_task('input2.dat') + +FunctionTasks +------------- + +Though the most common reason to use Crossflow is to provide an +interface to tools usually used from the command line, tasks can also be +created to wrap conventional Python functions. The most likely scenario +for this is where the Python function is compute intensive and so needs +to be executed on a distributed worker, or where it needs to access a +large unit of data that resides on a worker from a previous computation. + +``FunctionTasks`` are instantiated with the Python function they wrap: + +.. code:: python + + def mult(x, y): + return x * y + + mult_task = FunctionTask(mult) + mult_task.set_inputs(['x', 'y']) + mult_task.set_outputs('xy') + + result = mult_task(7.5, 8.4) + +Debugging Tasks +--------------- + +If an attempt to run a task results in an error, by default an exception +will be raised. If instead you want to be notified about the error, but +want execution of the script to continue, then you can include the +pseudo-variable ``crossflow.DEBUGINFO`` in the list of outputs from your +task: + +:: + + awk_task = SubprocessTask('awk -f awkscript input.dat > output.dat') + awk_task.set_inputs(['awkscript', 'input.dat']) + awk_task.set_outputs(['output.dat']) + + # Will raise an exception if awkscript contains errors: + output = awk_task('awkscript', 'infile.txt') + + awk_task.set_outputs(['output.dat', crossflow.DEBUGINFO]) + # Will not raise an exception if awkscript contains errors: + output, debuginfo = awk_task('awkscript', 'infile.txt') + ... + (examine debuginfo to decide what to do) + ... diff --git a/examples/Notebooks/input.txt b/examples/Notebooks/input.txt deleted file mode 100644 index 27435e1..0000000 --- a/examples/Notebooks/input.txt +++ /dev/null @@ -1,10 +0,0 @@ -line 0 -line 1 -line 2 -line 3 -line 4 -line 5 -line 6 -line 7 -line 8 -line 9 diff --git a/examples/README.md b/examples/README.md index fb5a6c7..4fe9311 100755 --- a/examples/README.md +++ b/examples/README.md @@ -1,5 +1,5 @@ ## Crossflow examples -The **Scripts** folder contains examples of Python scripts that run simple crossflow workflows.. +The **scripts** folder contains examples of Python scripts that run simple crossflow workflows.. -The **Notebooks** folder contains a number of Jupyter notebooks that illustrate similar workflows, and also includes some more advanced examples. +The **notebooks** folder contains a number of Jupyter notebooks that illustrate similar workflows, and also includes some more advanced examples. diff --git a/examples/Notebooks/Amber/Crossflow workflows 101.ipynb b/examples/notebooks/Amber/Crossflow workflows 101.ipynb similarity index 100% rename from examples/Notebooks/Amber/Crossflow workflows 101.ipynb rename to examples/notebooks/Amber/Crossflow workflows 101.ipynb diff --git a/examples/Notebooks/Amber/README.md b/examples/notebooks/Amber/README.md similarity index 100% rename from examples/Notebooks/Amber/README.md rename to examples/notebooks/Amber/README.md diff --git a/examples/Notebooks/Amber/dhfr.crd b/examples/notebooks/Amber/dhfr.crd similarity index 100% rename from examples/Notebooks/Amber/dhfr.crd rename to examples/notebooks/Amber/dhfr.crd diff --git a/examples/Notebooks/Amber/dhfr.prmtop b/examples/notebooks/Amber/dhfr.prmtop similarity index 100% rename from examples/Notebooks/Amber/dhfr.prmtop rename to examples/notebooks/Amber/dhfr.prmtop diff --git a/examples/Notebooks/Amber/step1.mdin b/examples/notebooks/Amber/step1.mdin similarity index 100% rename from examples/Notebooks/Amber/step1.mdin rename to examples/notebooks/Amber/step1.mdin diff --git a/examples/Notebooks/Amber/step2.mdin b/examples/notebooks/Amber/step2.mdin similarity index 100% rename from examples/Notebooks/Amber/step2.mdin rename to examples/notebooks/Amber/step2.mdin diff --git a/examples/Notebooks/Amber/step3.mdin b/examples/notebooks/Amber/step3.mdin similarity index 100% rename from examples/Notebooks/Amber/step3.mdin rename to examples/notebooks/Amber/step3.mdin diff --git a/examples/Notebooks/Chignolin_unfolding/README b/examples/notebooks/Chignolin_unfolding/README similarity index 100% rename from examples/Notebooks/Chignolin_unfolding/README rename to examples/notebooks/Chignolin_unfolding/README diff --git a/examples/Notebooks/Chignolin_unfolding/WE_unfolding.py b/examples/notebooks/Chignolin_unfolding/WE_unfolding.py similarity index 69% rename from examples/Notebooks/Chignolin_unfolding/WE_unfolding.py rename to examples/notebooks/Chignolin_unfolding/WE_unfolding.py index 0cdb0de..1e5a9ec 100644 --- a/examples/Notebooks/Chignolin_unfolding/WE_unfolding.py +++ b/examples/notebooks/Chignolin_unfolding/WE_unfolding.py @@ -2,58 +2,62 @@ # coding: utf-8 # Demonstration Weighted Ensemble Simulation - Chignolin unfolding -# +# # This notebook illustrates how you can write a simple Weighted Ensemble simulation workflow using crossflow. -# +# # It is assumed that you already have a Dask Distributed cluster up and running - see the ./cluster folder for a recipie to do this on a Condor cluster. Each Dask worker should be on its own GPU node. -# +# # The input files are taken from the WESTPA tutorial. -# +# # ----- -# +# # Import required packages: -# +# + +import logging +import time -from crossflow.kernels import SubprocessKernel -from crossflow.filehandling import FileHandler -from crossflow.clients import Client import mdtraj as mdt import numpy as np -import time -import logging -logging.basicConfig(filename='we_unfolding.log', level=logging.DEBUG) +from crossflow.clients import Client +from crossflow.filehandling import FileHandler +from crossflow.kernels import SubprocessKernel + +logging.basicConfig(filename="we_unfolding.log", level=logging.DEBUG) # Convert the starting structure into an Amber .ncrst format file via MDTraj: -ref = mdt.load('common_files/chignolin.pdb') -ref.save('chignolin.ncrst') +ref = mdt.load("common_files/chignolin.pdb") +ref.save("chignolin.ncrst") # Load the required starting files: fh = FileHandler() -mdin = fh.load('common_files/md.in') -crds = fh.load('chignolin.ncrst') -prmtop = fh.load('common_files/chignolin.prmtop') +mdin = fh.load("common_files/md.in") +crds = fh.load("chignolin.ncrst") +prmtop = fh.load("common_files/chignolin.prmtop") -# Create a SubprocessKernel that runs pmemd. All we need back are the +# Create a SubprocessKernel that runs pmemd. All we need back are the # final coordinates. -pmemd = SubprocessKernel('pmemd.cuda -i mdin -c x.inpcrd -p x.prmtop -r x.ncrst -o x.log') -#pmemd = SubprocessKernel('mpirun pmemd.MPI -i mdin -c x.inpcrd -p x.prmtop -r x.ncrst -o x.log') -pmemd.set_inputs(['mdin', 'x.inpcrd', 'x.prmtop']) -pmemd.set_outputs(['x.ncrst']) -pmemd.set_constant('mdin', mdin) -pmemd.set_constant('x.prmtop', prmtop) +pmemd = SubprocessKernel( + "pmemd.cuda -i mdin -c x.inpcrd -p x.prmtop -r x.ncrst -o x.log" +) +# pmemd = SubprocessKernel('mpirun pmemd.MPI -i mdin -c x.inpcrd -p x.prmtop -r x.ncrst -o x.log') +pmemd.set_inputs(["mdin", "x.inpcrd", "x.prmtop"]) +pmemd.set_outputs(["x.ncrst"]) +pmemd.set_constant("mdin", mdin) +pmemd.set_constant("x.prmtop", prmtop) # Create a crossflow client that talks to our dask cluster: -client = Client(scheduler_file='cluster/scheduler.json') +client = Client(scheduler_file="cluster/scheduler.json") -# We will have a target of 4 simulations per WE bin, and the bins are defined -# by RMSD from the starting structure, between 0.05 nm (limit of what is -# considered 'native' structure) and 0.40 nm (above which is 'unfolded' -# target state), in 0.02 nm increments. We will run the WE simulation for 50 +# We will have a target of 4 simulations per WE bin, and the bins are defined +# by RMSD from the starting structure, between 0.05 nm (limit of what is +# considered 'native' structure) and 0.40 nm (above which is 'unfolded' +# target state), in 0.02 nm increments. We will run the WE simulation for 50 # cycles. n_cycles = 1000 @@ -75,17 +79,17 @@ # Main loop: for c in range(n_cycles): # Calculate which bin each restart structure falls in to: - t = mdt.load([str(r.result()) for r in restarts], top = str(prmtop)) + t = mdt.load([str(r.result()) for r in restarts], top=str(prmtop)) bin_ids = np.digitize(mdt.rmsd(t, ref), bin_edges) # Calculate the weight which has reached the final target bin: recycled_weight = np.where(bin_ids == target_bin, weights, 0.0).sum() - logging.info('Cycle {}: recycled weight={}'.format(c, recycled_weight)) + logging.info("Cycle {}: recycled weight={}".format(c, recycled_weight)) recycled_flux.append(recycled_weight) - # Recycle any simulations that have reached the target state back to + # Recycle any simulations that have reached the target state back to # the starting state: restarts = np.where(bin_ids == target_bin, crds, restarts) bin_ids = np.where(bin_ids == target_bin, 0, bin_ids) - # Assign restart structures to bins and calculate the total weight in + # Assign restart structures to bins and calculate the total weight in # each bin: bins = {} bin_wts = {} @@ -104,15 +108,17 @@ walker_wt_max = max(weights) now = time.time() cycle_time = now - start - logging.info('{} walkers in total'.format(len(restarts))) - logging.info('{} of {} bins are populated'.format(n_occ, target_bin)) - logging.info('per-bin minimum non-zero probability: {:8.6f}'.format(bin_wt_min)) - logging.info('per-bin maximum probability: {:8.6f}'.format(bin_wt_max)) - logging.info('per-walker minimum non-zero probability: {:8.6f}'.format(walker_wt_min)) - logging.info('per-walker maximum probability: {:8.6f}'.format(walker_wt_max)) - logging.info('Time for this cycle: {:8.1f}s'.format(cycle_time)) + logging.info("{} walkers in total".format(len(restarts))) + logging.info("{} of {} bins are populated".format(n_occ, target_bin)) + logging.info("per-bin minimum non-zero probability: {:8.6f}".format(bin_wt_min)) + logging.info("per-bin maximum probability: {:8.6f}".format(bin_wt_max)) + logging.info( + "per-walker minimum non-zero probability: {:8.6f}".format(walker_wt_min) + ) + logging.info("per-walker maximum probability: {:8.6f}".format(walker_wt_max)) + logging.info("Time for this cycle: {:8.1f}s".format(cycle_time)) start = now - # Replicate or cull simulations from each bin to leave n_reps in each, + # Replicate or cull simulations from each bin to leave n_reps in each, # then reallocate the total bin weight evenly amongst the structures: starting_coordinates = [] weights = [] @@ -122,5 +128,5 @@ # Run the next round of simulations: restarts = client.map(pmemd, starting_coordinates) -recycled_flux = np.array(recycled_flux) / 20 # flux per picosecond -np.save('recycled_flux.npy', recycled_flux) +recycled_flux = np.array(recycled_flux) / 20 # flux per picosecond +np.save("recycled_flux.npy", recycled_flux) diff --git a/examples/Notebooks/Chignolin_unfolding/cluster/README b/examples/notebooks/Chignolin_unfolding/cluster/README similarity index 100% rename from examples/Notebooks/Chignolin_unfolding/cluster/README rename to examples/notebooks/Chignolin_unfolding/cluster/README diff --git a/examples/Notebooks/Chignolin_unfolding/cluster/dask-scheduler.con b/examples/notebooks/Chignolin_unfolding/cluster/dask-scheduler.con similarity index 100% rename from examples/Notebooks/Chignolin_unfolding/cluster/dask-scheduler.con rename to examples/notebooks/Chignolin_unfolding/cluster/dask-scheduler.con diff --git a/examples/Notebooks/Chignolin_unfolding/cluster/dask-workers.con b/examples/notebooks/Chignolin_unfolding/cluster/dask-workers.con similarity index 100% rename from examples/Notebooks/Chignolin_unfolding/cluster/dask-workers.con rename to examples/notebooks/Chignolin_unfolding/cluster/dask-workers.con diff --git a/examples/Notebooks/Chignolin_unfolding/common_files/chignolin.pdb b/examples/notebooks/Chignolin_unfolding/common_files/chignolin.pdb similarity index 100% rename from examples/Notebooks/Chignolin_unfolding/common_files/chignolin.pdb rename to examples/notebooks/Chignolin_unfolding/common_files/chignolin.pdb diff --git a/examples/Notebooks/Chignolin_unfolding/common_files/chignolin.prmtop b/examples/notebooks/Chignolin_unfolding/common_files/chignolin.prmtop similarity index 100% rename from examples/Notebooks/Chignolin_unfolding/common_files/chignolin.prmtop rename to examples/notebooks/Chignolin_unfolding/common_files/chignolin.prmtop diff --git a/examples/Notebooks/Chignolin_unfolding/common_files/md.in b/examples/notebooks/Chignolin_unfolding/common_files/md.in similarity index 100% rename from examples/Notebooks/Chignolin_unfolding/common_files/md.in rename to examples/notebooks/Chignolin_unfolding/common_files/md.in diff --git a/examples/Notebooks/Crossflow 101.ipynb b/examples/notebooks/Crossflow 101.ipynb similarity index 100% rename from examples/Notebooks/Crossflow 101.ipynb rename to examples/notebooks/Crossflow 101.ipynb diff --git a/examples/Notebooks/Gromacs/Crossflow workflows 101.ipynb b/examples/notebooks/Gromacs/Crossflow workflows 101.ipynb similarity index 100% rename from examples/Notebooks/Gromacs/Crossflow workflows 101.ipynb rename to examples/notebooks/Gromacs/Crossflow workflows 101.ipynb diff --git a/examples/Notebooks/Gromacs/README.md b/examples/notebooks/Gromacs/README.md similarity index 100% rename from examples/Notebooks/Gromacs/README.md rename to examples/notebooks/Gromacs/README.md diff --git a/examples/Notebooks/Gromacs/bpti.gro b/examples/notebooks/Gromacs/bpti.gro similarity index 100% rename from examples/Notebooks/Gromacs/bpti.gro rename to examples/notebooks/Gromacs/bpti.gro diff --git a/examples/Notebooks/Gromacs/bpti.top b/examples/notebooks/Gromacs/bpti.top similarity index 100% rename from examples/Notebooks/Gromacs/bpti.top rename to examples/notebooks/Gromacs/bpti.top diff --git a/examples/Notebooks/Gromacs/em.mdp b/examples/notebooks/Gromacs/em.mdp similarity index 100% rename from examples/Notebooks/Gromacs/em.mdp rename to examples/notebooks/Gromacs/em.mdp diff --git a/examples/Notebooks/Gromacs/nvt.mdp b/examples/notebooks/Gromacs/nvt.mdp similarity index 100% rename from examples/Notebooks/Gromacs/nvt.mdp rename to examples/notebooks/Gromacs/nvt.mdp diff --git a/examples/Notebooks/PockDock/README.md b/examples/notebooks/PockDock/README.md similarity index 100% rename from examples/Notebooks/PockDock/README.md rename to examples/notebooks/PockDock/README.md diff --git a/examples/Notebooks/PockDock/ReverseDocking.ipynb b/examples/notebooks/PockDock/ReverseDocking.ipynb similarity index 100% rename from examples/Notebooks/PockDock/ReverseDocking.ipynb rename to examples/notebooks/PockDock/ReverseDocking.ipynb diff --git a/examples/Notebooks/PockDock/complexes.list b/examples/notebooks/PockDock/complexes.list similarity index 100% rename from examples/Notebooks/PockDock/complexes.list rename to examples/notebooks/PockDock/complexes.list diff --git a/examples/Notebooks/PockDock/inverse_results.png b/examples/notebooks/PockDock/inverse_results.png similarity index 100% rename from examples/Notebooks/PockDock/inverse_results.png rename to examples/notebooks/PockDock/inverse_results.png diff --git a/examples/Notebooks/PockDock/local_cluster.sh b/examples/notebooks/PockDock/local_cluster.sh similarity index 100% rename from examples/Notebooks/PockDock/local_cluster.sh rename to examples/notebooks/PockDock/local_cluster.sh diff --git a/examples/Notebooks/PockDock/pockdock.ipynb b/examples/notebooks/PockDock/pockdock.ipynb similarity index 100% rename from examples/Notebooks/PockDock/pockdock.ipynb rename to examples/notebooks/PockDock/pockdock.ipynb diff --git a/examples/Notebooks/PockDock/prz.pdb b/examples/notebooks/PockDock/prz.pdb similarity index 100% rename from examples/Notebooks/PockDock/prz.pdb rename to examples/notebooks/PockDock/prz.pdb diff --git a/examples/Notebooks/PockDock/prz.pdbqt b/examples/notebooks/PockDock/prz.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/prz.pdbqt rename to examples/notebooks/PockDock/prz.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1afq_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1afq_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1afq_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1afq_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ank_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ank_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ank_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ank_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1br6_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1br6_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1br6_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1br6_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1cib_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1cib_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1cib_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1cib_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ctr_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ctr_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ctr_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ctr_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1d6y_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1d6y_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1d6y_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1d6y_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ecc_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ecc_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ecc_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ecc_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ey3_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ey3_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ey3_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ey3_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1eyj_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1eyj_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1eyj_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1eyj_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1fqc_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1fqc_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1fqc_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1fqc_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ftl_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ftl_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ftl_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ftl_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1g67_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1g67_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1g67_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1g67_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ghy_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ghy_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ghy_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ghy_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1gky_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1gky_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1gky_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1gky_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1gzf_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1gzf_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1gzf_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1gzf_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ha3_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ha3_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ha3_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ha3_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1imb_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1imb_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1imb_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1imb_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1j6z_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1j6z_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1j6z_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1j6z_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1l5s_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1l5s_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1l5s_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1l5s_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1lic_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1lic_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1lic_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1lic_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1nx3_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1nx3_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1nx3_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1nx3_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1oke_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1oke_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1oke_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1oke_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ow3_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ow3_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ow3_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ow3_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1pzy_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1pzy_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1pzy_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1pzy_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1q0b_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1q0b_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1q0b_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1q0b_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1ryo_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1ryo_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1ryo_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1ryo_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1s9d_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1s9d_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1s9d_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1s9d_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1t49_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1t49_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1t49_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1t49_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1tr5_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1tr5_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1tr5_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1tr5_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1u1d_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1u1d_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1u1d_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1u1d_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1xvc_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1xvc_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1xvc_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1xvc_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1yv3_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1yv3_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1yv3_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1yv3_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/1za1_receptor.pdbqt b/examples/notebooks/PockDock/receptors/1za1_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/1za1_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/1za1_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2al4_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2al4_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2al4_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2al4_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2brl_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2brl_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2brl_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2brl_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2bu2_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2bu2_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2bu2_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2bu2_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2bys_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2bys_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2bys_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2bys_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2egh_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2egh_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2egh_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2egh_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2eum_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2eum_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2eum_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2eum_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2gir_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2gir_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2gir_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2gir_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2gz7_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2gz7_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2gz7_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2gz7_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2h4k_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2h4k_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2h4k_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2h4k_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2hka_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2hka_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2hka_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2hka_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2hvd_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2hvd_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2hvd_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2hvd_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2ieg_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2ieg_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2ieg_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2ieg_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2iuz_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2iuz_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2iuz_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2iuz_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2ixu_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2ixu_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2ixu_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2ixu_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2iyq_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2iyq_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2iyq_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2iyq_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2jds_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2jds_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2jds_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2jds_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2npq_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2npq_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2npq_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2npq_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2ofp_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2ofp_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2ofp_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2ofp_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2ohv_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2ohv_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2ohv_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2ohv_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2oo8_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2oo8_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2oo8_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2oo8_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2piq_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2piq_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2piq_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2piq_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2q8h_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2q8h_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2q8h_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2q8h_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2v57_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2v57_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2v57_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2v57_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2w5k_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2w5k_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2w5k_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2w5k_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2wi7_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2wi7_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2wi7_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2wi7_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2wkw_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2wkw_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2wkw_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2wkw_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/2yqs_receptor.pdbqt b/examples/notebooks/PockDock/receptors/2yqs_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/2yqs_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/2yqs_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3bl7_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3bl7_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3bl7_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3bl7_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3bqm_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3bqm_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3bqm_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3bqm_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3cfn_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3cfn_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3cfn_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3cfn_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3dc1_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3dc1_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3dc1_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3dc1_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3dhh_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3dhh_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3dhh_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3dhh_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3eks_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3eks_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3eks_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3eks_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3f82_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3f82_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3f82_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3f82_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3fgo_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3fgo_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3fgo_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3fgo_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3fqk_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3fqk_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3fqk_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3fqk_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3gqz_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3gqz_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3gqz_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3gqz_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3h9j_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3h9j_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3h9j_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3h9j_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3hl7_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3hl7_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3hl7_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3hl7_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3hl8_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3hl8_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3hl8_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3hl8_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3hok_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3hok_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3hok_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3hok_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3hqp_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3hqp_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3hqp_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3hqp_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3hzt_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3hzt_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3hzt_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3hzt_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3ip0_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3ip0_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3ip0_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3ip0_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3ixj_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3ixj_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3ixj_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3ixj_receptor.pdbqt diff --git a/examples/Notebooks/PockDock/receptors/3lth_receptor.pdbqt b/examples/notebooks/PockDock/receptors/3lth_receptor.pdbqt similarity index 100% rename from examples/Notebooks/PockDock/receptors/3lth_receptor.pdbqt rename to examples/notebooks/PockDock/receptors/3lth_receptor.pdbqt diff --git a/examples/Notebooks/README.md b/examples/notebooks/README.md similarity index 100% rename from examples/Notebooks/README.md rename to examples/notebooks/README.md diff --git a/examples/Notebooks/Simple workflow.ipynb b/examples/notebooks/Simple workflow.ipynb similarity index 100% rename from examples/Notebooks/Simple workflow.ipynb rename to examples/notebooks/Simple workflow.ipynb diff --git a/examples/Scripts/README.md b/examples/scripts/README.md similarity index 100% rename from examples/Scripts/README.md rename to examples/scripts/README.md diff --git a/examples/Scripts/Slurm_GPU/dask-mig-worker b/examples/scripts/Slurm_GPU/dask-mig-worker similarity index 100% rename from examples/Scripts/Slurm_GPU/dask-mig-worker rename to examples/scripts/Slurm_GPU/dask-mig-worker diff --git a/examples/Scripts/Slurm_GPU/slurm_example.sh b/examples/scripts/Slurm_GPU/slurm_example.sh similarity index 100% rename from examples/Scripts/Slurm_GPU/slurm_example.sh rename to examples/scripts/Slurm_GPU/slurm_example.sh diff --git a/examples/Scripts/example1.py b/examples/scripts/example1.py similarity index 66% rename from examples/Scripts/example1.py rename to examples/scripts/example1.py index 89d7b38..15c37d2 100644 --- a/examples/Scripts/example1.py +++ b/examples/scripts/example1.py @@ -3,20 +3,21 @@ # Basic example of how to create tasks, load input data, and # run locally (without using a client) # -from crossflow import tasks from pathlib import Path +from crossflow import tasks + # Create two short text files: -here = Path('.') -file1 = here / 'file1.txt' -file1.write_text('content\n') -file2 = here / 'file2.txt' -file2.write_text('more content\n') +here = Path(".") +file1 = here / "file1.txt" +file1.write_text("content\n") +file2 = here / "file2.txt" +file2.write_text("more content\n") # Create a Subprocesstask that will join input files together: -joiner = tasks.SubprocessTask('cat * > output') -joiner.set_inputs(['*']) -joiner.set_outputs(['output']) +joiner = tasks.SubprocessTask("cat * > output") +joiner.set_inputs(["*"]) +joiner.set_outputs(["output"]) # The task expects an arbitrary number of input files, so put the inputs # into a list, and then call the task: @@ -24,6 +25,6 @@ joined = joiner(inputs) # Save the output FileHandle object as a file, and list its contents: -output = here / 'joined.txt' +output = here / "joined.txt" joined.save(output) print(output.read_text()) diff --git a/examples/Scripts/example2.py b/examples/scripts/example2.py similarity index 76% rename from examples/Scripts/example2.py rename to examples/scripts/example2.py index 8e46fb6..fbeda73 100644 --- a/examples/Scripts/example2.py +++ b/examples/scripts/example2.py @@ -6,22 +6,23 @@ # the associated client, this part of the process must be done in # __main__ # -from crossflow import clients, tasks from pathlib import Path +from crossflow import clients, tasks + def run(client): # Create two short text files: - here = Path('.') - input_file1 = here / 'file1.txt' - input_file1.write_text('content\n') - input_file2 = here / 'file2.txt' - input_file2.write_text('more content\n') + here = Path(".") + input_file1 = here / "file1.txt" + input_file1.write_text("content\n") + input_file2 = here / "file2.txt" + input_file2.write_text("more content\n") # Create a Subprocesstask that will join input files together: - joiner = tasks.SubprocessTask('cat * > output') - joiner.set_inputs(['*']) - joiner.set_outputs(['output']) + joiner = tasks.SubprocessTask("cat * > output") + joiner.set_inputs(["*"]) + joiner.set_outputs(["output"]) # The task expects an arbitrary number of input files, so put the inputs # into a list: @@ -35,12 +36,12 @@ def run(client): output_filehandle = output.result() # Save the output FileHandle object as a file, and list its contents: - output_file = here / 'joined.txt' + output_file = here / "joined.txt" output_filehandle.save(output_file) return output_file -if __name__ == '__main__': +if __name__ == "__main__": # Create a local compute cluster and the client to serve it: client = clients.Client() output_file = run(client) diff --git a/examples/Scripts/example3.py b/examples/scripts/example3.py similarity index 71% rename from examples/Scripts/example3.py rename to examples/scripts/example3.py index 9a979c4..6c7346a 100644 --- a/examples/Scripts/example3.py +++ b/examples/scripts/example3.py @@ -6,32 +6,33 @@ # A large text file is split into a number of separate ones, each of these # has its lines reversed (last to first), and then the chunks are recombined. # -from crossflow import clients, tasks from pathlib import Path +from crossflow import clients, tasks + def run(client): # Create the initial text file: - here = Path('.') - input_file = here / 'input.txt' - with input_file.open('w') as f: + here = Path(".") + input_file = here / "input.txt" + with input_file.open("w") as f: for i in range(50): - f.write('line {}\n'.format(i)) + f.write("line {}\n".format(i)) # Create a SubprocessTask that will split up the input file: - splitter = tasks.SubprocessTask('split -l 10 input.txt') - splitter.set_inputs(['input.txt']) - splitter.set_outputs(['xaa', 'xab', 'xac', 'xad', 'xae']) + splitter = tasks.SubprocessTask("split -l 10 input.txt") + splitter.set_inputs(["input.txt"]) + splitter.set_outputs(["xaa", "xab", "xac", "xad", "xae"]) # Create a SubprocessTask to reverse the order of the lines in a file: - reverser = tasks.SubprocessTask('tail -r input > output') - reverser.set_inputs(['input']) - reverser.set_outputs(['output']) + reverser = tasks.SubprocessTask("tail -r input > output") + reverser.set_inputs(["input"]) + reverser.set_outputs(["output"]) # Create a Subprocesstask that will join input files together: - joiner = tasks.SubprocessTask('cat * > output') - joiner.set_inputs(['*']) - joiner.set_outputs(['output']) + joiner = tasks.SubprocessTask("cat * > output") + joiner.set_inputs(["*"]) + joiner.set_outputs(["output"]) # Here is the workflow, using .submit() and .map() methods. # First split the file into pieces: @@ -49,12 +50,12 @@ def run(client): output_filehandle = output.result() # Save the output FileHandle object as a file, and list its contents: - output_file = here / 'processed.txt' + output_file = here / "processed.txt" output_filehandle.save(output_file) return output_file -if __name__ == '__main__': +if __name__ == "__main__": # Create a local compute cluster and the client to serve it: client = clients.Client() output_file = run(client) diff --git a/pyproject.toml b/pyproject.toml index c9315a3..223f60c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,97 @@ [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" -[tool.pytest.ini_options] -pythonpath = [ - "." +# Build the package with [flit](https://flit.readthedocs.io) +requires = ["flit_core >=3.4,<4"] +build-backend = "flit_core.buildapi" + +[project] +# See https://www.python.org/dev/peps/pep-0621/ +name = "crossflow" +dynamic = ["version"] +description = "Crossflow allows you to build and execute complex workflows that chain together command-line driven tools." +authors = [ + {name = "Charlie Laughton", email = "charles.laughton@nottingham.ac.uk"}, + {name = "Christian Suess", email = "christian.suess@nottingham.ac.uk"}, + {name = "James Gebbie-Rayet", email = "james.gebbie@stfc.ac.uk"}, + {name = "Sam Cox", email = "sam.cox@hdruk.ac.uk"}, ] +readme = "README.md" +license = { file = "LICENSE" } +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Development Status :: 3 - Alpha" +] +keywords = ["hpc", "workflows", "molecular dynamics"] +requires-python = ">=3.11" +dependencies = [ + "dask", + "distributed", + "fsspec", +] + +[project.urls] +Homepage = "https://" +Repository = "https://" +Documentation = "https://" + +[project.optional-dependencies] +testing = [ + "pytest==8.2.2", + "pytest-cov==5.0.0", + "pytest-sugar==1.0.0" +] + +pre-commit = [ + "pre-commit==3.7.1", + "pylint==3.2.5" +] +docs = [ + "sphinx", + "sphinx_rtd_theme", + "sphinxcontrib-contentui", + "sphinxcontrib-details-directive", + "sphinx_copybutton", + "furo", + "markupsafe<2.1" +] + +[tool.pylint.messages_control] +disable = [ + "too-many-nested-blocks", + "no-else-return", + "too-many-branches", + "too-many-statements", + "missing-function-docstring", + "consider-using-max-builtin", + "useless-object-inheritance", + "no-member", + "consider-using-enumerate", + "consider-using-f-string", + "invalid-name", + "missing-module-docstring", + "consider-using-dict-items", + "missing-class-docstring", + "deprecated-class", + "pointless-string-statement", + "too-many-locals", + "raise-missing-from", + "unnecessary-pass", +] + +[tool.pylint.format] +max-line-length = 120 + +[tool.coverage.run] +# Configuration of [coverage.py](https://coverage.readthedocs.io) +# reporting which lines of your plugin are covered by tests +source=["crossflow"] + +[tool.isort] +# Configuration of [isort](https://isort.readthedocs.io) +line_length = 120 +force_sort_within_sections = true diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 7d2b481..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -dask -distributed -fsspec diff --git a/run_client_tests b/run_client_tests deleted file mode 100755 index 5ce5977..0000000 --- a/run_client_tests +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env python -from crossflow import filehandling, tasks, clients -import tempfile -from pathlib import Path - - -def test_function_test(myclient, tmpdir): - def testit(f): - return f - fk = tasks.FunctionTask(testit) - fk.set_inputs(['f']) - fk.set_outputs(['x']) - p = tmpdir / "hello.txt" - p.write_text("content") - fh = filehandling.FileHandler() - pf = fh.load(p) - result = myclient.submit(fk, pf) - try: - assert isinstance(result.result(), filehandling.FileHandle) - except AssertionError: - print('Error: result.result() = {}'.format(result.result())) - raise - - -def test_function_test_no_filehandler(myclient, tmpdir): - def testit(f): - return f - fk = tasks.FunctionTask(testit) - fk.set_inputs(['f']) - fk.set_outputs(['x']) - p = tmpdir / "hello.txt" - p.write_text("content") - result = myclient.submit(fk, p) - try: - assert isinstance(result.result(), filehandling.FileHandle) - except AssertionError: - print('Error: result.result() = {}'.format(result.result())) - raise - - -def test_subprocess_test_data(myclient, tmpdir): - sk = tasks.SubprocessTask('cat file.txt') - sk.set_inputs(['file.txt']) - sk.set_outputs([tasks.STDOUT]) - p = tmpdir / "hello.txt" - p.write_text("content") - fh = filehandling.FileHandler() - pf = fh.load(p) - ll = myclient.upload(pf) - result = myclient.submit(sk, ll) - try: - assert result.result() == 'content' - except AssertionError: - print('Error: result.result() = {}'.format(result.result())) - - -def test_subprocess_test_no_filehandler(myclient, tmpdir): - sk = tasks.SubprocessTask('cat file.txt') - sk.set_inputs(['file.txt']) - sk.set_outputs([tasks.STDOUT]) - p = tmpdir / "hello.txt" - p.write_text("content") - result = myclient.submit(sk, p) - try: - assert result.result() == 'content' - except AssertionError: - print('Error: result.result() = {}'.format(result.result())) - - -def test_subprocess_test_file(myclient, tmpdir): - sk = tasks.SubprocessTask('cat file.txt') - sk.set_inputs(['file.txt']) - sk.set_outputs([tasks.STDOUT]) - p = tmpdir / "hello.txt" - p.write_text("content") - fh = filehandling.FileHandler(tmpdir) - pf = fh.load(p) - ll = myclient.upload(pf) - result = myclient.submit(sk, ll) - try: - assert result.result() == 'content' - except AssertionError: - print('Error: result.result() = {}'.format(result.result())) - - -def test_subprocess_test_s3(myclient, tmpdir): - sk = tasks.SubprocessTask('cat file.txt') - sk.set_inputs(['file.txt']) - sk.set_outputs([tasks.STDOUT]) - p = tmpdir / "hello.txt" - p.write_text("content") - fh = filehandling.FileHandler('s3://bucket_name') - fp = fh.load(p) - ll = myclient.upload(fp) - result = myclient.submit(sk, ll) - try: - assert result.result() == 'content' - except AssertionError: - print('Error: result.result() = {}'.format(result.result())) - - -if __name__ == '__main__': - myclient = clients.Client() - tmpdir = Path(tempfile.mkdtemp()) - test_function_test(myclient, tmpdir) - test_function_test_no_filehandler(myclient, tmpdir) - test_subprocess_test_data(myclient, tmpdir) - test_subprocess_test_no_filehandler(myclient, tmpdir) - test_subprocess_test_file(myclient, tmpdir) - myclient.close() - - diff --git a/setup.cfg b/setup.cfg deleted file mode 100755 index 08aedd7..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -description_file = README.md diff --git a/setup.py b/setup.py deleted file mode 100755 index 2eaa52e..0000000 --- a/setup.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python -""" Setup script. Used by easy_install and pip. """ - -import os -import sys -import re -import textwrap - -from setuptools import setup, find_packages -from setuptools import Extension as Ext - - -"""Some functions for checking and showing errors and warnings.""" - - -def _print_admonition(kind, head, body): - tw = textwrap.TextWrapper(initial_indent=' ', subsequent_indent=' ') - - print(".. {0}:: {1}".format(kind.upper(), head)) - for line in tw.wrap(body): - print(line) - - -def exit_with_error(head, body=''): - _print_admonition('error', head, body) - sys.exit(1) - - -def print_warning(head, body=''): - _print_admonition('warning', head, body) - - -def check_import(pkgname, pkgver): - """ Check for required Python packages. """ - try: - mod = __import__(pkgname) - if mod.__version__ < pkgver: - raise ImportError - except ImportError: - exit_with_error(f"Can't find a local {pkgname} installation" - f" with version >= {pkgver}. " - f"Crossflow needs {pkgname} {pkgname} or greater" - " to compile and run! " - "Please see the ``README`` file.") - - print(f"* Found {pkgname} {mod.__version__} package installed.") - globals()[pkgname] = mod - - -def read(*rnames): - return open(os.path.join(os.path.dirname(__file__), *rnames)).read() - - -"""Discover the package version""" -VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" -VERSIONFILE = "crossflow/_version.py" -verstrline = open(VERSIONFILE, "rt").read() -mo = re.search(VSRE, verstrline, re.M) -if mo: - verstr = mo.group(1) -else: - raise RuntimeError(f"Cannot find version string in {VERSIONFILE}.") - - -"""Check Python version""" -print("* Checking Python version...") -if sys.version_info[:2] < (3, 4): - exit_with_error("You need Python 3.4+ to install crossflow!") -print("* Python version OK!") - - -"""Set up crossflow.""" - - -class Extension(Ext, object): - pass - - -setup_args = { - 'name': "crossflow", - 'version': verstr, - 'description': "A Python workflows system", - 'long_description': read('README.md'), - 'author': "Charlie Laughton", - 'author_email': "charles.laughton@nottingham.ac.uk", - 'url': "", - 'license': "MIT license.", - - 'classifiers': [ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Science/Research', - 'Environment :: Console', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Scientific/Engineering', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: POSIX', - 'Operating System :: Unix' - ], - - 'packages': find_packages(), - - 'scripts': [ - ], - - 'install_requires': [ - 'dask', - 'distributed', - 'fsspec', - ], - - 'zip_safe': False, -} - -setup(**setup_args) diff --git a/test/test_client.py b/test/test_client.py deleted file mode 100644 index d78d73e..0000000 --- a/test/test_client.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -import subprocess - - -def test_client(): - SCRIPTLOC = os.path.dirname(os.path.dirname(__file__)) - RUNSCRIPT = os.path.join(SCRIPTLOC, "run_client_tests") - - subprocess.run(RUNSCRIPT, check=True) diff --git a/tests/test_client.py b/tests/test_client.py new file mode 100644 index 0000000..c82e339 --- /dev/null +++ b/tests/test_client.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +from pathlib import Path + +import pytest + +from crossflow import clients, filehandling, tasks + + +@pytest.fixture(scope="session") +def myclient(request): + newclient = clients.Client() + + def delete_client(): + newclient.close() + + request.addfinalizer(delete_client) + return newclient + + +def test_function_test(myclient, tmpdir): + def testit(f): + return f + + fk = tasks.FunctionTask(testit) + fk.set_inputs(["f"]) + fk.set_outputs(["x"]) + p = tmpdir / "hello.txt" + p.write_text("content", encoding="utf-8") + fh = filehandling.FileHandler() + pf = fh.load(p) + result = myclient.submit(fk, pf) + try: + assert isinstance(result.result(), filehandling.FileHandle) + except AssertionError: + print("Error: result.result() = {}".format(result.result())) + raise + + +def test_function_test_no_filehandler(myclient, tmpdir): + def testit(f): + return f + + fk = tasks.FunctionTask(testit) + fk.set_inputs(["f"]) + fk.set_outputs(["x"]) + p = tmpdir / "hello.txt" + p.write_text("content", encoding="utf-8") + result = myclient.submit(fk, p) + try: + assert isinstance(result.result(), filehandling.FileHandle) + except AssertionError: + print("Error: result.result() = {}".format(result.result())) + raise + + +def test_subprocess_test_data(myclient, tmpdir): + sk = tasks.SubprocessTask("cat file.txt") + sk.set_inputs(["file.txt"]) + sk.set_outputs([tasks.STDOUT]) + p = tmpdir / "hello.txt" + p.write_text("content", encoding="utf-8") + fh = filehandling.FileHandler() + pf = fh.load(p) + ll = myclient.upload(pf) + result = myclient.submit(sk, ll) + try: + assert result.result() == "content" + except AssertionError: + print("Error: result.result() = {}".format(result.result())) + + +def test_subprocess_test_no_filehandler(myclient, tmpdir): + sk = tasks.SubprocessTask("cat file.txt") + sk.set_inputs(["file.txt"]) + sk.set_outputs([tasks.STDOUT]) + p = tmpdir / "hello.txt" + p.write_text("content", encoding="utf-8") + result = myclient.submit(sk, p) + try: + assert result.result() == "content" + except AssertionError: + print("Error: result.result() = {}".format(result.result())) + + +def test_subprocess_test_file(myclient, tmpdir): + sk = tasks.SubprocessTask("cat file.txt") + sk.set_inputs(["file.txt"]) + sk.set_outputs([tasks.STDOUT]) + p = tmpdir / "hello.txt" + p.write_text("content", encoding="utf-8") + fh = filehandling.FileHandler(tmpdir) + pf = fh.load(p) + ll = myclient.upload(pf) + result = myclient.submit(sk, ll) + try: + assert result.result() == "content" + except AssertionError: + print("Error: result.result() = {}".format(result.result())) + + +# def test_subprocess_test_s3(myclient, tmpdir): +# sk = tasks.SubprocessTask('cat file.txt') +# sk.set_inputs(['file.txt']) +# sk.set_outputs([tasks.STDOUT]) +# p = tmpdir / "hello.txt" +# p.write_text("content", encoding="utf-8") +# fh = filehandling.FileHandler('s3://bucket_name') +# fp = fh.load(p) +# ll = myclient.upload(fp) +# result = myclient.submit(sk, ll) +# try: +# assert result.result() == 'content' +# except AssertionError: +# print('Error: result.result() = {}'.format(result.result())) diff --git a/test/test_filehandling.py b/tests/test_filehandling.py similarity index 99% rename from test/test_filehandling.py rename to tests/test_filehandling.py index 1e19f98..241b6cd 100644 --- a/test/test_filehandling.py +++ b/tests/test_filehandling.py @@ -1,6 +1,7 @@ -from crossflow import filehandling import os.path as op +from crossflow import filehandling + def test_data_protocol(tmpdir): d = tmpdir.mkdir("sub") diff --git a/test/test_tasks.py b/tests/test_tasks.py similarity index 95% rename from test/test_tasks.py rename to tests/test_tasks.py index 4be41d2..e81d160 100644 --- a/test/test_tasks.py +++ b/tests/test_tasks.py @@ -1,6 +1,7 @@ -from crossflow import filehandling, tasks import pytest +from crossflow import filehandling, tasks + def test_subprocess_task_no_filehandles(tmpdir): sk = tasks.SubprocessTask("cat file.txt") @@ -23,6 +24,7 @@ def test_subprocess_task_stdout(tmpdir): result = sk(pf) assert result == "content" + def test_subprocess_task_constant_filehandle(tmpdir): sk = tasks.SubprocessTask("cat file.txt") sk.set_inputs(["file.txt"]) @@ -31,7 +33,7 @@ def test_subprocess_task_constant_filehandle(tmpdir): p.write("content") fh = filehandling.FileHandler() pf = fh.load(p) - sk.set_constant('file.txt', pf) + sk.set_constant("file.txt", pf) result = sk() assert result == "content" @@ -64,6 +66,7 @@ def test_subprocess_task_globinputs_2(tmpdir): result.save(r) assert r.read() == "content\nmore content\n" + def test_subprocess_task_globinputs_1(tmpdir): sk = tasks.SubprocessTask("cat *.txt > out.dat") sk.set_inputs(["*.txt"]) @@ -107,17 +110,19 @@ def test_subprocess_task_catch_fail(): result = sk() assert isinstance(result, tasks.XflowError) + def test_subprocesstask_with_constant(): sk = tasks.SubprocessTask("head -{n} file.txt > out.dat") sk.set_inputs(["n", "file.txt"]) sk.set_outputs(["out.dat"]) - sk.set_constant('n', 1) + sk.set_constant("n", 1) fh = filehandling.FileHandler() pf = fh.create("tmp.txt") - pf.write_text('line 1\nline 2\nline 3') + pf.write_text("line 1\nline 2\nline 3") result = sk(pf) - assert result.read_text() == 'line 1\n' - + assert result.read_text() == "line 1\n" + + def test_function_task_basic(): def mult(a, b): return a * b @@ -147,6 +152,7 @@ def linecount(a): result = fk.run(pf) assert result == 3 + def test_function_task_with_output_filehandles(tmpdir): d = tmpdir.mkdir("sub") p = d.join("lines.txt") @@ -157,9 +163,9 @@ def test_function_task_with_output_filehandles(tmpdir): def duplicate(a): with open(a) as f: data = f.read() - with open('out.dat', 'w') as f: + with open("out.dat", "w") as f: f.write(data) - return 'out.dat' + return "out.dat" fk = tasks.FunctionTask(duplicate) fk.set_inputs(["a"]) @@ -167,6 +173,7 @@ def duplicate(a): result = fk.run(pf) assert isinstance(result, filehandling.FileHandle) + def test_function_task_with_constant_filehandles(tmpdir): d = tmpdir.mkdir("sub") p = d.join("lines.txt") @@ -182,7 +189,7 @@ def linecount(a): fk = tasks.FunctionTask(linecount) fk.set_inputs(["a"]) fk.set_outputs(["nlines"]) - fk.set_constant('a', pf) + fk.set_constant("a", pf) result = fk.run() assert result == 3 @@ -202,7 +209,7 @@ def linecount(a): fk = tasks.FunctionTask(linecount) fk.set_inputs(["a"]) fk.set_outputs(["nlines"]) - result = fk.run({'a':pf}) + result = fk.run({"a": pf}) assert result == 3