diff --git a/.github/workflows/check-notebooks.yml b/.github/workflows/check-notebooks.yml new file mode 100644 index 0000000..c4d8846 --- /dev/null +++ b/.github/workflows/check-notebooks.yml @@ -0,0 +1,75 @@ +name: Enforce Config Update for Notebooks + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + +jobs: + check-notebook-config: + runs-on: ubuntu-latest + steps: + # 1. Checkout the code + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required to compare against the base branch + + # 2. Run the check script + - name: Check for ipynb changes without config update + env: + # Path to config script that should be updated when notebooks are changed + CONFIG_FILE_PATH: ".github/workflows/scripts/update_docs_navigation.py" + BASE_BRANCH: ${{ github.base_ref }} + run: | + echo "Comparing HEAD against origin/$BASE_BRANCH..." + + # Get list of changed files between the PR branch and the base branch + CHANGED_FILES=$(git diff --name-only origin/$BASE_BRANCH HEAD) + + # Extract normalized notebook names for added/modified notebooks only + # (exclude deletions — deleted notebooks don't need mappings) + # Transforms: Getting_Started/Part_1_Loading_Data.ipynb -> part-1-loading-data + HAS_IPYNB=$(git diff --name-only --diff-filter=d origin/$BASE_BRANCH HEAD \ + | grep '\.ipynb$' \ + | xargs -n1 basename \ + | sed 's/\.ipynb$//' \ + | sed 's/_/-/g' \ + | tr '[:upper:]' '[:lower:]' || true) + + # Check if the specific config file was changed + # We search specifically for the file path defined in env + HAS_CONFIG=$(echo "$CHANGED_FILES" | grep "^$CONFIG_FILE_PATH$" || true) + + # LOGIC: + # If notebooks are present AND config is empty (missing), fail the build. + if [[ -n "$HAS_IPYNB" ]] && [[ -z "$HAS_CONFIG" ]]; then + echo "::error::FAILURE: You added or modified an .ipynb file, but you did not update $CONFIG_FILE_PATH." + echo "::error::Please update the configuration file to encompass your notebook changes." + exit 1 + elif [[ -n "$HAS_IPYNB" ]]; then + echo "Pass: Notebooks changed, and config file was updated." + else + echo "Pass: No notebooks were modified in this PR." + fi + + # Check if the filenames of modified notebooks already exist in the config file + CONFIG_CONTENT=$(cat "$CONFIG_FILE_PATH") + MISSING_MAPPING="" + + if [[ -n "$HAS_IPYNB" ]]; then + while IFS= read -r notebook; do + if ! grep -q "\"$notebook\":" <<< "$CONFIG_CONTENT"; then + MISSING_MAPPING="${MISSING_MAPPING}"$'\n'" - $notebook" + fi + done <<< "$HAS_IPYNB" + fi + + # If there are notebooks without mappings, fail the build + if [[ -n "$MISSING_MAPPING" ]]; then + echo "::error::FAILURE: The following notebooks are missing mappings in $CONFIG_FILE_PATH:$MISSING_MAPPING" + echo "::error::Please update the configuration file to encompass your notebook changes." + exit 1 + fi diff --git a/.github/workflows/convert-notebooks.yml b/.github/workflows/convert-notebooks.yml index 02dd284..a9a20da 100644 --- a/.github/workflows/convert-notebooks.yml +++ b/.github/workflows/convert-notebooks.yml @@ -8,6 +8,8 @@ on: - '**/*.ipynb' - '.github/workflows/convert-notebooks.yml' - '.github/workflows/scripts/notebook_to_mdx.py' + - '.github/workflows/scripts/cleanup_orphaned_mdx.py' + - '.github/workflows/scripts/update_docs_navigation.py' workflow_dispatch: permissions: @@ -34,14 +36,39 @@ jobs: with: python-version: '3.11' + - name: Find notebook directories + id: find-dirs + run: | + NOTEBOOK_DIRS=$(find wherobots-examples -name "*.ipynb" -type f \ + | xargs -I {} dirname {} \ + | sort -u \ + | grep -v ".ipynb_checkpoints" \ + | tr '\n' ' ') + echo "dirs=$NOTEBOOK_DIRS" >> $GITHUB_OUTPUT + + - name: Clean up orphaned MDX files + id: cleanup + run: | + python wherobots-examples/.github/workflows/scripts/cleanup_orphaned_mdx.py \ + ${{ steps.find-dirs.outputs.dirs }} \ + --mdx-dir docs/tutorials/example-notebooks \ + --exclude-prefix Raster_Inference \ + --output-file /tmp/removed-mdx.txt \ + -v + + if [ -f /tmp/removed-mdx.txt ] && [ -s /tmp/removed-mdx.txt ]; then + echo "has_deletions=true" >> $GITHUB_OUTPUT + echo "deleted_files<> $GITHUB_OUTPUT + cat /tmp/removed-mdx.txt | sed 's/^/- /' >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + else + echo "has_deletions=false" >> $GITHUB_OUTPUT + fi + - name: Convert notebooks to MDX run: | python wherobots-examples/.github/workflows/scripts/notebook_to_mdx.py \ - wherobots-examples/Getting_Started/ \ - wherobots-examples/Analyzing_Data/ \ - wherobots-examples/Reading_and_Writing_Data/ \ - wherobots-examples/Open_Data_Connections/ \ - wherobots-examples/scala/ \ + ${{ steps.find-dirs.outputs.dirs }} \ -o docs/tutorials/example-notebooks \ --exclude-prefix Raster_Inference \ -v @@ -56,7 +83,7 @@ jobs: id: git-check working-directory: docs run: | - git add tutorials/example-notebooks/ docs.json + git add -A tutorials/example-notebooks/ docs.json if git diff --staged --quiet; then echo "changes=false" >> $GITHUB_OUTPUT else @@ -68,28 +95,38 @@ jobs: working-directory: docs env: GH_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }} + HAS_DELETIONS: ${{ steps.cleanup.outputs.has_deletions }} + DELETED_FILES: ${{ steps.cleanup.outputs.deleted_files }} run: | - # Configure git git config --local user.email "github-actions[bot]@users.noreply.github.com" git config --local user.name "github-actions[bot]" - # Create a unique branch name using the source commit SHA BRANCH_NAME="sync-notebooks-${{ github.sha }}" git checkout -b "$BRANCH_NAME" - - # Commit changes git commit -m "docs: sync example notebooks from wherobots-examples@${{ github.sha }}" - - # Push the branch git push -u origin "$BRANCH_NAME" - # Create PR - gh pr create \ - --title "docs: sync example notebooks from wherobots-examples" \ - --body "## Summary + PR_BODY="## Summary - Automated sync of Jupyter notebooks from wherobots-examples repo - Converts notebooks to MDX format for Mintlify documentation - - Updates docs.json navigation + - Updates docs.json navigation" + + if [ "$HAS_DELETIONS" = "true" ]; then + PR_BODY="$PR_BODY + ## Deleted MDX Files + + The following MDX files were removed (source notebooks deleted or renamed): + + $DELETED_FILES" + fi + + PR_BODY="$PR_BODY + + --- This PR was automatically generated by the [Convert Notebooks to MDX](https://github.com/wherobots/wherobots-examples/actions/workflows/convert-notebooks.yml) workflow." + + gh pr create \ + --title "docs: sync example notebooks from wherobots-examples" \ + --body "$PR_BODY" diff --git a/.github/workflows/scripts/cleanup_orphaned_mdx.py b/.github/workflows/scripts/cleanup_orphaned_mdx.py new file mode 100644 index 0000000..65777b3 --- /dev/null +++ b/.github/workflows/scripts/cleanup_orphaned_mdx.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +""" +Clean up orphaned MDX files that no longer have corresponding notebooks. + +This script compares MDX files in the docs repo against notebooks in the +examples repo and removes any MDX files that are orphans (their source +notebook was deleted or renamed). +""" + +import argparse +from pathlib import Path + + +def get_expected_mdx_names(notebook_dirs: list[Path], exclude_prefix: str) -> set[str]: + """Get the set of expected MDX filenames from existing notebooks. + + Returns filenames without extension, using dash-separated lowercase format + (matching the to_page_slug() logic in notebook_to_mdx.py). + """ + expected = set() + + for notebook_dir in notebook_dirs: + if not notebook_dir.exists(): + continue + + for notebook in notebook_dir.rglob("*.ipynb"): + # Skip excluded notebooks + if exclude_prefix and notebook.name.startswith(exclude_prefix): + continue + + # Convert notebook name to expected MDX name + # (underscores to dashes, lowercase — matching to_page_slug()) + mdx_name = notebook.stem.replace("_", "-").lower() + expected.add(mdx_name) + + return expected + + +def get_existing_mdx_names(mdx_dir: Path) -> set[str]: + """Get the set of existing MDX filenames (without extension).""" + if not mdx_dir.exists(): + return set() + + return {mdx_file.stem for mdx_file in mdx_dir.glob("*.mdx")} + + +def cleanup_orphaned_files( + mdx_dir: Path, + orphaned_names: set[str], + dry_run: bool = False, + verbose: bool = False, +) -> list[Path]: + """Remove orphaned MDX files and their associated images. + + Returns list of removed files. + """ + removed = [] + images_dir = mdx_dir / "images" + + for name in sorted(orphaned_names): + # Remove MDX file + mdx_file = mdx_dir / f"{name}.mdx" + if mdx_file.exists(): + if verbose: + print(f"Removing orphaned MDX: {mdx_file}") + if not dry_run: + mdx_file.unlink() + removed.append(mdx_file) + + # Remove associated images (prefixed with notebook slug) + # Images are named: {notebook-slug}-{image-name}.{ext} + if images_dir.exists(): + for image_file in images_dir.glob(f"{name}-*"): + if verbose: + print(f"Removing orphaned image: {image_file}") + if not dry_run: + image_file.unlink() + removed.append(image_file) + + return removed + + +def main(): + parser = argparse.ArgumentParser( + description="Clean up orphaned MDX files from deleted/renamed notebooks" + ) + parser.add_argument( + "notebook_dirs", + nargs="+", + help="Directories containing source notebooks", + ) + parser.add_argument( + "--mdx-dir", + required=True, + help="Directory containing MDX files to clean", + ) + parser.add_argument( + "--exclude-prefix", + default="Raster_Inference", + help="Exclude notebooks with this filename prefix", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be removed without removing", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Verbose output", + ) + parser.add_argument( + "--output-file", + help="Write list of removed MDX files (names only) to this file", + ) + + args = parser.parse_args() + + notebook_dirs = [Path(d) for d in args.notebook_dirs] + mdx_dir = Path(args.mdx_dir) + + # Get expected and existing MDX names + expected_names = get_expected_mdx_names(notebook_dirs, args.exclude_prefix) + existing_names = get_existing_mdx_names(mdx_dir) + + if args.verbose: + print(f"Found {len(expected_names)} notebooks (expected MDX files)") + print(f"Found {len(existing_names)} existing MDX files") + + # Find orphans: existing MDX files with no corresponding notebook + orphaned_names = existing_names - expected_names + + if not orphaned_names: + print("No orphaned MDX files found") + return + + print(f"Found {len(orphaned_names)} orphaned MDX file(s):") + for name in sorted(orphaned_names): + print(f" - {name}.mdx") + + if args.dry_run: + print("\nDry run - no files removed") + return + + # Remove orphaned files + removed = cleanup_orphaned_files( + mdx_dir, orphaned_names, dry_run=args.dry_run, verbose=args.verbose + ) + + print(f"\nRemoved {len(removed)} file(s)") + + # Output removed files for git operations + for path in removed: + print(f"REMOVED: {path}") + + # Write removed MDX names to file if requested (for PR descriptions) + if args.output_file: + mdx_names = sorted(orphaned_names) + with open(args.output_file, "w") as f: + f.write("\n".join(mdx_names)) + if args.verbose: + print(f"Wrote {len(mdx_names)} removed MDX names to {args.output_file}") + + +if __name__ == "__main__": + main() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 79b3aed..66d2427 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,32 +56,41 @@ Notebooks in this repository are automatically converted to MDX format and publi ### Adding a new notebook -When you add a new notebook to this repository, you **must** update the navigation mapping so it appears in the correct category in the documentation: +When you add a new notebook to this repository, you **must** update the navigation mapping so it appears in the correct location in the documentation. A CI check will block your PR if you add or modify a notebook without updating the config. 1. Edit `.github/workflows/scripts/update_docs_navigation.py` -2. Add your notebook to the `FILENAME_TO_CATEGORY` dictionary with the appropriate category +2. Add your notebook to the `NOTEBOOK_LOCATIONS` dictionary with the appropriate navigation path 3. The filename key should be lowercase with hyphens (e.g., `My_New_Notebook.ipynb` becomes `"my-new-notebook"`) +4. The value is a list of group names representing the path in the docs navigation hierarchy Example: ```python -FILENAME_TO_CATEGORY = { +NOTEBOOK_LOCATIONS = { # ... existing entries ... - "my-new-notebook": "Analyzing Data", # Add your notebook here + "my-new-notebook": ["Advanced Topics"], # Top-level group + "my-spatial-stats-notebook": ["WherobotsAI", "Spatial Statistics"], # Nested group } ``` -Available categories: -- `"Getting Started"` -- `"Analyzing Data"` -- `"RasterFlow"` -- `"Reading and Writing Data"` -- `"Open Data Connections"` -- `"Scala"` +Available top-level groups: +- `["Getting Started"]` +- `["Data Connections"]` +- `["RasterFlow"]` +- `["Advanced Topics"]` -If you don't add your notebook to the mapping, it will appear under an "Other" category in the documentation. +Available nested groups: +- `["WherobotsDB", "Vector Tiles (PMTiles)"]` +- `["WherobotsAI", "Spatial Statistics"]` +- `["WherobotsAI"]` + +If you don't add your notebook to the mapping, it will be skipped in the navigation and a warning will be printed. **Note**: Notebooks with the `Raster_Inference_` prefix are excluded from documentation publishing. +### Deleted or renamed notebooks + +When notebooks are deleted or renamed, the corresponding MDX files and images in the docs repo are cleaned up by the `cleanup_orphaned_mdx.py` script. This cleanup runs automatically before conversion in the CI workflow and when using `make preview`/`make all`, can be invoked independently via `make cleanup`, and is not executed when running `make convert` alone. + ## Local preview You can preview how notebooks will look on the docs site locally using the Makefile targets. This requires the [`wherobots/docs`](https://github.com/wherobots/docs) repo cloned alongside this repo (at `../docs` by default). @@ -96,12 +105,13 @@ You can preview how notebooks will look on the docs site locally using the Makef | Target | Description | |---|---| -| `make preview` | Full local preview workflow. Syncs the docs repo to `main`, converts notebooks to MDX, updates navigation, and starts the Mintlify dev server at `http://localhost:3000`. | +| `make preview` | Full local preview workflow. Syncs the docs repo to `main`, cleans up orphaned MDX, converts notebooks to MDX, updates navigation, and starts the Mintlify dev server at `http://localhost:3000`. | | `make preview-branch DOCS_BRANCH=` | Same as `preview` but checks out a specific docs repo branch instead of `main`. Useful when redesigning the tutorials section on a feature branch. | -| `make convert` | Converts notebooks to MDX files in the docs repo. Cleans previous output first. | +| `make cleanup` | Removes orphaned MDX files and images from the docs repo (from deleted or renamed notebooks). | +| `make convert` | Converts notebooks to MDX files in the docs repo. Overwrites existing files in place. | | `make update-nav` | Updates `docs.json` navigation to include converted notebooks. | | `make sync-docs` | Checks out and pulls the target branch (default: `main`) in the docs repo. | -| `make clean` | Removes generated MDX files from the docs repo. | +| `make clean` | Removes all generated MDX files from the docs repo. | ### Configuration diff --git a/Makefile b/Makefile index 2395d43..25b0533 100644 --- a/Makefile +++ b/Makefile @@ -11,15 +11,16 @@ DOCS_BRANCH ?= main NOTEBOOKS_OUTPUT_DIR = $(DOCS_DIR)/tutorials/example-notebooks DOCS_JSON = $(DOCS_DIR)/docs.json -# Notebook directories to convert -NOTEBOOK_DIRS = Getting_Started/ Analyzing_Data/ Reading_and_Writing_Data/ Open_Data_Connections/ scala/ +# Dynamically find all directories containing notebooks +NOTEBOOK_DIRS = $(shell find . -name "*.ipynb" -type f | xargs -I {} dirname {} | sort -u | grep -v ".ipynb_checkpoints") -.PHONY: help convert update-nav sync-docs preview preview-branch clean all +.PHONY: help cleanup convert update-nav sync-docs preview preview-branch clean all help: @echo "Usage: make " @echo "" @echo "Targets:" + @echo " cleanup Remove orphaned MDX files (deleted/renamed notebooks)" @echo " convert Convert notebooks to MDX files" @echo " sync-docs Checkout and pull docs repo branch (default: main)" @echo " update-nav Update docs.json navigation" @@ -27,13 +28,21 @@ help: @echo " preview-branch Preview with a specific docs branch" @echo " Usage: make preview-branch DOCS_BRANCH=my-branch" @echo " clean Remove generated MDX files" - @echo " all Run sync-docs, convert, update-nav, then preview" + @echo " all Run sync-docs, cleanup, convert, update-nav, then preview" @echo "" @echo "Configuration:" @echo " DOCS_DIR Path to docs repo (default: ../docs)" @echo " DOCS_BRANCH Docs repo branch to use (default: main)" -convert: clean +cleanup: + @echo "Cleaning up orphaned MDX files..." + python3 .github/workflows/scripts/cleanup_orphaned_mdx.py \ + $(NOTEBOOK_DIRS) \ + --mdx-dir $(NOTEBOOKS_OUTPUT_DIR) \ + --exclude-prefix Raster_Inference \ + -v + +convert: @echo "Converting notebooks to MDX..." python3 .github/workflows/scripts/notebook_to_mdx.py \ $(NOTEBOOK_DIRS) \ @@ -51,12 +60,12 @@ sync-docs: @echo "Syncing docs repo to $(DOCS_BRANCH) branch..." cd $(DOCS_DIR) && git checkout -f $(DOCS_BRANCH) && git pull -preview: sync-docs convert update-nav +preview: sync-docs cleanup convert update-nav @echo "Starting Mintlify dev server..." @echo "Open http://localhost:3000 in your browser" cd $(DOCS_DIR) && npx mintlify dev -preview-branch: sync-docs convert update-nav +preview-branch: sync-docs cleanup convert update-nav @echo "Starting Mintlify dev server on $(DOCS_BRANCH) branch..." @echo "Open http://localhost:3000 in your browser" cd $(DOCS_DIR) && npx mintlify dev @@ -65,4 +74,4 @@ clean: @echo "Removing generated MDX files..." rm -rf $(NOTEBOOKS_OUTPUT_DIR) -all: sync-docs convert update-nav preview +all: sync-docs cleanup convert update-nav preview