Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions .github/workflows/check-notebooks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: Enforce Config Update for Notebooks

on:
pull_request:
types: [opened, synchronize, reopened]

permissions:
contents: read

jobs:
check-notebook-config:
runs-on: ubuntu-latest
steps:
# 1. Checkout the code
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required to compare against the base branch

# 2. Run the check script
- name: Check for ipynb changes without config update
env:
# Path to config script that should be updated when notebooks are changed
CONFIG_FILE_PATH: ".github/workflows/scripts/update_docs_navigation.py"
BASE_BRANCH: ${{ github.base_ref }}
run: |
echo "Comparing HEAD against origin/$BASE_BRANCH..."

# Get list of changed files between the PR branch and the base branch
CHANGED_FILES=$(git diff --name-only origin/$BASE_BRANCH HEAD)

# Extract filenames without extensions for comparison
HAS_IPYNB=$(echo "$CHANGED_FILES" | grep '\.ipynb$' | sed 's/\.ipynb$//' || true)

# Check if the specific config file was changed
# We search specifically for the file path defined in env
HAS_CONFIG=$(echo "$CHANGED_FILES" | grep "^$CONFIG_FILE_PATH$" || true)

# LOGIC:
# If notebooks are present AND config is empty (missing), fail the build.
if [[ -n "$HAS_IPYNB" ]] && [[ -z "$HAS_CONFIG" ]]; then
echo "::error::FAILURE: You added or modified an .ipynb file, but you did not update $CONFIG_FILE_PATH."
echo "::error::Please update the configuration file to encompass your notebook changes."
exit 1
elif [[ -n "$HAS_IPYNB" ]]; then
echo "Pass: Notebooks changed, and config file was updated."
else
echo "Pass: No notebooks were modified in this PR."
fi

# Check if the filenames of modified notebooks already exist in the config file
CONFIG_CONTENT=$(cat "$CONFIG_FILE_PATH")
MISSING_MAPPING=""

if [[ -n "$HAS_IPYNB" ]]; then
while IFS= read -r notebook; do
if ! grep -q "$notebook" <<< "$CONFIG_CONTENT"; then
MISSING_MAPPING="$MISSING_MAPPING\n$notebook"
fi
done <<< "$HAS_IPYNB"
fi

# If there are notebooks without mappings, fail the build
if [[ -n "$MISSING_MAPPING" ]]; then
echo "::error::FAILURE: The following notebooks are missing mappings in $CONFIG_FILE_PATH:$MISSING_MAPPING"
echo "::error::Please update the configuration file to encompass your notebook changes."
exit 1
fi
70 changes: 53 additions & 17 deletions .github/workflows/convert-notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
- '**/*.ipynb'
- '.github/workflows/convert-notebooks.yml'
- '.github/workflows/scripts/notebook_to_mdx.py'
- '.github/workflows/scripts/cleanup_orphaned_mdx.py'
workflow_dispatch:

permissions:
Expand All @@ -34,14 +35,39 @@ jobs:
with:
python-version: '3.11'

- name: Find notebook directories
id: find-dirs
run: |
NOTEBOOK_DIRS=$(find wherobots-examples -name "*.ipynb" -type f \
| xargs -I {} dirname {} \
| sort -u \
| grep -v ".ipynb_checkpoints" \
| tr '\n' ' ')
echo "dirs=$NOTEBOOK_DIRS" >> $GITHUB_OUTPUT

- name: Clean up orphaned MDX files
id: cleanup
run: |
python wherobots-examples/.github/workflows/scripts/cleanup_orphaned_mdx.py \
${{ steps.find-dirs.outputs.dirs }} \
--mdx-dir docs/tutorials/example-notebooks \
--exclude-prefix Raster_Inference \
--output-file /tmp/removed-mdx.txt \
-v

if [ -f /tmp/removed-mdx.txt ] && [ -s /tmp/removed-mdx.txt ]; then
echo "has_deletions=true" >> $GITHUB_OUTPUT
echo "deleted_files<<EOF" >> $GITHUB_OUTPUT
cat /tmp/removed-mdx.txt | sed 's/^/- /' >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
else
echo "has_deletions=false" >> $GITHUB_OUTPUT
fi

- name: Convert notebooks to MDX
run: |
python wherobots-examples/.github/workflows/scripts/notebook_to_mdx.py \
wherobots-examples/Getting_Started/ \
wherobots-examples/Analyzing_Data/ \
wherobots-examples/Reading_and_Writing_Data/ \
wherobots-examples/Open_Data_Connections/ \
wherobots-examples/scala/ \
${{ steps.find-dirs.outputs.dirs }} \
-o docs/tutorials/example-notebooks \
--exclude-prefix Raster_Inference \
-v
Expand All @@ -56,7 +82,7 @@ jobs:
id: git-check
working-directory: docs
run: |
git add tutorials/example-notebooks/ docs.json
git add -A tutorials/example-notebooks/ docs.json
if git diff --staged --quiet; then
echo "changes=false" >> $GITHUB_OUTPUT
else
Expand All @@ -68,28 +94,38 @@ jobs:
working-directory: docs
env:
GH_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
HAS_DELETIONS: ${{ steps.cleanup.outputs.has_deletions }}
DELETED_FILES: ${{ steps.cleanup.outputs.deleted_files }}
run: |
# Configure git
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"

# Create a unique branch name using the source commit SHA
BRANCH_NAME="sync-notebooks-${{ github.sha }}"
git checkout -b "$BRANCH_NAME"

# Commit changes
git commit -m "docs: sync example notebooks from wherobots-examples@${{ github.sha }}"

# Push the branch
git push -u origin "$BRANCH_NAME"

# Create PR
gh pr create \
--title "docs: sync example notebooks from wherobots-examples" \
--body "## Summary
PR_BODY="## Summary

- Automated sync of Jupyter notebooks from wherobots-examples repo
- Converts notebooks to MDX format for Mintlify documentation
- Updates docs.json navigation
- Updates docs.json navigation"

if [ "$HAS_DELETIONS" = "true" ]; then
PR_BODY="$PR_BODY

## Deleted MDX Files

The following MDX files were removed (source notebooks deleted or renamed):

$DELETED_FILES"
fi

PR_BODY="$PR_BODY

---
This PR was automatically generated by the [Convert Notebooks to MDX](https://github.com/wherobots/wherobots-examples/actions/workflows/convert-notebooks.yml) workflow."

gh pr create \
--title "docs: sync example notebooks from wherobots-examples" \
--body "$PR_BODY"
169 changes: 169 additions & 0 deletions .github/workflows/scripts/cleanup_orphaned_mdx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
#!/usr/bin/env python3
"""
Clean up orphaned MDX files that no longer have corresponding notebooks.

This script compares MDX files in the docs repo against notebooks in the
examples repo and removes any MDX files that are orphans (their source
notebook was deleted or renamed).
"""

import argparse
import sys
from pathlib import Path


def get_expected_mdx_names(notebook_dirs: list[Path], exclude_prefix: str) -> set[str]:
"""Get the set of expected MDX filenames from existing notebooks.

Returns filenames without extension, using dash-separated lowercase format
(matching the to_page_slug() logic in notebook_to_mdx.py).
"""
expected = set()

for notebook_dir in notebook_dirs:
if not notebook_dir.exists():
continue

for notebook in notebook_dir.rglob("*.ipynb"):
# Skip excluded notebooks
if exclude_prefix and notebook.name.startswith(exclude_prefix):
continue

# Convert notebook name to expected MDX name
# (underscores to dashes, lowercase — matching to_page_slug())
mdx_name = notebook.stem.replace("_", "-").lower()
expected.add(mdx_name)

return expected


def get_existing_mdx_names(mdx_dir: Path) -> set[str]:
"""Get the set of existing MDX filenames (without extension)."""
if not mdx_dir.exists():
return set()

return {mdx_file.stem for mdx_file in mdx_dir.glob("*.mdx")}


def cleanup_orphaned_files(
mdx_dir: Path,
orphaned_names: set[str],
dry_run: bool = False,
verbose: bool = False,
) -> list[Path]:
"""Remove orphaned MDX files and their associated images.

Returns list of removed files.
"""
removed = []
images_dir = mdx_dir / "images"

for name in sorted(orphaned_names):
# Remove MDX file
mdx_file = mdx_dir / f"{name}.mdx"
if mdx_file.exists():
if verbose:
print(f"Removing orphaned MDX: {mdx_file}")
if not dry_run:
mdx_file.unlink()
removed.append(mdx_file)

# Remove associated images (prefixed with notebook slug)
# Images are named: {notebook-slug}-{image-name}.{ext}
slug = name.lower()
if images_dir.exists():
for image_file in images_dir.glob(f"{slug}-*"):
if verbose:
print(f"Removing orphaned image: {image_file}")
if not dry_run:
image_file.unlink()
removed.append(image_file)
Comment on lines +70 to +78
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

images_dir.glob(f"{name}-*") treats name as a glob pattern. This breaks for notebook slugs that contain glob metacharacters (e.g., the repo has Getis_Ord_Gi*.ipynb -> slug getis-ord-gi*), which can cause the cleanup to miss the intended images or delete unrelated images. Escape the slug when building the glob (e.g., via glob.escape(name)) or iterate files and compare using a literal prefix check.

Copilot uses AI. Check for mistakes.

return removed


def main():
parser = argparse.ArgumentParser(
description="Clean up orphaned MDX files from deleted/renamed notebooks"
)
parser.add_argument(
"notebook_dirs",
nargs="+",
help="Directories containing source notebooks",
)
parser.add_argument(
"--mdx-dir",
required=True,
help="Directory containing MDX files to clean",
)
parser.add_argument(
"--exclude-prefix",
default="Raster_Inference",
help="Exclude notebooks with this filename prefix",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be removed without removing",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Verbose output",
)
parser.add_argument(
"--output-file",
help="Write list of removed MDX files (names only) to this file",
)

args = parser.parse_args()

notebook_dirs = [Path(d) for d in args.notebook_dirs]
mdx_dir = Path(args.mdx_dir)

# Get expected and existing MDX names
expected_names = get_expected_mdx_names(notebook_dirs, args.exclude_prefix)
existing_names = get_existing_mdx_names(mdx_dir)

if args.verbose:
print(f"Found {len(expected_names)} notebooks (expected MDX files)")
print(f"Found {len(existing_names)} existing MDX files")

# Find orphans: existing MDX files with no corresponding notebook
orphaned_names = existing_names - expected_names

if not orphaned_names:
print("No orphaned MDX files found")
return

print(f"Found {len(orphaned_names)} orphaned MDX file(s):")
for name in sorted(orphaned_names):
print(f" - {name}.mdx")

if args.dry_run:
print("\nDry run - no files removed")
return

# Remove orphaned files
removed = cleanup_orphaned_files(
mdx_dir, orphaned_names, dry_run=args.dry_run, verbose=args.verbose
)

print(f"\nRemoved {len(removed)} file(s)")

# Output removed files for git operations
for path in removed:
print(f"REMOVED: {path}")

# Write removed MDX names to file if requested (for PR descriptions)
if args.output_file:
mdx_names = sorted(orphaned_names)
with open(args.output_file, "w") as f:
f.write("\n".join(mdx_names))
if args.verbose:
print(f"Wrote {len(mdx_names)} removed MDX names to {args.output_file}")


if __name__ == "__main__":
main()
Loading