-
Notifications
You must be signed in to change notification settings - Fork 3
feat: combine notebook workflow improvements (cleanup orphaned MDX + CI enforcement) #129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
f19e3eb
c12ae95
5889caf
e6cb121
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| name: Enforce Config Update for Notebooks | ||
|
|
||
| on: | ||
| pull_request: | ||
| types: [opened, synchronize, reopened] | ||
|
|
||
| permissions: | ||
| contents: read | ||
|
|
||
| jobs: | ||
| check-notebook-config: | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| # 1. Checkout the code | ||
| - name: Checkout code | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| fetch-depth: 0 # Required to compare against the base branch | ||
|
|
||
| # 2. Run the check script | ||
| - name: Check for ipynb changes without config update | ||
| env: | ||
| # Path to config script that should be updated when notebooks are changed | ||
| CONFIG_FILE_PATH: ".github/workflows/scripts/update_docs_navigation.py" | ||
| BASE_BRANCH: ${{ github.base_ref }} | ||
| run: | | ||
| echo "Comparing HEAD against origin/$BASE_BRANCH..." | ||
|
|
||
| # Get list of changed files between the PR branch and the base branch | ||
| CHANGED_FILES=$(git diff --name-only origin/$BASE_BRANCH HEAD) | ||
|
|
||
| # Extract filenames without extensions for comparison | ||
| HAS_IPYNB=$(echo "$CHANGED_FILES" | grep '\.ipynb$' | sed 's/\.ipynb$//' || true) | ||
|
|
||
| # Check if the specific config file was changed | ||
| # We search specifically for the file path defined in env | ||
| HAS_CONFIG=$(echo "$CHANGED_FILES" | grep "^$CONFIG_FILE_PATH$" || true) | ||
|
|
||
| # LOGIC: | ||
| # If notebooks are present AND config is empty (missing), fail the build. | ||
| if [[ -n "$HAS_IPYNB" ]] && [[ -z "$HAS_CONFIG" ]]; then | ||
| echo "::error::FAILURE: You added or modified an .ipynb file, but you did not update $CONFIG_FILE_PATH." | ||
| echo "::error::Please update the configuration file to encompass your notebook changes." | ||
| exit 1 | ||
| elif [[ -n "$HAS_IPYNB" ]]; then | ||
| echo "Pass: Notebooks changed, and config file was updated." | ||
| else | ||
| echo "Pass: No notebooks were modified in this PR." | ||
| fi | ||
|
|
||
| # Check if the filenames of modified notebooks already exist in the config file | ||
| CONFIG_CONTENT=$(cat "$CONFIG_FILE_PATH") | ||
| MISSING_MAPPING="" | ||
|
|
||
| if [[ -n "$HAS_IPYNB" ]]; then | ||
| while IFS= read -r notebook; do | ||
| if ! grep -q "$notebook" <<< "$CONFIG_CONTENT"; then | ||
kadolor marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| MISSING_MAPPING="$MISSING_MAPPING\n$notebook" | ||
kadolor marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| fi | ||
| done <<< "$HAS_IPYNB" | ||
| fi | ||
kadolor marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # If there are notebooks without mappings, fail the build | ||
| if [[ -n "$MISSING_MAPPING" ]]; then | ||
| echo "::error::FAILURE: The following notebooks are missing mappings in $CONFIG_FILE_PATH:$MISSING_MAPPING" | ||
| echo "::error::Please update the configuration file to encompass your notebook changes." | ||
| exit 1 | ||
| fi | ||
kadolor marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| Clean up orphaned MDX files that no longer have corresponding notebooks. | ||
|
|
||
| This script compares MDX files in the docs repo against notebooks in the | ||
| examples repo and removes any MDX files that are orphans (their source | ||
| notebook was deleted or renamed). | ||
| """ | ||
|
|
||
| import argparse | ||
| import sys | ||
kadolor marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| from pathlib import Path | ||
|
|
||
|
|
||
| def get_expected_mdx_names(notebook_dirs: list[Path], exclude_prefix: str) -> set[str]: | ||
| """Get the set of expected MDX filenames from existing notebooks. | ||
|
|
||
| Returns filenames without extension, using dash-separated lowercase format | ||
| (matching the to_page_slug() logic in notebook_to_mdx.py). | ||
| """ | ||
| expected = set() | ||
|
|
||
| for notebook_dir in notebook_dirs: | ||
| if not notebook_dir.exists(): | ||
| continue | ||
|
|
||
| for notebook in notebook_dir.rglob("*.ipynb"): | ||
| # Skip excluded notebooks | ||
| if exclude_prefix and notebook.name.startswith(exclude_prefix): | ||
| continue | ||
|
|
||
| # Convert notebook name to expected MDX name | ||
| # (underscores to dashes, lowercase — matching to_page_slug()) | ||
| mdx_name = notebook.stem.replace("_", "-").lower() | ||
| expected.add(mdx_name) | ||
kadolor marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return expected | ||
|
|
||
|
|
||
| def get_existing_mdx_names(mdx_dir: Path) -> set[str]: | ||
| """Get the set of existing MDX filenames (without extension).""" | ||
| if not mdx_dir.exists(): | ||
| return set() | ||
|
|
||
| return {mdx_file.stem for mdx_file in mdx_dir.glob("*.mdx")} | ||
|
|
||
|
|
||
| def cleanup_orphaned_files( | ||
| mdx_dir: Path, | ||
| orphaned_names: set[str], | ||
| dry_run: bool = False, | ||
| verbose: bool = False, | ||
| ) -> list[Path]: | ||
| """Remove orphaned MDX files and their associated images. | ||
|
|
||
| Returns list of removed files. | ||
| """ | ||
| removed = [] | ||
| images_dir = mdx_dir / "images" | ||
|
|
||
| for name in sorted(orphaned_names): | ||
| # Remove MDX file | ||
| mdx_file = mdx_dir / f"{name}.mdx" | ||
| if mdx_file.exists(): | ||
| if verbose: | ||
| print(f"Removing orphaned MDX: {mdx_file}") | ||
| if not dry_run: | ||
| mdx_file.unlink() | ||
| removed.append(mdx_file) | ||
|
|
||
| # Remove associated images (prefixed with notebook slug) | ||
| # Images are named: {notebook-slug}-{image-name}.{ext} | ||
| slug = name.lower() | ||
kadolor marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if images_dir.exists(): | ||
| for image_file in images_dir.glob(f"{slug}-*"): | ||
| if verbose: | ||
| print(f"Removing orphaned image: {image_file}") | ||
| if not dry_run: | ||
| image_file.unlink() | ||
| removed.append(image_file) | ||
|
Comment on lines
+70
to
+78
|
||
|
|
||
| return removed | ||
|
|
||
|
|
||
| def main(): | ||
| parser = argparse.ArgumentParser( | ||
| description="Clean up orphaned MDX files from deleted/renamed notebooks" | ||
| ) | ||
| parser.add_argument( | ||
| "notebook_dirs", | ||
| nargs="+", | ||
| help="Directories containing source notebooks", | ||
| ) | ||
| parser.add_argument( | ||
| "--mdx-dir", | ||
| required=True, | ||
| help="Directory containing MDX files to clean", | ||
| ) | ||
| parser.add_argument( | ||
| "--exclude-prefix", | ||
| default="Raster_Inference", | ||
| help="Exclude notebooks with this filename prefix", | ||
| ) | ||
| parser.add_argument( | ||
| "--dry-run", | ||
| action="store_true", | ||
| help="Show what would be removed without removing", | ||
| ) | ||
| parser.add_argument( | ||
| "-v", | ||
| "--verbose", | ||
| action="store_true", | ||
| help="Verbose output", | ||
| ) | ||
| parser.add_argument( | ||
| "--output-file", | ||
| help="Write list of removed MDX files (names only) to this file", | ||
| ) | ||
|
|
||
| args = parser.parse_args() | ||
|
|
||
| notebook_dirs = [Path(d) for d in args.notebook_dirs] | ||
| mdx_dir = Path(args.mdx_dir) | ||
|
|
||
| # Get expected and existing MDX names | ||
| expected_names = get_expected_mdx_names(notebook_dirs, args.exclude_prefix) | ||
| existing_names = get_existing_mdx_names(mdx_dir) | ||
|
|
||
| if args.verbose: | ||
| print(f"Found {len(expected_names)} notebooks (expected MDX files)") | ||
| print(f"Found {len(existing_names)} existing MDX files") | ||
|
|
||
| # Find orphans: existing MDX files with no corresponding notebook | ||
| orphaned_names = existing_names - expected_names | ||
|
|
||
| if not orphaned_names: | ||
| print("No orphaned MDX files found") | ||
| return | ||
|
|
||
| print(f"Found {len(orphaned_names)} orphaned MDX file(s):") | ||
| for name in sorted(orphaned_names): | ||
| print(f" - {name}.mdx") | ||
|
|
||
| if args.dry_run: | ||
| print("\nDry run - no files removed") | ||
| return | ||
|
|
||
| # Remove orphaned files | ||
| removed = cleanup_orphaned_files( | ||
| mdx_dir, orphaned_names, dry_run=args.dry_run, verbose=args.verbose | ||
| ) | ||
|
|
||
| print(f"\nRemoved {len(removed)} file(s)") | ||
|
|
||
| # Output removed files for git operations | ||
| for path in removed: | ||
| print(f"REMOVED: {path}") | ||
|
|
||
| # Write removed MDX names to file if requested (for PR descriptions) | ||
| if args.output_file: | ||
| mdx_names = sorted(orphaned_names) | ||
| with open(args.output_file, "w") as f: | ||
| f.write("\n".join(mdx_names)) | ||
| if args.verbose: | ||
| print(f"Wrote {len(mdx_names)} removed MDX names to {args.output_file}") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
Uh oh!
There was an error while loading. Please reload this page.