-
Notifications
You must be signed in to change notification settings - Fork 11
[Integrate-2600] Support notebook kernels offline installation #2702
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
hengxian-jiang
wants to merge
2
commits into
develop
Choose a base branch
from
hengxian-jiang/offline_support_notebook_kernel
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 1 commit
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
95 changes: 95 additions & 0 deletions
95
plugins/ui/apps/webr-notebook/scripts/offline-assets-lib.mjs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| /** | ||
| * Pure helpers for the offline-asset prepare script. No I/O here so they can be | ||
| * unit-tested with `node --test`. | ||
| */ | ||
|
|
||
| /** | ||
| * Resolve the transitive closure of wheel file names for the given root | ||
| * package names, using a Pyodide pyodide-lock.json object. | ||
| * @returns {Set<string>} set of wheel file names | ||
| */ | ||
| export function resolveWheelClosure(lock, rootNames) { | ||
| const files = new Set() | ||
| const seen = new Set() | ||
| const queue = [...rootNames] | ||
| while (queue.length > 0) { | ||
| const name = queue.shift() | ||
| if (seen.has(name)) continue | ||
| seen.add(name) | ||
| const pkg = lock.packages[name] | ||
| if (!pkg) { | ||
| throw new Error(`Package not found in pyodide-lock.json: ${name}`) | ||
| } | ||
| files.add(pkg.file_name) | ||
| for (const dep of pkg.depends || []) { | ||
| if (!seen.has(dep)) queue.push(dep) | ||
| } | ||
| } | ||
| return files | ||
| } | ||
|
|
||
| /** | ||
| * Parse a Debian-control-style PACKAGES index into an array of | ||
| * { name, version, deps, raw } records. | ||
| */ | ||
| export function parsePackagesIndex(text) { | ||
| const records = [] | ||
| for (const block of text.split(/\n\s*\n/)) { | ||
| const trimmed = block.trim() | ||
| if (!trimmed) continue | ||
| const fields = {} | ||
| let lastKey = null | ||
| for (const line of trimmed.split('\n')) { | ||
| const m = line.match(/^([A-Za-z0-9-]+):\s*(.*)$/) | ||
| if (m) { | ||
| lastKey = m[1] | ||
| fields[lastKey] = m[2] | ||
| } else if (lastKey) { | ||
| fields[lastKey] += ' ' + line.trim() // continuation line | ||
| } | ||
| } | ||
| if (!fields.Package) continue | ||
| const depFields = ['Depends', 'Imports', 'LinkingTo'] | ||
| .map((k) => fields[k] || '') | ||
| .join(',') | ||
| const deps = depFields | ||
| .split(',') | ||
| .map((d) => d.replace(/\(.*?\)/g, '').trim()) | ||
| .filter((d) => d && d !== 'R') | ||
| records.push({ | ||
| name: fields.Package, | ||
| version: fields.Version, | ||
| deps, | ||
| raw: trimmed, | ||
| }) | ||
| } | ||
| return records | ||
| } | ||
|
|
||
| /** | ||
| * Build a PACKAGES index containing only the wanted packages plus the | ||
| * transitive closure of their dependencies (restricted to records present in | ||
| * the input). Records are separated by a blank line, as R expects. | ||
| */ | ||
| export function buildSubsetPackagesIndex(packagesText, wantedNames) { | ||
| const records = parsePackagesIndex(packagesText) | ||
| const byName = new Map(records.map((r) => [r.name, r])) | ||
| const keep = new Set() | ||
| const queue = [...wantedNames] | ||
| while (queue.length > 0) { | ||
| const name = queue.shift() | ||
| if (keep.has(name)) continue | ||
| const rec = byName.get(name) | ||
| if (!rec) continue // dep not in this repo (e.g. base R package) — skip | ||
| keep.add(name) | ||
| for (const dep of rec.deps) { | ||
| if (!keep.has(dep)) queue.push(dep) | ||
| } | ||
| } | ||
| return ( | ||
| records | ||
| .filter((r) => keep.has(r.name)) | ||
| .map((r) => r.raw) | ||
| .join('\n\n') + '\n' | ||
| ) | ||
| } |
52 changes: 52 additions & 0 deletions
52
plugins/ui/apps/webr-notebook/scripts/offline-assets-lib.test.mjs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| import { test } from 'node:test' | ||
| import assert from 'node:assert/strict' | ||
| import { resolveWheelClosure, buildSubsetPackagesIndex } from './offline-assets-lib.mjs' | ||
|
|
||
| test('resolveWheelClosure includes roots and transitive deps, deduped', () => { | ||
| const lock = { | ||
| packages: { | ||
| requests: { file_name: 'requests-2.32.4-py3-none-any.whl', depends: ['urllib3', 'idna'] }, | ||
| urllib3: { file_name: 'urllib3-2.0-py3-none-any.whl', depends: [] }, | ||
| idna: { file_name: 'idna-3.0-py3-none-any.whl', depends: [] }, | ||
| six: { file_name: 'six-1.16-py3-none-any.whl', depends: [] }, | ||
| }, | ||
| } | ||
| const files = resolveWheelClosure(lock, ['requests', 'six']) | ||
| assert.deepEqual( | ||
| [...files].sort(), | ||
| [ | ||
| 'idna-3.0-py3-none-any.whl', | ||
| 'requests-2.32.4-py3-none-any.whl', | ||
| 'six-1.16-py3-none-any.whl', | ||
| 'urllib3-2.0-py3-none-any.whl', | ||
| ] | ||
| ) | ||
| }) | ||
|
|
||
| test('resolveWheelClosure throws on a missing package', () => { | ||
| assert.throws(() => resolveWheelClosure({ packages: {} }, ['nope']), /nope/) | ||
| }) | ||
|
|
||
| test('buildSubsetPackagesIndex keeps wanted records + their dep closure', () => { | ||
| const packages = [ | ||
| 'Package: checkmate', | ||
| 'Version: 2.3.2', | ||
| 'Depends: backports', | ||
| '', | ||
| 'Package: backports', | ||
| 'Version: 1.5.0', | ||
| '', | ||
| 'Package: jsonlite', | ||
| 'Version: 1.8.9', | ||
| '', | ||
| 'Package: unused', | ||
| 'Version: 9.9.9', | ||
| '', | ||
| ].join('\n') | ||
|
|
||
| const subset = buildSubsetPackagesIndex(packages, ['checkmate', 'jsonlite']) | ||
| assert.match(subset, /Package: checkmate/) | ||
| assert.match(subset, /Package: backports/) // pulled in via Depends | ||
| assert.match(subset, /Package: jsonlite/) | ||
| assert.doesNotMatch(subset, /Package: unused/) | ||
| }) |
187 changes: 187 additions & 0 deletions
187
plugins/ui/apps/webr-notebook/scripts/prepare-offline-assets.mjs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,187 @@ | ||
| /** | ||
| * Build-time preparation of offline notebook-kernel assets. | ||
| * | ||
| * Populates <publicDir>/kernel-assets/ with: | ||
| * pyodide/ — pyodide core + bootstrap wheels + patched pyodide-lock.json | ||
| * webr/ — full WebR WASM binaries (copied from the npm package) | ||
| * webr-repo/ — minimal CRAN-style repo for the WebR bootstrap packages | ||
| * | ||
| * Requires internet at BUILD time; the resulting bundle runs fully offline. | ||
| */ | ||
| import { createHash } from 'node:crypto' | ||
| import { cp, mkdir, readFile, writeFile, access, rm } from 'node:fs/promises' | ||
| import { gzipSync } from 'node:zlib' | ||
| import path from 'node:path' | ||
| import { fileURLToPath } from 'node:url' | ||
| import { | ||
| resolveWheelClosure, | ||
| buildSubsetPackagesIndex, | ||
| parsePackagesIndex, | ||
| } from './offline-assets-lib.mjs' | ||
|
|
||
| const __dirname = path.dirname(fileURLToPath(import.meta.url)) | ||
| const APP_DIR = path.resolve(__dirname, '..') | ||
| const UI_DIR = path.resolve(APP_DIR, '../..') | ||
|
|
||
| const PYODIDE_PKG = path.join(UI_DIR, 'node_modules/pyodide') | ||
| const WEBR_PKG = path.join(UI_DIR, 'node_modules/webr') | ||
|
|
||
| // Bootstrap packages that ARE in the pyodide distribution lock. Their full | ||
| // dependency closure is resolved from the lock's `depends` graph and mirrored. | ||
| // - pyqe imports pandas (→ numpy, python-dateutil, pytz) and, via its azure | ||
| // auth path, msal (→ cryptography → cffi, pycparser). | ||
| // - `ssl` (→ libopenssl) is imported at runtime by requests/urllib3 but is | ||
| // NOT listed in their `depends`; Pyodide auto-loads it via the | ||
| // module→package map, so it must be pre-staged or `import requests` 404s. | ||
| const PYODIDE_LOCK_ROOTS = [ | ||
| 'micropip', | ||
| 'requests', | ||
| 'pyyaml', | ||
| 'six', | ||
| 'ssl', | ||
| 'pandas', | ||
| 'cryptography', // required by msal | ||
| ] | ||
| // Bootstrap packages NOT in the lock — fetched from PyPI and injected into the | ||
| // local lock. `depends` lists deps that must resolve from the mirror so the | ||
| // runtime can auto-load them (all listed deps are themselves mirrored above). | ||
| const PYODIDE_PYPI_EXTRAS = [ | ||
| { name: 'PyJWT' }, | ||
| { name: 'python-dotenv' }, | ||
| { name: 'msal', depends: ['cryptography', 'requests', 'PyJWT'] }, | ||
| ] | ||
| // WebR bootstrap packages (deps resolved from the repo PACKAGES index). | ||
| const WEBR_PACKAGES = ['checkmate', 'jsonlite', 'dplyr'] | ||
| // R minor version bundled in the webr npm pkg (4.5.1 in webr 0.5.9). | ||
| // Must match the contrib dir WebR requests at runtime. Bump on webr upgrade. | ||
| const R_CONTRIB_VERSION = '4.5' | ||
|
|
||
| const PYODIDE_CORE_FILES = [ | ||
| 'pyodide.asm.wasm', | ||
| 'pyodide.asm.js', | ||
| 'python_stdlib.zip', | ||
| 'pyodide-lock.json', | ||
| ] | ||
|
|
||
| async function exists(p) { | ||
| try { | ||
| await access(p) | ||
| return true | ||
| } catch { | ||
| return false | ||
| } | ||
| } | ||
|
|
||
| async function download(url) { | ||
| const res = await fetch(url) | ||
| if (!res.ok) throw new Error(`Download failed (${res.status}): ${url}`) | ||
| return Buffer.from(await res.arrayBuffer()) | ||
| } | ||
|
|
||
| async function preparePyodide(outDir) { | ||
| const dest = path.join(outDir, 'pyodide') | ||
| await mkdir(dest, { recursive: true }) | ||
|
|
||
| // 1. Copy core files from the npm package. | ||
| for (const f of PYODIDE_CORE_FILES) { | ||
| await cp(path.join(PYODIDE_PKG, f), path.join(dest, f)) | ||
| } | ||
|
|
||
| // 2. Resolve the pyodide version + lock; download in-distribution wheels. | ||
| const version = JSON.parse( | ||
| await readFile(path.join(PYODIDE_PKG, 'package.json'), 'utf-8') | ||
| ).version | ||
| const cdn = `https://cdn.jsdelivr.net/pyodide/v${version}/full/` | ||
| const lock = JSON.parse(await readFile(path.join(dest, 'pyodide-lock.json'), 'utf-8')) | ||
| const wheels = resolveWheelClosure(lock, PYODIDE_LOCK_ROOTS) | ||
| for (const file of wheels) { | ||
| const target = path.join(dest, file) | ||
| if (await exists(target)) continue | ||
| await writeFile(target, await download(cdn + file)) | ||
| console.log(` pyodide wheel: ${file}`) | ||
| } | ||
|
|
||
| // 3. Download the PyPI extras, place them in the index, and inject lock entries. | ||
| for (const { name, depends = [] } of PYODIDE_PYPI_EXTRAS) { | ||
| const metaRes = await fetch(`https://pypi.org/pypi/${name}/json`) | ||
| if (!metaRes.ok) { | ||
| throw new Error(`PyPI metadata fetch failed (${metaRes.status}): ${name}`) | ||
| } | ||
| const meta = await metaRes.json() | ||
| const wheel = meta.urls.find( | ||
| (u) => u.packagetype === 'bdist_wheel' && u.filename.endsWith('-none-any.whl') | ||
| ) | ||
| if (!wheel) throw new Error(`No py3-none-any wheel found on PyPI for ${name}`) | ||
| const buf = await download(wheel.url) | ||
| await writeFile(path.join(dest, wheel.filename), buf) | ||
| const sha256 = createHash('sha256').update(buf).digest('hex') | ||
| const key = name.toLowerCase() | ||
| lock.packages[key] = { | ||
| name: key, | ||
| version: meta.info.version, | ||
| file_name: wheel.filename, | ||
| install_dir: 'site', | ||
| sha256, | ||
| package_type: 'package', | ||
| imports: [], | ||
| depends: depends.map((d) => d.toLowerCase()), | ||
| unvendored_tests: false, | ||
| } | ||
| console.log(` pyodide extra (PyPI): ${wheel.filename}`) | ||
| } | ||
| await writeFile( | ||
| path.join(dest, 'pyodide-lock.json'), | ||
| JSON.stringify(lock) | ||
| ) | ||
| } | ||
|
|
||
| async function prepareWebr(outDir) { | ||
| // 1. Copy the full WASM runtime from the npm package. | ||
| const webrDest = path.join(outDir, 'webr') | ||
| if (!(await exists(webrDest))) { | ||
| await cp(path.join(WEBR_PKG, 'dist'), webrDest, { recursive: true }) | ||
| } | ||
|
hengxian-jiang marked this conversation as resolved.
|
||
|
|
||
| // 2. Build the minimal package repo. | ||
| const contribDir = path.join( | ||
| outDir, | ||
| 'webr-repo/bin/emscripten/contrib', | ||
| R_CONTRIB_VERSION | ||
| ) | ||
| await mkdir(contribDir, { recursive: true }) | ||
| const repoBase = `https://repo.r-wasm.org/bin/emscripten/contrib/${R_CONTRIB_VERSION}/` | ||
| const packagesText = (await download(repoBase + 'PACKAGES')).toString('utf-8') | ||
|
|
||
| // Determine versions/filenames + dep closure, then download each .tgz. | ||
| const subset = buildSubsetPackagesIndex(packagesText, WEBR_PACKAGES) | ||
| for (const rec of parsePackagesIndex(subset)) { | ||
| const file = `${rec.name}_${rec.version}.tgz` | ||
| const target = path.join(contribDir, file) | ||
| if (await exists(target)) continue | ||
| await writeFile(target, await download(repoBase + file)) | ||
| console.log(` webr package: ${file}`) | ||
| } | ||
|
|
||
| // 3. Write the filtered PACKAGES + PACKAGES.gz the WebR client reads. | ||
| await writeFile(path.join(contribDir, 'PACKAGES'), subset) | ||
| await writeFile(path.join(contribDir, 'PACKAGES.gz'), gzipSync(Buffer.from(subset))) | ||
| } | ||
|
|
||
| export async function prepareOfflineAssets({ publicDir }) { | ||
| const outDir = path.join(publicDir, 'kernel-assets') | ||
| await mkdir(outDir, { recursive: true }) | ||
| console.log('Preparing offline notebook kernel assets…') | ||
| await preparePyodide(outDir) | ||
| await prepareWebr(outDir) | ||
| console.log('Offline kernel assets ready at', outDir) | ||
| } | ||
|
|
||
| // Allow running directly: `node scripts/prepare-offline-assets.mjs` | ||
| if (process.argv[1] === fileURLToPath(import.meta.url)) { | ||
| const force = process.argv.includes('--force') | ||
| const publicDir = path.join(APP_DIR, 'public') | ||
| if (force) { | ||
| await rm(path.join(publicDir, 'kernel-assets'), { recursive: true, force: true }) | ||
| } | ||
| await prepareOfflineAssets({ publicDir }) | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.