diff --git a/packages/realm-server/tests/module-cache-race-test.ts b/packages/realm-server/tests/module-cache-race-test.ts index f23a9718bd..86b10b1907 100644 --- a/packages/realm-server/tests/module-cache-race-test.ts +++ b/packages/realm-server/tests/module-cache-race-test.ts @@ -1842,4 +1842,245 @@ module(basename(__filename), function () { }); }, ); + + // Regression coverage for the persist-after-invalidate race in + // Realm.#sourceCache — the source-read analogue of the + // #transpiledModuleCache race above. getSourceOrRedirect reads bytes from + // disk under an await, then writes #sourceCache. If invalidateCache fires + // inside that window — e.g. a DELETE removing the file while a worker's + // indexing fetch of the same source is still in flight — the in-flight + // read's set would otherwise re-fill the slot invalidate just cleared, + // leaving a subsequent GET serving a file that is already gone from disk. + // The reader snapshots the source-cache generation before its first await + // and discards its set when the generation moved. + // + // __testOnlyDelaySourceCacheSet parks a source read at the exact post-read, + // pre-set point so the tests can fire invalidate / delete concurrently and + // assert on the next request's x-boxel-cache header (and HTTP status, for + // the delete case) deterministically — no reliance on real worker timing. + module('Realm.#sourceCache set-after-invalidate race', function (hooks) { + let realmURL = new URL('http://127.0.0.1:4444/test/'); + let testRealm: Realm; + let request: RealmRequest; + + function onRealmSetup(args: { + testRealm: Realm; + testRealmHttpServer: Server; + request: SuperTest; + }) { + testRealm = args.testRealm; + request = withRealmPath(args.request, realmURL); + } + + setupPermissionedRealmCached(hooks, { + fixture: 'blank', + realmURL, + permissions: { + '*': ['read', 'write'], + user: ['read', 'write', 'realm-owner'], + '@node-test_realm:localhost': ['read', 'realm-owner'], + }, + onRealmSetup, + }); + + function authHeader() { + return `Bearer ${createJWT(testRealm, 'user', ['read', 'write'])}`; + } + + // supertest's Test is a thenable — an identity .then() forces the HTTP + // request to dispatch now so the caller can race other work against the + // in-flight read instead of waiting on an await to start it. + function fireSourceRequest(path: string): Promise<{ status: number }> { + return request + .get(`/${path}`) + .set('Accept', 'application/vnd.card+source') + .set('Authorization', authHeader()) + .then((r) => r as { status: number }); + } + + // Install a gate that parks the next source read AFTER it has read bytes + // from disk but BEFORE it writes #sourceCache. `entered` resolves once a + // read is actually parked, so tests never depend on a fixed timeout. + function parkNextSourceRead(): { + release: () => void; + entered: Promise; + } { + let release: () => void = () => {}; + let gate = new Promise((r) => { + release = r; + }); + let signalEntered: () => void = () => {}; + let entered = new Promise((r) => { + signalEntered = r; + }); + testRealm.__testOnlyDelaySourceCacheSet(() => { + signalEntered(); + return gate; + }); + return { release, entered }; + } + + test('in-flight source read is dropped when invalidateCache fires concurrently', async function (assert) { + let modulePath = 'source-race-invalidate.gts'; + await testRealm.write(modulePath, 'export const v = 1;\n'); + testRealm.__testOnlyClearCaches(); + + let { release, entered } = parkNextSourceRead(); + let inflight: Promise<{ status: number }> | undefined; + try { + inflight = fireSourceRequest(modulePath); + await entered; + + // Invalidate mid-read: bumps the source-cache generation and clears + // the slot. The parked read's post-gate set must be discarded. + testRealm.invalidateCache(modulePath); + + // Stop parking so the assertion request below runs unblocked, then + // release the in-flight read. + testRealm.__testOnlyDelaySourceCacheSet(undefined); + release(); + + let response = await inflight; + assert.strictEqual( + response.status, + 200, + 'in-flight read still serves the bytes it read at request time', + ); + } finally { + release(); + testRealm.__testOnlyDelaySourceCacheSet(undefined); + if (inflight) { + await inflight.catch(() => {}); + } + } + + let nextResponse = await request + .get(`/${modulePath}`) + .set('Accept', 'application/vnd.card+source') + .set('Authorization', authHeader()); + assert.strictEqual( + nextResponse.headers['x-boxel-cache'], + 'miss', + 'next source request is a cache miss — the in-flight read did not re-fill the slot invalidate cleared', + ); + }); + + test('a source read in flight during a delete does not leave the deleted file served from cache', async function (assert) { + let modulePath = 'source-race-delete.gts'; + await testRealm.write(modulePath, 'export const v = 1;\n'); + testRealm.__testOnlyClearCaches(); + + let { release, entered } = parkNextSourceRead(); + let inflight: Promise<{ status: number }> | undefined; + try { + inflight = fireSourceRequest(modulePath); + await entered; + + // Delete the file while the read is parked. _deleteUnlocked removes + // the file from disk AND calls invalidateCache(path) synchronously — + // exactly the concurrency the boxel-cli `push --delete` flake hit, + // where a worker's indexing fetch of the source was in flight when + // the DELETE landed and re-cached the just-removed bytes. + await testRealm.delete(modulePath, { waitForIndex: false }); + + testRealm.__testOnlyDelaySourceCacheSet(undefined); + release(); + await inflight; + } finally { + release(); + testRealm.__testOnlyDelaySourceCacheSet(undefined); + if (inflight) { + await inflight.catch(() => {}); + } + } + + let afterDelete = await request + .get(`/${modulePath}`) + .set('Accept', 'application/vnd.card+source') + .set('Authorization', authHeader()); + assert.strictEqual( + afterDelete.status, + 404, + 'the deleted source is gone — without the generation guard the parked read would re-cache it and this would be a stale 200', + ); + }); + + test('invalidateCache of an unrelated path does not drop the in-flight source set', async function (assert) { + let primaryPath = 'source-race-primary.gts'; + let unrelatedPath = 'source-race-unrelated.gts'; + await testRealm.write(primaryPath, 'export const v = 1;\n'); + await testRealm.write(unrelatedPath, 'export const v = 2;\n'); + testRealm.__testOnlyClearCaches(); + + let { release, entered } = parkNextSourceRead(); + let inflight: Promise<{ status: number }> | undefined; + try { + inflight = fireSourceRequest(primaryPath); + await entered; + + // Invalidate a DIFFERENT path mid-read. primaryPath's generation is + // unchanged, so its set must proceed. + testRealm.invalidateCache(unrelatedPath); + + testRealm.__testOnlyDelaySourceCacheSet(undefined); + release(); + await inflight; + } finally { + release(); + testRealm.__testOnlyDelaySourceCacheSet(undefined); + if (inflight) { + await inflight.catch(() => {}); + } + } + + let nextResponse = await request + .get(`/${primaryPath}`) + .set('Accept', 'application/vnd.card+source') + .set('Authorization', authHeader()); + assert.strictEqual( + nextResponse.headers['x-boxel-cache'], + 'hit', + 'cross-path invalidate did not poison the in-flight source set — per-path scoping is correct', + ); + }); + + test('in-flight source read is dropped when __testOnlyClearCaches fires concurrently', async function (assert) { + let modulePath = 'source-race-clear.gts'; + await testRealm.write(modulePath, 'export const v = 1;\n'); + testRealm.__testOnlyClearCaches(); + + let { release, entered } = parkNextSourceRead(); + let inflight: Promise<{ status: number }> | undefined; + try { + inflight = fireSourceRequest(modulePath); + await entered; + + // Global wipe mid-read. The path-level counter is reset to 0 + // alongside the parked read's snapshot value, so only the global + // generation distinguishes pre/post-wipe — that's what catches the + // race here. (__testOnlyClearCaches does not touch the delay hook.) + testRealm.__testOnlyClearCaches(); + + testRealm.__testOnlyDelaySourceCacheSet(undefined); + release(); + await inflight; + } finally { + release(); + testRealm.__testOnlyDelaySourceCacheSet(undefined); + if (inflight) { + await inflight.catch(() => {}); + } + } + + let nextResponse = await request + .get(`/${modulePath}`) + .set('Accept', 'application/vnd.card+source') + .set('Authorization', authHeader()); + assert.strictEqual( + nextResponse.headers['x-boxel-cache'], + 'miss', + 'next source request is a cache miss — the global generation bump catches the race the path counter alone would miss', + ); + }); + }); }); diff --git a/packages/runtime-common/realm.ts b/packages/runtime-common/realm.ts index 4ed0168c18..ad2aa4f1bc 100644 --- a/packages/runtime-common/realm.ts +++ b/packages/runtime-common/realm.ts @@ -730,6 +730,20 @@ export class Realm { #definitionLookup: DefinitionLookup; #copiedFromRealm: URL | undefined; #sourceCache = new AliasCache(); + // Per-path generation counters for #sourceCache — the source-read analogue + // of #transpiledModuleCacheGenerations below. getSourceOrRedirect reads + // bytes from disk under an `await` (getFileWithFallbacks + materializeFileRef) + // and only then calls #sourceCache.set. An invalidateCache(path) that fires + // inside that window — e.g. a concurrent DELETE removing the file while a + // worker's indexing fetch of the same source is still in flight — clears the + // slot synchronously, but the in-flight read's set would otherwise re-fill it + // with the now-deleted bytes, leaving a GET serving a file that is gone from + // disk. The reader snapshots the generation before its first await and drops + // its set when the generation moved. #sourceCacheGlobalGeneration covers the + // bulk clears (__testOnlyClearCaches / clearLocalSourceCaches), which reset + // the per-path map alongside any in-flight snapshot's `path` component. + #sourceCacheGenerations: Map = new Map(); + #sourceCacheGlobalGeneration = 0; #transpiledModuleCache = new AliasCache(); // CS-11028: per-path generation counters for #transpiledModuleCache. Bumped // synchronously by invalidateCache(path) before any await. fallbackHandle @@ -1492,7 +1506,7 @@ export class Realm { } __testOnlyClearCaches() { - this.#sourceCache.clear(); + this.#dropAllSourceCacheEntries(); this.#dropAllTranspiledModuleCacheEntries(); // Reset the transpile counter so each test reasons about its own // delta. Production never reads this counter — only the CS-11029 @@ -1519,7 +1533,7 @@ export class Realm { // `clearLocalSourceCachesAndBroadcast` (local clear + peer broadcast); // the listener invokes it directly (no broadcast — would NOTIFY-loop). clearLocalSourceCaches(): void { - this.#sourceCache.clear(); + this.#dropAllSourceCacheEntries(); this.#dropAllTranspiledModuleCacheEntries(); } @@ -1554,6 +1568,17 @@ export class Realm { } #testOnlyTranspileDelay?: () => Promise; + // Test-only gate for the source-cache set-after-invalidate race: when set, + // getSourceOrRedirect awaits the returned promise AFTER it has read the file + // bytes from disk but BEFORE it writes #sourceCache. Lets the race test park + // a source read mid-flight, fire invalidateCache concurrently, then release + // — deterministically reproducing the window the generation guard closes, + // without depending on real worker/indexer timing. + __testOnlyDelaySourceCacheSet(fn: (() => Promise) | undefined): void { + this.#testOnlySourceCacheDelay = fn; + } + #testOnlySourceCacheDelay?: () => Promise; + // CS-11119: Drop every read-side cache whose content derives from // server-side state — currently `#inFlightSearch` (the searchCards // coalesce map, index-derived) and `#cachedRealmInfo` (cached @@ -1592,7 +1617,7 @@ export class Realm { // extensions. Public so the realm-server process can wire a NOTIFY listener // without reaching into private state. invalidateCache(path: LocalPath): void { - this.#sourceCache.invalidate(path); + this.#dropSourceCacheEntry(path); if (hasExecutableExtension(path)) { this.#dropTranspiledModuleEntry(path); } @@ -1698,6 +1723,68 @@ export class Realm { return curGen !== snapGen; } + // Source-cache analogue of #dropTranspiledModuleEntry: bump the path's + // generation BEFORE clearing the slot so a concurrent in-flight source read + // — already past its generation snapshot in getSourceOrRedirect — observes + // the new value at persist time and drops its #sourceCache.set instead of + // re-filling the slot we're about to empty. + #dropSourceCacheEntry(canonicalPath: LocalPath): void { + this.#sourceCacheGenerations.set( + canonicalPath, + (this.#sourceCacheGenerations.get(canonicalPath) ?? 0) + 1, + ); + this.#sourceCache.invalidate(canonicalPath); + } + + // Source-cache analogue of #dropAllTranspiledModuleCacheEntries: wipe every + // entry and bump the global generation so an in-flight read whose snapshot + // predates the wipe discards its post-read set rather than re-populating the + // just-cleared map. The per-path map is cleared because the generations it + // held are no longer reachable — the global counter is what catches + // in-flight snapshots after a wipe. + #dropAllSourceCacheEntries(): void { + this.#sourceCache.clear(); + this.#sourceCacheGenerations.clear(); + this.#sourceCacheGlobalGeneration += 1; + } + + // Snapshot generations for every path getSourceOrRedirect's + // getFileWithFallbacks could resolve to: the request's localPath plus each + // executable extension and ".json" when the request is extensionless (the + // exact fallback set getSourceOrRedirect passes). The post-read check keys + // on the resolved canonicalPath, so snapshotting all candidates catches the + // race whether the request was extensionless or carried its extension — + // same reasoning as #snapshotModuleCacheGeneration. + #snapshotSourceCacheGeneration(localPath: LocalPath): { + pathGens: Map; + global: number; + } { + let pathGens = new Map(); + pathGens.set(localPath, this.#sourceCacheGenerations.get(localPath) ?? 0); + if (!hasExecutableExtension(localPath)) { + for (let ext of [...executableExtensions, '.json']) { + let candidate = localPath + ext; + pathGens.set( + candidate, + this.#sourceCacheGenerations.get(candidate) ?? 0, + ); + } + } + return { pathGens, global: this.#sourceCacheGlobalGeneration }; + } + + #sourceCacheGenerationChanged( + canonicalPath: LocalPath, + snapshot: { pathGens: Map; global: number }, + ): boolean { + if (this.#sourceCacheGlobalGeneration !== snapshot.global) { + return true; + } + let snapGen = snapshot.pathGens.get(canonicalPath) ?? 0; + let curGen = this.#sourceCacheGenerations.get(canonicalPath) ?? 0; + return curGen !== snapGen; + } + // Broadcast a file-change notification to peer realm-server instances so // they can invalidate their own #sourceCache / #transpiledModuleCache entries for the // same path. Best-effort — failures are logged and swallowed because the @@ -3743,7 +3830,7 @@ export class Realm { if (bypassCache) { let cachedEntry = this.#sourceCache.get(localName); if (cachedEntry) { - this.#sourceCache.invalidate(cachedEntry.canonicalPath); + this.#dropSourceCacheEntry(cachedEntry.canonicalPath); } } else { let cached = this.#sourceCache.get(localName); @@ -3791,6 +3878,19 @@ export class Realm { let fallbackExtensions = alreadyHasExecutableExt ? [] : [...executableExtensions, '.json']; + // Snapshot the source-cache generation BEFORE the first await for every + // candidate getFileWithFallbacks could resolve to. invalidateCache(path) + // bumps the counter synchronously, so if it fires while we're reading + // bytes from disk (getFileWithFallbacks + materializeFileRef + the + // getCreatedTime query below) the post-read comparison against + // handle.path's snapshotted gen catches the race and we skip the cache + // write — otherwise the pre-invalidation bytes we just read would + // re-fill the slot invalidate just cleared, serving a file that is + // already gone from disk. bypassCache requests never set the cache, so + // they need no snapshot. + let sourceCacheGenSnapshot = bypassCache + ? undefined + : this.#snapshotSourceCacheGeneration(localName); let handle = await this.getFileWithFallbacks( localName, fallbackExtensions, @@ -3815,13 +3915,24 @@ export class Realm { }, requestContext, }); - if (!bypassCache) { - this.#sourceCache.set(localName, { - type: 'redirect', - status: 302, - headers, - canonicalPath: handle.path, - }); + if (sourceCacheGenSnapshot) { + if ( + !this.#sourceCacheGenerationChanged( + handle.path, + sourceCacheGenSnapshot, + ) + ) { + this.#sourceCache.set(localName, { + type: 'redirect', + status: 302, + headers, + canonicalPath: handle.path, + }); + } else { + this.#log.info( + `Dropped stale #sourceCache redirect set for ${handle.path} (requested ${localName}) — invalidated during in-flight source read`, + ); + } } return response; } @@ -3841,17 +3952,34 @@ export class Realm { }); } else { let cachedRef = await this.materializeFileRef(handle); + // Test-only gate: park here (bytes read, cache not yet written) so the + // source-cache race test can fire invalidateCache before the set. + if (this.#testOnlySourceCacheDelay) { + await this.#testOnlySourceCacheDelay(); + } // Compute the content fingerprint while we have the body in // memory — `cachedRef.content` is already a string/Uint8Array // post-materialization, so this is a single md5 with no extra I/O. let contentHash = contentHashFromMaterializedRef(cachedRef); - this.#sourceCache.set(localName, { - type: 'file', - ref: cachedRef, - defaultHeaders, - canonicalPath: handle.path, - contentHash, - }); + if ( + sourceCacheGenSnapshot && + !this.#sourceCacheGenerationChanged( + handle.path, + sourceCacheGenSnapshot, + ) + ) { + this.#sourceCache.set(localName, { + type: 'file', + ref: cachedRef, + defaultHeaders, + canonicalPath: handle.path, + contentHash, + }); + } else if (sourceCacheGenSnapshot) { + this.#log.info( + `Dropped stale #sourceCache set for ${handle.path} — invalidated during in-flight source read`, + ); + } return await this.serveLocalFile(request, cachedRef, requestContext, { defaultHeaders, etagVariant: SOURCE_ETAG_VARIANT,