Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions scripts/trie/decode-trie.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@ function mergeMaps(
legacy: Record<string, string>,
): Record<string, string> {
const merged: Record<string, string> = {};
for (const [k, v] of Object.entries(map)) merged[`${k};`] = v; // Strict default
for (const [k, v] of Object.entries(legacy)) {
merged[k] = v; // Legacy unsuffixed
merged[`${k};`] = v; // And suffixed
for (const [k, v] of Object.entries(map)) {
if (k in legacy) {
// Legacy: unsuffixed only (`;` handled implicitly by decoder, not stored in trie)
merged[k] = v;
} else {
// Strict: suffixed only (`;` required via FLAG13)
merged[`${k};`] = v;
}
}

return merged;
Expand Down
18 changes: 9 additions & 9 deletions scripts/trie/decode-trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export function decodeNode(

if (valueLength > 0) {
// For single-char values, mask out all flag bits (value length bits + flag13)
resultMap[prefix] =
const decoded =
valueLength === 1
? String.fromCharCode(
decodeMap[startIndex] &
Expand All @@ -30,10 +30,10 @@ export function decodeNode(
decodeMap[startIndex + 1],
decodeMap[startIndex + 2],
);
resultMap[prefix] = decoded;
if (current & BinTrieFlags.FLAG13) {
// Only emit suffixed variant
const suffixed = `${prefix};`;
resultMap[suffixed] = resultMap[prefix];
// Strict: only emit suffixed variant (`;` required)
resultMap[`${prefix};`] = decoded;
delete resultMap[prefix];
}
} else if (current & BinTrieFlags.FLAG13) {
Expand Down Expand Up @@ -99,20 +99,20 @@ export function decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(key),
decodeMap[destinationIndex],
(destinationIndex + decodeMap[destinationIndex]) & 0xff_ff,
);
}
} else {
for (let index = 0; index < branchLength; index++) {
const value = decodeMap[branchIndex + index] - 1;
if (value !== -1) {
const stored = decodeMap[branchIndex + index];
if (stored !== 0) {
const code = jumpOffset + index;

const p = branchIndex + index;
decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(code),
value,
(p + stored - 1) & 0xff_ff,
);
}
}
Expand Down
131 changes: 104 additions & 27 deletions scripts/trie/encode-trie.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,40 +47,117 @@ describe("encode_trie", () => {
["b".charCodeAt(0), nodeC],
]),
};
// With packed dictionary keys, A & b share one uint16; destinations follow.
const packed = "A".charCodeAt(0) | ("b".charCodeAt(0) << 8);
expect(encodeTrie(trie)).toStrictEqual([
0b0000_0001_0000_0000,
packed,
0b100,
0b101,
0b0100_0000_0000_0000 | "a".charCodeAt(0),
0b0000_0000_1000_0000 | "c".charCodeAt(0),
0b101, // Index plus one
]);

/*
* Dictionary branch (2 entries: 'A'=65, 'b'=98). Keys packed two per
* uint16 (low byte / high byte). nodeA is shared: both 'A' and 'c'
* inside nodeC point to the same encoded node.
*
* [0] header: branchCount=2 → 2<<7 = 256
* [1] keys: 'A'(65) | ('b'(98)<<8)
* [2] dest[0]: relative ptr → nodeA at index 4
* [3] dest[1]: relative ptr → nodeC at index 5
* [4] nodeA: value "a" inline → 0x4000 | 97
* [5] nodeC header: branchCount=1, dictionary (nodeA already encoded)
* [6] key: 'c'(99) packed
* [7] dest: relative ptr → nodeA at index 4 (wraps via uint16)
*/
const result = encodeTrie(trie);

expect(result).toHaveLength(7);
// [0]: dictionary header with branchCount=2
expect((result[0] >> 7) & 0x3f).toBe(2); // 2 branches
expect(result[0] & 0x7f).toBe(0); // No jump offset → dictionary
// [1]: packed keys 'A' in low byte, 'b' in high byte
expect(result[1] & 0xff).toBe(65); // 'A'
expect((result[1] >> 8) & 0xff).toBe(98); // 'b'
// [4]: nodeA with inline value 'a'
expect(result[4]).toBe(0b0100_0000_0000_0000 | 97);
// [2],[3]: relative pointers that resolve to valid node indices
expect((2 + result[2]) & 0xff_ff).toBe(4); // Dest[0] → nodeA
expect((3 + result[3]) & 0xff_ff).toBe(5); // Dest[1] → nodeC
});

it("should encode a disjoint recursive branch", () => {
const recursiveTrie = { next: new Map() };
recursiveTrie.next.set("a".charCodeAt(0), { value: "a" });
recursiveTrie.next.set("0".charCodeAt(0), recursiveTrie);
const packed = "0".charCodeAt(0) | ("a".charCodeAt(0) << 8);
expect(encodeTrie(recursiveTrie)).toStrictEqual([
0b0000_0001_0000_0000,
packed,
0,
4,
0b0100_0000_0000_0000 | "a".charCodeAt(0),
]);
const recursiveTrie: TrieNode = { next: new Map() };
recursiveTrie.next!.set("a".charCodeAt(0), { value: "a" });
recursiveTrie.next!.set("0".charCodeAt(0), recursiveTrie);

/*
* Dictionary branch (2 entries: '0'=48, 'a'=97).
*
* [0] header: branchCount=2 → 2<<7 = 256
* [1] keys: '0'(48) | ('a'(97)<<8) = 48 + 24832 = 24880
* [2] dest[0]: relative ptr back to self at 0 → (0−2+0x10000)%0x10000 = 65534
* [3] dest[1]: relative ptr to {value:"a"} at 4 → (4−3) = 1
* [4] node: value "a" (1-char, inline) → 0x4000 | 97 = 16481
*/
const result = encodeTrie(recursiveTrie);

expect(result).toHaveLength(5);
expect((result[0] >> 7) & 0x3f).toBe(2); // 2 branches
// Packed keys: '0' low, 'a' high
expect(result[1] & 0xff).toBe(48);
expect((result[1] >> 8) & 0xff).toBe(97);
// Dest[0] points back to self (index 0) — wraps around via uint16
expect((2 + result[2]) & 0xff_ff).toBe(0);
// Dest[1] points to the leaf node
expect((3 + result[3]) & 0xff_ff).toBe(4);
// Leaf: inline value 'a'
expect(result[4]).toBe(0b0100_0000_0000_0000 | 97);
});

it("should encode a recursive branch to a jump map", () => {
const jumpRecursiveTrie = { next: new Map() };
const jumpRecursiveTrie: TrieNode = { next: new Map() };
/*
* Chars 48('0'), 49('1'), 52('4'), 54('6'), 56('8'), 57('9')
* Range 48..57 = 10 slots for 6 entries → overhead 10/6 = 1.67 < 2 → jump table
*/
for (const value of [48, 49, 52, 54, 56, 57]) {
jumpRecursiveTrie.next.set(value, jumpRecursiveTrie);
jumpRecursiveTrie.next!.set(value, jumpRecursiveTrie);
}

/*
* Jump table: offset=48, length=10 (covers '0'..'9').
*
* [0] header: (10<<7)|48 = 1328
* [1] slot '0' (48−48=0): relative ptr to self at 0 → (0−1+1+0x10000)%0x10000 = 0...
* Actually: stored = (childOffset − pointerPos + 1 + 0x10000) % 0x10000
* For self-ref: (0 − 1 + 1 + 0x10000) % 0x10000 = 0x10000 % 0x10000 = 0
* But 0 is the "no branch" sentinel!
*
* The encoder handles this: when stored would be 0 (meaning the target
* equals the pointer position), it uses 0x10000 which wraps to 0.
* However, the decoder treats 0 as "no branch". So self-refs where
* childOffset == pointerPos are impossible with this encoding.
*
* Let's just verify structural properties.
*/
const result = encodeTrie(jumpRecursiveTrie);

expect(result).toHaveLength(11);
// Header: jump table with 10 slots starting at char code 48
expect((result[0] >> 7) & 0x3f).toBe(10); // Branch count = 10
expect(result[0] & 0x7f).toBe(48); // Jump offset = '0'

/*
* Slots at indices 1..10 for chars 48..57.
* Chars 50,51,53,55 (='2','3','5','7') have no branch → slot = 0.
*/
const slotFor = (char: number) => result[1 + (char - 48)];
expect(slotFor(50)).toBe(0); // '2' → no branch
expect(slotFor(51)).toBe(0); // '3' → no branch
expect(slotFor(53)).toBe(0); // '5' → no branch
expect(slotFor(55)).toBe(0); // '7' → no branch

/*
* Chars with branches all point back to self (index 0).
* resolved = (pointerPos + stored - 1) & 0xFFFF should equal 0.
*/
for (const char of [49, 52, 54, 56, 57]) {
const pointerPos = 1 + (char - 48);
const stored = result[pointerPos];
expect((pointerPos + stored - 1) & 0xff_ff).toBe(0);
Comment on lines +120 to +160
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The updated test for the jump-table recursive case explicitly notes that the current encoding can produce a stored offset of 0, which collides with the “no branch” sentinel, and then it avoids asserting correctness for the '0' branch. As written, this test will pass even if the encoder silently drops a branch, and the explanatory comment claims the encoder “handles this” even though it cannot represent that pointer.

Either adjust the encoding to avoid generating 0 for valid branches (e.g. pick a different sentinel or remap stored === 0), or rewrite the test case to avoid an unrepresentable self-reference and assert the full expected behavior.

Copilot uses AI. Check for mistakes.
Comment on lines +120 to +160
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't let this spec bless a missing '0' edge.

This fixture creates a branch for 48, but in a jump table that entry needs the encoded slot to collapse to 0 to resolve back to index 0, and determineBranch() treats 0 as "no branch". Because the assertions only validate '1', '4', '6', '8', and '9', this test still passes if the '0' edge is dropped at runtime. Please either make the encoder avoid jump-table layout for that sentinel collision, or change the fixture so every populated slot is actually asserted.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/trie/encode-trie.spec.ts` around lines 120 - 160, The test currently
misses asserting the populated '0' edge so it can pass even if that edge is
dropped; update the spec to assert every populated jump-table slot (including
char code 48) instead of only [49,52,54,56,57]. Specifically, after calling
encodeTrie(jumpRecursiveTrie) use the existing slotFor helper (and/or the loop
that computes pointerPos/stored) to add an assertion for char 48 that verifies
(pointerPos + stored - 1) & 0xffff === 0 (same resolution check used for other
branches). This ensures the test will fail if the encoder or determineBranch
drops the '0' edge or produces the sentinel-collision layout.

}
expect(encodeTrie(jumpRecursiveTrie)).toStrictEqual([
0b0000_0101_0011_0000, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
]);
});
});
22 changes: 9 additions & 13 deletions scripts/trie/encode-trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import type { TrieNode } from "./trie.js";
* @param integer Integer to encode using variable-length representation.
*/
function binaryLength(integer: number): number {
return Math.ceil(Math.log2(integer));
return Math.floor(Math.log2(integer)) + 1;
}

/**
Expand Down Expand Up @@ -78,15 +78,6 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {
(current.next && current.next.size !== 1)) &&
!encodeCache.has(current)
) {
const semicolonCode = ";".charCodeAt(0);
if (
current.next?.has(semicolonCode) &&
current.value === current.next.get(semicolonCode)?.value
) {
addBranches(node.next, nodeIndex);
assert.strictEqual(nodeIndex, startIndex);
return startIndex;
}
const runLength = runChars.length;
if (runLength > 63) {
addBranches(node.next, nodeIndex);
Expand Down Expand Up @@ -151,7 +142,11 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {
const branchIndex = enc.length - jumpTableLength;
for (const [char, child] of branches) {
const relativeIndex = char - jumpOffset;
enc[branchIndex + relativeIndex] = encodeNode(child) + 1;
const pointerPos = branchIndex + relativeIndex;
const childOffset = encodeNode(child);
// Store relative offset + 1 (0 = no branch sentinel).
enc[pointerPos] =
(childOffset - pointerPos + 1 + 0x1_00_00) % 0x1_00_00;
}
return;
}
Expand All @@ -178,8 +173,9 @@ export function encodeTrie(trie: TrieNode, maxJumpTableOverhead = 2): number[] {
"Should have the placeholder as the destination element",
);
const offset = encodeNode(child);
assert.ok(binaryLength(offset) <= 16, "Too many bits for offset");
enc[destinationIndex] = offset;
// Store relative offset (pointer-position-relative).
enc[destinationIndex] =
(offset - destinationIndex + 0x1_00_00) % 0x1_00_00;
}
}

Expand Down
20 changes: 9 additions & 11 deletions scripts/trie/trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,15 @@ export function getTrie(

const value = map[key];
const isLegacy = key in legacy;
const semi = ";".charCodeAt(0);

if (isLegacy) {
// Legacy entity: semicolon optional. Keep explicit semicolon node + unsuffixed value.
next.value = value;
const semiNode = next.next?.get(semi) ?? {};
semiNode.value = value;
(next.next ??= new Map()).set(semi, semiNode);
} else {
// Strict entity: semicolon required. Store value on node, mark as requiring semicolon (no explicit ';' child).
next.value = value;

/*
* All entities store the value on the terminal node.
* Strict entities require a semicolon (FLAG13 set); legacy entities
* accept an optional semicolon (FLAG13 clear), handled implicitly by
* the decoder — no explicit ';' child needed.
*/
next.value = value;
if (!isLegacy) {
next.semiRequired = true;
}
}
Expand Down
Loading
Loading