diff --git a/doc/export-import.md b/doc/export-import.md index 1c8aabf7..c1b7bb9e 100644 --- a/doc/export-import.md +++ b/doc/export-import.md @@ -15,7 +15,8 @@ await index.export(async function(key, data){ Import from folder `/export/` into an `Index` or `Document-Index`: ```js -const index = new Index({/* keep old config and place it here */}); +// Config is restored automatically from the export payload +const index = new Index({}); const files = await fs.readdir("./export/"); for(let i = 0; i < files.length; i++){ @@ -24,14 +25,18 @@ for(let i = 0; i < files.length; i++){ } ``` -> You'll need to use the same configuration as you used before the export. Any changes on the configuration needs to be re-indexed. +The export payload includes a `.cfg` key that carries all index configuration (tokenizer, encoder, resolution, context, score, etc). A plain `new Index({})` or `new Document({})` is sufficient — no need to repeat the original options. + +> When a custom encoder or score function is defined **inline** in the original config it is serialized as source text and reconstructed on import. If the function closes over outer variables those bindings **will not** be available after restore — keep any required values inside the function body. + +> The feature "fastupdate" is automatically disabled on import. ## Fast-Boot Serialization for Server-Side-Rendering (PHP, Python, Ruby, Rust, Java, Go, Node.js, ...) > This is an experimental feature with limited support which probably might drop in future release. You're welcome to give some feedback. -When using Server-Side-Rendering you can create a different export which instantly boot up. Especially when using Server-side rendered content, this could help to restore a __static__ index on page load. Document-Indexes aren't supported yet for this method. +When using Server-Side-Rendering you can create a different export which instantly boot up. Especially when using Server-side rendered content, this could help to restore a __static__ index on page load. > When your index is too large you should use the default export/import mechanism. @@ -39,7 +44,7 @@ You'll need Javascript to create the serialized output. Alternatively just creat As the first step populate the FlexSearch index with your contents. -You have two options: +You have three options: ### 1. Create a function as string @@ -70,8 +75,6 @@ inject(index); That's it. -> You'll need to use the same configuration as you used before the export. Any changes on the configuration needs to be re-indexed. - ### 2. Create just a function body as string Alternatively you can use lazy function declaration by passing `false` to the serialize function: @@ -101,92 +104,144 @@ const index = new Index(); inject(index); ``` +### 3. Self-contained inject (config embedded) - +Pass `true` as the second argument to embed the index configuration inside the serialized output. The restored index needs no external config at all: -## Export / Import (In-Memory) +```js +const fn_body = index.serialize(false, true); +const index2 = new Function("FlexSearch", fn_body)(FlexSearch); +``` -### Node.js +This is the recommended approach when the index was built with custom options (custom encoder, score function, tokenizer, etc.) and you cannot guarantee the consumer will supply the same config. The encoder and score functions are serialized as source text — the same caveat about closure variables applies as with export/import. -> Persistent-Indexes and Worker-Indexes don't support Import/Export. + -Export an `Index` or `Document-Index` to the folder `/export/`: +## Document Fast-Boot Serialization -```js -import { promises as fs } from "fs"; +Document indexes can also be serialized for fast-boot on the client side. This works similarly to Index serialization but handles multiple fields, tags, and storage. -await index.export(async function(key, data){ - await fs.writeFile("./export/" + key, data, "utf8"); -}); +### Serialize a Document Index + +```js +const fn_string = document.serialize(); ``` -Import from folder `/export/` into an `Index` or `Document-Index`: +This produces a function string that looks like: ```js -const index = new Index({/* keep old config and place it here */}); - -const files = await fs.readdir("./export/"); -for(let i = 0; i < files.length; i++){ - const data = await fs.readFile("./export/" + files[i], "utf8"); - await index.import(files[i], data); +function inject(doc){ + doc.reg = new Set([/* ... */]); + doc.index.get("fieldName").map = new Map([/* ... */]); + doc.index.get("fieldName").ctx = new Map([/* ... */]); + // ... for each field } ``` -> You'll need to use the same configuration as you used before the export. Any changes on the configuration needs to be re-indexed. +### Restore the serialized Document -### Browser +**Option A — self-contained inject (config embedded):** ```js -index.export(function(key, data){ - - // you need to store both the key and the data! - // e.g. use the key for the filename and save your data - - localStorage.setItem(key, data); -}); +const fn_body = document.serialize(false, false, true); +const doc = new Function("FlexSearch", fn_body)(FlexSearch); + +// Ready to search immediately +const results = doc.search("your query"); ``` -> The size of the export corresponds to the memory consumption of the library. To reduce export size you have to use a configuration which has less memory footprint (use the table at the bottom to get information about configs and its memory allocation). +Pass `true` as the third argument to embed all field configuration (encoders, tokenizers, score functions, etc.) in the serialized output. `FlexSearch` must be in scope when the function runs. -When your save routine runs asynchronously you have to use `async/await` or return a promise: +**Option B — inject into a pre-created document (no config needed):** ```js -index.export(function(key, data){ - - return new Promise(function(resolve){ - - // do the saving as async +const fn_body = document.serialize(false); +const inject = new Function("doc", fn_body); - resolve(); - }); -}); +// A plain new Document({}) is enough — config is read from the serialized data +const doc = new Document({}); +inject(doc); + +// Ready to search +const results = doc.search("your query"); ``` -Before you can import data, you need to create your index first. For document indexes provide the same document descriptor you used when export the data. This configuration isn't stored in the export. +### Without function wrapper + +Get just the body if you want to wrap it differently: ```js -const index = new Index({/* keep old config and place it here */}); +const fn_body = document.serialize(false); +const inject = new Function("doc", fn_body); ``` -To import the data just pass a key and data: +## Bulk Export / Import + +Use the bulk export APIs when you want all index data in a single payload for transport or storage: +```js +// Export uncompressed (returns JSON string) +const json = await index.exportIndexBulk(); + +// Export compressed (returns gzip Uint8Array) +const compressed = await index.exportIndexBulk(true); ``` -const data = localStorage.getItem(key); -index.import(key, data); + +```js +// Import uncompressed JSON string +const restored = new Index({}); +await restored.importIndexBulk(json); + +// Import compressed Uint8Array +const restored2 = new Index({}); +await restored2.importIndexBulk(compressed, true); ``` -You need to import every key! Otherwise, your index does not work. You need to store the keys from the export and use this keys for the import (the order of the keys can differ). +Same pattern for `Document`: -> The feature "fastupdate" is automatically disabled on import. +```js +const json = await doc.exportDocumentBulk(); +const docRestored = new Document({}); +await docRestored.importDocumentBulk(json); +``` + +These methods collect all export data into a Map, serialize to JSON, and optionally compress with gzip. This leverages the same bulk import support and provides a simple, maintainable approach. -This is just for demonstration and is not recommended, because you might have other keys in your localStorage which aren't supported as an import: +### Bulk import convenience + +`import()` also accepts a full payload map or entries array and loops internally: ```js -var keys = Object.keys(localStorage); +const payload = new Map(); +await index.export((key, data) => payload.set(key, data)); -for(let i = 0, key, data; i < keys.length; i++){ - key = keys[i] - data = localStorage.getItem(key); - index.import(key, data); -} +const index2 = new Index({}); +index2.import(payload); // or index2.import(Array.from(payload.entries())) ``` + +### Utility helpers for generic strings + +`compress()` and `decompress()` stay as convenience helpers for string payloads (for example serialized fast-boot function strings): + +```js +import { compress, decompress } from "flexsearch"; + +const fnString = index.serialize(false); +const compressed = await compress(fnString); +const restored = await decompress(compressed); +``` + +#### API + +| Function | Signature | Returns | +|---|---|---| +| `exportIndexBulk` | `(compressed?: boolean) => Promise` | JSON string (uncompressed) or Uint8Array (compressed) | +| `importIndexBulk` | `(source: string \| Uint8Array, compressed?: boolean) => Promise` | Restores from bulk payload | +| `exportDocumentBulk` | `(compressed?: boolean) => Promise` | JSON string (uncompressed) or Uint8Array (compressed) | +| `importDocumentBulk` | `(source: string \| Uint8Array, compressed?: boolean) => Promise` | Restores from bulk payload | +| `import` | `(payload: Map \| Array<[string, string]>) => void` | Bulk import convenience | +| `serialize` | **Index:** `(withFunctionWrapper?: boolean, withCfg?: boolean) => SerializedFunctionString` | **Index:** Fast-boot function string or body | +| | **Document:** `(withFunctionWrapper?: boolean, withCompression?: boolean, withCfg?: boolean) => SerializedFunctionString \| Promise` | **Document:** Fast-boot function string/body or compressed data | +| `compress` | `(data: string) => Promise` | Compress string data | +| `decompress` | `(data: Uint8Array) => Promise` | Decompress to string | + diff --git a/index.d.ts b/index.d.ts index 92edcf33..71668660 100644 --- a/index.d.ts +++ b/index.d.ts @@ -19,6 +19,9 @@ declare module "flexsearch" { export type Limit = number; export type ExportHandler = (key: string, data: string) => void; export type ExportHandlerAsync = (key: string, data: string) => Promise; + export type ExportEntries = Array<[string, string]>; + export type ExportMap = Map; + export type CompressedSource = Uint8Array | ArrayBuffer | ReadableStream; export type AsyncCallback = (result?: T) => void; /************************************/ @@ -148,6 +151,20 @@ declare module "flexsearch" { LatinDefault: EncoderOptions }; + /** + * Compress a string using gzip compression + * @param data - String data to compress + * @returns Promise that resolves to compressed Uint8Array + */ + export function compress(data: string): Promise; + + /** + * Decompress gzip-compressed data + * @param data - Compressed data as Uint8Array + * @returns Promise that resolves to decompressed string + */ + export function decompress(data: Uint8Array): Promise; + /** * These options will determine how the contents will be indexed. * @@ -276,8 +293,15 @@ declare module "flexsearch" { export(handler: ExportHandlerAsync): Promise; import(key: string, data: string): void; + import(payload: ExportMap): void; + import(payload: ExportEntries): void; + + exportIndexBulk(compressed?: boolean): Promise; + importIndexBulk(source: string | Uint8Array, compressed?: boolean): Promise; + serialize(with_function_wrapper?: boolean): SerializedFunctionString; + serialize(with_function_wrapper: boolean, with_cfg: boolean): SerializedFunctionString; // Persistent Index mount(db: StorageInterface): Promise; @@ -746,6 +770,15 @@ declare module "flexsearch" { export(handler: ExportHandlerAsync): Promise; import(key: string, data: string): void; + import(payload: ExportMap): void; + import(payload: ExportEntries): void; + + exportDocumentBulk(compressed?: boolean): Promise; + importDocumentBulk(source: string | Uint8Array, compressed?: boolean): Promise; + + + serialize(with_function_wrapper?: boolean, compress?: boolean): SerializedFunctionString | Promise; + serialize(with_function_wrapper: boolean, compress: boolean, with_cfg: boolean): SerializedFunctionString | Promise; // Persistent Index mount>(db: S): Promise; diff --git a/src/bundle.js b/src/bundle.js index 4100f95e..cdfade63 100644 --- a/src/bundle.js +++ b/src/bundle.js @@ -13,6 +13,10 @@ import { import { SearchOptions, ContextOptions, + SerializedIndexContext, + SerializedIndexConfig, + SerializedFieldConfig, + SerializedDocumentConfig, DocumentDescriptor, DocumentSearchOptions, FieldOptions, @@ -37,6 +41,7 @@ import Encoder from "./encoder.js"; import IdxDB from "./db/indexeddb/index.js"; import Charset from "./charset.js"; import { KeystoreMap, KeystoreArray, KeystoreSet } from "./keystore.js"; +import { compress, decompress } from "./serialize.js"; /** @export */ Index.prototype.add; /** @export */ Index.prototype.append; @@ -55,6 +60,8 @@ import { KeystoreMap, KeystoreArray, KeystoreSet } from "./keystore.js"; /** @export */ Index.prototype.removeAsync; /** @export */ Index.prototype.export; /** @export */ Index.prototype.import; +/** @export */ Index.prototype.exportIndexBulk; +/** @export */ Index.prototype.importIndexBulk; /** @export */ Index.prototype.serialize; /** @export */ Index.prototype.mount; /** @export */ Index.prototype.commit; @@ -65,6 +72,11 @@ if(SUPPORT_SERIALIZE || SUPPORT_PERSISTENT){ /** @export */ Index.prototype.reg; /** @export */ Index.prototype.map; /** @export */ Index.prototype.ctx; +/** @export */ Index.prototype.resolution_ctx; +} + +if (SUPPORT_SERIALIZE) { +/** @export */ Index.prototype._encoderOpt; } if(SUPPORT_PERSISTENT){ @@ -108,6 +120,9 @@ if(SUPPORT_PERSISTENT){ /** @export */ Document.prototype.destroy; /** @export */ Document.prototype.export; /** @export */ Document.prototype.import; +/** @export */ Document.prototype.exportDocumentBulk; +/** @export */ Document.prototype.importDocumentBulk; +/** @export */ Document.prototype.serialize; /** @export */ Document.prototype.get; /** @export */ Document.prototype.set; @@ -118,6 +133,10 @@ if(SUPPORT_SERIALIZE){ /** @export */ Document.prototype.tag; /** @export */ Document.prototype.store; /** @export */ Document.prototype.fastupdate; +/** @export */ Document.prototype._cfgKey; +/** @export */ Document.prototype.tree; +/** @export */ Document.prototype.tagtree; +/** @export */ Document.prototype.tagfield; } /** @export */ Resolver.prototype.limit; @@ -228,6 +247,34 @@ if(SUPPORT_SERIALIZE){ /** @export */ ContextOptions.bidirectional; /** @export */ ContextOptions.resolution; +/** @export */ SerializedIndexContext.depth; +/** @export */ SerializedIndexContext.bidirectional; +/** @export */ SerializedIndexContext.resolution; + +/** @export */ SerializedIndexConfig.tokenize; +/** @export */ SerializedIndexConfig.resolution; +/** @export */ SerializedIndexConfig.context; +/** @export */ SerializedIndexConfig.rtl; +/** @export */ SerializedIndexConfig.encoder; +/** @export */ SerializedIndexConfig.score; +/** @export */ SerializedIndexConfig.priority; +/** @export */ SerializedIndexConfig.keystore; + +/** @export */ SerializedFieldConfig.field; +/** @export */ SerializedFieldConfig.tokenize; +/** @export */ SerializedFieldConfig.resolution; +/** @export */ SerializedFieldConfig.context; +/** @export */ SerializedFieldConfig.rtl; +/** @export */ SerializedFieldConfig.encoder; +/** @export */ SerializedFieldConfig.score; +/** @export */ SerializedFieldConfig.priority; +/** @export */ SerializedFieldConfig.keystore; + +/** @export */ SerializedDocumentConfig.id; +/** @export */ SerializedDocumentConfig.fields; +/** @export */ SerializedDocumentConfig.tagfields; +/** @export */ SerializedDocumentConfig.store; + /** @export */ DocumentDescriptor.field; /** @export */ DocumentDescriptor.index; /** @export */ DocumentDescriptor.tag; @@ -339,6 +386,8 @@ const FlexSearch = { "Worker": SUPPORT_WORKER ? WorkerIndex : null, "Resolver": SUPPORT_RESOLVER ? Resolver : null, "IndexedDB": SUPPORT_PERSISTENT ? IdxDB : null, + "compress": SUPPORT_SERIALIZE ? compress : null, + "decompress": SUPPORT_SERIALIZE ? decompress : null, "Language": {} }; @@ -386,6 +435,8 @@ export { Document, Encoder, Charset, + compress, + decompress, WorkerIndex as Worker, Resolver, IdxDB as IndexedDB diff --git a/src/document.js b/src/document.js index 325070f5..bc97220d 100644 --- a/src/document.js +++ b/src/document.js @@ -38,7 +38,7 @@ import Encoder, { fallback_encoder } from "./encoder.js"; import Cache, { searchCache } from "./cache.js"; import { is_string, is_object, parse_simple } from "./common.js"; import apply_async from "./async.js"; -import { exportDocument, importDocument } from "./serialize.js"; +import { exportDocument, importDocument, serializeDocument, exportDocumentBulk, importDocumentBulk } from "./serialize.js"; import { KeystoreMap, KeystoreSet } from "./keystore.js"; import "./document/add.js"; import "./document/search.js"; @@ -63,9 +63,14 @@ export default function Document(options){ let tmp, keystore; this.tree = []; + // Keep stable public property names for bundled/minified builds. + this["tree"] = this.tree; this.field = []; this.marker = []; this.key = ((tmp = document.key || document.id) && parse_tree(tmp, this.marker)) || "id"; + if(SUPPORT_SERIALIZE){ + this._cfgKey = document.key || document.id || null; + } keystore = SUPPORT_KEYSTORE && (options.keystore || 0); keystore && (this.keystore = keystore); @@ -123,6 +128,8 @@ export default function Document(options){ this.tag = new Map(); this.tagtree = []; this.tagfield = []; + this["tagtree"] = this.tagtree; + this["tagfield"] = this.tagfield; for(let i = 0, params, field; i < tmp.length; i++){ params = tmp[i]; field = params.field || params; @@ -573,6 +580,9 @@ if(SUPPORT_SERIALIZE){ Document.prototype.export = exportDocument; Document.prototype.import = importDocument; + Document.prototype.exportDocumentBulk = exportDocumentBulk; + Document.prototype.importDocumentBulk = importDocumentBulk; + Document.prototype.serialize = serializeDocument; } if(SUPPORT_ASYNC){ diff --git a/src/index.js b/src/index.js index d7137c6f..f9125ed0 100644 --- a/src/index.js +++ b/src/index.js @@ -29,7 +29,7 @@ import Cache, { searchCache } from "./cache.js"; import Charset from "./charset.js"; import { KeystoreMap, KeystoreSet } from "./keystore.js"; import { is_array, is_string } from "./common.js"; -import { exportIndex, importIndex, serialize } from "./serialize.js"; +import { exportIndex, importIndex, serializeIndex, exportIndexBulk, importIndexBulk } from "./serialize.js"; import { remove_index } from "./index/remove.js"; //import default_encoder from "./charset/latin/default.js"; import apply_preset from "./preset.js"; @@ -88,6 +88,10 @@ export default function Index(options, _register){ ) : { encode: encoder }; + if(SUPPORT_SERIALIZE){ + this._encoderOpt = options.encoder || options.encode || null; + } + if(SUPPORT_COMPRESSION){ this.compress = options.compress || options.compression || false; } @@ -281,7 +285,9 @@ if(SUPPORT_SERIALIZE){ Index.prototype.export = exportIndex; Index.prototype.import = importIndex; - Index.prototype.serialize = serialize; + Index.prototype.exportIndexBulk = exportIndexBulk; + Index.prototype.importIndexBulk = importIndexBulk; + Index.prototype.serialize = serializeIndex; } if(SUPPORT_ASYNC){ diff --git a/src/serialize.js b/src/serialize.js index 2a280796..fcf1249c 100644 --- a/src/serialize.js +++ b/src/serialize.js @@ -2,12 +2,20 @@ import { SUPPORT_STORE, SUPPORT_TAGS, - SUPPORT_WORKER + SUPPORT_WORKER, + SUPPORT_SERIALIZE, + SUPPORT_CHARSET, + SUPPORT_ENCODER, + SUPPORT_ASYNC, + SUPPORT_KEYSTORE } from "./config.js"; import { IntermediateSearchResults } from "./type.js"; // <-- COMPILER BLOCK import Index from "./index.js"; import Document from "./document.js"; +import WorkerIndex from "./worker.js"; +import Charset from "./charset.js"; +import Encoder from "./encoder.js"; import { KeystoreMap, KeystoreSet } from "./keystore.js"; import { is_string } from "./common.js"; @@ -15,6 +23,22 @@ const chunk_size_reg = 250000; const chunk_size_map = 5000; const chunk_size_ctx = 1000; +// Runtime config records used for Closure @export protection in bundle builds. +/** @constructor */ export function IndexContextRecord(){} +/** @constructor */ export function IndexConfigRecord(){} +/** @constructor */ export function FieldConfigRecord(){} +/** @constructor */ export function DocumentConfigRecord(){} + +/** + * Escape a string for safe embedding in a JS string literal. + * Handles backslash, double-quote, single-quote, newlines, and other control chars. + * @param {string} str + * @return {string} + */ +function escape_js_string(str) { + return str.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/'/g, "\\'").replace(/\n/g, "\\n").replace(/\r/g, "\\r"); +} + /** * @param {Map|KeystoreMap} map * @param {number=} size @@ -134,9 +158,252 @@ function json_to_reg(json, reg){ return /** @type {Set} */ (reg); } +/** + * Find the name of a Charset preset by object reference. + * @param {*} encoderOpt + * @return {string|null} + */ +function find_charset_name(encoderOpt){ + if(!encoderOpt || typeof encoderOpt === "string") return null; + const keys = Object.keys(Charset); + for(let i = 0; i < keys.length; i++){ + if(Charset[keys[i]] === encoderOpt) return keys[i]; + } + return null; +} + +/** + * Serialize an encoder option to a string key for JSON export/import. + * @param {*} encoderOpt + * @return {string|null} + */ +function serialize_encoder_to_str(encoderOpt){ + if(!encoderOpt) return null; + if(typeof encoderOpt === "string") return encoderOpt; + const name = find_charset_name(encoderOpt); + if(name) return name; + if(typeof encoderOpt === "function") return encoderOpt.toString(); + return null; +} + +/** + * Serialize an encoder option to a JS expression for inject function bodies. + * @param {*} encoderOpt + * @param {string} charsetRef - JS variable name for Charset + * @return {string|null} + */ +function serialize_encoder_to_js(encoderOpt, charsetRef){ + if(!encoderOpt) return null; + if(typeof encoderOpt === "string") return charsetRef + '["' + encoderOpt + '"]'; + const name = find_charset_name(encoderOpt); + if(name) return charsetRef + '["' + name + '"]'; + if(typeof encoderOpt === "function") return encoderOpt.toString(); + return null; +} + +/** + * Build an Index config as a JS object literal string. + * @param {Index|WorkerIndex} index + * @param {string} charsetRef + * @return {string} + */ +function index_config_to_js(index, charsetRef){ + const parts = []; + if(index.tokenize && index.tokenize !== "strict"){ + parts.push('tokenize:"' + index.tokenize + '"'); + } + if(index.resolution !== undefined && index.resolution !== 9){ + parts.push("resolution:" + index.resolution); + } + if(index.depth){ + const ctxParts = ["depth:" + index.depth]; + if(!index.bidirectional) ctxParts.push("bidirectional:false"); + if(index.resolution_ctx !== undefined && index.resolution_ctx !== 3){ + ctxParts.push("resolution:" + index.resolution_ctx); + } + parts.push("context:{" + ctxParts.join(",") + "}"); + } + if(index.rtl) parts.push("rtl:true"); + if(SUPPORT_SERIALIZE && index._encoderOpt){ + const expr = serialize_encoder_to_js(index._encoderOpt, charsetRef); + if(expr) parts.push("encoder:" + expr); + } + if(index.score) parts.push("score:" + index.score.toString()); + if(SUPPORT_ASYNC && index.priority && index.priority !== 4) parts.push("priority:" + index.priority); + if(SUPPORT_KEYSTORE && index.keystore) parts.push("keystore:" + index.keystore); + return "{" + parts.join(",") + "}"; +} + +/** + * Build an Index config as a plain object for JSON export. + * @param {Index|WorkerIndex} index + * @return {IndexConfigRecord} + */ +function index_config_to_obj(index){ + const cfg = new IndexConfigRecord(); + if (index.tokenize && index.tokenize !== "strict") cfg.tokenize = index.tokenize; + if (index.resolution !== 9) cfg.resolution = index.resolution; + if(index.depth){ + const ctx = new IndexContextRecord(); + ctx.depth = index.depth; + if (!index.bidirectional) ctx.bidirectional = false; + if (index.resolution_ctx !== 3) ctx.resolution = index.resolution_ctx; + cfg.context = ctx; + } + if (index.rtl) cfg.rtl = true; + if(SUPPORT_SERIALIZE && index._encoderOpt){ + const str = serialize_encoder_to_str(index._encoderOpt); + if (str) cfg.encoder = str; + } + if (index.score) cfg.score = index.score.toString(); + if (SUPPORT_ASYNC && index.priority && index.priority !== 4) cfg.priority = index.priority; + if (SUPPORT_KEYSTORE && index.keystore) cfg.keystore = index.keystore; + return cfg; +} + +/** + * Build a Document config as a JS object literal string for inject functions. + * @param {Document} doc + * @param {string} charsetRef + * @return {string} + */ +function document_config_to_js(doc, charsetRef){ + const idField = (SUPPORT_SERIALIZE && doc._cfgKey) || doc.key || "id"; + let indexFields = ""; + for(let i = 0; i < doc.field.length; i++){ + const fieldName = doc.field[i]; + const fieldIdx = doc.index.get(fieldName); + const inner = fieldIdx ? index_config_to_js(fieldIdx, charsetRef).slice(1, -1) : ""; + indexFields += (indexFields ? "," : "") + '{field:"' + fieldName + '"' + (inner ? "," + inner : "") + "}"; + } + const parts = ['id:"' + idField + '"']; + if(indexFields) parts.push("index:[" + indexFields + "]"); + if(SUPPORT_TAGS && doc.tagfield && doc.tagfield.length){ + let tagFields = ""; + for(let i = 0; i < doc.tagfield.length; i++){ + tagFields += (tagFields ? "," : "") + '{field:"' + doc.tagfield[i] + '"}'; + } + parts.push("tag:[" + tagFields + "]"); + } + if(SUPPORT_STORE && doc.store !== null) parts.push("store:true"); + return "{document:{" + parts.join(",") + "}}"; +} + +/** + * Build a Document config as a plain object for JSON export. + * @param {Document} doc + * @return {DocumentConfigRecord} + */ +function document_config_to_export_obj(doc){ + const cfg = new DocumentConfigRecord(); + cfg.id = (SUPPORT_SERIALIZE && doc._cfgKey) || doc.key || "id"; + cfg.fields = []; + for(let i = 0; i < doc.field.length; i++){ + const fieldName = doc.field[i]; + const fieldIdx = doc.index.get(fieldName); + const fieldCfg = new FieldConfigRecord(); + fieldCfg.field = fieldName; + if(fieldIdx){ + Object.assign(fieldCfg, index_config_to_obj(fieldIdx)); + } + cfg.fields.push(fieldCfg); + } + if(SUPPORT_TAGS && doc.tagfield && doc.tagfield.length){ + cfg.tagfields = doc.tagfield.slice(); + } + if (SUPPORT_STORE && doc.store !== null) cfg.store = true; + return cfg; +} + +/** + * Apply a serialized config object to an Index instance. + * @param {Index} index + * @param {IndexConfigRecord|FieldConfigRecord|Object} cfg + */ +function apply_index_cfg(index, cfg){ + if (cfg.tokenize) index.tokenize = cfg.tokenize; + if (cfg.resolution !== undefined) index.resolution = cfg.resolution; + if (cfg.context) { + index.depth = cfg.context.depth || 0; + if (cfg.context.bidirectional !== undefined) index.bidirectional = cfg.context.bidirectional; + if (cfg.context.resolution !== undefined) index.resolution_ctx = cfg.context.resolution; + } + if (cfg.rtl !== undefined) index.rtl = cfg.rtl; + if (cfg.encoder) { + let encoderOpt; + const encoderStr = cfg.encoder; + if(typeof encoderStr === "string" && SUPPORT_CHARSET && Charset[encoderStr]){ + encoderOpt = Charset[encoderStr]; + } else if(typeof encoderStr === "string"){ + try { encoderOpt = new Function("return (" + encoderStr + ")")(); } catch(e){} + } + if(encoderOpt){ + index.encoder = encoderOpt.encode + ? encoderOpt + : (SUPPORT_ENCODER && typeof encoderOpt === "object" + ? new Encoder(encoderOpt) + : { encode: encoderOpt }); + if (SUPPORT_SERIALIZE) index._encoderOpt = cfg.encoder; + } + } + if (cfg.score && typeof cfg.score === "string") { + try { + const scoreFn = new Function("return (" + cfg.score + ")")(); + if(typeof scoreFn === "function") index.score = scoreFn; + } catch(e){} + } + if (SUPPORT_ASYNC && cfg.priority !== undefined) index.priority = cfg.priority; + if (SUPPORT_KEYSTORE && cfg.keystore) { + const ks = cfg.keystore; + index.keystore = ks; + // Replace empty map/ctx with Keystore variants (populated in subsequent imports) + if(!index.map.size) index.map = new KeystoreMap(ks); + if(!index.ctx.size) index.ctx = new KeystoreMap(ks); + } +} + +/** + * Apply a serialized config object to a Document instance. + * Initializes fields/tags/store only when the document has no data yet. + * @param {Document} doc + * @param {DocumentConfigRecord|Object} cfg + */ +function apply_document_cfg(doc, cfg){ + if (cfg.id || cfg.key) doc.key = cfg.id || cfg.key; + if (!doc.field.length && cfg.fields && cfg.fields.length) { + for (let i = 0; i < cfg.fields.length; i++) { + const fc = cfg.fields[i]; + // Support both old format (string) and new format (object with .field) + const fieldName = typeof fc === "string" ? fc : fc.field; + doc.field.push(fieldName); + // Reconstruct tree entry so new documents can be indexed after import + const parts = fieldName.split(":"); + doc.tree[i] = parts.length > 1 ? parts : parts[0]; + if(!doc.index.has(fieldName)){ + const idx = new Index({}, doc.reg); + if(typeof fc === "object") apply_index_cfg(idx, fc); + doc.index.set(fieldName, idx); + } + } + } + if (SUPPORT_TAGS && cfg.tagfields && cfg.tagfields.length && !doc.tag) { + if(!doc.tagtree) doc.tagtree = []; + doc.tag = new Map(); + doc.tagfield = cfg.tagfields; + for (let i = 0; i < cfg.tagfields.length; i++) { + const parts = cfg.tagfields[i].split(":"); + doc.tagtree[i] = parts.length > 1 ? parts : parts[0]; + doc.tag.set(cfg.tagfields[i], new Map()); + } + } + if (SUPPORT_STORE && cfg.store && !doc.store) { + doc.store = new Map(); + } +} + /** * - * @param {function(string, string):Promise|void} callback + * @param {function(string, (string|Array|Object)):Promise|void} callback * @param {string|null|void} field * @param {string} key * @param {Array|null} chunk @@ -146,7 +413,7 @@ function json_to_reg(json, reg){ * @this {Index|Document} * @return {Promise} */ -function save(callback, field, key, chunk, index_doc, index_obj, index_prt = 0){ +function save(callback, field, key, chunk, index_doc, index_obj, index_prt = 0, raw = false){ const is_arr = chunk && chunk.constructor === Array; const data = is_arr ? chunk.shift() : chunk; @@ -155,13 +422,14 @@ function save(callback, field, key, chunk, index_doc, index_obj, index_prt = 0){ callback, field, index_doc, - index_obj + 1 + index_obj + 1, + raw ); } const res = callback( (field ? field + "." : "") + (index_prt + 1) + "." + key, - JSON.stringify(data) + raw ? data : JSON.stringify(data) ); if(res && res["then"]){ @@ -174,7 +442,8 @@ function save(callback, field, key, chunk, index_doc, index_obj, index_prt = 0){ is_arr ? chunk : null, index_doc, index_obj, - index_prt + 1 + index_prt + 1, + raw ); }); } @@ -186,19 +455,21 @@ function save(callback, field, key, chunk, index_doc, index_obj, index_prt = 0){ is_arr ? chunk : null, index_doc, index_obj, - index_prt + 1 + index_prt + 1, + raw ); } /** - * @param {function(string,string):Promise|void} callback + * @param {function(string,(string|Array|Object)):Promise|void} callback * @param {!string|null=} _field * @param {number=} _index_doc * @param {number=} _index_obj + * @param {boolean=} _raw * @this {Index} */ -export function exportIndex(callback, _field, _index_doc = 0, _index_obj = 0){ +export function exportIndex(callback, _field, _index_doc = 0, _index_obj = 0, _raw = false){ let key, chunk; @@ -212,9 +483,8 @@ export function exportIndex(callback, _field, _index_doc = 0, _index_obj = 0){ case 1: - // todo key = "cfg"; - chunk = null; + chunk = [index_config_to_obj(this)]; break; case 2: @@ -240,18 +510,27 @@ export function exportIndex(callback, _field, _index_doc = 0, _index_obj = 0){ key, chunk, _index_doc, - _index_obj + _index_obj, + 0, + _raw ); } /** - * @param {string} key - * @param {string|Array=} data + * @param {string|Map|Object)>|Array>} key + * @param {string|Array|Object=} data * @this Index */ export function importIndex(key, data){ + if (key && (key instanceof Map || Array.isArray(key))) { + for(const [k, v] of key){ + importIndex.call(this, k, v); + } + return; + } + if(!data){ return; } @@ -273,7 +552,7 @@ export function importIndex(key, data){ switch(key){ case "cfg": - // todo + apply_index_cfg(this, data); break; case "reg": @@ -285,41 +564,53 @@ export function importIndex(key, data){ case "map": - this.map = json_to_map(data, this.map); + this.map = json_to_map(/** @type {Array<(Object|null)>} */(data), this.map); break; case "ctx": - this.ctx = json_to_ctx(data, this.ctx); + this.ctx = json_to_ctx(/** @type {Array<(Object|null)>} */(data), this.ctx); break; } } /** - * @param {function(string,string):Promise|void} callback + * @param {function(string,(string|Array|Object)):Promise|void} callback * @param {string|null=} _field * @param {number=} _index_doc * @param {number=} _index_obj * @this {Document} */ -export function exportDocument(callback, _field, _index_doc = 0, _index_obj = 0){ +export function exportDocument(callback, _field, _index_doc = -1, _index_obj = 0, _raw = false){ + + if(_index_doc === -1){ + const cfgObj = document_config_to_export_obj(this); + const res = callback("1.cfg", _raw ? cfgObj : JSON.stringify(cfgObj)); + if(res && res["then"]){ + const self = this; + return res["then"](function(){ + return self.export(callback, null, 0, 0, _raw); + }); + } + return this.export(callback, null, 0, 0, _raw); + } if(_index_doc < this.field.length){ const field = this.field[_index_doc]; const idx = this.index.get(field); // start from index 1, because document indexes does not additionally store register - const res = idx.export(callback, field, _index_doc, _index_obj = 1); + const res = idx.export(callback, field, _index_doc, _index_obj = 1, _raw); if(res && res["then"]){ const self = this; return res["then"](function(){ - return self.export(callback, field, _index_doc + 1); + return self.export(callback, field, _index_doc + 1, 0, _raw); }); } - return this.export(callback, field, _index_doc + 1); + return this.export(callback, field, _index_doc + 1, 0, _raw); } else{ @@ -350,13 +641,6 @@ export function exportDocument(callback, _field, _index_doc = 0, _index_obj = 0) _field = null; break; - // case 3: - // - // key = "cfg"; - // chunk = null; - // _field = null; - // break; - default: return; @@ -368,19 +652,28 @@ export function exportDocument(callback, _field, _index_doc = 0, _index_obj = 0) key, /** @type {Array|null} */ (chunk || null), _index_doc, - _index_obj + _index_obj, + 0, + _raw ); } } /** - * @param {!string} key - * @param {string|Array} data + * @param {string|Map|Object)>|Array>} key + * @param {string|Array|Object=} data * @this {Document} */ export function importDocument(key, data){ + if (key && (key instanceof Map || Array.isArray(key))) { + for(const [k, v] of key){ + importDocument.call(this, k, v); + } + return; + } + const split = key.split("."); if(split[split.length - 1] === "json"){ split.pop(); @@ -438,16 +731,17 @@ export function importDocument(key, data){ case "tag": - this.tag = json_to_ctx(data, this.tag); + this.tag = json_to_ctx(/** @type {Array<(Object|null)>} */(data), this.tag); break; case "doc": - this.store = json_to_map(data, this.store); + this.store = json_to_map(/** @type {Array<(Object|null)>} */(data), this.store); break; case "cfg": + apply_document_cfg(this, data); break; } @@ -467,10 +761,11 @@ ctx: "gulliver+travel:1,2,3|4,5,6|7,8,9;" /** * @this {Index} * @param {boolean} withFunctionWrapper + * @param {boolean} withCfg - When true, embed config and return a self-contained function(FlexSearch) * @return {string} */ -export function serialize(withFunctionWrapper = true){ +export function serializeIndex(withFunctionWrapper = true, withCfg = false){ let reg = ''; let map = ''; @@ -481,7 +776,7 @@ export function serialize(withFunctionWrapper = true){ let type; for(const key of this.reg.keys()){ type || (type = typeof key); - reg += (reg ? ',' : '') + (type === "string" ? '"' + key + '"' : key); + reg += (reg ? ',' : '') + (type === "string" ? '"' + escape_js_string(key) + '"' : key); } reg = 'index.reg=new Set([' + reg + ']);'; @@ -493,15 +788,23 @@ export function serialize(withFunctionWrapper = true){ const value_ctx = context[1]; let ctx_map = parse_map(value_ctx, type); ctx_map = "new Map([" + ctx_map + "])"; - ctx_map = '["' + key_ctx + '",' + ctx_map + ']'; + ctx_map = '["' + escape_js_string(key_ctx) + '",' + ctx_map + ']'; ctx += (ctx ? ',' : '') + ctx_map; } ctx = "index.ctx=new Map([" + ctx + "]);"; } + if(withCfg){ + const cfgJs = index_config_to_js(this, "Charset"); + const body = "const {Index,Charset}=FlexSearch;const index=new Index(" + cfgJs + ");" + reg + map + ctx + "return index;"; + return withFunctionWrapper + ? "function inject(FlexSearch){" + body + "}" + : body; + } + return withFunctionWrapper ? "function inject(index){" + reg + map + ctx + "}" - : reg + map + ctx + : reg + map + ctx; } function parse_map(map, type){ @@ -511,16 +814,260 @@ function parse_map(map, type){ const value = item[1]; let res = ''; for(let i = 0, ids; i < value.length; i++){ - ids = value[i] || ['']; + ids = value[i]; let str = ''; - for(let j = 0; j < ids.length; j++){ - str += (str ? ',' : '') + (type === "string" ? '"' + ids[j] + '"' : ids[j]); + if(ids && ids.length){ + for(let j = 0; j < ids.length; j++){ + str += (str ? ',' : '') + (type === "string" ? '"' + escape_js_string(ids[j]) + '"' : ids[j]); + } + str = '[' + str + ']'; + } + else{ + str = 'null'; // Preserve null/empty for array structure } - str = '[' + str + ']'; res += (res ? ',' : '') + str; } - res = '["' + key + '",[' + res + ']]'; + res = '["' + escape_js_string(key) + '",[' + res + ']]'; result += (result ? ',' : '') + res; } return result; +} + +/** + * Helper: Serialize a Map> for tags + * @param {Map} tagMap - inner map: tagValue → Array + * @param {string=} type - "string" or "number" + * @return {string} + */ +function parse_tag_map(tagMap, type = "string") { + let result = ''; + for(const item of tagMap.entries()){ + const key = item[0]; // tag value (e.g., "1894") + const ids = item[1]; // flat Array (e.g., ["tt0000001"]) + let idsStr = ''; + for(let j = 0; j < ids.length; j++){ + idsStr += (idsStr ? ',' : '') + (type === "string" ? '"' + escape_js_string(ids[j]) + '"' : ids[j]); + } + result += (result ? ',' : '') + '["' + escape_js_string(key) + '",[' + idsStr + ']]'; + } + return result; +} + +/** + * Serialize a Document's multi-field indexes with optional streaming compression + * @this {Document} + * @param {boolean=} withFunctionWrapper - Wrap in function(doc) or return raw statements + * @param {boolean=} withCompression - Apply gzip compression + * @param {boolean=} withCfg - When true, embed config and return a self-contained function(FlexSearch) + * @return {string|Promise|Uint8Array} + */ +export function serializeDocument(withFunctionWrapper = true, withCompression = false, withCfg = false){ + + let statements = ''; + let type = undefined; + + // Serialize shared registry once + if(this.reg && this.reg.size){ + let reg = ''; + for(const key of this.reg.keys()){ + type || (type = typeof key); + reg += (reg ? ',' : '') + (type === "string" ? '"' + escape_js_string(key) + '"' : key); + } + statements += 'doc.reg=new Set([' + reg + ']);'; + // Sync the shared reg reference into each field index (mirrors importDocument "reg" case) + for(const fieldName of this.field){ + statements += 'doc.index.get("' + escape_js_string(fieldName) + '").reg=doc.reg;'; + } + } + + // Serialize each field index + if(this.index && this.index.size){ + for(const fieldName of this.field){ + const index = this.index.get(fieldName); + if(!index) continue; + + // Only serialize if field index has map data + if(index.map && index.map.size){ + let map = parse_map(index.map, type); + if(map){ + statements += 'doc.index.get("' + escape_js_string(fieldName) + '").map=new Map([' + map + ']);'; + } + + // Serialize ctx if present + if(index.ctx && index.ctx.size){ + let ctx = ''; + for(const context of index.ctx.entries()){ + const key_ctx = context[0]; + const value_ctx = context[1]; + let ctx_map = parse_map(value_ctx, type); + if(ctx_map){ + ctx_map = "new Map([" + ctx_map + "])"; + ctx_map = '["' + escape_js_string(key_ctx) + '",' + ctx_map + ']'; + ctx += (ctx ? ',' : '') + ctx_map; + } + } + if(ctx){ + statements += 'doc.index.get("' + escape_js_string(fieldName) + '").ctx=new Map([' + ctx + ']);'; + } + } + } + } + } + + // Serialize tags if present + if(SUPPORT_TAGS && this.tag && this.tagfield){ + for(let i = 0; i < this.tagfield.length; i++){ + const tagField = this.tagfield[i]; + const tagMap = this.tag.get(tagField); + if(tagMap && tagMap.size){ + let tag = parse_tag_map(tagMap, type); + if(tag){ + statements += 'doc.tag.set("' + escape_js_string(tagField) + '",new Map([' + tag + ']));'; + } + } + } + } + + // Serialize store if present + if(SUPPORT_STORE && this.store && this.store.size){ + let storeData = ''; + for(const item of this.store.entries()){ + const key = item[0]; + const value = item[1]; + const valueJson = JSON.stringify(value); + storeData += (storeData ? ',' : '') + '[' + (typeof key === "string" ? '"' + escape_js_string(key) + '"' : key) + ',' + valueJson + ']'; + } + if(storeData){ + statements += 'doc.store=new Map([' + storeData + ']);'; + } + } + + if(withCfg){ + const cfgJs = document_config_to_js(this, "Charset"); + const body = "const {Document,Charset}=FlexSearch;const doc=new Document(" + cfgJs + ");" + statements + "return doc;"; + const result = withFunctionWrapper + ? "function inject(FlexSearch){" + body + "}" + : body; + return withCompression ? compress(result) : result; + } + + const plain = withFunctionWrapper + ? "function inject(doc){" + statements + "}" + : statements; + + return withCompression ? compress(plain) : plain; +} + +/** + * Export bulk index data with optional gzip compression + * Collects all index data into a single bulk format. + * @param {boolean=} compressed - Whether to apply gzip compression (default: false) + * @this {Index} + * @return {Promise} + */ +export async function exportIndexBulk(compressed = false){ + const map = new Map(); + await exportIndex.call(this, (key, data) => { + map.set(key, data); + return null; + }, null, 0, 0, true); // _raw = true + const json = JSON.stringify([...map]); + return compressed ? compress(json) : json; +} + +/** + * Export bulk document data with optional gzip compression + * Collects all document data into a single bulk format. + * @param {boolean=} compressed - Whether to apply gzip compression (default: false) + * @this {Document} + * @return {Promise} + */ +export async function exportDocumentBulk(compressed = false){ + const map = new Map(); + await exportDocument.call(this, (key, data) => { + map.set(key, data); + return null; + }, null, -1, 0, true); // _raw = true + const json = JSON.stringify([...map]); + return compressed ? compress(json) : json; +} + +/** + * Import bulk index data with optional gzip decompression + * @param {Uint8Array|string|null} source - Bulk data (compressed as Uint8Array, or uncompressed as string) + * @param {boolean=} compressed - Whether source is gzip compressed (default: false) + * @this {Index} + * @return {Promise} + */ +export async function importIndexBulk(source, compressed = false){ + /** @type {string} */ + let json = ""; + if(compressed){ + if (typeof source === "string" || !source) { + throw new TypeError("Compressed import expects a Uint8Array source."); + } + json = /** @type {string} */ (await decompress(source)); + } else { + if (source === null) { + throw new TypeError("Import source must not be null."); + } + json = typeof source === "string" ? source : new TextDecoder().decode(source); + } + const entries = /** @type {Array>} */ (JSON.parse(json)); + return importIndex.call(this, entries); +} + +/** + * Import bulk document data with optional gzip decompression + * @param {Uint8Array|string|null} source - Bulk data (compressed as Uint8Array, or uncompressed as string) + * @param {boolean=} compressed - Whether source is gzip compressed (default: false) + * @this {Document} + * @return {Promise} + */ +export async function importDocumentBulk(source, compressed = false){ + /** @type {string} */ + let json = ""; + if(compressed){ + if (typeof source === "string" || !source) { + throw new TypeError("Compressed import expects a Uint8Array source."); + } + json = /** @type {string} */ (await decompress(source)); + } else { + if (source === null) { + throw new TypeError("Import source must not be null."); + } + json = typeof source === "string" ? source : new TextDecoder().decode(source); + } + const entries = /** @type {Array>} */ (JSON.parse(json)); + return importDocument.call(this, entries); +} + +/** + * Compress string using gzip + * @param {string} data - String to compress + * @return {Promise} Compressed data + */ +export async function compress(data){ + const cs = new CompressionStream('gzip'); + const writer = cs.writable.getWriter(); + const encoder = new TextEncoder(); + await writer.write(encoder.encode(data)); + await writer.close(); + const compressed = await new Response(cs.readable).arrayBuffer(); + return new Uint8Array(compressed); +} + +/** + * Decompress gzip-compressed data + * @param {Uint8Array} data - Compressed data + * @return {Promise} Decompressed string + */ +export async function decompress(data){ + const ds = new DecompressionStream('gzip'); + const writer = ds.writable.getWriter(); + await writer.write(data); + await writer.close(); + + const decompressed = await new Response(ds.readable).arrayBuffer(); + return new TextDecoder().decode(decompressed); } \ No newline at end of file diff --git a/src/type.js b/src/type.js index de1ebb21..8477511d 100644 --- a/src/type.js +++ b/src/type.js @@ -87,6 +87,58 @@ export let DocumentOptions = {}; */ export let ContextOptions = {}; +/** + * Internal: serialized Index context payload used by export/import. + * @typedef {{ + * depth: (number|undefined), + * bidirectional: (boolean|undefined), + * resolution: (number|undefined) + * }} + */ +export let SerializedIndexContext = {}; + +/** + * Internal: serialized Index payload used by export/import. + * @typedef {{ + * tokenize: (string|undefined), + * resolution: (number|undefined), + * context: (SerializedIndexContext|undefined), + * rtl: (boolean|undefined), + * encoder: (string|undefined), + * score: (string|undefined), + * priority: (number|undefined), + * keystore: (string|undefined) + * }} + */ +export let SerializedIndexConfig = {}; + +/** + * Internal: serialized Document field payload used by export/import. + * @typedef {{ + * field: (string|undefined), + * tokenize: (string|undefined), + * resolution: (number|undefined), + * context: (SerializedIndexContext|undefined), + * rtl: (boolean|undefined), + * encoder: (string|undefined), + * score: (string|undefined), + * priority: (number|undefined), + * keystore: (string|undefined) + * }} + */ +export let SerializedFieldConfig = {}; + +/** + * Internal: serialized Document payload used by export/import. + * @typedef {{ + * id: (string|undefined), + * fields: (Array|undefined), + * tagfields: (Array|undefined), + * store: (boolean|undefined) + * }} + */ +export let SerializedDocumentConfig = {}; + /** * @typedef {{ * id: (string|undefined), diff --git a/test/serialize.js b/test/serialize.js index 3d595645..78d7f4ef 100644 --- a/test/serialize.js +++ b/test/serialize.js @@ -4,7 +4,7 @@ import { expect } from "chai"; let FlexSearch = await import(env ? "../dist/" + env + ".js" : "../src/bundle.js"); if(FlexSearch.default) FlexSearch = FlexSearch.default; if(FlexSearch.FlexSearch) FlexSearch = FlexSearch.FlexSearch; -const { Index, Document, Worker, Charset: _Charset, Encoder, Resolver } = FlexSearch; +const { Index, Document, Worker, Charset: _Charset, Encoder, Resolver, decompress } = FlexSearch; const build_light = env && env.includes("light"); const build_compact = env && env.includes("compact"); const build_esm = !env || env.startsWith("module"); @@ -34,6 +34,7 @@ if(!build_light) describe("Export / Import", function(){ expect(payload).to.eql(new Map([ ['1.reg', '[0,1,2]'], + ['1.cfg', '{"tokenize":"forward"}'], ['1.map', '[["f",[[0,2],[1]]],["fo",[[0,2],[1]]],["b",[[1],[0],[2]]],["ba",[[1],[0],[2]]],["bar",[[1],[0],[2]]],["fob",[[2],null,[0,1]]],["foba",[[2],null,[0,1]]],["fobar",[[2],null,[0,1]]]]'] ])); @@ -72,6 +73,7 @@ if(!build_light) describe("Export / Import", function(){ expect(payload).to.eql(new Map([ ['1.reg', '[0,1,2]'], + ['1.cfg', '{"context":{"depth":1}}'], ['1.map', '[["fo",[[0],[1,2]]],["bar",[[1],[0],[2]]],["fobar",[[2],null,[0,1]]]]'], ['1.ctx', '[["fo",[["bar",[[0,1],[2]]]]],["fobar",[["bar",[null,[0]]],["fo",[[2],[1]]]]]]'] ])); @@ -124,9 +126,123 @@ if(!build_light) describe("Export / Import", function(){ expect(index3.serialize()).to.equal("function inject(index){}"); }); -}); -if(!build_light) describe("Document Export/Import", function(){ + it("Should have been serialized Index with self-contained inject (Fast-Boot)", function(){ + + let index = new Index({ + tokenize: "forward", + encoder: Charset.LatinBalance + }); + + index.add(1, "Carmencita"); + index.add(2, "Le clown et ses chiens"); + + const body = index.serialize(false, true); + const index2 = new Function("FlexSearch", body)(FlexSearch); + + expect(index2).to.be.instanceOf(Index); + expect(index2.tokenize).to.equal(index.tokenize); + expect(index2.reg.size).to.equal(index.reg.size); + expect(Array.from(index2.reg)).to.eql(Array.from(index.reg)); + expect(normalize_map(index2.map)).to.eql(normalize_map(index.map)); + + // Encoder works: phonetic search results match original (proves LatinBalance restored) + expect(index2.search("karmen")).to.eql(index.search("karmen")); + + // Encoder works for documents added to the restored index + index.add(3, "Carmelo"); + index2.add(3, "Carmelo"); + expect(index2.search("karm")).to.eql(index.search("karm")); + }); + + it("Should have been exported Index with cfg", function(){ + + let index = new Index({ + tokenize: "forward", + encoder: Charset.LatinBalance + }); + + index.add(1, "Carmencita"); + index.add(2, "Le clown et ses chiens"); + + const payload = new Map(); + index.export(function(key, value){ payload.set(key, value); }); + + expect(Array.from(payload.keys())).to.include("1.cfg"); + + let index2 = new Index({}); + for(const [key, value] of payload){ + index2.import(key, value); + } + + expect(index2.tokenize).to.equal(index.tokenize); + expect(index2.reg.size).to.equal(index.reg.size); + expect(normalize_map(index2.map)).to.eql(normalize_map(index.map)); + + // Encoder works: phonetic search results match original (proves LatinBalance restored) + expect(index2.search("karmen")).to.eql(index.search("karmen")); + + // Encoder works for documents added to the restored index + index.add(3, "Carmelo"); + index2.add(3, "Carmelo"); + expect(index2.search("karm")).to.eql(index.search("karm")); + }); + + it("Kitchen sink: Index - encoder, score, context, priority, keystore", function () { + + // Inline encoder strips vowels ("alpha"→"lph", "tau"→"t"). + // Score always returns 0 (best bucket) — a post-restore add to "sigma tau" + // must put "t" in bucket 0; the default scorer at resolution=4 puts i=1 → bucket 1. + // normalize_index covers config scalars + score source + map/ctx data in one eql. + + function makeKsIndex() { + return new Index({ + tokenize: "strict", + resolution: 4, + context: { depth: 1, bidirectional: false, resolution: 2 }, + rtl: false, + priority: 2, + keystore: 4, + encoder: function (str) { + return str.toLowerCase().replace(/[aeiou]/g, "").split(/\s+/).filter(Boolean); + }, + score: function (content, term, i) { return 0; } + }); + } + + const ksRef = makeKsIndex(); + ksRef.add(1, "alpha beta gamma"); + ksRef.add(2, "delta epsilon"); + + // Serialize for inject BEFORE mutating ksRef with the post-restore liveness doc + const body = ksRef.serialize(false, true); + const ks3 = new Function("FlexSearch", body)(FlexSearch); + + // Export / import + const payload = new Map(); + ksRef.export(function (key, value) { payload.set(key, value); }); + const ks2 = new Index({}); + for (const [key, value] of payload) { ks2.import(key, value); } + + // Single eql: config scalars + score source + full map/ctx data + expect(normalize_index(ks2)).to.eql(normalize_index(ksRef)); + expect(normalize_index(ks3)).to.eql(normalize_index(ksRef)); + expect(ks3).to.be.instanceOf(Index); + + // One search tests encoder (strips vowels), context (depth=1 multi-term), tokenizer + // "alpha beta" → ["lph", "bt"] (vowels stripped), both in same doc (context), strict tokenize + expect(ks2.search("alpha beta")).to.eql(ksRef.search("alpha beta")); + expect(ks3.search("alpha beta")).to.eql(ksRef.search("alpha beta")); + expect(ks2.search("alpha beta")).to.eql([1]); + + // Score liveness: "sigma tau" → "t" token. score()=0 → bucket 0. + // Default scorer at resolution=4 puts i=1 tokens in bucket > 0. + ksRef.add(3, "sigma tau"); + ks2.add(3, "sigma tau"); + ks3.add(3, "sigma tau"); + expect(ks2.map.get("t")[0]).to.include(3); + expect(ks3.map.get("t")[0]).to.include(3); + }); const data = [{ "tconst": "tt0000001", @@ -156,28 +272,28 @@ if(!build_light) describe("Document Export/Import", function(){ ] }]; - it("Should have been exported Document-Index properly", function(){ + const config = { + document: { + id: "tconst", + store: true, + index: [{ + field: "primaryTitle", + tokenize: "forward", + encoder: Charset.LatinBalance + },{ + field: "originalTitle", + tokenize: "forward", + encoder: Charset.LatinBalance + }], + tag: [{ + field: "startYear" + },{ + field: "genres" + }] + } + }; - const config = { - document: { - id: "tconst", - store: true, - index: [{ - field: "primaryTitle", - tokenize: "forward", - encoder: Charset.LatinBalance - },{ - field: "originalTitle", - tokenize: "forward", - encoder: Charset.LatinBalance - }], - tag: [{ - field: "startYear" - },{ - field: "genres" - }] - } - }; + it("Should have been exported Document-Index properly", function(){ let document = new Document(config); @@ -244,11 +360,324 @@ if(!build_light) describe("Document Export/Import", function(){ }] } ]); }); + + it("Should have been serialized Document-Index properly (Fast-Boot)", async function(){ + + let document = new Document(config); + + for(let i = 0; i < data.length; i++){ + document.add(data[i]); + } + + // Test basic serialization without compression + const fn_string = document.serialize(false); + const inject = new Function("doc", fn_string); + + let document2 = new Document(config); + inject(document2); + + // Verify internal structures match + expect(document2.reg.size).to.equal(document.reg.size); + expect(document2.store.size).to.equal(document.store.size); + + // Check each field's index data + for(const field of document.field){ + const idx1 = document.index.get(field); + const idx2 = document2.index.get(field); + expect(idx2.map.size).to.equal(idx1.map.size); + expect(idx2.ctx.size).to.equal(idx1.ctx.size); + expect(normalize_map(idx2.map)).to.eql(normalize_map(idx1.map)); + expect(normalize_ctx(idx2.ctx)).to.eql(normalize_ctx(idx1.ctx)); + } + + // Test search results match + const search1 = document.search("karmen"); + const search2 = document2.search("karmen"); + expect(search2).to.eql(search1); + + // Test with function wrapper + expect(document.serialize()).to.equal("function inject(doc){" + fn_string + "}"); + + // Test serialization with compression + const compressed = await document.serialize(false, true); + expect(compressed).to.be.instanceOf(Uint8Array); + expect(compressed.length).to.be.lessThan(Buffer.byteLength(fn_string)); + + // Decompress and verify + const decompressed = await decompress(compressed); + expect(decompressed).to.eql(fn_string); + + // Inject decompressed function + const inject2 = new Function("doc", decompressed); + let document3 = new Document(config); + inject2(document3); + + // Verify search results match after decompression + const search3 = document3.search("karmen"); + expect(search3).to.eql(search1); + }); + + it("Kitchen sink: Document - deep nesting, per-field custom functions", function () { + + // meta:title: nested field path, LatinBalance encoder, forward tokenize + // genre: inline vowel-stripping encoder, score always 0 (bucket liveness proof) + // year: tag field; store: enabled + // normalize_doc covers key, fields, tree, tagtree, store size + per-field map/ctx/config. + + const ksDocData = [{ + id: 1, meta: { title: "Carmencita" }, genre: "fantasy", year: "1865" + }, { + id: 2, meta: { title: "Gulliver" }, genre: "adventure", year: "1864" + }]; + + function makeKsDoc() { + return new Document({ + document: { + id: "id", + store: true, + index: [{ + field: "meta:title", + tokenize: "forward", + encoder: Charset.LatinBalance + }, { + field: "genre", + tokenize: "strict", + encoder: function (str) { + return str.toLowerCase().replace(/[aeiou]/g, "").split(/\s+/).filter(Boolean); + }, + score: function (content, term, i) { return 0; } + }], + tag: [{ field: "year" }] + } + }); + } + + const ksDocRef = makeKsDoc(); + for (const record of ksDocData) ksDocRef.add(record); + + // Export / import + const payload = new Map(); + ksDocRef.export(function (key, value) { payload.set(key, value); }); + expect(Array.from(payload.keys())[0]).to.equal("1.cfg"); + const ksDoc2 = new Document({}); + for (const [key, value] of payload) ksDoc2.import(key, value); + + // Self-contained inject AFTER export/import + const body = ksDocRef.serialize(false, false, true); + const ksDoc3 = new Function("FlexSearch", body)(FlexSearch); + + // Single eql: key, fields, tree, tagtree, store size, per-field config + map data + expect(normalize_doc(ksDoc2)).to.eql(normalize_doc(ksDocRef)); + expect(normalize_doc(ksDoc3)).to.eql(normalize_doc(ksDocRef)); + + // Tag-filtered search tests: LatinBalance encoder on meta:title, tags live, store live + // Tests that field-specific config (encoder, tokenize, tags) all survived + const tagQ = { query: "karmen", tag: { year: "1865" } }; + expect(ksDoc2.search(tagQ)).to.eql(ksDocRef.search(tagQ)); + expect(ksDoc3.search(tagQ)).to.eql(ksDocRef.search(tagQ)); + expect(ksDoc2.search(tagQ).some(r => r.result.includes(1))).to.equal(true); + + // Per-field encoder: genre field uses vowel-stripping encoder + expect(ksDoc2.search("fntsy")).to.eql(ksDocRef.search("fntsy")); + expect(ksDoc3.search("fntsy")).to.eql(ksDocRef.search("fntsy")); + + // Score liveness + tree/tagtree live after restore: add new doc to all three + const newDoc = { id: 3, meta: { title: "Alice in Wonderland" }, genre: "fantasy", year: "1866" }; + ksDocRef.add(newDoc); + ksDoc2.add(newDoc); + ksDoc3.add(newDoc); + + expect(ksDoc2.search("alice")).to.eql(ksDocRef.search("alice")); + expect(ksDoc3.search("alice")).to.eql(ksDocRef.search("alice")); + // score()=0 for genre → "fntsy" must land in bucket 0 for newDoc + expect(ksDoc2.index.get("genre").map.get("fntsy")[0]).to.include(3); + expect(ksDoc3.index.get("genre").map.get("fntsy")[0]).to.include(3); + + const tagQ2 = { query: "alice", tag: { year: "1866" } }; + expect(ksDoc2.search(tagQ2)).to.eql(ksDocRef.search(tagQ2)); + expect(ksDoc3.search(tagQ2)).to.eql(ksDocRef.search(tagQ2)); + }); + + it("Should exportIndexBulk/importIndexBulk with compression (Index)", async function(){ + + const idx = new Index({ tokenize: "forward", resolution: 3 }); + idx.add(0, "foo bar foobar"); + idx.add(1, "bar foo foobar"); + idx.add(2, "foobar foo bar"); + + const compressed = await idx.exportIndexBulk(true); + expect(compressed).to.be.instanceOf(Uint8Array); + + const idx2 = new Index({}); + await idx2.importIndexBulk(compressed, true); + + expect(normalize_index(idx2)).to.eql(normalize_index(idx)); + expect(idx2.search("foobar")).to.eql(idx.search("foobar")); + + // bulk import still works from map payload + const payload = new Map(); + idx.export(function(key, value){ payload.set(key, value); }); + const idx3 = new Index({}); + idx3.import(payload); + expect(normalize_index(idx3)).to.eql(normalize_index(idx)); + }); + + it("Should exportDocumentBulk/importDocumentBulk with compression (Document)", async function(){ + + const doc = new Document({ + document: { + id: "id", + store: true, + index: [{ field: "title", tokenize: "forward" }], + tag: [{ field: "year" }] + } + }); + doc.add({ id: 1, title: "Carmencita", year: "1865" }); + doc.add({ id: 2, title: "Gulliver", year: "1864" }); + + const compressed = await doc.exportDocumentBulk(true); + expect(compressed).to.be.instanceOf(Uint8Array); + + const doc2 = new Document({}); + await doc2.importDocumentBulk(compressed, true); + + expect(normalize_doc(doc2)).to.eql(normalize_doc(doc)); + const tagQ = { query: "carmen", tag: { year: "1865" } }; + expect(doc2.search(tagQ)).to.eql(doc.search(tagQ)); + expect(doc2.search(tagQ).some(r => r.result.includes(1))).to.equal(true); + + // bulk import still works from map payload + const payload = new Map(); + doc.export(function(key, value){ payload.set(key, value); }); + const doc3 = new Document({}); + doc3.import(payload); + expect(normalize_doc(doc3)).to.eql(normalize_doc(doc)); + }); + + it("Should handle special characters in IDs and indexed content (serialize)", function () { + + // Special characters in both IDs and indexed content + const index = new Index({ tokenize: "forward" }); + index.add('he"llo', 'text with "quotes"'); + index.add("back\\slash", "path\\like\\content"); + index.add("new\nline", "multi\nline\ntext"); + + const fn_body = index.serialize(false); + const inject = new Function("index", fn_body); + + const index2 = new Index({ tokenize: "forward" }); + inject(index2); + + // Verify all IDs survived round-trip + expect(index2.reg.size).to.equal(3); + // Verify search still works for tokens from original content + expect(index2.search("quotes")).to.eql(index.search("quotes")); + expect(index2.search("path")).to.eql(index.search("path")); + expect(index2.search("multi")).to.eql(index.search("multi")); + }); + + it("Should handle special characters in IDs and indexed content (export/import)", function () { + + let index = new Index({ tokenize: "forward" }); + index.add('he"llo', 'text with "quotes"'); + index.add("back\\slash", "path\\like\\content"); + + const payload = new Map(); + index.export(function (key, value) { payload.set(key, value); }); + + let index2 = new Index({}); + for (const [key, value] of payload) { + index2.import(key, value); + } + + // Verify all IDs survived round-trip + expect(index2.reg.size).to.equal(2); + // Verify search works for tokens from original content + expect(index2.search("quotes")).to.eql(index.search("quotes")); + expect(index2.search("path")).to.eql(index.search("path")); + }); + + + it("Should export/import uncompressed bulk (Document)", async function () { + + const doc = new Document({ + document: { + id: "id", + store: true, + index: [{ field: "title", tokenize: "forward" }], + tag: [{ field: "year" }] + } + }); + doc.add({ id: 1, title: "Carmencita", year: "1865" }); + doc.add({ id: 2, title: "Gulliver", year: "1864" }); + + const json = await doc.exportDocumentBulk(false); + expect(typeof json).to.equal("string"); + + const doc2 = new Document({}); + await doc2.importDocumentBulk(json, false); + + expect(normalize_doc(doc2)).to.eql(normalize_doc(doc)); + expect(doc2.search("carmen")).to.eql(doc.search("carmen")); + }); + + it("Should export/import uncompressed bulk (Index)", async function () { + + const idx = new Index({ tokenize: "forward" }); + idx.add(0, "foo bar"); + idx.add(1, "baz qux"); + + const json = await idx.exportIndexBulk(false); + expect(typeof json).to.equal("string"); + + const idx2 = new Index({}); + await idx2.importIndexBulk(json, false); + + expect(normalize_index(idx2)).to.eql(normalize_index(idx)); + expect(idx2.search("foo")).to.eql(idx.search("foo")); + }); + + it("Should import from Array entries (same as Map)", function () { + + const index = new Index({ tokenize: "forward" }); + index.add(0, "foo bar foobar"); + index.add(1, "bar foo foobar"); + + const payload = new Map(); + index.export(function (key, value) { payload.set(key, value); }); + + const entries = Array.from(payload.entries()); + const index2 = new Index({}); + index2.import(entries); + + expect(index2.reg.size).to.equal(index.reg.size); + expect(index2.search("foobar")).to.eql(index.search("foobar")); + }); + + it("Should serialize empty Index gracefully", function () { + + const index = new Index({ tokenize: "forward" }); + expect(index.serialize()).to.equal("function inject(index){}"); + expect(index.serialize(false)).to.equal(""); + }); + + it("Should serialize empty Document gracefully", function () { + + const doc = new Document({ + document: { + id: "id", + index: [{ field: "title" }] + } + }); + expect(doc.serialize()).to.equal("function inject(doc){}"); + expect(doc.serialize(false)).to.equal(""); + }); + }); function normalize_map(map){ return Array.from(map.entries()).map(item => { - item[1].forEach((res, i) => res.length || delete item[1][i]); + item[1].forEach((res, i) => (res && res.length) || delete item[1][i]); return item; }); } @@ -258,4 +687,37 @@ function normalize_ctx(ctx){ item[1] = normalize_map(item[1]); return item; }); +} + +function normalize_index(idx) { + return { + tokenize: idx.tokenize, + resolution: idx.resolution, + depth: idx.depth, + bidirectional: idx.bidirectional, + resolution_ctx: idx.resolution_ctx, + rtl: idx.rtl, + priority: idx.priority, + keystore: idx.keystore || 0, + score: idx.score ? idx.score.toString() : null, + map: normalize_map(idx.map), + ctx: normalize_ctx(idx.ctx), + regSize: idx.reg.size + }; +} + +function normalize_doc(doc) { + const fields = {}; + for (const field of doc.field) { + fields[field] = normalize_index(doc.index.get(field)); + } + return { + key: doc.key, + field: doc.field.slice(), + tree: doc.tree.map(t => Array.isArray(t) ? t.slice() : t), + tagfield: (doc.tagfield || []).slice(), + tagtree: (doc.tagtree || []).map(t => Array.isArray(t) ? t.slice() : t), + store: doc.store ? Array.from(doc.store.entries()) : null, + fields + }; } \ No newline at end of file