diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index f2566db58f3..77a0b1cc523 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -5216,6 +5216,70 @@ importers: specifier: ^5.9.3 version: 5.9.3 + ../../foundations/server/packages/opensearch: + dependencies: + '@hcengineering/analytics': + specifier: workspace:^0.7.17 + version: link:../../../core/packages/analytics + '@hcengineering/core': + specifier: workspace:^0.7.24 + version: link:../../../core/packages/core + '@hcengineering/platform': + specifier: workspace:^0.7.19 + version: link:../../../core/packages/platform + '@hcengineering/server-core': + specifier: workspace:^0.7.18 + version: link:../core + '@opensearch-project/opensearch': + specifier: ^2.12.0 + version: 2.13.0 + devDependencies: + '@hcengineering/platform-rig': + specifier: workspace:^0.7.19 + version: link:../../../utils/packages/platform-rig + '@types/jest': + specifier: ^29.5.5 + version: 29.5.14 + '@typescript-eslint/eslint-plugin': + specifier: ^6.21.0 + version: 6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1)(typescript@5.9.3) + '@typescript-eslint/parser': + specifier: ^6.21.0 + version: 6.21.0(eslint@8.57.1)(typescript@5.9.3) + eslint: + specifier: ^8.54.0 + version: 8.57.1 + eslint-config-standard-with-typescript: + specifier: ^40.0.0 + version: 40.0.0(@typescript-eslint/eslint-plugin@6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1)(typescript@5.9.3))(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint-plugin-n@15.7.0(eslint@8.57.1))(eslint-plugin-promise@6.6.0(eslint@8.57.1))(eslint@8.57.1)(typescript@5.9.3) + eslint-plugin-import: + specifier: ^2.26.0 + version: 2.32.0(eslint@8.57.1) + eslint-plugin-n: + specifier: ^15.4.0 + version: 15.7.0(eslint@8.57.1) + eslint-plugin-promise: + specifier: ^6.1.1 + version: 6.6.0(eslint@8.57.1) + eslint-plugin-svelte: + specifier: ^2.35.1 + version: 2.46.1(eslint@8.57.1)(svelte@4.2.20)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)) + jest: + specifier: ^29.7.0 + version: 29.7.0(@types/node@22.19.0)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)) + prettier: + specifier: ^3.6.2 + version: 3.6.2 + ts-jest: + specifier: ^29.1.1 + version: 29.4.5(@babel/core@7.28.5)(@jest/transform@29.7.0)(@jest/types@30.2.0)(babel-jest@29.7.0(@babel/core@7.28.5))(esbuild@0.25.12)(jest-util@30.2.0)(jest@29.7.0(@types/node@22.19.0)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)))(typescript@5.9.3) + ts-node: + specifier: ^10.9.2 + version: 10.9.2(@types/node@22.19.0)(typescript@5.9.3) + typescript: + specifier: ^5.9.3 + version: 5.9.3 + ../../foundations/server/packages/postgres: dependencies: '@hcengineering/core': @@ -5514,6 +5578,67 @@ importers: specifier: ^5.9.3 version: 5.9.3 + ../../foundations/server/packages/typesense: + dependencies: + '@hcengineering/analytics': + specifier: workspace:^0.7.17 + version: link:../../../core/packages/analytics + '@hcengineering/core': + specifier: workspace:^0.7.24 + version: link:../../../core/packages/core + '@hcengineering/platform': + specifier: workspace:^0.7.19 + version: link:../../../core/packages/platform + '@hcengineering/server-core': + specifier: workspace:^0.7.18 + version: link:../core + typesense: + specifier: ^2.0.0 + version: 2.1.0(@babel/runtime@7.28.4) + devDependencies: + '@hcengineering/platform-rig': + specifier: workspace:^0.7.19 + version: link:../../../utils/packages/platform-rig + '@types/jest': + specifier: ^29.5.5 + version: 29.5.14 + '@typescript-eslint/eslint-plugin': + specifier: ^6.21.0 + version: 6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1)(typescript@5.9.3) + '@typescript-eslint/parser': + specifier: ^6.21.0 + version: 6.21.0(eslint@8.57.1)(typescript@5.9.3) + eslint: + specifier: ^8.54.0 + version: 8.57.1 + eslint-config-standard-with-typescript: + specifier: ^40.0.0 + version: 40.0.0(@typescript-eslint/eslint-plugin@6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1)(typescript@5.9.3))(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint-plugin-n@15.7.0(eslint@8.57.1))(eslint-plugin-promise@6.6.0(eslint@8.57.1))(eslint@8.57.1)(typescript@5.9.3) + eslint-plugin-import: + specifier: ^2.26.0 + version: 2.32.0(eslint@8.57.1) + eslint-plugin-n: + specifier: ^15.4.0 + version: 15.7.0(eslint@8.57.1) + eslint-plugin-promise: + specifier: ^6.1.1 + version: 6.6.0(eslint@8.57.1) + eslint-plugin-svelte: + specifier: ^2.35.1 + version: 2.46.1(eslint@8.57.1)(svelte@4.2.20)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)) + jest: + specifier: ^29.7.0 + version: 29.7.0(@types/node@22.19.0)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)) + prettier: + specifier: ^3.6.2 + version: 3.6.2 + ts-jest: + specifier: ^29.1.1 + version: 29.4.5(@babel/core@7.28.5)(@jest/transform@29.7.0)(@jest/types@30.2.0)(babel-jest@29.7.0(@babel/core@7.28.5))(esbuild@0.25.12)(jest-util@30.2.0)(jest@29.7.0(@types/node@22.19.0)(ts-node@10.9.2(@types/node@22.19.0)(typescript@5.9.3)))(typescript@5.9.3) + typescript: + specifier: ^5.9.3 + version: 5.9.3 + ../../foundations/utils/packages/platform-rig: dependencies: '@typescript-eslint/eslint-plugin': @@ -30673,6 +30798,9 @@ importers: '@hcengineering/mongo': specifier: workspace:^0.7.16 version: link:../../foundations/server/packages/mongo + '@hcengineering/opensearch': + specifier: workspace:^0.7.17 + version: link:../../foundations/server/packages/opensearch '@hcengineering/platform': specifier: workspace:^0.7.19 version: link:../../foundations/core/packages/platform @@ -30700,6 +30828,9 @@ importers: '@hcengineering/server-token': specifier: workspace:^0.7.17 version: link:../../foundations/core/packages/token + '@hcengineering/typesense': + specifier: workspace:^0.7.17 + version: link:../../foundations/server/packages/typesense '@koa/cors': specifier: ^5.0.0 version: 5.0.0 @@ -43384,6 +43515,10 @@ packages: resolution: {integrity: sha512-exj1MzVXoP7xnAcAB3jZ97pTvVPkQF9y6GA/dvYC47HV7vLv+24XRS6b/v/XnyikpEuvMhugEXdGtAlU086WkQ==} engines: {node: '>= 18'} + '@opensearch-project/opensearch@2.13.0': + resolution: {integrity: sha512-Bu3jJ7pKzumbMMeefu7/npAWAvFu5W9SlbBow1ulhluqUpqc7QoXe0KidDrMy7Dy3BQrkI6llR3cWL4lQTZOFw==} + engines: {node: '>=10', yarn: ^1.22.10} + '@opentelemetry/api-logs@0.203.0': resolution: {integrity: sha512-9B9RU0H7Ya1Dx/Rkyc4stuBZSGVQF27WigitInx2QQoj6KUpEFYPKoWjdFTunJYxmXmh17HeBvbMa1EhGyPmqQ==} engines: {node: '>=8.0.0'} @@ -45579,6 +45714,12 @@ packages: resolution: {integrity: sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==} engines: {node: '>= 0.4'} + aws4@1.13.2: + resolution: {integrity: sha512-lHe62zvbTB5eEABUVi/AwVh0ZKY9rMMDhmm+eeyuuUQbQ3+J+fONVQOZyj+DdrvD4BY33uYniyRJ4UJIaSKAfw==} + + axios@1.13.6: + resolution: {integrity: sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==} + axobject-query@4.1.0: resolution: {integrity: sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==} engines: {node: '>= 0.4'} @@ -47565,6 +47706,10 @@ packages: resolution: {integrity: sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==} engines: {node: '>= 6'} + form-data@4.0.5: + resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} + engines: {node: '>= 6'} + formdata-node@4.4.1: resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==} engines: {node: '>= 12.20'} @@ -47915,6 +48060,10 @@ packages: hpagent@0.1.2: resolution: {integrity: sha512-ePqFXHtSQWAFXYmj+JtOTHr84iNrII4/QRlAAPPE+zqnKy4xJo7Ie1Y4kC7AdB+LxLxSTTzBMASsEcy0q8YyvQ==} + hpagent@1.2.0: + resolution: {integrity: sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==} + engines: {node: '>=14'} + html-encoding-sniffer@4.0.0: resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==} engines: {node: '>=18'} @@ -48668,6 +48817,10 @@ packages: json-stringify-safe@5.0.1: resolution: {integrity: sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==} + json11@2.0.2: + resolution: {integrity: sha512-HIrd50UPYmP6sqLuLbFVm75g16o0oZrVfxrsY0EEys22klz8mRoWlX9KAEDOSOR9Q34rcxsyC8oDveGrCz5uLQ==} + hasBin: true + json5@1.0.2: resolution: {integrity: sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==} hasBin: true @@ -51499,6 +51652,12 @@ packages: engines: {node: '>=14.17'} hasBin: true + typesense@2.1.0: + resolution: {integrity: sha512-a/IRTL+dRXlpRDU4UodyGj8hl5xBz3nKihVRd/KfSFAfFPGcpdX6lxIgwdXy3O6VLNNiEsN8YwIsPHQPVT0vNw==} + engines: {node: '>=18'} + peerDependencies: + '@babel/runtime': ^7.23.2 + ua-is-frozen@0.1.2: resolution: {integrity: sha512-RwKDW2p3iyWn4UbaxpP2+VxwqXh0jpvdxsYpZ5j/MLLiQOfbsV5shpgQiw93+KMYQPcteeMQ289MaAFzs3G9pw==} @@ -54164,6 +54323,17 @@ snapshots: '@octokit/webhooks-types': 7.6.1 aggregate-error: 3.1.0 + '@opensearch-project/opensearch@2.13.0': + dependencies: + aws4: 1.13.2 + debug: 4.4.3 + hpagent: 1.2.0 + json11: 2.0.2 + ms: 2.1.3 + secure-json-parse: 2.7.0 + transitivePeerDependencies: + - supports-color + '@opentelemetry/api-logs@0.203.0': dependencies: '@opentelemetry/api': 1.9.0 @@ -56900,6 +57070,16 @@ snapshots: dependencies: possible-typed-array-names: 1.1.0 + aws4@1.13.2: {} + + axios@1.13.6: + dependencies: + follow-redirects: 1.15.11 + form-data: 4.0.5 + proxy-from-env: 1.1.0 + transitivePeerDependencies: + - debug + axobject-query@4.1.0: {} b4a@1.7.3: {} @@ -59242,6 +59422,14 @@ snapshots: hasown: 2.0.2 mime-types: 2.1.35 + form-data@4.0.5: + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + es-set-tostringtag: 2.1.0 + hasown: 2.0.2 + mime-types: 2.1.35 + formdata-node@4.4.1: dependencies: node-domexception: 1.0.0 @@ -59690,6 +59878,8 @@ snapshots: hpagent@0.1.2: {} + hpagent@1.2.0: {} + html-encoding-sniffer@4.0.0: dependencies: whatwg-encoding: 3.1.1 @@ -60717,6 +60907,8 @@ snapshots: json-stringify-safe@5.0.1: optional: true + json11@2.0.2: {} + json5@1.0.2: dependencies: minimist: 1.2.8 @@ -63941,6 +64133,15 @@ snapshots: typescript@5.9.3: {} + typesense@2.1.0(@babel/runtime@7.28.4): + dependencies: + '@babel/runtime': 7.28.4 + axios: 1.13.6 + loglevel: 1.9.2 + tslib: 2.8.1 + transitivePeerDependencies: + - debug + ua-is-frozen@0.1.2: {} ua-parser-js@2.0.6: diff --git a/common/scripts/show_version.js b/common/scripts/show_version.js index 7a49a74d5fa..0ec1ba25e61 100644 --- a/common/scripts/show_version.js +++ b/common/scripts/show_version.js @@ -18,21 +18,26 @@ const path = require('path') const exec = require('child_process').exec function main() { + // Read version from version.txt first, fall back to git tag, then default + let version + try { + const versionFilePath = path.resolve(__dirname, 'version.txt') + version = fs.readFileSync(versionFilePath, 'utf8').trim() + } catch (error) { + // version.txt not found + } + + if (version) { + console.log(version) + return + } + exec('git describe --tags --abbrev=0', (err, stdout) => { if (err !== null) { console.log('"0.6.0"') return } - // Take version from file - let version - try { - const versionFilePath = path.resolve(__dirname, 'version.txt') - version = fs.readFileSync(versionFilePath, 'utf8').trim() - } catch (error) { - version = '"0.6.0"' - } - - console.log(version) + console.log(`"${stdout.trim()}"`) }) } diff --git a/foundations/server/packages/core/src/types.ts b/foundations/server/packages/core/src/types.ts index 09d7053c09a..6e4e0716033 100644 --- a/foundations/server/packages/core/src/types.ts +++ b/foundations/server/packages/core/src/types.ts @@ -409,6 +409,12 @@ export interface FullTextAdapter { // If no field is provided, will return existing mapping of all dimms. initMapping: (ctx: MeasureContext, field?: { key: string, dims: number }) => Promise + + // Optional: return the total document count in the index/collection. + // Used by non-persistent backends (e.g. Typesense with emptyDir) to detect + // empty state on startup and trigger automatic reindex. + // Returns -1 if the count cannot be determined (e.g. connection error). + getDocCount?: (ctx: MeasureContext) => Promise } /** diff --git a/foundations/server/packages/opensearch/.eslintrc.js b/foundations/server/packages/opensearch/.eslintrc.js new file mode 100644 index 00000000000..72235dc2833 --- /dev/null +++ b/foundations/server/packages/opensearch/.eslintrc.js @@ -0,0 +1,7 @@ +module.exports = { + extends: ['./node_modules/@hcengineering/platform-rig/profiles/default/eslint.config.json'], + parserOptions: { + tsconfigRootDir: __dirname, + project: './tsconfig.json' + } +} diff --git a/foundations/server/packages/opensearch/package.json b/foundations/server/packages/opensearch/package.json new file mode 100644 index 00000000000..135748e20e7 --- /dev/null +++ b/foundations/server/packages/opensearch/package.json @@ -0,0 +1,70 @@ +{ + "name": "@hcengineering/opensearch", + "version": "0.7.17", + "main": "lib/index.js", + "svelte": "src/index.ts", + "types": "types/index.d.ts", + "files": [ + "lib/**/*", + "types/**/*", + "src/**/*", + "!lib/**/*.test.*", + "!lib/**/*.spec.*", + "!lib/__tests__/**/*", + "!lib/__test__/**/*", + "!lib/tests/**/*", + "!types/**/*.test.*", + "!types/**/*.spec.*", + "!types/__tests__/**/*", + "!types/__test__/**/*", + "!types/tests/**/*", + "!src/**/*.test.*", + "!src/**/*.spec.*", + "!src/__tests__/**/*", + "!src/__test__/**/*", + "!src/tests/**/*" + ], + "author": "Anticrm Platform Contributors", + "license": "EPL-2.0", + "scripts": { + "build": "compile", + "build:watch": "compile", + "format": "format src", + "test": "jest --passWithNoTests --silent --forceExit", + "_phase:build": "compile transpile src", + "_phase:test": "jest --passWithNoTests --silent --forceExit", + "_phase:format": "format src", + "_phase:validate": "compile validate" + }, + "devDependencies": { + "@hcengineering/platform-rig": "workspace:^0.7.21", + "@typescript-eslint/eslint-plugin": "^6.21.0", + "eslint-plugin-import": "^2.26.0", + "eslint-plugin-promise": "^6.1.1", + "eslint-plugin-n": "^15.4.0", + "eslint": "^8.54.0", + "ts-node": "^10.9.2", + "@typescript-eslint/parser": "^6.21.0", + "eslint-config-standard-with-typescript": "^40.0.0", + "prettier": "^3.6.2", + "typescript": "^5.9.3", + "jest": "^29.7.0", + "ts-jest": "^29.1.1", + "@types/jest": "^29.5.5", + "eslint-plugin-svelte": "^2.35.1" + }, + "dependencies": { + "@hcengineering/core": "workspace:^0.7.26", + "@hcengineering/platform": "workspace:^0.7.20", + "@hcengineering/server-core": "workspace:^0.7.19", + "@opensearch-project/opensearch": "^2.12.0", + "@hcengineering/analytics": "workspace:^0.7.19" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/hcengineering/huly.server.git" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/foundations/server/packages/opensearch/src/adapter.ts b/foundations/server/packages/opensearch/src/adapter.ts new file mode 100644 index 00000000000..9b4697ece30 --- /dev/null +++ b/foundations/server/packages/opensearch/src/adapter.ts @@ -0,0 +1,698 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { Analytics } from '@hcengineering/analytics' +import { + Class, + Doc, + DocumentQuery, + MeasureContext, + Ref, + SearchOptions, + SearchQuery, + TxResult, + WorkspaceUuid +} from '@hcengineering/core' +import type { FullTextAdapter, IndexedDoc, SearchScoring, SearchStringResult } from '@hcengineering/server-core' +import serverCore from '@hcengineering/server-core' + +import { Client, errors as osErr } from '@opensearch-project/opensearch' +import { getMetadata } from '@hcengineering/platform' + +const DEFAULT_LIMIT = 200 + +function getIndexName (): string { + return getMetadata(serverCore.metadata.ElasticIndexName) ?? 'storage_index' +} + +function getIndexVersion (): string { + return getMetadata(serverCore.metadata.ElasticIndexVersion) ?? 'v2' +} + +const mappings = { + properties: { + fulltextSummary: { + type: 'text', + analyzer: 'rebuilt_english' + }, + workspaceId: { + type: 'keyword', + index: true + }, + id: { + type: 'keyword', + index: true + }, + _class: { + type: 'keyword', + index: true + }, + attachedTo: { + type: 'keyword', + index: true + }, + attachedToClass: { + type: 'keyword', + index: true + }, + space: { + type: 'keyword', + index: true + }, + 'core:class:Doc%createdBy': { + type: 'keyword', + index: true + }, + 'core:class:Doc%createdOn': { + type: 'date', + format: 'epoch_millis', + index: true + }, + modifiedBy: { + type: 'keyword', + index: true + }, + modifiedOn: { + type: 'date', + format: 'epoch_millis', + index: true + }, + 'core:class:Doc%modifiedBy': { + type: 'keyword', + index: true + }, + 'core:class:Doc%modifiedOn': { + type: 'date', + format: 'epoch_millis', + index: true + } + } +} + +class OpenSearchAdapter implements FullTextAdapter { + private readonly getFulltextDocId: (workspaceId: WorkspaceUuid, doc: Ref) => Ref + private readonly getDocId: (workspaceId: WorkspaceUuid, fulltext: Ref) => Ref + private readonly indexName: string + + constructor ( + private readonly client: Client, + private readonly indexBaseName: string, + readonly indexVersion: string + ) { + this.indexName = `${indexBaseName}_${indexVersion}` + this.getFulltextDocId = (workspaceId, doc) => `${doc}@${workspaceId}` as Ref + this.getDocId = (workspaceId, fulltext) => fulltext.slice(0, -1 * (workspaceId.length + 1)) as Ref + } + + async initMapping (ctx: MeasureContext): Promise { + const indexName = this.indexName + try { + const existingVersions = await ctx.withSync('get-indexes', {}, () => + this.client.indices.get({ + index: `${this.indexBaseName}_*` + }) + ) + const allIndexes = Object.keys(existingVersions.body) + const existingOldVersionIndices = allIndexes.filter((name) => name !== indexName) + const existsIndex = allIndexes.find((it) => it === indexName) !== undefined + let shouldDropExistingIndex = false + if (existsIndex) { + const mapping = await ctx.with('get-mapping', { indexName }, () => + this.client.indices.getMapping({ + index: indexName + }) + ) + for (const [propName, propType] of Object.entries(mappings.properties)) { + if (mapping.body[indexName]?.mappings.properties?.[propName]?.type !== propType.type) { + shouldDropExistingIndex = true + break + } + } + } + if (existingOldVersionIndices.length > 0 || shouldDropExistingIndex) { + await ctx.with('delete-old-index', {}, () => + this.client.indices.delete({ + index: shouldDropExistingIndex ? allIndexes : existingOldVersionIndices + }) + ) + } + if (!existsIndex || shouldDropExistingIndex) { + await ctx.with('create-index', { indexName }, () => + this.client.indices.create({ + index: indexName, + body: { + settings: { + analysis: { + filter: { + english_stemmer: { + type: 'stemmer', + language: 'english' + }, + english_possessive_stemmer: { + type: 'stemmer', + language: 'possessive_english' + } + }, + analyzer: { + rebuilt_english: { + type: 'custom', + tokenizer: 'standard', + filter: ['english_possessive_stemmer', 'lowercase', 'english_stemmer'] + } + } + } + }, + mappings + } + }) + ) + } else { + await ctx.with('put-mapping', {}, () => + this.client.indices.putMapping({ + index: indexName, + body: mappings + }) + ) + } + } catch (err: any) { + if (err.name === 'ConnectionError') { + ctx.warn('OpenSearch DB is not available') + } + Analytics.handleError(err) + ctx.error(err) + return false + } + return true + } + + async close (): Promise { + await this.client.close() + } + + async searchString ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + query: SearchQuery, + options: SearchOptions & { scoring?: SearchScoring[] } + ): Promise { + try { + const elasticQuery: any = { + query: { + function_score: { + query: { + bool: { + must: [ + { + simple_query_string: { + query: query.query, + analyze_wildcard: true, + flags: 'OR|PREFIX|PHRASE|FUZZY|NOT|ESCAPE', + default_operator: 'and', + fields: [ + 'searchTitle^50', // boost + 'searchShortTitle^50', + '*' // Search in all other fields without a boost + ] + } + }, + { + term: { + workspaceId + } + } + ] + } + }, + boost_mode: 'sum' + } + }, + size: options.limit ?? DEFAULT_LIMIT + } + + const filter: any = [ + { + exists: { field: 'searchTitle' } + } + ] + + if (query.spaces !== undefined) { + filter.push({ + terms: this.getTerms(query.spaces, 'space') + }) + } + if (query.classes !== undefined) { + filter.push({ + terms: this.getTerms(query.classes, '_class') + }) + } + + if (filter.length > 0) { + elasticQuery.query.function_score.query.bool.filter = filter + } + + if (options.scoring !== undefined) { + const scoringTerms: any[] = options.scoring.map((scoringOption): any => { + const field = Object.hasOwn(mappings.properties, scoringOption.attr) + ? scoringOption.attr + : `${scoringOption.attr}.keyword` + return { + term: { + [field]: { + value: scoringOption.value, + boost: scoringOption.boost + } + } + } + }) + elasticQuery.query.function_score.query.bool.should = scoringTerms + } + + const result = await this.client.search({ + index: this.indexName, + body: elasticQuery + }) + + const resp: SearchStringResult = { docs: [] } + if (result.body.hits !== undefined) { + if (result.body.hits.total?.value !== undefined) { + resp.total = result.body.hits.total?.value + } + resp.docs = result.body.hits.hits.map((hit: any) => ({ ...hit._source, _score: hit._score })) + } + + return resp + } catch (err: any) { + if (err.name === 'ConnectionError') { + ctx.warn('OpenSearch DB is not available') + return { docs: [] } + } + Analytics.handleError(err) + ctx.error('OpenSearch error', { error: err }) + return { docs: [] } + } + } + + async search ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + _classes: Ref>[], + query: DocumentQuery, + size: number | undefined, + from: number | undefined + ): Promise { + if (query.$search === undefined) return [] + const request: any = { + bool: { + must: [ + { + simple_query_string: { + query: query.$search, + analyze_wildcard: true, + flags: 'OR|PREFIX|PHRASE|FUZZY|NOT|ESCAPE', + default_operator: 'and' + } + }, + { + term: { + workspaceId + } + } + ], + should: [{ terms: this.getTerms(_classes, '_class', { boost: 10.0 }) }], + filter: [ + { + bool: { + should: [ + { terms: this.getTerms(_classes, '_class') } + // { terms: this.getTerms(_classes, 'attachedToClass') } + ] + } + } + ] + } + } + + for (const [q, v] of Object.entries(query)) { + if (!q.startsWith('$')) { + const field = Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword` + if (typeof v === 'object') { + if (v.$in !== undefined) { + request.bool.should.push({ + terms: { + [field]: v.$in, + boost: 100.0 + } + }) + } + } else { + request.bool.should.push({ + term: { + [field]: { + value: v, + boost: 100.0, + case_insensitive: true + } + } + }) + } + } + } + + try { + const result = await ctx.with( + 'search', + {}, + () => + this.client.search({ + index: this.indexName, + body: { + query: request, + size: size ?? 200, + from: from ?? 0 + } + }), + { + _classes, + size, + from, + query: request + } + ) + const hits = result.body.hits.hits as any[] + return hits.map((hit) => ({ ...hit._source, _score: hit._score })) + } catch (err: any) { + if (err.name === 'ConnectionError') { + ctx.warn('OpenSearch DB is not available') + return [] + } + ctx.error('OpenSearch error', { error: err }) + Analytics.handleError(err) + return [] + } + } + + private getTerms (values: string[], field: string, extra: any = {}): any { + return { + [Object.hasOwn(mappings.properties, field) ? field : `${field}.keyword`]: values, + ...extra + } + } + + async index (ctx: MeasureContext, workspaceId: WorkspaceUuid, doc: IndexedDoc): Promise { + const wsDoc = { + workspaceId, + ...doc + } + const fulltextId = this.getFulltextDocId(workspaceId, doc.id) + if (doc.data === undefined) { + await this.client.index({ + index: this.indexName, + id: fulltextId, + body: wsDoc + }) + } else { + await this.client.index({ + index: this.indexName, + id: fulltextId, + pipeline: 'attachment', + body: wsDoc + }) + } + return {} + } + + async update ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + id: Ref, + update: Record + ): Promise { + await this.client.update({ + index: this.indexName, + id: this.getFulltextDocId(workspaceId, id), + body: { + doc: update + } + }) + + return {} + } + + async updateMany (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: IndexedDoc[]): Promise { + const parts = Array.from(docs) + while (parts.length > 0) { + const part = parts.splice(0, 500) + + const operations = part.flatMap((doc) => { + const wsDoc = { workspaceId, ...doc } + return [{ index: { _index: this.indexName, _id: this.getFulltextDocId(workspaceId, doc.id) } }, { ...wsDoc }] + }) + + const response = await this.client.bulk({ refresh: true, body: operations }) + if ((response as any).body.errors === true) { + const errors = response.body.items.filter((it: any) => it.index.error !== undefined) + const errorIds = new Set(errors.map((it: any) => it.index._id)) + const erroDocs = docs.filter((it) => errorIds.has(this.getFulltextDocId(workspaceId, it.id))) + // Collect only errors + const errs = Array.from( + errors.map((it: any) => { + return `${it.index.error.reason}: ${it.index.error.caused_by?.reason}` + }) + ).join('\n') + + console.error(`Failed to process bulk request: ${errs} ${JSON.stringify(erroDocs)}`) + } + } + return [] + } + + async updateByQuery ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + query: DocumentQuery, + update: Record + ): Promise { + const elasticQuery: any = { + bool: { + must: [ + { + term: { + workspaceId + } + } + ] + } + } + + for (const [q, v] of Object.entries(query)) { + if (!q.startsWith('$')) { + if (typeof v === 'object') { + if (v.$in !== undefined) { + elasticQuery.bool.must.push({ + terms: { + [Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword`]: v.$in + } + }) + } + } else { + elasticQuery.bool.must.push({ + term: { + [Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword`]: { + value: v + } + } + }) + } + } + } + + await this.client.updateByQuery({ + index: this.indexName, + body: { + query: elasticQuery, + script: { + source: + 'for(int i = 0; i < params.updateFields.size(); i++) { ctx._source[params.updateFields[i].key] = params.updateFields[i].value }', + params: { + updateFields: Object.entries(update).map(([key, value]) => ({ key, value })) + }, + lang: 'painless' + } + } + }) + return [] + } + + async remove (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: Ref[]): Promise { + try { + const remaining = Array.from(docs) + while (remaining.length > 0) { + const part = remaining.splice(0, 5000) + await this.client.deleteByQuery( + { + index: this.indexName, + body: { + query: { + bool: { + must: [ + { + terms: { + _id: part.map((it) => this.getFulltextDocId(workspaceId, it)), + boost: 1.0 + } + }, + { + term: { + workspaceId + } + } + ] + } + }, + size: part.length + } + }, + undefined + ) + } + } catch (e: any) { + if (e instanceof osErr.ResponseError && e.meta.statusCode === 404) { + return + } + throw e + } + } + + async removeByQuery (ctx: MeasureContext, workspaceId: WorkspaceUuid, query: DocumentQuery): Promise { + const elasticQuery: any = { + bool: { + must: [ + { + term: { + workspaceId + } + } + ] + } + } + + for (const [q, v] of Object.entries(query)) { + if (!q.startsWith('$')) { + if (typeof v === 'object') { + if (v.$in !== undefined) { + elasticQuery.bool.must.push({ + terms: { + [Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword`]: v.$in + } + }) + } + } else { + elasticQuery.bool.must.push({ + term: { + [Object.hasOwn(mappings.properties, q) ? q : `${q}.keyword`]: { + value: v + } + } + }) + } + } + } + try { + await this.client.deleteByQuery({ + index: this.indexName, + body: { + query: elasticQuery + } + }) + } catch (e: any) { + if (e instanceof osErr.ResponseError && e.meta.statusCode === 404) { + return + } + throw e + } + } + + async clean (ctx: MeasureContext, workspaceId: WorkspaceUuid): Promise { + try { + await this.client.deleteByQuery( + { + index: this.indexName, + body: { + query: { + bool: { + must: [ + { + term: { + workspaceId + } + } + ] + } + } + } + }, + undefined + ) + } catch (e: any) { + if (e instanceof osErr.ResponseError && e.meta.statusCode === 404) { + return + } + throw e + } + } + + async load (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: Ref[]): Promise { + const resp = await this.client.search({ + index: this.indexName, + body: { + query: { + bool: { + must: [ + { + terms: { + _id: docs.map((it) => this.getFulltextDocId(workspaceId, it)), + boost: 1.0 + } + }, + { + term: { + workspaceId + } + } + ] + } + }, + size: docs.length + } + }) + return Array.from( + resp.body.hits.hits.map((hit: any) => ({ ...hit._source, id: this.getDocId(workspaceId, hit._id) })) + ) + } +} + +/** + * @public + */ +export async function createOpenSearchAdapter (url: string): Promise { + const client = new Client({ + node: url + }) + const indexBaseName = getIndexName() + const indexVersion = getIndexVersion() + + return new OpenSearchAdapter(client, indexBaseName, indexVersion) +} diff --git a/foundations/server/packages/opensearch/src/index.ts b/foundations/server/packages/opensearch/src/index.ts new file mode 100644 index 00000000000..7189f833d5e --- /dev/null +++ b/foundations/server/packages/opensearch/src/index.ts @@ -0,0 +1,17 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021, 2022 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +export { createOpenSearchAdapter } from './adapter' diff --git a/foundations/server/packages/opensearch/tsconfig.json b/foundations/server/packages/opensearch/tsconfig.json new file mode 100644 index 00000000000..7d78e05abb4 --- /dev/null +++ b/foundations/server/packages/opensearch/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "./node_modules/@hcengineering/platform-rig/profiles/default/tsconfig.json", + + "compilerOptions": { + "rootDir": "./src", + "outDir": "./lib", + "declarationDir": "./types", + "tsBuildInfoFile": ".build/build.tsbuildinfo" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "lib", "dist", "types", "bundle"] +} diff --git a/foundations/server/packages/typesense/.eslintrc.js b/foundations/server/packages/typesense/.eslintrc.js new file mode 100644 index 00000000000..72235dc2833 --- /dev/null +++ b/foundations/server/packages/typesense/.eslintrc.js @@ -0,0 +1,7 @@ +module.exports = { + extends: ['./node_modules/@hcengineering/platform-rig/profiles/default/eslint.config.json'], + parserOptions: { + tsconfigRootDir: __dirname, + project: './tsconfig.json' + } +} diff --git a/foundations/server/packages/typesense/package.json b/foundations/server/packages/typesense/package.json new file mode 100644 index 00000000000..53d1e45134a --- /dev/null +++ b/foundations/server/packages/typesense/package.json @@ -0,0 +1,69 @@ +{ + "name": "@hcengineering/typesense", + "version": "0.7.17", + "main": "lib/index.js", + "svelte": "src/index.ts", + "types": "types/index.d.ts", + "files": [ + "lib/**/*", + "types/**/*", + "src/**/*", + "!lib/**/*.test.*", + "!lib/**/*.spec.*", + "!lib/__tests__/**/*", + "!lib/__test__/**/*", + "!lib/tests/**/*", + "!types/**/*.test.*", + "!types/**/*.spec.*", + "!types/__tests__/**/*", + "!types/__test__/**/*", + "!types/tests/**/*", + "!src/**/*.test.*", + "!src/**/*.spec.*", + "!src/__tests__/**/*", + "!src/__test__/**/*", + "!src/tests/**/*" + ], + "author": "Anticrm Platform Contributors", + "license": "EPL-2.0", + "scripts": { + "build": "compile", + "build:watch": "compile", + "format": "format src", + "test": "jest --passWithNoTests --silent --forceExit", + "_phase:build": "compile transpile src", + "_phase:test": "jest --passWithNoTests --silent --forceExit", + "_phase:format": "format src", + "_phase:validate": "compile validate" + }, + "devDependencies": { + "@hcengineering/platform-rig": "workspace:^0.7.21", + "@typescript-eslint/eslint-plugin": "^6.21.0", + "eslint-plugin-import": "^2.26.0", + "eslint-plugin-promise": "^6.1.1", + "eslint-plugin-n": "^15.4.0", + "eslint": "^8.54.0", + "@typescript-eslint/parser": "^6.21.0", + "eslint-config-standard-with-typescript": "^40.0.0", + "prettier": "^3.6.2", + "typescript": "^5.9.3", + "jest": "^29.7.0", + "ts-jest": "^29.1.1", + "@types/jest": "^29.5.5", + "eslint-plugin-svelte": "^2.35.1" + }, + "dependencies": { + "@hcengineering/core": "workspace:^0.7.26", + "@hcengineering/platform": "workspace:^0.7.20", + "@hcengineering/server-core": "workspace:^0.7.19", + "@hcengineering/analytics": "workspace:^0.7.19", + "typesense": "^2.0.0" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/hcengineering/huly.server.git" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/foundations/server/packages/typesense/src/adapter.ts b/foundations/server/packages/typesense/src/adapter.ts new file mode 100644 index 00000000000..36ea078f6ee --- /dev/null +++ b/foundations/server/packages/typesense/src/adapter.ts @@ -0,0 +1,604 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { Analytics } from '@hcengineering/analytics' +import { + type Class, + type Doc, + type DocumentQuery, + type MeasureContext, + type Ref, + type SearchOptions, + type SearchQuery, + type TxResult, + type WorkspaceUuid +} from '@hcengineering/core' +import type { FullTextAdapter, IndexedDoc, SearchScoring, SearchStringResult } from '@hcengineering/server-core' +import serverCore from '@hcengineering/server-core' + +import { Client as TypesenseClient } from 'typesense' +import type { CollectionFieldSchema } from 'typesense/lib/Typesense/Collection' + +import { getMetadata } from '@hcengineering/platform' + +const DEFAULT_LIMIT = 200 +const BATCH_SIZE = 500 + +function getIndexName (): string { + return getMetadata(serverCore.metadata.ElasticIndexName) ?? 'storage_index' +} + +function getIndexVersion (): string { + return getMetadata(serverCore.metadata.ElasticIndexVersion) ?? 'v2' +} + +function buildCollectionSchema (collectionName: string): { + name: string + fields: CollectionFieldSchema[] + enable_nested_fields: boolean +} { + const fields: CollectionFieldSchema[] = [ + { name: 'id', type: 'string', facet: true }, + { name: 'workspaceId', type: 'string', facet: true }, + { name: '_class', type: 'string[]', facet: true }, + { name: 'space', type: 'string', facet: true }, + { name: 'attachedTo', type: 'string', facet: true, optional: true }, + { name: 'attachedToClass', type: 'string', facet: true, optional: true }, + { name: 'searchTitle', type: 'string', optional: true }, + { name: 'searchShortTitle', type: 'string', optional: true }, + { name: 'fulltextSummary', type: 'string', optional: true }, + { name: 'modifiedBy', type: 'string', facet: true, optional: true }, + { name: 'modifiedOn', type: 'int64', optional: true }, + { name: 'core:class:Doc%createdBy', type: 'string', facet: true, optional: true }, + { name: 'core:class:Doc%createdOn', type: 'int64', optional: true }, + { name: 'core:class:Doc%modifiedBy', type: 'string', facet: true, optional: true }, + { name: 'core:class:Doc%modifiedOn', type: 'int64', optional: true }, + // Catch-all for dynamic fields + { name: '.*', type: 'auto' } + ] + + return { + name: collectionName, + fields, + enable_nested_fields: true + } +} + +function isTypesenseError (err: any): boolean { + return err?.httpStatus !== undefined || err?.name === 'TypesenseError' +} + +function isConnectionError (err: any): boolean { + if (err?.name === 'ConnectionError') return true + if (err?.code === 'ECONNREFUSED' || err?.code === 'ENOTFOUND') return true + if (err?.message?.includes?.('ECONNREFUSED') === true) return true + return false +} + +/** Fields that are known to be string arrays in the schema. */ +const ARRAY_FIELDS = new Set(['_class']) + +/** Escape a value for use inside a Typesense filter_by backtick-quoted string. */ +function escapeFilterValue (val: string): string { + return val.replace(/`/g, '\\`') +} + +/** + * Sanitize a document for Typesense upsert. + * - Removes binary `data` field + * - Coerces fields ending in `_fields` to string arrays (dynamic ES fields) + * - Ensures `_class` is always an array + */ +function sanitizeDoc (doc: Record): Record { + const result: Record = {} + for (const [key, value] of Object.entries(doc)) { + if (key === 'data') continue + // Coerce _class and *_fields to arrays + if (ARRAY_FIELDS.has(key) || key.endsWith('_fields')) { + result[key] = Array.isArray(value) ? value : value != null ? [String(value)] : [] + } else { + result[key] = value + } + } + return result +} + +/** + * Build a Typesense filter_by string from a workspace ID and a Huly DocumentQuery. + * Skips `$`-prefixed keys (like `$search`). + */ +function buildQueryFilter (workspaceId: WorkspaceUuid, query?: DocumentQuery): string { + const parts: string[] = [`workspaceId:=${workspaceId as string}`] + + if (query !== undefined) { + for (const [q, v] of Object.entries(query)) { + if (q.startsWith('$')) continue + if (typeof v === 'object' && v !== null) { + if (v.$in !== undefined && Array.isArray(v.$in)) { + parts.push(`${q}:=[${v.$in.map((val: string) => `\`${escapeFilterValue(val)}\``).join(',')}]`) + } + } else { + parts.push(`${q}:=\`${escapeFilterValue(String(v))}\``) + } + } + } + + return parts.join(' && ') +} + +class TypesenseAdapter implements FullTextAdapter { + private readonly getFulltextDocId: (workspaceId: WorkspaceUuid, doc: Ref) => string + private readonly getDocId: (workspaceId: WorkspaceUuid, fulltext: string) => Ref + private readonly collectionName: string + + constructor ( + private readonly client: TypesenseClient, + private readonly indexBaseName: string, + readonly indexVersion: string + ) { + this.collectionName = `${indexBaseName}_${indexVersion}` + this.getFulltextDocId = (workspaceId, doc) => `${doc as string}@${workspaceId as string}` + this.getDocId = (workspaceId, fulltext) => fulltext.slice(0, -1 * ((workspaceId as string).length + 1)) as Ref + } + + async initMapping (ctx: MeasureContext, _field?: { key: string, dims: number }): Promise { + const collectionName = this.collectionName + try { + const collections = await this.client.collections().retrieve() + const matchingCollections = collections.filter((c: any) => c.name.startsWith(`${this.indexBaseName}_`)) + const existingCollection = matchingCollections.find((c: any) => c.name === collectionName) + const oldCollections = matchingCollections.filter((c: any) => c.name !== collectionName) + + for (const old of oldCollections) { + await ctx.with('delete-old-collection', {}, async () => { + await this.client.collections(old.name).delete() + }) + } + + let shouldRecreate = false + if (existingCollection !== undefined) { + const existingFields = new Set((existingCollection as any).fields?.map((f: any) => f.name) ?? []) + const schema = buildCollectionSchema(collectionName) + for (const field of schema.fields) { + if (field.name !== '.*' && !existingFields.has(field.name)) { + shouldRecreate = true + break + } + } + } + + if (shouldRecreate && existingCollection !== undefined) { + await ctx.with('delete-collection', {}, async () => { + await this.client.collections(collectionName).delete() + }) + } + + if (existingCollection === undefined || shouldRecreate) { + const schema = buildCollectionSchema(collectionName) + await ctx.with('create-collection', { collectionName }, async () => { + await this.client.collections().create(schema as any) + }) + } + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } + Analytics.handleError(err) + ctx.error(err) + return false + } + return true + } + + async close (): Promise { + // Typesense client does not require explicit close + } + + async getDocCount (ctx: MeasureContext): Promise { + try { + const col = await this.client.collections(this.collectionName).retrieve() + return (col as any).num_documents ?? 0 + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available for doc count check') + return -1 + } + Analytics.handleError(err) + ctx.error('Typesense getDocCount error', { error: err }) + return -1 + } + } + + async searchString ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + query: SearchQuery, + options: SearchOptions & { scoring?: SearchScoring[] } + ): Promise { + try { + const filterParts: string[] = [`workspaceId:=${workspaceId as string}`] + + if (query.spaces !== undefined && query.spaces.length > 0) { + filterParts.push(`space:=[${query.spaces.map((s) => `\`${escapeFilterValue(s)}\``).join(',')}]`) + } + if (query.classes !== undefined && query.classes.length > 0) { + filterParts.push(`_class:=[${query.classes.map((c) => `\`${escapeFilterValue(c)}\``).join(',')}]`) + } + + // Require searchTitle to exist (non-empty) + filterParts.push('searchTitle:!=""') + + // Scoring: ES uses function_score with should clauses (soft boosts). + // Typesense has no equivalent — filter_by is always mandatory (AND). + // We skip scoring filters entirely and rely on text_match ranking, + // which already prioritizes title matches via query_by_weights. + // Adding scoring fields to filter_by would incorrectly EXCLUDE + // documents that don't match, instead of just ranking them lower. + + const filterBy = filterParts.join(' && ') + + const searchParams: any = { + q: query.query, + query_by: 'searchTitle,searchShortTitle,fulltextSummary', + query_by_weights: '100,100,1', + filter_by: filterBy, + limit: options.limit ?? DEFAULT_LIMIT, + prefix: 'true,true,false', + num_typos: '2,2,1', + typo_tokens_threshold: 1, + drop_tokens_threshold: 1, + sort_by: '_text_match:desc' + } + + const result = await this.client.collections(this.collectionName).documents().search(searchParams) + + const resp: SearchStringResult = { docs: [] } + if (result.found !== undefined) { + resp.total = result.found + } + if (result.hits !== undefined) { + resp.docs = result.hits.map((hit: any) => { + const doc = hit.document + return { + ...doc, + id: this.getDocId(workspaceId, doc.id), + _score: hit.text_match_info?.best_field_score ?? hit.text_match ?? 0 + } + }) + } + + return resp + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + return { docs: [] } + } + Analytics.handleError(err) + ctx.error('Typesense error', { error: err }) + return { docs: [] } + } + } + + async search ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + _classes: Ref>[], + query: DocumentQuery, + size: number | undefined, + from: number | undefined + ): Promise { + if (query.$search === undefined) return [] + try { + const filterParts: string[] = [`workspaceId:=${workspaceId as string}`] + + if (_classes.length > 0) { + filterParts.push(`_class:=[${_classes.map((c) => `\`${escapeFilterValue(c as string)}\``).join(',')}]`) + } + + // In Elastic, additional query fields are soft boosts (should clauses). + // Typesense filter_by is always mandatory (AND), so adding these as + // filters would incorrectly exclude non-matching documents instead of + // just ranking them lower. We skip them and rely on text_match ranking. + + const filterBy = filterParts.join(' && ') + + const searchParams: any = { + q: query.$search, + query_by: 'searchTitle,searchShortTitle,fulltextSummary', + query_by_weights: '100,100,1', + filter_by: filterBy, + limit: size ?? DEFAULT_LIMIT, + offset: from ?? 0, + sort_by: '_text_match:desc', + prefix: 'true,true,false', + num_typos: '2,2,1', + typo_tokens_threshold: 1, + drop_tokens_threshold: 1 + } + + const result = await ctx.with( + 'search', + {}, + async () => await this.client.collections(this.collectionName).documents().search(searchParams), + { _classes, size, from, query: searchParams } + ) + + if (result.hits === undefined) return [] + return result.hits.map((hit: any) => { + const doc = hit.document + return { + ...doc, + id: this.getDocId(workspaceId, doc.id), + _score: hit.text_match_info?.best_field_score ?? hit.text_match ?? 0 + } + }) + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + return [] + } + ctx.error('Typesense error', { error: err }) + Analytics.handleError(err) + return [] + } + } + + async index (ctx: MeasureContext, workspaceId: WorkspaceUuid, doc: IndexedDoc): Promise { + if (doc.data !== undefined) { + ctx.warn('Binary attachment content (doc.data) is not supported in Typesense; relying on Rekoni for extraction', { + docId: doc.id + }) + } + + const fulltextId = this.getFulltextDocId(workspaceId, doc.id) + const tsDoc = sanitizeDoc({ ...doc, id: fulltextId, workspaceId }) + + try { + await this.client.collections(this.collectionName).documents().upsert(tsDoc) + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } else { + Analytics.handleError(err) + ctx.error('Typesense index error', { error: err, docId: doc.id }) + } + } + return {} + } + + async update ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + id: Ref, + update: Record + ): Promise { + const fulltextId = this.getFulltextDocId(workspaceId, id) + try { + await this.client.collections(this.collectionName).documents(fulltextId).update(update) + } catch (err: any) { + if (isTypesenseError(err) && err.httpStatus === 404) { + return {} + } + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } else { + Analytics.handleError(err) + ctx.error('Typesense update error', { error: err, docId: id }) + } + } + return {} + } + + async updateMany (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: IndexedDoc[]): Promise { + const parts = Array.from(docs) + while (parts.length > 0) { + const batch = parts.splice(0, BATCH_SIZE) + const jsonlLines = batch + .map((doc) => { + const tsDoc = sanitizeDoc({ ...doc, id: this.getFulltextDocId(workspaceId, doc.id), workspaceId }) + return JSON.stringify(tsDoc) + }) + .join('\n') + + try { + const results = await this.client + .collections(this.collectionName) + .documents() + .import(jsonlLines, { action: 'upsert' }) + + const errors = ( + typeof results === 'string' ? results.split('\n').map((l: string) => JSON.parse(l)) : results + ).filter((r: any) => r.success === false) + + if (errors.length > 0) { + const errorMessages = errors.map((e: any) => e.error).join('\n') + const errorDocs = errors.map((e: any) => e.document) + console.error(`Failed to process bulk request: ${errorMessages} ${JSON.stringify(errorDocs)}`) + } + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } else { + Analytics.handleError(err) + ctx.error('Typesense updateMany error', { error: err }) + } + } + } + return [] + } + + async updateByQuery ( + ctx: MeasureContext, + workspaceId: WorkspaceUuid, + query: DocumentQuery, + update: Record + ): Promise { + try { + const filterBy = buildQueryFilter(workspaceId, query) + + // Paginate through all matching documents + let page = 1 + const perPage = 250 + const allDocs: any[] = [] + + for (;;) { + const result = await this.client.collections(this.collectionName).documents().search({ + q: '*', + filter_by: filterBy, + per_page: perPage, + page + }) + const hits = result.hits ?? [] + if (hits.length === 0) break + allDocs.push(...hits.map((h: any) => h.document)) + if (allDocs.length >= (result.found ?? 0)) break + page++ + } + + if (allDocs.length === 0) return [] + + // Apply update and batch upsert + const updatedDocs = allDocs.map((doc: any) => ({ ...doc, ...update })) + const remaining = Array.from(updatedDocs) + while (remaining.length > 0) { + const batch = remaining.splice(0, BATCH_SIZE) + const jsonlLines = batch.map((doc: any) => JSON.stringify(doc)).join('\n') + const results = await this.client + .collections(this.collectionName) + .documents() + .import(jsonlLines, { action: 'upsert' }) + const errors = ( + typeof results === 'string' ? results.split('\n').map((l: string) => JSON.parse(l)) : results + ).filter((r: any) => r.success === false) + if (errors.length > 0) { + console.error(`updateByQuery upsert errors: ${errors.map((e: any) => e.error).join('; ')}`) + } + } + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + } else { + Analytics.handleError(err) + ctx.error('Typesense updateByQuery error', { error: err }) + } + } + return [] + } + + async remove (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: Ref[]): Promise { + try { + const remaining = Array.from(docs) + while (remaining.length > 0) { + const batch = remaining.splice(0, 5000) + const ids = batch.map((it) => this.getFulltextDocId(workspaceId, it)) + const filterBy = `id:=[${ids.map((id) => `\`${escapeFilterValue(id)}\``).join(',')}] && workspaceId:=${workspaceId as string}` + await this.client.collections(this.collectionName).documents().delete({ filter_by: filterBy }) + } + } catch (err: any) { + if (isTypesenseError(err) && err.httpStatus === 404) { + return + } + throw err + } + } + + async removeByQuery (ctx: MeasureContext, workspaceId: WorkspaceUuid, query: DocumentQuery): Promise { + const filterBy = buildQueryFilter(workspaceId, query) + try { + await this.client.collections(this.collectionName).documents().delete({ filter_by: filterBy }) + } catch (err: any) { + if (isTypesenseError(err) && err.httpStatus === 404) { + return + } + throw err + } + } + + async clean (ctx: MeasureContext, workspaceId: WorkspaceUuid): Promise { + try { + const filterBy = `workspaceId:=${workspaceId as string}` + await this.client.collections(this.collectionName).documents().delete({ filter_by: filterBy }) + } catch (err: any) { + if (isTypesenseError(err) && err.httpStatus === 404) { + return + } + throw err + } + } + + async load (ctx: MeasureContext, workspaceId: WorkspaceUuid, docs: Ref[]): Promise { + try { + const ids = docs.map((it) => this.getFulltextDocId(workspaceId, it)) + const filterBy = `id:=[${ids.map((id) => `\`${escapeFilterValue(id)}\``).join(',')}] && workspaceId:=${workspaceId as string}` + + const result = await this.client.collections(this.collectionName).documents().search({ + q: '*', + filter_by: filterBy, + per_page: docs.length + }) + + if (result.hits === undefined) return [] + return result.hits.map((hit: any) => { + const doc = hit.document + return { + ...doc, + id: this.getDocId(workspaceId, doc.id) + } + }) + } catch (err: any) { + if (isConnectionError(err)) { + ctx.warn('Typesense DB is not available') + return [] + } + Analytics.handleError(err) + ctx.error('Typesense load error', { error: err }) + return [] + } + } +} + +/** + * @public + * + * Creates a Typesense fulltext adapter from a URL like: + * http://typesense:8108?apiKey=xyz + */ +export async function createTypesenseAdapter (url: string): Promise { + const parsed = new URL(url) + const apiKey = parsed.searchParams.get('apiKey') ?? parsed.searchParams.get('apikey') ?? '' + const protocol = parsed.protocol.replace(':', '') + const host = parsed.hostname + const port = parsed.port !== '' ? parseInt(parsed.port, 10) : protocol === 'https' ? 443 : 8108 + + const client = new TypesenseClient({ + nodes: [ + { + host, + port, + protocol + } + ], + apiKey, + connectionTimeoutSeconds: 10, + retryIntervalSeconds: 0.1, + numRetries: 3 + }) + + const indexBaseName = getIndexName() + const indexVersion = getIndexVersion() + + return new TypesenseAdapter(client, indexBaseName, indexVersion) +} diff --git a/foundations/server/packages/typesense/src/index.ts b/foundations/server/packages/typesense/src/index.ts new file mode 100644 index 00000000000..acb9895fc7c --- /dev/null +++ b/foundations/server/packages/typesense/src/index.ts @@ -0,0 +1,17 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021, 2022 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +export { createTypesenseAdapter } from './adapter' diff --git a/foundations/server/packages/typesense/tsconfig.json b/foundations/server/packages/typesense/tsconfig.json new file mode 100644 index 00000000000..7d78e05abb4 --- /dev/null +++ b/foundations/server/packages/typesense/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "./node_modules/@hcengineering/platform-rig/profiles/default/tsconfig.json", + + "compilerOptions": { + "rootDir": "./src", + "outDir": "./lib", + "declarationDir": "./types", + "tsBuildInfoFile": ".build/build.tsbuildinfo" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "lib", "dist", "types", "bundle"] +} diff --git a/pods/fulltext/package.json b/pods/fulltext/package.json index 9a1e4955056..8fa97e2a13c 100644 --- a/pods/fulltext/package.json +++ b/pods/fulltext/package.json @@ -69,6 +69,8 @@ "@koa/cors": "^5.0.0", "@hcengineering/server-indexer": "workspace:^0.7.0", "@hcengineering/elastic": "workspace:^0.7.16", + "@hcengineering/opensearch": "workspace:^0.7.17", + "@hcengineering/typesense": "workspace:^0.7.17", "@hcengineering/server-collaboration": "workspace:^0.7.0", "@hcengineering/middleware": "workspace:^0.7.21", "@hcengineering/server-client": "workspace:^0.7.16", diff --git a/pods/fulltext/src/index.ts b/pods/fulltext/src/index.ts index 89bf4250851..579fbd199ef 100644 --- a/pods/fulltext/src/index.ts +++ b/pods/fulltext/src/index.ts @@ -16,10 +16,16 @@ import { Analytics } from '@hcengineering/analytics' import { configureAnalytics, createOpenTelemetryMetricsContext, SplitLogger } from '@hcengineering/analytics-service' import { newMetrics, type Tx } from '@hcengineering/core' -import { initStatisticsContext, type StorageConfiguration } from '@hcengineering/server-core' +import { + initStatisticsContext, + type StorageConfiguration, + type FullTextAdapterFactory +} from '@hcengineering/server-core' import { join } from 'path' import { createElasticAdapter } from '@hcengineering/elastic' +import { createOpenSearchAdapter } from '@hcengineering/opensearch' +import { createTypesenseAdapter } from '@hcengineering/typesense' import { getPlatformQueue } from '@hcengineering/kafka' import { setMetadata } from '@hcengineering/platform' import { createRekoniAdapter, type FulltextDBConfiguration } from '@hcengineering/server-indexer' @@ -74,9 +80,22 @@ if (rekoniUrl === undefined) { process.exit(1) } +const fulltextBackend = (process.env.FULLTEXT_BACKEND ?? 'elastic').toLowerCase() +const fulltextAdapterFactories: Record = { + elastic: createElasticAdapter, + opensearch: createOpenSearchAdapter, + typesense: createTypesenseAdapter +} +const fulltextFactory = fulltextAdapterFactories[fulltextBackend] +if (fulltextFactory === undefined) { + console.error(`Unknown FULLTEXT_BACKEND: "${fulltextBackend}". Supported: elastic, opensearch, typesense`) + process.exit(1) +} +console.info(`Using fulltext backend: ${fulltextBackend}`) + const config: FulltextDBConfiguration = { fulltextAdapter: { - factory: createElasticAdapter, + factory: fulltextFactory, url: fullTextDbURL }, contentAdapters: { @@ -91,7 +110,7 @@ const config: FulltextDBConfiguration = { const elasticIndexName = process.env.ELASTIC_INDEX_NAME if (elasticIndexName === undefined) { - console.log('Please provide ELASTIC_INDEX_NAME') + console.error('ELASTIC_INDEX_NAME should be specified') process.exit(1) } diff --git a/pods/fulltext/src/manager.ts b/pods/fulltext/src/manager.ts index 4d5381d4d6d..40906686c08 100644 --- a/pods/fulltext/src/manager.ts +++ b/pods/fulltext/src/manager.ts @@ -105,6 +105,8 @@ export class WorkspaceManager { } } + await this.checkAndTriggerReindex() + this.shutdownInterval = setInterval(() => { for (const [k, v] of [...this.indexers.entries()]) { if (v instanceof Promise) { @@ -159,6 +161,36 @@ export class WorkspaceManager { ) } + private async checkAndTriggerReindex (): Promise { + if (this.fulltextAdapter.getDocCount === undefined) { + return + } + + const docCount = await this.fulltextAdapter.getDocCount(this.ctx) + if (docCount < 0) { + this.ctx.warn('Could not determine backend doc count, skipping auto-reindex check') + return + } + if (docCount > 0) { + this.ctx.info('Fulltext backend has documents, skipping auto-reindex', { docCount }) + return + } + + this.ctx.warn('Fulltext backend is empty (0 documents), triggering auto-reindex for all workspaces') + try { + const token = generateToken(systemAccountUuid, undefined, { service: 'tool' }) + const accountClient = getAccountClient(token) + const workspaces = await accountClient.listWorkspaces(null, 'active') + this.ctx.info('Auto-reindex: found workspaces', { count: workspaces.length }) + for (const ws of workspaces) { + await this.fulltextProducer.send(this.ctx, ws.uuid, [workspaceEvents.fullReindex()]) + this.ctx.info('Queued full reindex', { workspace: ws.uuid }) + } + } catch (err: any) { + this.ctx.error('Failed to trigger auto-reindex', { error: err }) + } + } + private async processTransactions ( m: ConsumerMessage> | TxDomainEvent>>, control: ConsumerControl diff --git a/rush.json b/rush.json index 11ae72e6b2c..0f1bdb97979 100644 --- a/rush.json +++ b/rush.json @@ -547,6 +547,16 @@ "projectFolder": "foundations/server/packages/elastic", "shouldPublish": true }, + { + "packageName": "@hcengineering/opensearch", + "projectFolder": "foundations/server/packages/opensearch", + "shouldPublish": true + }, + { + "packageName": "@hcengineering/typesense", + "projectFolder": "foundations/server/packages/typesense", + "shouldPublish": true + }, { "packageName": "@hcengineering/kafka", "projectFolder": "foundations/server/packages/kafka",