From f7817438190b4438225337fddfa2a2af594e8fff Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:15:30 +0400 Subject: [PATCH] LID block size config --- cmd/seq-db/seq-db.go | 2 ++ config/config.go | 2 ++ config/validation.go | 16 ++++++++++++++++ config/validation_test.go | 12 ++++++++++++ docs/en/02-configuration.md | 13 +++++++------ docs/ru/02-configuration.md | 13 +++++++------ frac/active.go | 3 +++ frac/common/seal_params.go | 1 + frac/config.go | 1 + frac/fraction_concurrency_test.go | 1 + frac/fraction_test.go | 1 + frac/sealed/sealing/index.go | 2 +- fracmanager/config.go | 3 +++ 13 files changed, 57 insertions(+), 13 deletions(-) diff --git a/cmd/seq-db/seq-db.go b/cmd/seq-db/seq-db.go index b0a7c47e..9c57e510 100644 --- a/cmd/seq-db/seq-db.go +++ b/cmd/seq-db/seq-db.go @@ -266,6 +266,7 @@ func startStore( SealParams: common.SealParams{ IDsZstdLevel: cfg.Compression.SealedZstdCompressionLevel, LIDsZstdLevel: cfg.Compression.SealedZstdCompressionLevel, + LIDBlockSize: int(cfg.Compression.LIDBlockSize), TokenListZstdLevel: cfg.Compression.SealedZstdCompressionLevel, DocsPositionsZstdLevel: cfg.Compression.SealedZstdCompressionLevel, TokenTableZstdLevel: cfg.Compression.SealedZstdCompressionLevel, @@ -283,6 +284,7 @@ func startStore( }, SkipSortDocs: !cfg.DocsSorting.Enabled, KeepMetaFile: false, + LIDBlockSize: int(cfg.Compression.LIDBlockSize), }, OffloadingEnabled: cfg.Offloading.Enabled, OffloadingRetention: cfg.Offloading.Retention, diff --git a/config/config.go b/config/config.go index 28a67dc0..6ffe0667 100644 --- a/config/config.go +++ b/config/config.go @@ -200,6 +200,8 @@ type Config struct { MetasZstdCompressionLevel int `config:"metas_zstd_compression_level" default:"1"` SealedZstdCompressionLevel int `config:"sealed_zstd_compression_level" default:"3"` DocBlockZstdCompressionLevel int `config:"doc_block_zstd_compression_level" default:"3"` + // LIDBlockSize sets max lids (postings) saved per LIDs block. + LIDBlockSize Bytes `config:"lid_block_size" default:"64KiB"` } `config:"compression"` Indexing struct { diff --git a/config/validation.go b/config/validation.go index 15d63c9b..13174ac3 100644 --- a/config/validation.go +++ b/config/validation.go @@ -3,6 +3,8 @@ package config import ( "cmp" "fmt" + + "github.com/alecthomas/units" ) type validateFn func() error @@ -68,6 +70,8 @@ func (c *Config) storeValidations() []validateFn { inRange("compression.sealed_zstd_compression_level", -7, 22, c.Compression.SealedZstdCompressionLevel), inRange("compression.doc_block_zstd_compression_level", -7, 22, c.Compression.DocBlockZstdCompressionLevel), + greaterThan("compression.lid_block_cap", 0, c.Compression.LIDBlockSize), + lessOrEqThan("compression.lid_block_cap", int(64*units.KiB), int(c.Compression.LIDBlockSize)), inRange("offloading.queue_size_percent", 0, 100, c.Offloading.QueueSizePercent), greaterThan("experimental.max_regex_tokens_check", -1, c.Experimental.MaxRegexTokensCheck), @@ -106,6 +110,18 @@ func greaterThan[T cmp.Ordered](field string, base, v T) validateFn { } } +func lessOrEqThan[T cmp.Ordered](field string, base, v T) validateFn { + return func() error { + if v > base { + return fmt.Errorf( + "field %q must be greater than %v", + field, base, + ) + } + return nil + } +} + func inRange[T cmp.Ordered](field string, from, to, v T) validateFn { return func() error { if v < from || to < v { diff --git a/config/validation_test.go b/config/validation_test.go index 0a29f990..c37c1da2 100644 --- a/config/validation_test.go +++ b/config/validation_test.go @@ -86,6 +86,18 @@ limits: env: map[string]string{"SEQDB_OFFLOADING_QUEUE_SIZE_PERCENT": "50"}, expectErr: false, }, + { + name: "Invalid compression.lid_block_size", + cfg: baseCfg, + env: map[string]string{"SEQDB_COMPRESSION_LID_BLOCK_SIZE": "-1KiB"}, + expectErr: true, + }, + { + name: "Valid compression.lid_block_size", + cfg: baseCfg, + env: map[string]string{"SEQDB_COMPRESSION_LID_BLOCK_SIZE": "8KiB"}, + expectErr: false, + }, } for _, tt := range tests { diff --git a/docs/en/02-configuration.md b/docs/en/02-configuration.md index 54b798df..019c163d 100644 --- a/docs/en/02-configuration.md +++ b/docs/en/02-configuration.md @@ -104,12 +104,13 @@ Resource allocation settings. Compression level settings for various data types. -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `compression.docs_zstd_compression_level` | int | `1` | Zstandard compression level for documents | -| `compression.metas_zstd_compression_level` | int | `1` | Zstandard compression level for metadata | -| `compression.sealed_zstd_compression_level` | int | `3` | Zstandard compression level for sealed fractions | -| `compression.doc_block_zstd_compression_level` | int | `3` | Zstandard compression level for document blocks | +| Field | Type | Default | Description | +|------------------------------------------------|------|---------|--------------------------------------------------| +| `compression.docs_zstd_compression_level` | int | `1` | Zstandard compression level for documents | +| `compression.metas_zstd_compression_level` | int | `1` | Zstandard compression level for metadata | +| `compression.sealed_zstd_compression_level` | int | `3` | Zstandard compression level for sealed fractions | +| `compression.doc_block_zstd_compression_level` | int | `3` | Zstandard compression level for document blocks | +| `compression.lid_block_size` | int | `64KiB` | Max lids (postings) saved per LIDs block | ## Indexing Configuration diff --git a/docs/ru/02-configuration.md b/docs/ru/02-configuration.md index 664530aa..178e343c 100644 --- a/docs/ru/02-configuration.md +++ b/docs/ru/02-configuration.md @@ -104,12 +104,13 @@ id: configuration Настройки уровня сжатия для различных типов данных. -| Параметр | Тип | Значение по умолчанию | Описание | -|----------|-----|----------------------|-----------| -| `compression.docs_zstd_compression_level` | int | `1` | Уровень сжатия для документов | -| `compression.metas_zstd_compression_level` | int | `1` | Уровень сжатия для метаданных | -| `compression.sealed_zstd_compression_level` | int | `3` | Уровень сжатия для запечатанных фракций | -| `compression.doc_block_zstd_compression_level` | int | `3` | Уровень сжатия для блоков документов | +| Параметр | Тип | Значение по умолчанию | Описание | +|------------------------------------------------|-----|-----------------------|-----------------------------------------| +| `compression.docs_zstd_compression_level` | int | `1` | Уровень сжатия для документов | +| `compression.metas_zstd_compression_level` | int | `1` | Уровень сжатия для метаданных | +| `compression.sealed_zstd_compression_level` | int | `3` | Уровень сжатия для запечатанных фракций | +| `compression.doc_block_zstd_compression_level` | int | `3` | Уровень сжатия для блоков документов | +| `compression.lid_block_size` | int | `64KiB` | Максимальное количество лидов в блоках | ## Конфигурация индексирования diff --git a/frac/active.go b/frac/active.go index 7c3691c1..88d880ec 100644 --- a/frac/active.go +++ b/frac/active.go @@ -114,6 +114,9 @@ func NewActive( skipMaskProvider: skipMaskProvider, } + if cfg.LIDBlockSize > 0 { + f.info.ConstLIDBlockCap = cfg.LIDBlockSize + } // use of 0 as keys in maps is prohibited – it's system key, so add first element f.MIDs.Append(systemMID) diff --git a/frac/common/seal_params.go b/frac/common/seal_params.go index c19365f9..05f89696 100644 --- a/frac/common/seal_params.go +++ b/frac/common/seal_params.go @@ -8,5 +8,6 @@ type SealParams struct { TokenTableZstdLevel int DocBlocksZstdLevel int // DocBlocksZstdLevel is the zstd compress level of each document block. + LIDBlockSize int DocBlockSize int // DocBlockSize is decompressed payload size of document block. } diff --git a/frac/config.go b/frac/config.go index 3b1c1e97..2abae098 100644 --- a/frac/config.go +++ b/frac/config.go @@ -5,6 +5,7 @@ type Config struct { SkipSortDocs bool KeepMetaFile bool + LIDBlockSize int } type SearchConfig struct { diff --git a/frac/fraction_concurrency_test.go b/frac/fraction_concurrency_test.go index a5c19b22..25f1c345 100644 --- a/frac/fraction_concurrency_test.go +++ b/frac/fraction_concurrency_test.go @@ -344,6 +344,7 @@ func seal(active *Active) (*Sealed, error) { TokenTableZstdLevel: 1, DocBlocksZstdLevel: 1, DocBlockSize: 128 * int(units.KiB), + LIDBlockSize: 512, } activeSealingSource, err := NewActiveSealingSource(active, sealParams) if err != nil { diff --git a/frac/fraction_test.go b/frac/fraction_test.go index ec5f3d85..3f0994e6 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -97,6 +97,7 @@ func (s *FractionTestSuite) SetupTestCommon() { DocsPositionsZstdLevel: 1, TokenTableZstdLevel: 1, DocBlocksZstdLevel: 1, + LIDBlockSize: 512, DocBlockSize: 128 * int(units.KiB), } diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 491c7233..48cf8302 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -271,7 +271,7 @@ func (s *IndexSealer) indexBlocks(src Source) iter.Seq[indexBlock] { // SECTION 6: LIDs Section statsLIDs := startStats() s.lidsTable.StartBlockIndex = blocksCounter - for block := range bb.BuildLIDsBlocks(src.TokenLIDs(), consts.LIDBlockCap) { + for block := range bb.BuildLIDsBlocks(src.TokenLIDs(), s.params.LIDBlockSize) { if !push(s.packLIDsBlock(block), &statsLIDs) { return } diff --git a/fracmanager/config.go b/fracmanager/config.go index 5a9790ac..c3c8d1ec 100644 --- a/fracmanager/config.go +++ b/fracmanager/config.go @@ -57,6 +57,9 @@ func FillConfigWithDefault(config *Config) *Config { if config.SealParams.LIDsZstdLevel == 0 { config.SealParams.LIDsZstdLevel = zstdDefaultLevel } + if config.SealParams.LIDBlockSize == 0 { + config.SealParams.LIDBlockSize = consts.LIDBlockCap + } if config.SealParams.TokenListZstdLevel == 0 { config.SealParams.TokenListZstdLevel = zstdDefaultLevel }