Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/seq-db/seq-db.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ func startStore(
SealParams: common.SealParams{
IDsZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
LIDsZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
LIDBlockSize: int(cfg.Compression.LIDBlockSize),
TokenListZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
DocsPositionsZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
TokenTableZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
Expand All @@ -283,6 +284,7 @@ func startStore(
},
SkipSortDocs: !cfg.DocsSorting.Enabled,
KeepMetaFile: false,
LIDBlockSize: int(cfg.Compression.LIDBlockSize),
},
OffloadingEnabled: cfg.Offloading.Enabled,
OffloadingRetention: cfg.Offloading.Retention,
Expand Down
2 changes: 2 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ type Config struct {
MetasZstdCompressionLevel int `config:"metas_zstd_compression_level" default:"1"`
SealedZstdCompressionLevel int `config:"sealed_zstd_compression_level" default:"3"`
DocBlockZstdCompressionLevel int `config:"doc_block_zstd_compression_level" default:"3"`
// LIDBlockSize sets max lids (postings) saved per LIDs block.
LIDBlockSize Bytes `config:"lid_block_size" default:"64KiB"`
} `config:"compression"`

Indexing struct {
Expand Down
16 changes: 16 additions & 0 deletions config/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package config
import (
"cmp"
"fmt"

"github.com/alecthomas/units"
)

type validateFn func() error
Expand Down Expand Up @@ -68,6 +70,8 @@ func (c *Config) storeValidations() []validateFn {

inRange("compression.sealed_zstd_compression_level", -7, 22, c.Compression.SealedZstdCompressionLevel),
inRange("compression.doc_block_zstd_compression_level", -7, 22, c.Compression.DocBlockZstdCompressionLevel),
greaterThan("compression.lid_block_cap", 0, c.Compression.LIDBlockSize),
lessOrEqThan("compression.lid_block_cap", int(64*units.KiB), int(c.Compression.LIDBlockSize)),
inRange("offloading.queue_size_percent", 0, 100, c.Offloading.QueueSizePercent),

greaterThan("experimental.max_regex_tokens_check", -1, c.Experimental.MaxRegexTokensCheck),
Expand Down Expand Up @@ -106,6 +110,18 @@ func greaterThan[T cmp.Ordered](field string, base, v T) validateFn {
}
}

func lessOrEqThan[T cmp.Ordered](field string, base, v T) validateFn {
return func() error {
if v > base {
return fmt.Errorf(
"field %q must be greater than %v",
field, base,
)
}
return nil
}
}

func inRange[T cmp.Ordered](field string, from, to, v T) validateFn {
return func() error {
if v < from || to < v {
Expand Down
12 changes: 12 additions & 0 deletions config/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,18 @@ limits:
env: map[string]string{"SEQDB_OFFLOADING_QUEUE_SIZE_PERCENT": "50"},
expectErr: false,
},
{
name: "Invalid compression.lid_block_size",
cfg: baseCfg,
env: map[string]string{"SEQDB_COMPRESSION_LID_BLOCK_SIZE": "-1KiB"},
expectErr: true,
},
{
name: "Valid compression.lid_block_size",
cfg: baseCfg,
env: map[string]string{"SEQDB_COMPRESSION_LID_BLOCK_SIZE": "8KiB"},
expectErr: false,
},
}

for _, tt := range tests {
Expand Down
13 changes: 7 additions & 6 deletions docs/en/02-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,13 @@ Resource allocation settings.

Compression level settings for various data types.

| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `compression.docs_zstd_compression_level` | int | `1` | Zstandard compression level for documents |
| `compression.metas_zstd_compression_level` | int | `1` | Zstandard compression level for metadata |
| `compression.sealed_zstd_compression_level` | int | `3` | Zstandard compression level for sealed fractions |
| `compression.doc_block_zstd_compression_level` | int | `3` | Zstandard compression level for document blocks |
| Field | Type | Default | Description |
|------------------------------------------------|------|---------|--------------------------------------------------|
| `compression.docs_zstd_compression_level` | int | `1` | Zstandard compression level for documents |
| `compression.metas_zstd_compression_level` | int | `1` | Zstandard compression level for metadata |
| `compression.sealed_zstd_compression_level` | int | `3` | Zstandard compression level for sealed fractions |
| `compression.doc_block_zstd_compression_level` | int | `3` | Zstandard compression level for document blocks |
| `compression.lid_block_size` | int | `64KiB` | Max lids (postings) saved per LIDs block |

## Indexing Configuration

Expand Down
13 changes: 7 additions & 6 deletions docs/ru/02-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,13 @@ id: configuration

Настройки уровня сжатия для различных типов данных.

| Параметр | Тип | Значение по умолчанию | Описание |
|----------|-----|----------------------|-----------|
| `compression.docs_zstd_compression_level` | int | `1` | Уровень сжатия для документов |
| `compression.metas_zstd_compression_level` | int | `1` | Уровень сжатия для метаданных |
| `compression.sealed_zstd_compression_level` | int | `3` | Уровень сжатия для запечатанных фракций |
| `compression.doc_block_zstd_compression_level` | int | `3` | Уровень сжатия для блоков документов |
| Параметр | Тип | Значение по умолчанию | Описание |
|------------------------------------------------|-----|-----------------------|-----------------------------------------|
| `compression.docs_zstd_compression_level` | int | `1` | Уровень сжатия для документов |
| `compression.metas_zstd_compression_level` | int | `1` | Уровень сжатия для метаданных |
| `compression.sealed_zstd_compression_level` | int | `3` | Уровень сжатия для запечатанных фракций |
| `compression.doc_block_zstd_compression_level` | int | `3` | Уровень сжатия для блоков документов |
| `compression.lid_block_size` | int | `64KiB` | Максимальное количество лидов в блоках |

## Конфигурация индексирования

Expand Down
3 changes: 3 additions & 0 deletions frac/active.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ func NewActive(

skipMaskProvider: skipMaskProvider,
}
if cfg.LIDBlockSize > 0 {
f.info.ConstLIDBlockCap = cfg.LIDBlockSize
}

// use of 0 as keys in maps is prohibited – it's system key, so add first element
f.MIDs.Append(systemMID)
Expand Down
1 change: 1 addition & 0 deletions frac/common/seal_params.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ type SealParams struct {
TokenTableZstdLevel int

DocBlocksZstdLevel int // DocBlocksZstdLevel is the zstd compress level of each document block.
LIDBlockSize int
DocBlockSize int // DocBlockSize is decompressed payload size of document block.
}
1 change: 1 addition & 0 deletions frac/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ type Config struct {

SkipSortDocs bool
KeepMetaFile bool
LIDBlockSize int
}

type SearchConfig struct {
Expand Down
1 change: 1 addition & 0 deletions frac/fraction_concurrency_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ func seal(active *Active) (*Sealed, error) {
TokenTableZstdLevel: 1,
DocBlocksZstdLevel: 1,
DocBlockSize: 128 * int(units.KiB),
LIDBlockSize: 512,
}
activeSealingSource, err := NewActiveSealingSource(active, sealParams)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions frac/fraction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ func (s *FractionTestSuite) SetupTestCommon() {
DocsPositionsZstdLevel: 1,
TokenTableZstdLevel: 1,
DocBlocksZstdLevel: 1,
LIDBlockSize: 512,
DocBlockSize: 128 * int(units.KiB),
}

Expand Down
2 changes: 1 addition & 1 deletion frac/sealed/sealing/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ func (s *IndexSealer) indexBlocks(src Source) iter.Seq[indexBlock] {
// SECTION 6: LIDs Section
statsLIDs := startStats()
s.lidsTable.StartBlockIndex = blocksCounter
for block := range bb.BuildLIDsBlocks(src.TokenLIDs(), consts.LIDBlockCap) {
for block := range bb.BuildLIDsBlocks(src.TokenLIDs(), s.params.LIDBlockSize) {
if !push(s.packLIDsBlock(block), &statsLIDs) {
return
}
Expand Down
3 changes: 3 additions & 0 deletions fracmanager/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ func FillConfigWithDefault(config *Config) *Config {
if config.SealParams.LIDsZstdLevel == 0 {
config.SealParams.LIDsZstdLevel = zstdDefaultLevel
}
if config.SealParams.LIDBlockSize == 0 {
config.SealParams.LIDBlockSize = consts.LIDBlockCap
}
if config.SealParams.TokenListZstdLevel == 0 {
config.SealParams.TokenListZstdLevel = zstdDefaultLevel
}
Expand Down
Loading