diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 8d5496b9740a..761533548b03 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -264,6 +264,8 @@ API Changes
* GITHUB#15584: Add support for termdoc fields that use custom term freqs (via IndexOptions.DOCS_AND_CUSTOM_FREQS).
IndexWriter counts their terms rather than summing their freqs. Use
+* GITHUB#15990: Add experimental api to IndexWriter for columnar indexing.
+
New Features
---------------------
diff --git a/lucene/core/src/java/module-info.java b/lucene/core/src/java/module-info.java
index 1358b9fe068d..eeed4020e7ae 100644
--- a/lucene/core/src/java/module-info.java
+++ b/lucene/core/src/java/module-info.java
@@ -76,6 +76,10 @@
exports org.apache.lucene.codecs.hnsw;
exports org.apache.lucene.internal.vectorization to
org.apache.lucene.benchmark.jmh;
+ exports org.apache.lucene.document.column;
+
+ opens org.apache.lucene.document.column to
+ org.apache.lucene.test_framework;
provides org.apache.lucene.analysis.TokenizerFactory with
org.apache.lucene.analysis.standard.StandardTokenizerFactory;
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/BinaryColumn.java b/lucene/core/src/java/org/apache/lucene/document/column/BinaryColumn.java
new file mode 100644
index 000000000000..e1a831d742c4
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/BinaryColumn.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import org.apache.lucene.document.StoredValue;
+import org.apache.lucene.index.IndexableFieldType;
+
+/**
+ * A {@link Column} that provides variable-size binary values via a tuple cursor. Used for {@link
+ * org.apache.lucene.index.DocValuesType#BINARY BINARY}, {@link
+ * org.apache.lucene.index.DocValuesType#SORTED SORTED}, and {@link
+ * org.apache.lucene.index.DocValuesType#SORTED_SET SORTED_SET} doc values, and for stored/indexed
+ * binary or text fields. Values fed to points are passed through unchanged, so callers are
+ * responsible for producing sort-encoded bytes of the correct total length.
+ *
+ *
Numeric doc values ({@link org.apache.lucene.index.DocValuesType#NUMERIC NUMERIC} / {@link
+ * org.apache.lucene.index.DocValuesType#SORTED_NUMERIC SORTED_NUMERIC}) and 1-D numeric points (int
+ * / long / float / double) are fed by {@link LongColumn} instead.
+ *
+ * @lucene.experimental
+ */
+public abstract class BinaryColumn extends Column {
+
+ /** Creates a BinaryColumn with the given field name, type, and density. */
+ protected BinaryColumn(String name, IndexableFieldType fieldType, Density density) {
+ super(name, fieldType, density);
+ }
+
+ /**
+ * The {@link org.apache.lucene.document.StoredValue.Type} to emit when this column is written to
+ * stored fields. The default is {@link org.apache.lucene.document.StoredValue.Type#BINARY}. Only
+ * {@link org.apache.lucene.document.StoredValue.Type#BINARY} and {@link
+ * org.apache.lucene.document.StoredValue.Type#STRING} are supported; numeric stored types require
+ * {@link LongColumn}.
+ */
+ public StoredValue.Type storedType() {
+ return StoredValue.Type.BINARY;
+ }
+
+ /** Returns a fresh tuple cursor starting at the beginning of the batch. */
+ public abstract BinaryTupleCursor tuples();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/BinaryTupleCursor.java b/lucene/core/src/java/org/apache/lucene/document/column/BinaryTupleCursor.java
new file mode 100644
index 000000000000..f6fe58aacb27
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/BinaryTupleCursor.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A tuple cursor over a {@link BinaryColumn}. Yields {@code (docID, binaryValue)} pairs.
+ * Batch-local doc-ids are returned in non-decreasing order; the same doc-id may repeat for
+ * multi-valued fields (e.g. {@link org.apache.lucene.index.DocValuesType#SORTED_SET SORTED_SET}).
+ *
+ * @lucene.experimental
+ */
+public abstract class BinaryTupleCursor {
+
+ /** Sole constructor. */
+ protected BinaryTupleCursor() {}
+
+ /**
+ * Advances to the next doc-id that has a value and returns it, or {@link
+ * DocIdSetIterator#NO_MORE_DOCS} if exhausted. Doc-ids are batch-local (0 to {@code numDocs -
+ * 1}).
+ */
+ public abstract int nextDoc();
+
+ /**
+ * Returns the value at the current cursor position. Only valid until the next call to {@link
+ * #nextDoc()}, and only after a {@code nextDoc()} that returned a value other than {@link
+ * DocIdSetIterator#NO_MORE_DOCS}.
+ */
+ public abstract BytesRef binaryValue();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/Column.java b/lucene/core/src/java/org/apache/lucene/document/column/Column.java
new file mode 100644
index 000000000000..143a82ed1750
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/Column.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import java.util.Objects;
+import org.apache.lucene.index.IndexableFieldType;
+
+/**
+ * A single field's values across multiple documents in a {@link ColumnBatch}. A Column carries only
+ * metadata (name, field type, and density); iteration is performed via cursors obtained from {@link
+ * LongColumn}, {@link BinaryColumn}, or {@link VectorColumn}.
+ *
+ *
Each call that requests a cursor returns a fresh cursor positioned at the first value, so
+ * columns can be consumed multiple times (for example, once in the row-oriented pass for stored
+ * fields and again in the column-oriented pass for doc values).
+ *
+ * @lucene.experimental
+ */
+public abstract class Column {
+
+ /**
+ * Whether a column has a value for every document in the batch. This is a contract the column
+ * asserts up-front so the indexing chain can pick the right code path without probing the data.
+ */
+ public enum Density {
+ /** The column has a value for every batch-local doc-id in {@code [0, numDocs)}, in order. */
+ DENSE,
+ /** The column may be missing values or have multiple values for some doc-ids. */
+ SPARSE,
+ }
+
+ private final String name;
+ private final IndexableFieldType fieldType;
+ private final Density density;
+
+ /**
+ * Creates a Column with the given field name, type, and density.
+ *
+ * @param name the field name
+ * @param fieldType describes how this field should be indexed
+ * @param density whether this column has a value for every document in the batch
+ */
+ protected Column(String name, IndexableFieldType fieldType, Density density) {
+ this.name = Objects.requireNonNull(name, "field name must not be null");
+ this.fieldType = Objects.requireNonNull(fieldType, "field type must not be null");
+ this.density = Objects.requireNonNull(density, "density must not be null");
+ }
+
+ /** Returns the field name. */
+ public String name() {
+ return name;
+ }
+
+ /** Returns the field type describing how this field is indexed. */
+ public IndexableFieldType fieldType() {
+ return fieldType;
+ }
+
+ /** Returns the density of this column (whether every doc has a value). */
+ public Density density() {
+ return density;
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/ColumnBatch.java b/lucene/core/src/java/org/apache/lucene/document/column/ColumnBatch.java
new file mode 100644
index 000000000000..cd70ebcd8b4e
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/ColumnBatch.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+/**
+ * A column-oriented batch of documents for indexing. A Batch contains a collection of {@link
+ * Column}s, where each Column represents a single field across all documents in the batch.
+ * Documents are identified by batch-local IDs from 0 (inclusive) to {@link #numDocs()} (exclusive).
+ *
+ * @lucene.experimental
+ */
+public abstract class ColumnBatch {
+
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ protected ColumnBatch() {}
+
+ /**
+ * Returns the number of documents in this batch. All column doc-ids must be in the range [0,
+ * numDocs()).
+ */
+ public abstract int numDocs();
+
+ /**
+ * Returns the columns in this batch. Each column represents a single field across the documents
+ * in the batch.
+ */
+ public abstract Iterable columns();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/ColumnFieldAdapter.java b/lucene/core/src/java/org/apache/lucene/document/column/ColumnFieldAdapter.java
new file mode 100644
index 000000000000..c118081b7a69
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/ColumnFieldAdapter.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import java.nio.charset.StandardCharsets;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.InvertableType;
+import org.apache.lucene.document.StoredValue;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
+
+/**
+ * Lightweight adapter that presents a {@link Column}'s current cursor value as an {@link
+ * IndexableField} so it can be fed through the row-oriented indexing pass (stored fields and term
+ * inversion). Holds a fresh tuple cursor over the underlying column; one instance is created per
+ * column per batch.
+ *
+ * @lucene.internal
+ */
+public abstract class ColumnFieldAdapter extends Field {
+
+ ColumnFieldAdapter(String name, IndexableFieldType fieldType) {
+ super(name, fieldType);
+ }
+
+ /** Returns an adapter for the given column, dispatching on its concrete type. */
+ public static ColumnFieldAdapter create(Column column) {
+ if (column instanceof LongColumn lc) {
+ return new LongColumnAdapter(lc);
+ } else if (column instanceof BinaryColumn bc) {
+ return new BinaryColumnAdapter(bc);
+ } else {
+ throw new IllegalArgumentException("Unknown column type: " + column.getClass().getName());
+ }
+ }
+
+ /** Advances to the next batch-local doc-id with a value. */
+ public abstract int nextDoc();
+}
+
+final class LongColumnAdapter extends ColumnFieldAdapter {
+ private final LongTupleCursor cursor;
+ private final StoredValue reusableStoredValue;
+ private final StoredValue.Type storedType;
+
+ LongColumnAdapter(LongColumn column) {
+ super(column.name(), column.fieldType());
+ this.cursor = column.tuples();
+ if (column.fieldType().stored()) {
+ this.storedType = column.storedType();
+ this.reusableStoredValue = newReusableLongStoredValue(storedType);
+ } else {
+ this.storedType = null;
+ this.reusableStoredValue = null;
+ }
+ }
+
+ private static StoredValue newReusableLongStoredValue(StoredValue.Type type) {
+ return switch (type) {
+ case INTEGER -> new StoredValue(0);
+ case LONG -> new StoredValue(0L);
+ case FLOAT -> new StoredValue(0.0f);
+ case DOUBLE -> new StoredValue(0.0);
+ case STRING, BINARY, DATA_INPUT ->
+ throw new AssertionError("rejected by ColumnValidation.validateLongColumn");
+ };
+ }
+
+ @Override
+ public int nextDoc() {
+ return cursor.nextDoc();
+ }
+
+ @Override
+ public Number numericValue() {
+ return cursor.longValue();
+ }
+
+ @Override
+ public StoredValue storedValue() {
+ if (reusableStoredValue == null) {
+ return null;
+ }
+ long raw = cursor.longValue();
+ switch (storedType) {
+ case INTEGER -> reusableStoredValue.setIntValue((int) raw);
+ case LONG -> reusableStoredValue.setLongValue(raw);
+ case FLOAT -> reusableStoredValue.setFloatValue(NumericUtils.sortableIntToFloat((int) raw));
+ case DOUBLE -> reusableStoredValue.setDoubleValue(NumericUtils.sortableLongToDouble(raw));
+ case STRING, BINARY, DATA_INPUT ->
+ throw new IllegalArgumentException("rejected by ColumnValidation.validateLongColumn");
+ }
+ return reusableStoredValue;
+ }
+
+ @Override
+ public InvertableType invertableType() {
+ return null;
+ }
+}
+
+final class BinaryColumnAdapter extends ColumnFieldAdapter {
+ private final BinaryTupleCursor cursor;
+ private final StoredValue reusableStoredValue;
+ private final StoredValue.Type storedType;
+ private final boolean tokenized;
+ private final boolean indexed;
+
+ BinaryColumnAdapter(BinaryColumn column) {
+ super(column.name(), column.fieldType());
+ this.cursor = column.tuples();
+ this.tokenized = column.fieldType().tokenized();
+ this.indexed = column.fieldType().indexOptions() != IndexOptions.NONE;
+ if (column.fieldType().stored()) {
+ this.storedType = column.storedType();
+ this.reusableStoredValue = newReusableStoredValue(storedType);
+ } else {
+ this.storedType = null;
+ this.reusableStoredValue = null;
+ }
+ }
+
+ private static StoredValue newReusableStoredValue(StoredValue.Type type) {
+ return switch (type) {
+ case STRING -> new StoredValue("");
+ case BINARY -> new StoredValue(new BytesRef());
+ case INTEGER, LONG, FLOAT, DOUBLE, DATA_INPUT ->
+ throw new IllegalArgumentException("rejected by ColumnValidation.validateBinaryColumn");
+ };
+ }
+
+ @Override
+ public int nextDoc() {
+ return cursor.nextDoc();
+ }
+
+ @Override
+ public BytesRef binaryValue() {
+ return cursor.binaryValue();
+ }
+
+ @Override
+ public String stringValue() {
+ if (tokenized) {
+ BytesRef ref = cursor.binaryValue();
+ return new String(ref.bytes, ref.offset, ref.length, StandardCharsets.UTF_8);
+ }
+ return null;
+ }
+
+ @Override
+ public StoredValue storedValue() {
+ if (reusableStoredValue == null) {
+ return null;
+ }
+ BytesRef value = cursor.binaryValue();
+ switch (storedType) {
+ case STRING ->
+ reusableStoredValue.setStringValue(
+ new String(value.bytes, value.offset, value.length, StandardCharsets.UTF_8));
+ case BINARY -> reusableStoredValue.setBinaryValue(value);
+ case INTEGER, LONG, FLOAT, DOUBLE, DATA_INPUT ->
+ throw new IllegalArgumentException("rejected by ColumnValidation.validateBinaryColumn");
+ }
+ return reusableStoredValue;
+ }
+
+ @Override
+ public InvertableType invertableType() {
+ if (indexed == false) {
+ return null;
+ }
+ return tokenized ? InvertableType.TOKEN_STREAM : InvertableType.BINARY;
+ }
+
+ @Override
+ public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
+ if (tokenized) {
+ return analyzer.tokenStream(name(), stringValue());
+ }
+ return null;
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/ColumnValidation.java b/lucene/core/src/java/org/apache/lucene/document/column/ColumnValidation.java
new file mode 100644
index 000000000000..6c1584f87ce1
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/ColumnValidation.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import org.apache.lucene.document.StoredValue;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexableFieldType;
+
+/**
+ * Static validation and bounds-checking helpers for the columnar indexing path. These helpers are
+ * pure functions over the public column API and {@link IndexableFieldType}; they do not touch any
+ * indexing-chain state.
+ *
+ * @lucene.internal
+ */
+public final class ColumnValidation {
+
+ private ColumnValidation() {}
+
+ /**
+ * Throws {@link IllegalArgumentException} if {@code fieldType} declares no indexing feature (no
+ * doc values, no points, not stored, no index options, no vectors).
+ */
+ public static void validateColumnHasIndexingFeature(
+ String fieldName, IndexableFieldType fieldType) {
+ if (fieldType.docValuesType() == DocValuesType.NONE
+ && fieldType.pointDimensionCount() == 0
+ && fieldType.stored() == false
+ && fieldType.indexOptions() == IndexOptions.NONE
+ && fieldType.vectorDimension() == 0) {
+ throw new IllegalArgumentException(
+ "Column \""
+ + fieldName
+ + "\" must have a non-NONE docValuesType, point dimensions, be stored,"
+ + " have index options, or have vector dimensions");
+ }
+ }
+
+ /** Validates a {@link LongColumn} against the field type it will feed. */
+ public static void validateLongColumn(LongColumn column, IndexableFieldType fieldType) {
+ final int pointDims = fieldType.pointDimensionCount();
+ if (pointDims != 0) {
+ if (pointDims != 1) {
+ throw new IllegalArgumentException(
+ "LongColumn \""
+ + column.name()
+ + "\" only supports 1-dimensional point fields, got pointDimensionCount="
+ + pointDims);
+ }
+ final int expectedPointBytes =
+ (column.numericKind() == LongColumn.NumericKind.INT
+ || column.numericKind() == LongColumn.NumericKind.FLOAT)
+ ? Integer.BYTES
+ : Long.BYTES;
+ if (fieldType.pointNumBytes() != expectedPointBytes) {
+ throw new IllegalArgumentException(
+ "LongColumn \""
+ + column.name()
+ + "\" numericKind="
+ + column.numericKind()
+ + " requires pointNumBytes="
+ + expectedPointBytes
+ + ", got "
+ + fieldType.pointNumBytes());
+ }
+ }
+ if (fieldType.stored()) {
+ final StoredValue.Type storedType = column.storedType();
+ switch (storedType) {
+ case INTEGER, LONG, FLOAT, DOUBLE -> {
+ // OK.
+ }
+ case STRING, BINARY ->
+ throw new IllegalArgumentException(
+ "LongColumn \""
+ + column.name()
+ + "\" storedType="
+ + storedType
+ + " is not supported; use a BinaryColumn for non-numeric stored data");
+ case DATA_INPUT ->
+ throw new IllegalArgumentException(
+ "LongColumn \""
+ + column.name()
+ + "\" storedType DATA_INPUT is not supported for columns");
+ }
+ }
+ }
+
+ /** Validates a {@link BinaryColumn} against the field type it will feed. */
+ public static void validateBinaryColumn(BinaryColumn column, IndexableFieldType fieldType) {
+ final DocValuesType dvType = fieldType.docValuesType();
+ if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED_NUMERIC) {
+ throw new IllegalArgumentException(
+ "BinaryColumn \""
+ + column.name()
+ + "\" cannot feed docValuesType="
+ + dvType
+ + "; use a LongColumn");
+ }
+ if (fieldType.stored()) {
+ final StoredValue.Type storedType = column.storedType();
+ switch (storedType) {
+ case BINARY, STRING -> {
+ // OK.
+ }
+ case INTEGER, LONG, FLOAT, DOUBLE ->
+ throw new IllegalArgumentException(
+ "BinaryColumn \""
+ + column.name()
+ + "\" storedType="
+ + storedType
+ + " is not supported; use a LongColumn for numeric stored data");
+ case DATA_INPUT ->
+ throw new IllegalArgumentException(
+ "BinaryColumn \""
+ + column.name()
+ + "\" storedType DATA_INPUT is not supported for columns");
+ }
+ }
+ }
+
+ /** Validates a {@link VectorColumn} against the field type it will feed. */
+ public static void validateVectorColumn(VectorColumn> column, IndexableFieldType fieldType) {
+ if (fieldType.vectorDimension() <= 0) {
+ throw new IllegalArgumentException(
+ "VectorColumn \""
+ + column.name()
+ + "\" requires fieldType.vectorDimension() > 0; got "
+ + fieldType.vectorDimension());
+ }
+ if (fieldType.docValuesType() != DocValuesType.NONE
+ || fieldType.pointDimensionCount() != 0
+ || fieldType.stored()
+ || fieldType.indexOptions() != IndexOptions.NONE) {
+ throw new IllegalArgumentException(
+ "VectorColumn \""
+ + column.name()
+ + "\" must be vector-only: docValuesType=NONE, pointDimensionCount=0,"
+ + " stored=false, indexOptions=NONE");
+ }
+ }
+
+ /** Throws if {@code batchDocID} is outside {@code [0, numDocs)}. */
+ public static void checkDocID(Column column, int batchDocID, int numDocs) {
+ if (batchDocID < 0 || batchDocID >= numDocs) {
+ throw new IllegalArgumentException(
+ "Column \""
+ + column.name()
+ + "\" returned batch doc-id "
+ + batchDocID
+ + " which is out of range [0, "
+ + numDocs
+ + ")");
+ }
+ }
+
+ /** Throws if a dense column did not produce exactly {@code numDocs} values. */
+ public static void checkDenseCount(Column column, int consumed, int numDocs) {
+ if (consumed != numDocs) {
+ throw new IllegalArgumentException(
+ "Dense column \""
+ + column.name()
+ + "\" provided "
+ + consumed
+ + " values but batch has "
+ + numDocs
+ + " documents");
+ }
+ }
+
+ /** Throws if a vector cursor doc-id is not strictly greater than the previous one. */
+ public static void checkVectorDocIDStrictlyIncreasing(
+ VectorColumn> column, int batchDocID, int prevBatchDocID) {
+ if (batchDocID <= prevBatchDocID) {
+ throw new IllegalArgumentException(
+ "VectorColumn \""
+ + column.name()
+ + "\" must yield strictly increasing batch doc-ids; got "
+ + batchDocID
+ + " after "
+ + prevBatchDocID);
+ }
+ }
+
+ /** Throws if a vector value's length does not match the field's declared dimension. */
+ public static void checkVectorDimension(
+ VectorColumn> column, int actual, int expected, int batchDocID) {
+ if (actual != expected) {
+ throw new IllegalArgumentException(
+ "VectorColumn \""
+ + column.name()
+ + "\" expected dimension "
+ + expected
+ + " but got vector of length "
+ + actual
+ + " at batch doc "
+ + batchDocID);
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/LongColumn.java b/lucene/core/src/java/org/apache/lucene/document/column/LongColumn.java
new file mode 100644
index 000000000000..732a89cc8e3e
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/LongColumn.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import java.util.Objects;
+import org.apache.lucene.document.StoredValue;
+import org.apache.lucene.index.IndexableFieldType;
+
+/**
+ * A {@link Column} that provides long values. Used for {@link
+ * org.apache.lucene.index.DocValuesType#NUMERIC NUMERIC} and {@link
+ * org.apache.lucene.index.DocValuesType#SORTED_NUMERIC SORTED_NUMERIC} doc values and for stored
+ * numeric fields.
+ *
+ * Iteration is performed via cursors. {@link #tuples()} is always available and yields {@code
+ * (docID, longValue)} pairs. {@link #values()} is a bulk cursor over consecutive doc-ids; it must
+ * be overridden when {@link #density()} is {@link Column.Density#DENSE DENSE} and is only consulted
+ * in that case.
+ *
+ *
{@link #numericKind()} marks how the long bits should be interpreted. Defaults to {@link
+ * NumericKind#LONG LONG}; pass {@link NumericKind#INT INT} (low 32 bits, sign-extended), {@link
+ * NumericKind#FLOAT FLOAT} (low 32 bits encoded via {@link
+ * org.apache.lucene.util.NumericUtils#floatToSortableInt}), or {@link NumericKind#DOUBLE DOUBLE}
+ * (full 64 bits encoded via {@link org.apache.lucene.util.NumericUtils#doubleToSortableLong}) to
+ * the constructor to select another interpretation. Callers are responsible for producing the
+ * sortable encoding; doc values writes the long unchanged, points consumes it as sortable bytes,
+ * and stored fields round-trips it back to {@code float}/{@code double} via {@link
+ * org.apache.lucene.util.NumericUtils#sortableIntToFloat} / {@link
+ * org.apache.lucene.util.NumericUtils#sortableLongToDouble}. The numeric kind drives the default
+ * {@link #storedType()}.
+ *
+ * @lucene.experimental
+ */
+public abstract class LongColumn extends Column {
+
+ /** The numeric interpretation of the column's long values. */
+ public enum NumericKind {
+ /** Type of integer values. */
+ INT,
+ /** Type of long values. */
+ LONG,
+ /** Type of float values. */
+ FLOAT,
+ /** Type of double values. */
+ DOUBLE,
+ }
+
+ private final NumericKind numericKind;
+
+ /** Creates a LongColumn with {@link NumericKind#LONG}. */
+ protected LongColumn(String name, IndexableFieldType fieldType, Density density) {
+ this(name, fieldType, density, NumericKind.LONG);
+ }
+
+ /** Creates a LongColumn with the given numeric interpretation. */
+ protected LongColumn(
+ String name, IndexableFieldType fieldType, Density density, NumericKind numericKind) {
+ super(name, fieldType, density);
+ this.numericKind = Objects.requireNonNull(numericKind, "numericKind must not be null");
+ }
+
+ /** Returns a fresh tuple cursor starting at the beginning of the batch. */
+ public abstract LongTupleCursor tuples();
+
+ /**
+ * Returns a fresh values cursor iterating dense long values for doc-ids {@code [0, numDocs)}.
+ * Must be overridden when {@link Column#density()} is {@link Column.Density#DENSE DENSE}; the
+ * default implementation throws {@link UnsupportedOperationException} and is never called for
+ * {@link Column.Density#SPARSE SPARSE} columns.
+ */
+ public LongValuesCursor values() {
+ throw new UnsupportedOperationException(
+ "values() requires density() == DENSE for column \"" + name() + "\"");
+ }
+
+ /** The numeric interpretation of the column's long values. */
+ public final NumericKind numericKind() {
+ return numericKind;
+ }
+
+ /**
+ * The stored-field variant emitted for this column. The default derives from {@link
+ * #numericKind()} — {@code INT→INTEGER}, {@code LONG→LONG}, {@code FLOAT→FLOAT}, {@code
+ * DOUBLE→DOUBLE} — so a caller that wants the natural numeric variant does not need to override
+ * this method. Only numeric {@link org.apache.lucene.document.StoredValue.Type} values are
+ * permitted; non-numeric stored data should use a {@link BinaryColumn}.
+ */
+ public StoredValue.Type storedType() {
+ return switch (numericKind) {
+ case INT -> StoredValue.Type.INTEGER;
+ case LONG -> StoredValue.Type.LONG;
+ case FLOAT -> StoredValue.Type.FLOAT;
+ case DOUBLE -> StoredValue.Type.DOUBLE;
+ };
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/LongTupleCursor.java b/lucene/core/src/java/org/apache/lucene/document/column/LongTupleCursor.java
new file mode 100644
index 000000000000..25dee6aa7742
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/LongTupleCursor.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import org.apache.lucene.search.DocIdSetIterator;
+
+/**
+ * A tuple cursor over a {@link LongColumn}. Yields {@code (docID, longValue)} pairs. Batch-local
+ * doc-ids are returned in non-decreasing order; the same doc-id may repeat for multi-valued fields
+ * (e.g. {@link org.apache.lucene.index.DocValuesType#SORTED_NUMERIC SORTED_NUMERIC}).
+ *
+ * @lucene.experimental
+ */
+public abstract class LongTupleCursor {
+
+ /** Sole constructor. */
+ protected LongTupleCursor() {}
+
+ /**
+ * Advances to the next doc-id that has a value and returns it, or {@link
+ * DocIdSetIterator#NO_MORE_DOCS} if exhausted. Doc-ids are batch-local (0 to {@code numDocs -
+ * 1}).
+ */
+ public abstract int nextDoc();
+
+ /**
+ * Returns the value at the current cursor position. Only valid after a successful {@link
+ * #nextDoc()} call that returned a value other than {@link DocIdSetIterator#NO_MORE_DOCS}.
+ */
+ public abstract long longValue();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/LongValuesCursor.java b/lucene/core/src/java/org/apache/lucene/document/column/LongValuesCursor.java
new file mode 100644
index 000000000000..34aefeb5fdb9
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/LongValuesCursor.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+/**
+ * A values cursor over a dense {@link LongColumn}. The cursor produces exactly {@link #size()}
+ * values for consecutive batch-local doc-ids starting at 0, one per call to {@link #nextLong()}.
+ *
+ *
Implementations must throw an exception if {@link #nextLong()} is called more than {@link
+ * #size()} times.
+ *
+ * @lucene.experimental
+ */
+public abstract class LongValuesCursor {
+
+ /** Sole constructor. */
+ protected LongValuesCursor() {}
+
+ /** Total number of values this cursor will produce. */
+ public abstract int size();
+
+ /** Returns the next long value. Must not be called more than {@link #size()} times. */
+ public abstract long nextLong();
+
+ /**
+ * Bulk-fill {@code length} values into {@code dst} starting at {@code offset}, advancing the
+ * cursor by {@code length}. Combined {@link #nextLong()} and {@code fill} calls must not consume
+ * more than {@link #size()} values; implementations must throw if they do.
+ *
+ *
The default implementation calls {@link #nextLong()} in a loop. Override to provide a more
+ * efficient bulk fill (for example a {@link System#arraycopy} from a backing array).
+ */
+ public void fill(long[] dst, int offset, int length) {
+ for (int i = 0; i < length; i++) {
+ dst[offset + i] = nextLong();
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/VectorColumn.java b/lucene/core/src/java/org/apache/lucene/document/column/VectorColumn.java
new file mode 100644
index 000000000000..1df27fd02a38
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/VectorColumn.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.index.VectorEncoding;
+
+/**
+ * A {@link Column} that provides KNN vector values via a tuple cursor. Vector columns are
+ * vector-only: the field type must declare {@code vectorDimension() > 0}, and must not also set doc
+ * values, points, stored, or {@code indexOptions}. Vectors are single-valued, so the cursor yields
+ * strictly increasing batch-local doc-ids.
+ *
+ *
The type parameter {@code T} must match {@link IndexableFieldType#vectorEncoding()}: {@code
+ * float[]} for {@link VectorEncoding#FLOAT32 FLOAT32} and {@code byte[]} for {@link
+ * VectorEncoding#BYTE BYTE}. A mismatch is reported as a {@link ClassCastException} when values are
+ * consumed during indexing.
+ *
+ *
{@link Column.Density#DENSE DENSE} indicates that every batch-local doc has a vector; {@link
+ * Column.Density#SPARSE SPARSE} allows gaps. Both densities use the same tuple cursor — there is no
+ * dense bulk-fill fast path for vectors.
+ *
+ * @param the vector array type, either {@code float[]} or {@code byte[]}
+ * @lucene.experimental
+ */
+public abstract class VectorColumn extends Column {
+
+ /**
+ * Creates a VectorColumn with the given field name, type, and density.
+ *
+ * @throws IllegalArgumentException if {@code fieldType.vectorDimension() <= 0}
+ */
+ protected VectorColumn(String name, IndexableFieldType fieldType, Density density) {
+ super(name, fieldType, density);
+ if (fieldType.vectorDimension() <= 0) {
+ throw new IllegalArgumentException(
+ "VectorColumn \""
+ + name
+ + "\" requires fieldType.vectorDimension() > 0; got "
+ + fieldType.vectorDimension());
+ }
+ }
+
+ /** Returns a fresh tuple cursor starting at the beginning of the batch. */
+ public abstract VectorTupleCursor tuples();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/VectorTupleCursor.java b/lucene/core/src/java/org/apache/lucene/document/column/VectorTupleCursor.java
new file mode 100644
index 000000000000..d05b8d47ff69
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/column/VectorTupleCursor.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import org.apache.lucene.search.DocIdSetIterator;
+
+/**
+ * A tuple cursor over a {@link VectorColumn}. Yields {@code (docID, vectorValue)} pairs.
+ * Batch-local doc-ids are returned in strictly increasing order (vectors are single-valued).
+ *
+ * @param the vector array type, either {@code float[]} or {@code byte[]}
+ * @lucene.experimental
+ */
+public abstract class VectorTupleCursor {
+
+ /** Sole constructor. */
+ protected VectorTupleCursor() {}
+
+ /**
+ * Advances to the next doc-id that has a vector and returns it, or {@link
+ * DocIdSetIterator#NO_MORE_DOCS} if exhausted. Doc-ids are batch-local (0 to {@code numDocs - 1})
+ * and strictly increasing.
+ */
+ public abstract int nextDoc();
+
+ /**
+ * Returns the vector at the current cursor position. The returned array may be reused by the
+ * cursor on subsequent calls to {@link #nextDoc()} — the indexing chain copies the value before
+ * advancing. Only valid after a {@code nextDoc()} that returned a value other than {@link
+ * DocIdSetIterator#NO_MORE_DOCS}.
+ */
+ public abstract T vectorValue();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java b/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java
index 8c8b0cac26f6..a7d70b23ace7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java
@@ -61,6 +61,31 @@ public void add(int docID) {
cardinality++;
}
+ /**
+ * Add a contiguous range of document IDs to the set.
+ *
+ * @param from first document ID (inclusive)
+ * @param toExclusive one past the last document ID (exclusive)
+ */
+ public void addRange(int from, int toExclusive) {
+ if (from <= lastDocId) {
+ throw new IllegalArgumentException(
+ "Out of order doc ids: last=" + lastDocId + ", next=" + from);
+ }
+ int count = toExclusive - from;
+ if (set != null) {
+ set = FixedBitSet.ensureCapacity(set, toExclusive - 1);
+ set.set(from, toExclusive);
+ } else if (from != cardinality) {
+ // migrate to a sparse encoding using a bit set
+ set = new FixedBitSet(toExclusive);
+ set.set(0, cardinality);
+ set.set(from, toExclusive);
+ }
+ lastDocId = toExclusive - 1;
+ cardinality += count;
+ }
+
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + (set == null ? 0 : set.ramBytesUsed());
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
index 3414b882f89f..c6cf1ee8dc2b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
@@ -27,6 +27,7 @@
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import java.util.function.ToLongFunction;
+import org.apache.lucene.document.column.ColumnBatch;
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.AlreadyClosedException;
@@ -452,6 +453,45 @@ long updateDocuments(
return seqNo;
}
+ long updateBatch(final ColumnBatch columnBatch, final DocumentsWriterDeleteQueue.Node> delNode)
+ throws IOException {
+ boolean hasEvents = preUpdate();
+
+ final DocumentsWriterPerThread dwpt = flushControl.obtainAndLock();
+ final DocumentsWriterPerThread flushingDWPT;
+ long seqNo;
+
+ try {
+ // This must happen after we've pulled the DWPT because IW.close
+ // waits for all DWPT to be released:
+ ensureOpen();
+ try {
+ seqNo =
+ dwpt.updateBatch(
+ columnBatch, delNode, flushNotifications, numDocsInRAM::incrementAndGet);
+ } finally {
+ if (dwpt.isAborted()) {
+ flushControl.doOnAbort(dwpt);
+ }
+ }
+ flushingDWPT = flushControl.doAfterDocument(dwpt);
+ } finally {
+ synchronized (flushControl) {
+ if (dwpt.isFlushPending() || dwpt.isAborted() || dwpt.isQueueAdvanced()) {
+ dwpt.unlock();
+ } else {
+ perThreadPool.marksAsFreeAndUnlock(dwpt);
+ }
+ }
+ assert dwpt.isHeldByCurrentThread() == false : "we didn't release the dwpt even on abort";
+ }
+
+ if (postUpdate(flushingDWPT, hasEvents)) {
+ seqNo = -seqNo;
+ }
+ return seqNo;
+ }
+
private boolean maybeFlush() throws IOException {
final DocumentsWriterPerThread flushingDWPT = flushControl.nextPendingFlush();
if (flushingDWPT != null) {
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
index e4cd3f04328c..2324dd6094f2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@@ -32,6 +32,7 @@
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.column.ColumnBatch;
import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
@@ -284,6 +285,60 @@ long updateDocuments(
}
}
+ long updateBatch(
+ ColumnBatch columnBatch,
+ DocumentsWriterDeleteQueue.Node> deleteNode,
+ DocumentsWriter.FlushNotifications flushNotifications,
+ Runnable onNewDocOnRAM)
+ throws IOException {
+ try {
+ testPoint("DocumentsWriterPerThread addBatch start");
+ assert abortingException == null : "DWPT has hit aborting exception but is still indexing";
+ if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
+ infoStream.message(
+ "DWPT",
+ Thread.currentThread().getName()
+ + " update batch"
+ + " docID="
+ + numDocsInRAM
+ + " seg="
+ + segmentInfo.name);
+ }
+ final int docsInRamBefore = numDocsInRAM;
+ final int numDocs = columnBatch.numDocs();
+ boolean allDocsIndexed = false;
+ try {
+ // Reserve all doc IDs upfront and account for them in numDocsInRAM immediately,
+ // so that deleteLastDocs in the finally block can correctly clean up on failure.
+ // Even on exception, the documents are still added (but marked deleted), matching
+ // the document path semantics.
+ for (int i = 0; i < numDocs; i++) {
+ reserveOneDoc();
+ }
+ numDocsInRAM += numDocs;
+ for (int i = 0; i < numDocs; i++) {
+ onNewDocOnRAM.run();
+ }
+
+ indexingChain.processBatch(docsInRamBefore, columnBatch);
+
+ if (numDocs > 1) {
+ segmentInfo.setHasBlocks();
+ }
+ allDocsIndexed = true;
+ return finishDocuments(deleteNode, docsInRamBefore);
+ } finally {
+ if (!allDocsIndexed && !aborted) {
+ // the iterator threw an exception that is not aborting
+ // go and mark all docs from this block as deleted
+ deleteLastDocs(numDocsInRAM - docsInRamBefore);
+ }
+ }
+ } finally {
+ maybeAbort("updateBatch", flushNotifications);
+ }
+ }
+
private long finishDocuments(DocumentsWriterDeleteQueue.Node> deleteNode, int docIdUpTo) {
/*
* here we actually finish the document in two steps 1. push the delete into
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 30ff07c3932f..79bada22d57b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -55,6 +55,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.column.ColumnBatch;
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
import org.apache.lucene.index.FieldInfos.FieldNumbers;
@@ -1523,6 +1524,41 @@ public long addDocuments(Iterable extends Iterable extends IndexableField>>
return updateDocuments((DocumentsWriterDeleteQueue.Node>) null, docs);
}
+ /**
+ * Adds a batch of documents in column-oriented format. The batch's columns are processed
+ * field-by-field rather than document-by-document.
+ *
+ * @param columnBatch the column-oriented batch of documents to add
+ * @return The sequence number for this operation
+ * @throws IOException if there is a low-level IO error
+ * @lucene.experimental
+ */
+ public long addBatch(ColumnBatch columnBatch) throws IOException {
+ return updateBatch(null, columnBatch);
+ }
+
+ private long updateBatch(
+ final DocumentsWriterDeleteQueue.Node> delNode, ColumnBatch columnBatch)
+ throws IOException {
+ ensureOpen();
+ boolean success = false;
+ try {
+ final long seqNo = maybeProcessEvents(docWriter.updateBatch(columnBatch, delNode));
+ success = true;
+ return seqNo;
+ } catch (Error tragedy) {
+ tragicEvent(tragedy, "updateBatch");
+ throw tragedy;
+ } finally {
+ if (success == false) {
+ if (infoStream.isEnabled("IW")) {
+ infoStream.message("IW", "hit exception adding batch");
+ }
+ maybeCloseOnTragicEvent();
+ }
+ }
+ }
+
/**
* Atomically deletes documents matching the provided delTerm and adds a block of documents with
* sequentially assigned document IDs, such that an external reader will see all or none of the
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
index d62100635415..1db2a8f2567b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
@@ -43,6 +43,17 @@
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredValue;
+import org.apache.lucene.document.column.BinaryColumn;
+import org.apache.lucene.document.column.BinaryTupleCursor;
+import org.apache.lucene.document.column.Column;
+import org.apache.lucene.document.column.ColumnBatch;
+import org.apache.lucene.document.column.ColumnFieldAdapter;
+import org.apache.lucene.document.column.ColumnValidation;
+import org.apache.lucene.document.column.LongColumn;
+import org.apache.lucene.document.column.LongTupleCursor;
+import org.apache.lucene.document.column.LongValuesCursor;
+import org.apache.lucene.document.column.VectorColumn;
+import org.apache.lucene.document.column.VectorTupleCursor;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
@@ -59,6 +70,7 @@
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.IntBlockPool;
+import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.Version;
@@ -681,6 +693,435 @@ private void oversizeDocFields() {
docFields = newDocFields;
}
+ /**
+ * Process a column-oriented batch of documents. Iterates the batch's columns, validates each
+ * column's field type, and feeds values to the appropriate DocValuesWriter.
+ *
+ * @param baseDocID the segment-level doc ID for the first document in the batch (batch-local doc
+ * 0 maps to this value)
+ * @param columnBatch the column-oriented batch
+ */
+ void processBatch(int baseDocID, ColumnBatch columnBatch) throws IOException {
+ final int numDocs = columnBatch.numDocs();
+ boolean hasRowColumns = false;
+
+ // First pass: validate all column schemas and initialize field infos
+ for (Column column : columnBatch.columns()) {
+ final String fieldName = column.name();
+ final IndexableFieldType fieldType = column.fieldType();
+
+ ColumnValidation.validateColumnHasIndexingFeature(fieldName, fieldType);
+
+ if (column instanceof BinaryColumn bc) {
+ ColumnValidation.validateBinaryColumn(bc, fieldType);
+ } else if (column instanceof LongColumn lc) {
+ ColumnValidation.validateLongColumn(lc, fieldType);
+ } else if (column instanceof VectorColumn> vc) {
+ ColumnValidation.validateVectorColumn(vc, fieldType);
+ }
+
+ if (fieldType.stored() || fieldType.indexOptions() != IndexOptions.NONE) {
+ hasRowColumns = true;
+ }
+
+ PerField pf = getOrAddPerField(fieldName);
+ validateColumnSchema(fieldName, pf, fieldType);
+ }
+
+ // Index the parent field for every document (each batch doc is an individual document,
+ // not part of a block, so every doc is its own parent).
+ if (parentPf != null) {
+ if (parentPf.fieldInfo == null) {
+ initializeFieldInfo(parentPf);
+ parentPf.trySetValidatedFrozenFieldType();
+ }
+ final NumericDocValuesWriter parentWriter = (NumericDocValuesWriter) parentPf.docValuesWriter;
+ final long value = parentField.numericValue().longValue();
+ for (int i = 0; i < numDocs; i++) {
+ parentWriter.addValue(baseDocID + i, value);
+ }
+ }
+
+ // Row-oriented pass: stored fields and term inversion only. Uses fresh tuple cursors.
+ if (hasRowColumns) {
+ processRowColumns(baseDocID, numDocs, columnBatch.columns());
+ }
+
+ // Column-oriented pass: doc values, points, and vectors. Each column is asked for a fresh
+ // cursor.
+ for (Column column : columnBatch.columns()) {
+ final IndexableFieldType fieldType = column.fieldType();
+ if (fieldType.docValuesType() == DocValuesType.NONE
+ && fieldType.pointDimensionCount() == 0
+ && fieldType.vectorDimension() == 0) {
+ continue; // no column-oriented features
+ }
+ PerField pf = getOrAddPerField(column.name());
+
+ switch (column) {
+ case LongColumn longCol -> processLongColumn(baseDocID, numDocs, longCol, pf, fieldType);
+ case BinaryColumn binaryCol ->
+ processBinaryColumn(baseDocID, numDocs, binaryCol, pf, fieldType);
+ case VectorColumn> vectorCol ->
+ processVectorColumn(baseDocID, numDocs, vectorCol, pf, fieldType);
+ default ->
+ throw new IllegalArgumentException(
+ "Unknown column type: " + column.getClass().getName());
+ }
+ }
+ }
+
+ /**
+ * Processes row-oriented features (stored fields and term inversion) for columns that have stored
+ * or indexed fields. The outer loop iterates every batch-local doc-id in {@code [0, numDocs)} so
+ * every reserved doc is framed with {@code startStoredFields}/{@code termsHash.startDocument},
+ * matching the single-doc indexing path. For each doc, row columns are consumed while their
+ * cursor head equals the current doc. Doc values and points are handled separately in the
+ * column-oriented pass.
+ */
+ private void processRowColumns(int baseDocID, int numDocs, Iterable columns)
+ throws IOException {
+ // Collect row-oriented columns. Per-field PerFields are cached in the shared docFields array
+ // (also used by processDocument) to avoid a per-batch allocation; adapters and cursor heads
+ // are local since they're column-specific.
+ int numRowCols = 0;
+ ColumnFieldAdapter[] adapters = new ColumnFieldAdapter[4];
+ int[] heads = new int[4];
+ boolean hasInverted = false;
+
+ for (Column column : columns) {
+ IndexableFieldType fieldType = column.fieldType();
+ if (fieldType.stored() == false && fieldType.indexOptions() == IndexOptions.NONE) {
+ continue;
+ }
+ if (numRowCols >= adapters.length) {
+ adapters = ArrayUtil.grow(adapters, numRowCols + 1);
+ heads = ArrayUtil.grow(heads, numRowCols + 1);
+ }
+ if (numRowCols >= docFields.length) {
+ oversizeDocFields();
+ }
+ ColumnFieldAdapter adapter = ColumnFieldAdapter.create(column);
+ adapters[numRowCols] = adapter;
+ docFields[numRowCols] = getOrAddPerField(column.name());
+ heads[numRowCols] = adapter.nextDoc();
+ if (fieldType.indexOptions() != IndexOptions.NONE) {
+ hasInverted = true;
+ }
+ numRowCols++;
+ }
+
+ // Row-dense outer loop: frame every doc in [0, numDocs). Column cursors stay sparse, but the
+ // per-doc framing is fixed so stored fields and termsHash stay aligned with the reserved doc
+ // ids even for docs that have no row-oriented values.
+ for (int batchDocID = 0; batchDocID < numDocs; batchDocID++) {
+ int segDocID = baseDocID + batchDocID;
+ long fieldGen = nextFieldGen++;
+ int indexedFieldCount = 0;
+
+ if (hasInverted) {
+ termsHash.startDocument();
+ }
+ startStoredFields(segDocID);
+ try {
+ for (int i = 0; i < numRowCols; i++) {
+ int head = heads[i];
+ if (head != DocIdSetIterator.NO_MORE_DOCS && head < batchDocID) {
+ throw new IllegalArgumentException(
+ "Row column \""
+ + adapters[i].name()
+ + "\" returned out-of-order batch doc-id "
+ + head);
+ }
+ while (head == batchDocID) {
+ PerField pf = docFields[i];
+ if (pf.fieldGen != fieldGen) {
+ pf.fieldGen = fieldGen;
+ pf.reset(segDocID, adapters[i].fieldType());
+ }
+ if (invertAndStore(segDocID, adapters[i], pf)) {
+ fields[indexedFieldCount] = pf;
+ indexedFieldCount++;
+ }
+ head = adapters[i].nextDoc();
+ }
+ heads[i] = head;
+ }
+ } finally {
+ if (hasHitAbortingException == false) {
+ for (int i = 0; i < indexedFieldCount; i++) {
+ fields[i].finish(segDocID);
+ }
+ finishStoredFields();
+ if (hasInverted) {
+ try {
+ termsHash.finishDocument(segDocID);
+ } catch (Throwable th) {
+ abortingExceptionConsumer.accept(th);
+ throw th;
+ }
+ }
+ }
+ }
+ }
+
+ // Any remaining cursor head after the outer loop is a doc-id >= numDocs.
+ for (int i = 0; i < numRowCols; i++) {
+ if (heads[i] != DocIdSetIterator.NO_MORE_DOCS) {
+ throw new IllegalArgumentException(
+ "Row column \""
+ + adapters[i].name()
+ + "\" returned batch doc-id "
+ + heads[i]
+ + " which is out of range [0, "
+ + numDocs
+ + ")");
+ }
+ }
+ }
+
+ private void validateColumnSchema(String fieldName, PerField pf, IndexableFieldType fieldType)
+ throws IOException {
+ updateDocFieldSchema(fieldName, pf.schema, fieldType);
+ if (pf.fieldInfo == null) {
+ initializeFieldInfo(pf);
+ pf.trySetValidatedFrozenFieldType();
+ } else {
+ pf.schema.assertSameSchema(pf.fieldInfo);
+ }
+ }
+
+ private static void processLongColumn(
+ int baseDocID, int numDocs, LongColumn column, PerField pf, IndexableFieldType fieldType)
+ throws IOException {
+ final DocValuesType dvType = fieldType.docValuesType();
+ final boolean hasPoints = fieldType.pointDimensionCount() != 0;
+
+ // DV-only path (no points): the bulk dense path remains available.
+ // TODO: can support dense fast path for points
+ if (hasPoints == false) {
+ if (column.density() == Column.Density.DENSE) {
+ processDenseLongColumn(baseDocID, numDocs, column, column.values(), pf, dvType);
+ return;
+ }
+ LongTupleCursor cursor = column.tuples();
+ switch (dvType) {
+ case NUMERIC -> {
+ NumericDocValuesWriter writer = (NumericDocValuesWriter) pf.docValuesWriter;
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ writer.addValue(baseDocID + batchDocID, cursor.longValue());
+ }
+ }
+ case SORTED_NUMERIC -> {
+ SortedNumericDocValuesWriter writer = (SortedNumericDocValuesWriter) pf.docValuesWriter;
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ writer.addValue(baseDocID + batchDocID, cursor.longValue());
+ }
+ }
+ // $CASES-OMITTED$
+ default ->
+ throw new IllegalArgumentException(
+ "LongColumn \"" + column.name() + "\" has incompatible docValuesType: " + dvType);
+ }
+ return;
+ }
+
+ // Points (+ optional numeric DV). Always uses the tuple cursor.
+ final LongColumn.NumericKind kind = column.numericKind();
+ final int byteWidth =
+ (kind == LongColumn.NumericKind.INT || kind == LongColumn.NumericKind.FLOAT)
+ ? Integer.BYTES
+ : Long.BYTES;
+ final byte[] pointScratch = new byte[byteWidth];
+ final BytesRef pointBytesRef = new BytesRef(pointScratch);
+ final PointValuesWriter pointWriter = pf.pointValuesWriter;
+ final LongTupleCursor cursor = column.tuples();
+
+ switch (dvType) {
+ case NONE -> {
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ encodeSortablePointBytes(cursor.longValue(), kind, pointScratch);
+ pointWriter.addPackedValue(baseDocID + batchDocID, pointBytesRef);
+ }
+ }
+ case NUMERIC -> {
+ NumericDocValuesWriter dvWriter = (NumericDocValuesWriter) pf.docValuesWriter;
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ int segDocID = baseDocID + batchDocID;
+ long raw = cursor.longValue();
+ dvWriter.addValue(segDocID, raw);
+ encodeSortablePointBytes(raw, kind, pointScratch);
+ pointWriter.addPackedValue(segDocID, pointBytesRef);
+ }
+ }
+ case SORTED_NUMERIC -> {
+ SortedNumericDocValuesWriter dvWriter = (SortedNumericDocValuesWriter) pf.docValuesWriter;
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ int segDocID = baseDocID + batchDocID;
+ long raw = cursor.longValue();
+ dvWriter.addValue(segDocID, raw);
+ encodeSortablePointBytes(raw, kind, pointScratch);
+ pointWriter.addPackedValue(segDocID, pointBytesRef);
+ }
+ }
+ // $CASES-OMITTED$
+ default ->
+ throw new IllegalArgumentException(
+ "LongColumn \"" + column.name() + "\" has incompatible docValuesType: " + dvType);
+ }
+ }
+
+ private static void encodeSortablePointBytes(
+ long raw, LongColumn.NumericKind kind, byte[] scratch) {
+ switch (kind) {
+ case INT, FLOAT -> NumericUtils.intToSortableBytes((int) raw, scratch, 0);
+ case LONG, DOUBLE -> NumericUtils.longToSortableBytes(raw, scratch, 0);
+ }
+ }
+
+ private static void processDenseLongColumn(
+ int baseDocID,
+ int numDocs,
+ LongColumn column,
+ LongValuesCursor cursor,
+ PerField pf,
+ DocValuesType dvType) {
+ ColumnValidation.checkDenseCount(column, cursor.size(), numDocs);
+ switch (dvType) {
+ case NUMERIC -> {
+ NumericDocValuesWriter writer = (NumericDocValuesWriter) pf.docValuesWriter;
+ writer.addDenseValues(baseDocID, cursor);
+ }
+ case SORTED_NUMERIC -> {
+ SortedNumericDocValuesWriter writer = (SortedNumericDocValuesWriter) pf.docValuesWriter;
+ writer.addDenseValues(baseDocID, cursor);
+ }
+ // $CASES-OMITTED$
+ default ->
+ throw new IllegalArgumentException(
+ "LongColumn \"" + column.name() + "\" has incompatible docValuesType: " + dvType);
+ }
+ }
+
+ private static void processBinaryColumn(
+ int baseDocID, int numDocs, BinaryColumn column, PerField pf, IndexableFieldType fieldType)
+ throws IOException {
+ final DocValuesType dvType = fieldType.docValuesType();
+ final boolean hasPoints = fieldType.pointDimensionCount() != 0;
+ final PointValuesWriter pointWriter = hasPoints ? pf.pointValuesWriter : null;
+ final BinaryTupleCursor cursor = column.tuples();
+
+ if (dvType == DocValuesType.NONE) {
+ // Points only: bytes are passed through unchanged (caller is responsible for producing
+ // sort-encoded bytes of the correct total length).
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ pointWriter.addPackedValue(baseDocID + batchDocID, cursor.binaryValue());
+ }
+ return;
+ }
+
+ switch (dvType) {
+ case BINARY -> {
+ BinaryDocValuesWriter writer = (BinaryDocValuesWriter) pf.docValuesWriter;
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ int segDocID = baseDocID + batchDocID;
+ BytesRef value = cursor.binaryValue();
+ writer.addValue(segDocID, value);
+ if (hasPoints) {
+ pointWriter.addPackedValue(segDocID, value);
+ }
+ }
+ }
+ case SORTED -> {
+ SortedDocValuesWriter writer = (SortedDocValuesWriter) pf.docValuesWriter;
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ int segDocID = baseDocID + batchDocID;
+ BytesRef value = cursor.binaryValue();
+ writer.addValue(segDocID, value);
+ if (hasPoints) {
+ pointWriter.addPackedValue(segDocID, value);
+ }
+ }
+ }
+ case SORTED_SET -> {
+ SortedSetDocValuesWriter writer = (SortedSetDocValuesWriter) pf.docValuesWriter;
+ int batchDocID;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ int segDocID = baseDocID + batchDocID;
+ BytesRef value = cursor.binaryValue();
+ writer.addValue(segDocID, value);
+ if (hasPoints) {
+ pointWriter.addPackedValue(segDocID, value);
+ }
+ }
+ }
+ // $CASES-OMITTED$
+ default ->
+ throw new IllegalArgumentException(
+ "BinaryColumn \"" + column.name() + "\" has incompatible docValuesType: " + dvType);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ private static void processVectorColumn(
+ int baseDocID, int numDocs, VectorColumn> column, PerField pf, IndexableFieldType fieldType)
+ throws IOException {
+ final VectorEncoding encoding = fieldType.vectorEncoding();
+ final int dimension = fieldType.vectorDimension();
+ final VectorTupleCursor> cursor = column.tuples();
+ int prevBatchDocID = -1;
+ int consumed = 0;
+ int batchDocID;
+ switch (encoding) {
+ case FLOAT32 -> {
+ KnnFieldVectorsWriter writer =
+ (KnnFieldVectorsWriter) pf.knnFieldVectorsWriter;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ ColumnValidation.checkVectorDocIDStrictlyIncreasing(column, batchDocID, prevBatchDocID);
+ float[] vec = (float[]) cursor.vectorValue();
+ ColumnValidation.checkVectorDimension(column, vec.length, dimension, batchDocID);
+ writer.addValue(baseDocID + batchDocID, vec);
+ prevBatchDocID = batchDocID;
+ consumed++;
+ }
+ }
+ case BYTE -> {
+ KnnFieldVectorsWriter writer =
+ (KnnFieldVectorsWriter) pf.knnFieldVectorsWriter;
+ while ((batchDocID = cursor.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ColumnValidation.checkDocID(column, batchDocID, numDocs);
+ ColumnValidation.checkVectorDocIDStrictlyIncreasing(column, batchDocID, prevBatchDocID);
+ byte[] vec = (byte[]) cursor.vectorValue();
+ ColumnValidation.checkVectorDimension(column, vec.length, dimension, batchDocID);
+ writer.addValue(baseDocID + batchDocID, vec);
+ prevBatchDocID = batchDocID;
+ consumed++;
+ }
+ }
+ }
+ if (column.density() == Column.Density.DENSE) {
+ ColumnValidation.checkDenseCount(column, consumed, numDocs);
+ }
+ }
+
private void initializeFieldInfo(PerField pf) throws IOException {
// Create and add a new fieldInfo to fieldInfos for this segment.
// During the creation of FieldInfo there is also verification of the correctness of all its
@@ -764,10 +1205,30 @@ private void initializeFieldInfo(PerField pf) throws IOException {
/** Index each field Returns {@code true}, if we are indexing a unique field with postings */
private boolean processField(int docID, IndexableField field, PerField pf) throws IOException {
+ boolean indexedField = invertAndStore(docID, field, pf);
+ IndexableFieldType fieldType = field.fieldType();
+ DocValuesType dvType = fieldType.docValuesType();
+ if (dvType != DocValuesType.NONE) {
+ indexDocValue(docID, pf, dvType, field);
+ }
+ if (fieldType.pointDimensionCount() != 0) {
+ pf.pointValuesWriter.addPackedValue(docID, field.binaryValue());
+ }
+ if (fieldType.vectorDimension() != 0) {
+ indexVectorValue(docID, pf, fieldType.vectorEncoding(), field);
+ }
+ return indexedField;
+ }
+
+ /**
+ * Inverts indexed fields and writes stored fields. Shared by the single-doc row path ({@link
+ * #processField}) and the column-batch row pass ({@link #processRowColumns}). Returns {@code
+ * true} if this is a unique indexed field with postings.
+ */
+ private boolean invertAndStore(int docID, IndexableField field, PerField pf) throws IOException {
IndexableFieldType fieldType = field.fieldType();
boolean indexedField = false;
- // Invert indexed fields
if (fieldType.indexOptions() != IndexOptions.NONE) {
if (pf.first) { // first time we see this field in this doc
pf.invert(docID, field, true);
@@ -778,7 +1239,6 @@ private boolean processField(int docID, IndexableField field, PerField pf) throw
}
}
- // Add stored fields
if (fieldType.stored()) {
StoredValue storedValue = field.storedValue();
if (storedValue == null) {
@@ -800,16 +1260,6 @@ private boolean processField(int docID, IndexableField field, PerField pf) throw
}
}
- DocValuesType dvType = fieldType.docValuesType();
- if (dvType != DocValuesType.NONE) {
- indexDocValue(docID, pf, dvType, field);
- }
- if (fieldType.pointDimensionCount() != 0) {
- pf.pointValuesWriter.addPackedValue(docID, field.binaryValue());
- }
- if (fieldType.vectorDimension() != 0) {
- indexVectorValue(docID, pf, fieldType.vectorEncoding(), field);
- }
return indexedField;
}
@@ -1264,8 +1714,10 @@ private void invertTokenStream(int docID, IndexableField field, boolean first)
throws IOException {
final boolean analyzed = field.fieldType().tokenized() && analyzer != null;
/*
- * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
- * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
+ * To assist people in tracking down problems in analysis components, we wish to write the field name to the
+ * infostream
+ * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch'
+ * clauses,
* but rather a finally that takes note of the problem.
*/
boolean succeededInProcessingField = false;
@@ -1327,7 +1779,8 @@ private void invertTokenStream(int docID, IndexableField field, boolean first)
int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
if (startOffset < invertState.lastStartOffset || endOffset < startOffset) {
throw new IllegalArgumentException(
- "startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
+ "startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go "
+ + "backwards "
+ "startOffset="
+ startOffset
+ ",endOffset="
@@ -1372,7 +1825,8 @@ private void invertTokenStream(int docID, IndexableField field, boolean first)
+ fieldInfo.name
+ "\" (whose UTF8 encoding is longer than the max length "
+ IndexWriter.MAX_TERM_LENGTH
- + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '"
+ + "), all of which were skipped. Please correct the analyzer to not produce such terms. The "
+ + "prefix of the first immense term is: '"
+ Arrays.toString(prefix)
+ "...', original message: "
+ e.getMessage();
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index 09bef657b2df..a84414a7348a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -21,6 +21,7 @@
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.document.column.LongValuesCursor;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Counter;
@@ -64,6 +65,18 @@ public void addValue(int docID, long value) {
lastDocID = docID;
}
+ public void addDenseValues(int firstDocID, LongValuesCursor cursor) {
+ assert firstDocID > lastDocID;
+
+ int numValues = cursor.size();
+ pending.add(cursor);
+ docsWithField.addRange(firstDocID, firstDocID + numValues);
+
+ updateBytesUsed();
+
+ lastDocID = firstDocID + numValues - 1;
+ }
+
private void updateBytesUsed() {
final long newBytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
index 364dfb023a68..4b6a3ddf9e9b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
@@ -22,6 +22,7 @@
import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.document.column.LongValuesCursor;
import org.apache.lucene.index.NumericDocValuesWriter.BufferedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
@@ -68,11 +69,42 @@ public void addValue(int docID, long value) {
updateBytesUsed();
}
+ public void addDenseValues(int firstDocID, LongValuesCursor cursor) {
+ assert firstDocID > currentDoc;
+ finishCurrentDoc();
+
+ int numValues = cursor.size();
+
+ // Write values directly to pending — each value is one doc, single-valued.
+ // No currentValues[] buffering, no sorting needed.
+ pending.add(cursor);
+
+ // If pendingCounts is active (some earlier doc was multi-valued),
+ // record count=1 for each dense doc.
+ if (pendingCounts != null) {
+ for (int i = 0; i < numValues; i++) {
+ pendingCounts.add(1);
+ }
+ }
+
+ // Bulk-add consecutive doc-ids
+ docsWithField.addRange(firstDocID, firstDocID + numValues);
+
+ // Set currentDoc to last written doc so ordering is maintained.
+ // currentUpto stays 0 — nothing buffered.
+ currentDoc = firstDocID + numValues - 1;
+
+ updateBytesUsed();
+ }
+
// finalize currentDoc: this sorts the values in the current doc
private void finishCurrentDoc() {
if (currentDoc == -1) {
return;
}
+ if (currentUpto == 0) {
+ return; // doc already committed directly (e.g., via addDenseValues)
+ }
if (currentUpto > 1) {
Arrays.sort(currentValues, 0, currentUpto);
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java b/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java
index e996c2d52e1a..2abf8f29bb6d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java
@@ -18,6 +18,7 @@
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+import org.apache.lucene.document.column.LongValuesCursor;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.LongValues;
@@ -234,17 +235,40 @@ public Builder add(long l) {
if (pending == null) {
throw new IllegalStateException("Cannot be reused after build()");
}
+ packIfFull();
+ pending[pendingOff++] = l;
+ size += 1;
+ return this;
+ }
+
+ /**
+ * Add all values produced by the given {@link LongValuesCursor} in bulk. The cursor's {@link
+ * LongValuesCursor#size()} is used as the bounds: exactly that many values are pulled.
+ */
+ public Builder add(LongValuesCursor cursor) {
+ if (pending == null) {
+ throw new IllegalStateException("Cannot be reused after build()");
+ }
+ int remaining = cursor.size();
+ while (remaining > 0) {
+ packIfFull();
+ int toFill = Math.min(remaining, pending.length - pendingOff);
+ cursor.fill(pending, pendingOff, toFill);
+ pendingOff += toFill;
+ remaining -= toFill;
+ size += toFill;
+ }
+ return this;
+ }
+
+ private void packIfFull() {
if (pendingOff == pending.length) {
- // check size
if (values.length == valuesOff) {
final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
grow(newLength);
}
pack();
}
- pending[pendingOff++] = l;
- size += 1;
- return this;
}
final void finish() {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestColumnBatchIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestColumnBatchIndexing.java
new file mode 100644
index 000000000000..66e313528c88
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestColumnBatchIndexing.java
@@ -0,0 +1,2752 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.List;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StoredValue;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.column.BinaryColumn;
+import org.apache.lucene.document.column.BinaryTupleCursor;
+import org.apache.lucene.document.column.Column;
+import org.apache.lucene.document.column.ColumnBatch;
+import org.apache.lucene.document.column.LongColumn;
+import org.apache.lucene.document.column.LongTupleCursor;
+import org.apache.lucene.document.column.LongValuesCursor;
+import org.apache.lucene.document.column.VectorColumn;
+import org.apache.lucene.document.column.VectorTupleCursor;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.analysis.MockAnalyzer;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
+
+/** Tests for column-oriented batch indexing via {@link IndexWriter#addBatch}. */
+public class TestColumnBatchIndexing extends LuceneTestCase {
+
+ public void testNumericDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ long[] values = {10, 20, 30};
+ int[] docIds = {0, 1, 2};
+ w.addBatch(
+ simpleBatch(3, new ArrayLongColumn("numeric", NumericDocValuesField.TYPE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues dv = leaf.getNumericDocValues("numeric");
+ for (int i = 0; i < values.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.longValue());
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, dv.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testSortedNumericDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Doc 0 has two values, doc 1 has one value
+ int[] docIds = {0, 0, 1};
+ long[] values = {5, 15, 25};
+ w.addBatch(
+ simpleBatch(
+ 2,
+ new ArrayLongColumn(
+ "sortedNumeric", SortedNumericDocValuesField.TYPE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("sortedNumeric");
+
+ assertEquals(0, dv.nextDoc());
+ assertEquals(2, dv.docValueCount());
+ assertEquals(5, dv.nextValue());
+ assertEquals(15, dv.nextValue());
+
+ assertEquals(1, dv.nextDoc());
+ assertEquals(1, dv.docValueCount());
+ assertEquals(25, dv.nextValue());
+
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, dv.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBinaryDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ BytesRef[] values = {newBytesRef("aaa"), newBytesRef("bbb"), newBytesRef("ccc")};
+ int[] docIds = {0, 1, 2};
+ w.addBatch(
+ simpleBatch(3, new ArrayBinaryColumn("binary", BinaryDocValuesField.TYPE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ BinaryDocValues dv = leaf.getBinaryDocValues("binary");
+ for (int i = 0; i < values.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.binaryValue());
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, dv.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testSortedDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ BytesRef[] values = {newBytesRef("x"), newBytesRef("y"), newBytesRef("x")};
+ int[] docIds = {0, 1, 2};
+ w.addBatch(
+ simpleBatch(3, new ArrayBinaryColumn("sorted", SortedDocValuesField.TYPE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ SortedDocValues dv = leaf.getSortedDocValues("sorted");
+
+ assertEquals(0, dv.nextDoc());
+ assertEquals(newBytesRef("x"), dv.lookupOrd(dv.ordValue()));
+ assertEquals(1, dv.nextDoc());
+ assertEquals(newBytesRef("y"), dv.lookupOrd(dv.ordValue()));
+ assertEquals(2, dv.nextDoc());
+ assertEquals(newBytesRef("x"), dv.lookupOrd(dv.ordValue()));
+
+ // "x" and "y" should share ord space
+ assertEquals(2, dv.getValueCount());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testSortedSetDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Doc 0 has two values, doc 1 has one value
+ int[] docIds = {0, 0, 1};
+ BytesRef[] values = {newBytesRef("a"), newBytesRef("b"), newBytesRef("a")};
+ w.addBatch(
+ simpleBatch(
+ 2, new ArrayBinaryColumn("sortedSet", SortedSetDocValuesField.TYPE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ SortedSetDocValues dv = leaf.getSortedSetDocValues("sortedSet");
+
+ assertEquals(0, dv.nextDoc());
+ assertEquals(2, dv.docValueCount());
+ assertEquals(newBytesRef("a"), dv.lookupOrd(dv.nextOrd()));
+ assertEquals(newBytesRef("b"), dv.lookupOrd(dv.nextOrd()));
+
+ assertEquals(1, dv.nextDoc());
+ assertEquals(1, dv.docValueCount());
+ assertEquals(newBytesRef("a"), dv.lookupOrd(dv.nextOrd()));
+
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, dv.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMultipleColumns() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ int[] allDocs = {0, 1, 2};
+ long[] numericValues = {100, 200, 300};
+ BytesRef[] sortedValues = {newBytesRef("a"), newBytesRef("b"), newBytesRef("c")};
+
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn("numeric", NumericDocValuesField.TYPE, allDocs, numericValues),
+ new ArrayBinaryColumn("sorted", SortedDocValuesField.TYPE, allDocs, sortedValues)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ NumericDocValues ndv = leaf.getNumericDocValues("numeric");
+ SortedDocValues sdv = leaf.getSortedDocValues("sorted");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, ndv.nextDoc());
+ assertEquals(numericValues[i], ndv.longValue());
+ assertEquals(i, sdv.nextDoc());
+ assertEquals(sortedValues[i], sdv.lookupOrd(sdv.ordValue()));
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testSparseDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Only doc 1 has a value (docs 0 and 2 are missing)
+ int[] docIds = {1};
+ long[] values = {42};
+ w.addBatch(
+ simpleBatch(3, new ArrayLongColumn("sparse", NumericDocValuesField.TYPE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues dv = leaf.getNumericDocValues("sparse");
+ assertEquals(1, dv.nextDoc());
+ assertEquals(42, dv.longValue());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, dv.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testParentFieldIndexed() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig();
+ config.setParentField("_parent");
+ IndexWriter w = new IndexWriter(dir, config);
+
+ int[] docIds = {0, 1, 2};
+ long[] values = {1, 2, 3};
+ w.addBatch(
+ simpleBatch(3, new ArrayLongColumn("numeric", NumericDocValuesField.TYPE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // Every batch doc should have the parent field
+ NumericDocValues parentDv = leaf.getNumericDocValues("_parent");
+ assertNotNull(parentDv);
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, parentDv.nextDoc());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testPointsColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Create a points-only FieldType (1 dimension, Integer.BYTES)
+ FieldType pointType = new FieldType();
+ pointType.setDimensions(1, Integer.BYTES);
+ pointType.freeze();
+
+ int[] raw = {10, 20, 30};
+ int[] docIds = {0, 1, 2};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = raw[i];
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn("point", pointType, LongColumn.NumericKind.INT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(1, searcher.count(IntPoint.newExactQuery("point", 10)));
+ assertEquals(1, searcher.count(IntPoint.newExactQuery("point", 20)));
+ assertEquals(1, searcher.count(IntPoint.newExactQuery("point", 30)));
+ assertEquals(0, searcher.count(IntPoint.newExactQuery("point", 99)));
+ assertEquals(3, searcher.count(IntPoint.newRangeQuery("point", 10, 30)));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testPointsWithDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // 1D int points + SORTED_NUMERIC DV via the compat layer.
+ FieldType pointAndDvType = new FieldType();
+ pointAndDvType.setDimensions(1, Integer.BYTES);
+ pointAndDvType.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ pointAndDvType.freeze();
+
+ int[] raw = {10, 20, 30};
+ int[] docIds = {0, 1, 2};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = raw[i];
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn(
+ "field", pointAndDvType, LongColumn.NumericKind.INT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+
+ assertEquals(1, searcher.count(IntPoint.newExactQuery("field", 10)));
+ assertEquals(3, searcher.count(IntPoint.newRangeQuery("field", 10, 30)));
+
+ LeafReader leaf = getOnlyLeafReader(r);
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("field");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(raw[i], dv.nextValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testSparsePointsColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType pointType = new FieldType();
+ pointType.setDimensions(1, Integer.BYTES);
+ pointType.freeze();
+
+ // Only doc 1 out of 3 has a point value
+ int[] docIds = {1};
+ long[] values = {42};
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn("point", pointType, LongColumn.NumericKind.INT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(1, searcher.count(IntPoint.newExactQuery("point", 42)));
+ assertEquals(0, searcher.count(IntPoint.newExactQuery("point", 0)));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredLongColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // stored + NUMERIC doc values
+ FieldType storedNumericType = new FieldType();
+ storedNumericType.setStored(true);
+ storedNumericType.setDocValuesType(DocValuesType.NUMERIC);
+ storedNumericType.freeze();
+
+ int[] docIds = {0, 1, 2};
+ long[] values = {100, 200, 300};
+ w.addBatch(simpleBatch(3, new ArrayLongColumn("val", storedNumericType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // Verify stored fields
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < 3; i++) {
+ Document doc = storedFields.document(i);
+ assertEquals(values[i], doc.getField("val").numericValue().longValue());
+ }
+
+ // Verify doc values
+ NumericDocValues dv = leaf.getNumericDocValues("val");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredBinaryColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // stored + SORTED doc values
+ FieldType storedSortedType = new FieldType();
+ storedSortedType.setStored(true);
+ storedSortedType.setDocValuesType(DocValuesType.SORTED);
+ storedSortedType.freeze();
+
+ int[] docIds = {0, 1, 2};
+ BytesRef[] values = {newBytesRef("aaa"), newBytesRef("bbb"), newBytesRef("ccc")};
+ w.addBatch(simpleBatch(3, new ArrayBinaryColumn("val", storedSortedType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // Verify stored fields
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < 3; i++) {
+ Document doc = storedFields.document(i);
+ assertEquals(values[i], doc.getField("val").binaryValue());
+ }
+
+ // Verify doc values
+ SortedDocValues dv = leaf.getSortedDocValues("val");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.lookupOrd(dv.ordValue()));
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredOnlyColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // stored only — no doc values, no points
+ FieldType storedOnlyType = new FieldType();
+ storedOnlyType.setStored(true);
+ storedOnlyType.freeze();
+
+ int[] docIds = {0, 1, 2};
+ long[] values = {10, 20, 30};
+ w.addBatch(simpleBatch(3, new ArrayLongColumn("stored", storedOnlyType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < 3; i++) {
+ Document doc = storedFields.document(i);
+ assertEquals(values[i], doc.getField("stored").numericValue().longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMixedStoredAndNonStoredColumns() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType storedNumericType = new FieldType();
+ storedNumericType.setStored(true);
+ storedNumericType.setDocValuesType(DocValuesType.NUMERIC);
+ storedNumericType.freeze();
+
+ int[] allDocs = {0, 1, 2};
+ long[] storedValues = {100, 200, 300};
+ long[] dvOnlyValues = {1, 2, 3};
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn("stored_field", storedNumericType, allDocs, storedValues),
+ new ArrayLongColumn("dv_only", NumericDocValuesField.TYPE, allDocs, dvOnlyValues)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // Verify stored field
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < 3; i++) {
+ Document doc = storedFields.document(i);
+ assertEquals(storedValues[i], doc.getField("stored_field").numericValue().longValue());
+ assertNull(doc.getField("dv_only")); // non-stored column should not appear
+ }
+
+ // Verify both doc values columns
+ NumericDocValues storedDv = leaf.getNumericDocValues("stored_field");
+ NumericDocValues dvOnly = leaf.getNumericDocValues("dv_only");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, storedDv.nextDoc());
+ assertEquals(storedValues[i], storedDv.longValue());
+ assertEquals(i, dvOnly.nextDoc());
+ assertEquals(dvOnlyValues[i], dvOnly.longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredPointsColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // stored + points
+ FieldType storedPointType = new FieldType();
+ storedPointType.setStored(true);
+ storedPointType.setDimensions(1, Integer.BYTES);
+ storedPointType.freeze();
+
+ int[] raw = {10, 20, 30};
+ int[] docIds = {0, 1, 2};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = raw[i];
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn(
+ "pt", storedPointType, LongColumn.NumericKind.INT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // Verify stored fields — decoded as ints.
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ Document doc = storedFields.document(i);
+ assertEquals(raw[i], doc.getField("pt").numericValue().intValue());
+ }
+
+ // Verify points
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(1, searcher.count(IntPoint.newExactQuery("pt", 10)));
+ assertEquals(3, searcher.count(IntPoint.newRangeQuery("pt", 10, 30)));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testInvertedColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // StringField-like: DOCS, omitNorms, non-tokenized
+ FieldType stringType = new FieldType();
+ stringType.setIndexOptions(IndexOptions.DOCS);
+ stringType.setOmitNorms(true);
+ stringType.setTokenized(false);
+ stringType.freeze();
+
+ int[] docIds = {0, 1, 2};
+ BytesRef[] values = {newBytesRef("alpha"), newBytesRef("beta"), newBytesRef("alpha")};
+ w.addBatch(simpleBatch(3, new ArrayBinaryColumn("tag", stringType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(2, searcher.count(new TermQuery(new Term("tag", "alpha"))));
+ assertEquals(1, searcher.count(new TermQuery(new Term("tag", "beta"))));
+ assertEquals(0, searcher.count(new TermQuery(new Term("tag", "gamma"))));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testInvertedWithDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Inverted + SORTED doc values (like a StringField with doc values)
+ FieldType invertedDvType = new FieldType();
+ invertedDvType.setIndexOptions(IndexOptions.DOCS);
+ invertedDvType.setOmitNorms(true);
+ invertedDvType.setTokenized(false);
+ invertedDvType.setDocValuesType(DocValuesType.SORTED);
+ invertedDvType.freeze();
+
+ int[] docIds = {0, 1, 2};
+ BytesRef[] values = {newBytesRef("x"), newBytesRef("y"), newBytesRef("x")};
+ w.addBatch(simpleBatch(3, new ArrayBinaryColumn("field", invertedDvType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+
+ // Verify inverted index
+ assertEquals(2, searcher.count(new TermQuery(new Term("field", "x"))));
+ assertEquals(1, searcher.count(new TermQuery(new Term("field", "y"))));
+
+ // Verify doc values
+ LeafReader leaf = getOnlyLeafReader(r);
+ SortedDocValues dv = leaf.getSortedDocValues("field");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.lookupOrd(dv.ordValue()));
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testInvertedWithStored() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Inverted + stored (like StringField with Store.YES)
+ FieldType invertedStoredType = new FieldType(StringField.TYPE_STORED);
+ invertedStoredType.freeze();
+
+ int[] docIds = {0, 1, 2};
+ BytesRef[] values = {newBytesRef("aaa"), newBytesRef("bbb"), newBytesRef("ccc")};
+ w.addBatch(simpleBatch(3, new ArrayBinaryColumn("field", invertedStoredType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+
+ // Verify inverted index
+ assertEquals(1, searcher.count(new TermQuery(new Term("field", "aaa"))));
+ assertEquals(1, searcher.count(new TermQuery(new Term("field", "bbb"))));
+
+ // Verify stored fields
+ LeafReader leaf = getOnlyLeafReader(r);
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < 3; i++) {
+ Document doc = storedFields.document(i);
+ assertEquals(values[i], doc.getField("field").binaryValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testInvertedWithStoredAndDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Inverted + stored + SORTED doc values
+ FieldType allType = new FieldType();
+ allType.setIndexOptions(IndexOptions.DOCS);
+ allType.setOmitNorms(true);
+ allType.setTokenized(false);
+ allType.setStored(true);
+ allType.setDocValuesType(DocValuesType.SORTED);
+ allType.freeze();
+
+ int[] docIds = {0, 1, 2};
+ BytesRef[] values = {newBytesRef("x"), newBytesRef("y"), newBytesRef("z")};
+ w.addBatch(simpleBatch(3, new ArrayBinaryColumn("field", allType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ IndexSearcher searcher = new IndexSearcher(r);
+
+ // Verify inverted index
+ assertEquals(1, searcher.count(new TermQuery(new Term("field", "x"))));
+
+ // Verify stored fields
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < 3; i++) {
+ assertEquals(values[i], storedFields.document(i).getField("field").binaryValue());
+ }
+
+ // Verify doc values
+ SortedDocValues dv = leaf.getSortedDocValues("field");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.lookupOrd(dv.ordValue()));
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testInvertedSparse() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType stringType = new FieldType();
+ stringType.setIndexOptions(IndexOptions.DOCS);
+ stringType.setOmitNorms(true);
+ stringType.setTokenized(false);
+ stringType.freeze();
+
+ // Only doc 1 out of 3 has a term
+ int[] docIds = {1};
+ BytesRef[] values = {newBytesRef("found")};
+ w.addBatch(simpleBatch(3, new ArrayBinaryColumn("tag", stringType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(1, searcher.count(new TermQuery(new Term("tag", "found"))));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testTokenizedColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter w = new IndexWriter(dir, config);
+
+ // TextField-like: tokenized, DOCS_AND_FREQS_AND_POSITIONS
+ int[] docIds = {0, 1, 2};
+ BytesRef[] values = {
+ newBytesRef("quick brown fox"), newBytesRef("lazy brown dog"), newBytesRef("quick fox jumps")
+ };
+ w.addBatch(
+ simpleBatch(3, new ArrayBinaryColumn("text", TextField.TYPE_NOT_STORED, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+
+ // Each word was tokenized — verify individual terms
+ assertEquals(2, searcher.count(new TermQuery(new Term("text", "quick"))));
+ assertEquals(2, searcher.count(new TermQuery(new Term("text", "brown"))));
+ assertEquals(2, searcher.count(new TermQuery(new Term("text", "fox"))));
+ assertEquals(1, searcher.count(new TermQuery(new Term("text", "lazy"))));
+ assertEquals(1, searcher.count(new TermQuery(new Term("text", "dog"))));
+ assertEquals(1, searcher.count(new TermQuery(new Term("text", "jumps"))));
+ assertEquals(0, searcher.count(new TermQuery(new Term("text", "missing"))));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testTokenizedWithStored() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter w = new IndexWriter(dir, config);
+
+ int[] docIds = {0, 1};
+ BytesRef[] values = {newBytesRef("hello world"), newBytesRef("goodbye world")};
+ w.addBatch(
+ simpleBatch(2, new ArrayBinaryColumn("text", TextField.TYPE_STORED, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ IndexSearcher searcher = new IndexSearcher(r);
+
+ // Verify tokenized search
+ assertEquals(2, searcher.count(new TermQuery(new Term("text", "world"))));
+ assertEquals(1, searcher.count(new TermQuery(new Term("text", "hello"))));
+
+ // Verify stored fields
+ StoredFields storedFields = leaf.storedFields();
+ assertEquals(values[0], storedFields.document(0).getField("text").binaryValue());
+ assertEquals(values[1], storedFields.document(1).getField("text").binaryValue());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testColumnWithNoneDocValuesTypeAndNoPointsThrows() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // FieldType with NONE doc values type and no points
+ FieldType badType = new FieldType();
+ badType.freeze();
+
+ int[] docIds = {0};
+ long[] values = {1};
+ expectThrows(
+ IllegalArgumentException.class,
+ () -> w.addBatch(simpleBatch(1, new ArrayLongColumn("bad", badType, docIds, values))));
+
+ // Writer should still be usable after the failure
+ w.addBatch(
+ simpleBatch(
+ 1,
+ new ArrayLongColumn(
+ "numeric", NumericDocValuesField.TYPE, new int[] {0}, new long[] {42})));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues dv = leaf.getNumericDocValues("numeric");
+ assertNotNull(dv);
+ // The failed batch's doc was marked deleted; the successful batch's doc is still live
+ int doc = dv.nextDoc();
+ assertTrue(doc != DocIdSetIterator.NO_MORE_DOCS);
+ assertEquals(42, dv.longValue());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredWithDocValuesAndPoints() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // stored + SORTED_NUMERIC DV + 4-byte points
+ FieldType allType = new FieldType();
+ allType.setStored(true);
+ allType.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ allType.setDimensions(1, Integer.BYTES);
+ allType.freeze();
+
+ int[] raw = {10, 20, 30};
+ int[] docIds = {0, 1, 2};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = raw[i];
+ }
+ w.addBatch(
+ simpleBatch(
+ 3, new ArrayLongColumn("field", allType, LongColumn.NumericKind.INT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // Verify stored fields — decoded as ints.
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(raw[i], storedFields.document(i).getField("field").numericValue().intValue());
+ }
+
+ // Verify doc values (raw int widened to long).
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("field");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(raw[i], dv.nextValue());
+ }
+
+ // Verify points
+ assertEquals(3, leaf.getPointValues("field").size());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMultiValuedStoredWithDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // stored + SORTED_NUMERIC doc values (multi-valued)
+ FieldType storedSortedNumericType = new FieldType();
+ storedSortedNumericType.setStored(true);
+ storedSortedNumericType.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ storedSortedNumericType.freeze();
+
+ // Doc 0 has two values (10, 20), doc 1 has one value (30)
+ int[] docIds = {0, 0, 1};
+ long[] values = {10, 20, 30};
+ w.addBatch(simpleBatch(2, new ArrayLongColumn("val", storedSortedNumericType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // Verify stored fields — each value occurrence is stored separately
+ StoredFields storedFields = leaf.storedFields();
+ Document doc0 = storedFields.document(0);
+ assertEquals(2, doc0.getFields("val").length);
+ assertEquals(10L, doc0.getFields("val")[0].numericValue().longValue());
+ assertEquals(20L, doc0.getFields("val")[1].numericValue().longValue());
+ Document doc1 = storedFields.document(1);
+ assertEquals(1, doc1.getFields("val").length);
+ assertEquals(30L, doc1.getFields("val")[0].numericValue().longValue());
+
+ // Verify doc values
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("val");
+ assertEquals(0, dv.nextDoc());
+ assertEquals(2, dv.docValueCount());
+ assertEquals(10, dv.nextValue());
+ assertEquals(20, dv.nextValue());
+ assertEquals(1, dv.nextDoc());
+ assertEquals(1, dv.docValueCount());
+ assertEquals(30, dv.nextValue());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testDenseNumericDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ long[] values = {100, 200, 300};
+ w.addBatch(simpleBatch(3, new ArrayDenseLongColumn("val", NumericDocValuesField.TYPE, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ NumericDocValues dv = leaf.getNumericDocValues("val");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testDenseSortedNumericDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ long[] values = {10, 20, 30, 40, 50};
+ w.addBatch(
+ simpleBatch(5, new ArrayDenseLongColumn("val", SortedNumericDocValuesField.TYPE, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("val");
+ for (int i = 0; i < 5; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(1, dv.docValueCount());
+ assertEquals(values[i], dv.nextValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testDenseColumnCountMismatchThrows() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // 2 values but batch expects 3 documents
+ long[] values = {10, 20};
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 3, new ArrayDenseLongColumn("val", NumericDocValuesField.TYPE, values))));
+
+ // Writer should still be usable after the failure — use a different field to avoid
+ // the partially-written DV entries from the failed batch
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayDenseLongColumn("val2", NumericDocValuesField.TYPE, new long[] {42})));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues dv = leaf.getNumericDocValues("val2");
+ assertNotNull(dv);
+ int doc = dv.nextDoc();
+ assertTrue(doc != DocIdSetIterator.NO_MORE_DOCS);
+ assertEquals(42, dv.longValue());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testDenseColumnTooManyValuesThrows() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // 5 values but batch only has 3 documents
+ long[] values = {10, 20, 30, 40, 50};
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 3, new ArrayDenseLongColumn("val", NumericDocValuesField.TYPE, values))));
+
+ // Writer should still be usable — no values were written past numDocs
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayDenseLongColumn("val2", NumericDocValuesField.TYPE, new long[] {42})));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues dv = leaf.getNumericDocValues("val2");
+ assertNotNull(dv);
+ int doc = dv.nextDoc();
+ assertTrue(doc != DocIdSetIterator.NO_MORE_DOCS);
+ assertEquals(42, dv.longValue());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testIntSparseNumericDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Sparse: only docs 0 and 2 have values.
+ int[] docIds = {0, 2};
+ int[] raw = {-7, 9};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = raw[i];
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn(
+ "val", NumericDocValuesField.TYPE, LongColumn.NumericKind.INT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues dv = leaf.getNumericDocValues("val");
+ assertEquals(0, dv.nextDoc());
+ assertEquals(-7, dv.longValue());
+ assertEquals(2, dv.nextDoc());
+ assertEquals(9, dv.longValue());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, dv.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testLongColumnPointWidthMismatchThrows() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType pointType = new FieldType();
+ pointType.setDimensions(1, Integer.BYTES); // expects 4 bytes
+ pointType.freeze();
+
+ // LONG kind implies 8-byte point bytes; should fail validation against a 4-byte point type.
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1,
+ new ArrayLongColumn(
+ "pt",
+ pointType,
+ LongColumn.NumericKind.LONG,
+ new int[] {0},
+ new long[] {1}))));
+
+ w.close();
+ dir.close();
+ }
+
+ public void testBinaryColumnNumericDVBadFixedSizeThrows() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // Variable-size binary into NUMERIC DV should fail validation (fixedSize=-1).
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1,
+ new ArrayBinaryColumn(
+ "val",
+ NumericDocValuesField.TYPE,
+ new int[] {0},
+ new BytesRef[] {newBytesRef("x")}))));
+
+ w.close();
+ dir.close();
+ }
+
+ public void testLongColumnMultiDimPointsThrows() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType pointType = new FieldType();
+ pointType.setDimensions(2, Long.BYTES);
+ pointType.freeze();
+
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayLongColumn("pt", pointType, new int[] {0}, new long[] {1}))));
+
+ w.close();
+ dir.close();
+ }
+
+ public void testDenseLongColumnWithStoredFields() throws IOException {
+ // Covers the "single column consumed by both passes via fresh cursors" case: a dense
+ // LongColumn with stored+numeric DV. Row pass uses tuples(), column pass uses values().
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType storedNumericType = new FieldType();
+ storedNumericType.setStored(true);
+ storedNumericType.setDocValuesType(DocValuesType.NUMERIC);
+ storedNumericType.freeze();
+
+ long[] values = {100, 200, 300, 400};
+ w.addBatch(simpleBatch(4, new ArrayDenseLongColumn("val", storedNumericType, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < values.length; i++) {
+ assertEquals(values[i], storedFields.document(i).getField("val").numericValue().longValue());
+ }
+
+ NumericDocValues dv = leaf.getNumericDocValues("val");
+ for (int i = 0; i < values.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeIntegerFromLongColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.freeze();
+
+ int[] raw = {1, -2, 3};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = raw[i];
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn(
+ "val", type, LongColumn.NumericKind.INT, new int[] {0, 1, 2}, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(raw[i], storedFields.document(i).getField("val").numericValue().intValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeLongFromLongColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.freeze();
+
+ long[] raw = {Long.MIN_VALUE, 0L, Long.MAX_VALUE};
+ w.addBatch(simpleBatch(3, new ArrayLongColumn("val", type, new int[] {0, 1, 2}, raw.clone())));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(raw[i], storedFields.document(i).getField("val").numericValue().longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeFloatFromLongColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.freeze();
+
+ float[] raw = {1.5f, -2.25f, Float.MAX_VALUE};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = NumericUtils.floatToSortableInt(raw[i]);
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn(
+ "val", type, LongColumn.NumericKind.FLOAT, new int[] {0, 1, 2}, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(
+ raw[i], storedFields.document(i).getField("val").numericValue().floatValue(), 0f);
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeDoubleFromLongColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.freeze();
+
+ double[] raw = {1.5d, -2.25d, Double.MAX_VALUE};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = NumericUtils.doubleToSortableLong(raw[i]);
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn(
+ "val", type, LongColumn.NumericKind.DOUBLE, new int[] {0, 1, 2}, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(
+ raw[i], storedFields.document(i).getField("val").numericValue().doubleValue(), 0d);
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeStringFromBinaryColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.freeze();
+
+ String[] raw = {"hello", "wörld", "🦜"};
+ BytesRef[] values = new BytesRef[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = newBytesRef(raw[i]);
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayBinaryColumn(
+ "val", type, new int[] {0, 1, 2}, values, StoredValue.Type.STRING)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(raw[i], storedFields.document(i).getField("val").stringValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeFloatWithNumericDV() throws IOException {
+ // FLOAT kind on a LongColumn that also feeds NumericDV.
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.setDocValuesType(DocValuesType.NUMERIC);
+ type.freeze();
+
+ float[] raw = {1.5f, -2.25f, 42.0f};
+ long[] values = new long[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ values[i] = NumericUtils.floatToSortableInt(raw[i]);
+ }
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayLongColumn(
+ "val", type, LongColumn.NumericKind.FLOAT, new int[] {0, 1, 2}, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // Stored values decoded as floats.
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(
+ raw[i], storedFields.document(i).getField("val").numericValue().floatValue(), 0f);
+ }
+
+ // NumericDV stores the sortable-int encoding sign-extended to long.
+ NumericDocValues dv = leaf.getNumericDocValues("val");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeIntegerWithNumericDV() throws IOException {
+ // INT kind on a LongColumn that also feeds NumericDV.
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.setDocValuesType(DocValuesType.NUMERIC);
+ type.freeze();
+
+ int[] raw = {Integer.MIN_VALUE, -1, 0, 42, Integer.MAX_VALUE};
+ long[] values = new long[raw.length];
+ int[] docIds = new int[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ docIds[i] = i;
+ values[i] = raw[i];
+ }
+ w.addBatch(
+ simpleBatch(
+ raw.length,
+ new ArrayLongColumn("val", type, LongColumn.NumericKind.INT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(raw[i], storedFields.document(i).getField("val").numericValue().intValue());
+ }
+
+ NumericDocValues dv = leaf.getNumericDocValues("val");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(raw[i], dv.longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeDoubleWithNumericDV() throws IOException {
+ // DOUBLE kind on a LongColumn that also feeds NumericDV.
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.setDocValuesType(DocValuesType.NUMERIC);
+ type.freeze();
+
+ double[] raw = {Double.NEGATIVE_INFINITY, -1.5d, 0.0d, 2.25d, Double.POSITIVE_INFINITY};
+ long[] values = new long[raw.length];
+ int[] docIds = new int[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ docIds[i] = i;
+ values[i] = NumericUtils.doubleToSortableLong(raw[i]);
+ }
+ w.addBatch(
+ simpleBatch(
+ raw.length,
+ new ArrayLongColumn("val", type, LongColumn.NumericKind.DOUBLE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ StoredFields storedFields = leaf.storedFields();
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(
+ raw[i], storedFields.document(i).getField("val").numericValue().doubleValue(), 0d);
+ }
+
+ NumericDocValues dv = leaf.getNumericDocValues("val");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testStoredTypeDataInputRejected() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setStored(true);
+ type.freeze();
+
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1,
+ new ArrayBinaryColumn(
+ "val",
+ type,
+ new int[] {0},
+ new BytesRef[] {newBytesRef("x")},
+ StoredValue.Type.DATA_INPUT))));
+
+ w.close();
+ dir.close();
+ }
+
+ public void testBinaryColumnMultiDimPointsOnly() throws IOException {
+ // Plain BinaryColumn with 2-D int points (fixedSize = 2 * 4 = 8). Caller pre-packs bytes via
+ // IntPoint.pack; the chain writes them to points unchanged.
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType pointType = new FieldType();
+ pointType.setDimensions(2, Integer.BYTES);
+ pointType.freeze();
+
+ int[] docIds = {0, 1, 2};
+ BytesRef[] values = {IntPoint.pack(1, 10), IntPoint.pack(2, 20), IntPoint.pack(3, 30)};
+ w.addBatch(simpleBatch(3, new ArrayBinaryColumn("pt", pointType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(
+ 1, searcher.count(IntPoint.newRangeQuery("pt", new int[] {1, 10}, new int[] {1, 10})));
+ assertEquals(
+ 3, searcher.count(IntPoint.newRangeQuery("pt", new int[] {0, 0}, new int[] {10, 100})));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBinaryColumnPointsOnlyArbitraryWidth() throws IOException {
+ // 3-D int points (12 bytes) via plain BinaryColumn — arbitrary widths are fine for the
+ // opaque-bytes path since no numeric transform is applied.
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setDimensions(3, Integer.BYTES);
+ type.freeze();
+
+ int[][] raw = {{1, 2, 3}, {4, 5, 6}, {10, 20, 30}};
+ BytesRef[] values = new BytesRef[raw.length];
+ int[] docIds = new int[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ docIds[i] = i;
+ values[i] = IntPoint.pack(raw[i]);
+ }
+ w.addBatch(simpleBatch(raw.length, new ArrayBinaryColumn("pt", type, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(
+ 1, searcher.count(IntPoint.newRangeQuery("pt", new int[] {1, 2, 3}, new int[] {1, 2, 3})));
+ assertEquals(
+ 3,
+ searcher.count(
+ IntPoint.newRangeQuery("pt", new int[] {0, 0, 0}, new int[] {100, 100, 100})));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBinaryColumnSortedDVAndPoints() throws IOException {
+ // Plain BinaryColumn with SORTED DV + 1-D int point. Same BytesRef goes to both writers.
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setDimensions(1, Integer.BYTES);
+ type.setDocValuesType(DocValuesType.SORTED);
+ type.freeze();
+
+ int[] docIds = {0, 1, 2};
+ BytesRef[] values = {IntPoint.pack(10), IntPoint.pack(20), IntPoint.pack(30)};
+ w.addBatch(simpleBatch(3, new ArrayBinaryColumn("field", type, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ SortedDocValues dv = leaf.getSortedDocValues("field");
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.lookupOrd(dv.ordValue()));
+ }
+
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(1, searcher.count(IntPoint.newExactQuery("field", 10)));
+ assertEquals(3, searcher.count(IntPoint.newRangeQuery("field", 10, 30)));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testNumericKindIntPointsAndDV() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setDimensions(1, Integer.BYTES);
+ type.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ type.freeze();
+
+ int[] raw = {-5, -1, 0, 7, Integer.MAX_VALUE};
+ long[] values = new long[raw.length];
+ int[] docIds = new int[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ docIds[i] = i;
+ values[i] = raw[i];
+ }
+ w.addBatch(
+ simpleBatch(
+ raw.length,
+ new ArrayLongColumn("val", type, LongColumn.NumericKind.INT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("val");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(1, dv.docValueCount());
+ assertEquals(raw[i], dv.nextValue());
+ }
+
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(
+ raw.length,
+ searcher.count(IntPoint.newRangeQuery("val", Integer.MIN_VALUE, Integer.MAX_VALUE)));
+ assertEquals(1, searcher.count(IntPoint.newExactQuery("val", -5)));
+ assertEquals(3, searcher.count(IntPoint.newRangeQuery("val", -1, 7)));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testNumericKindLongPointsAndDV() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setDimensions(1, Long.BYTES);
+ type.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ type.freeze();
+
+ long[] raw = {Long.MIN_VALUE, -100L, 0L, 42L, Long.MAX_VALUE};
+ int[] docIds = new int[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ docIds[i] = i;
+ }
+ w.addBatch(simpleBatch(raw.length, new ArrayLongColumn("val", type, docIds, raw.clone())));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("val");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(raw[i], dv.nextValue());
+ }
+
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(
+ raw.length, searcher.count(LongPoint.newRangeQuery("val", Long.MIN_VALUE, Long.MAX_VALUE)));
+ assertEquals(1, searcher.count(LongPoint.newExactQuery("val", Long.MIN_VALUE)));
+ assertEquals(3, searcher.count(LongPoint.newRangeQuery("val", -100L, 42L)));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testNumericKindFloatPointsAndDV() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setDimensions(1, Float.BYTES);
+ type.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ type.freeze();
+
+ float[] raw = {Float.NEGATIVE_INFINITY, -1.5f, 0.0f, 2.25f, Float.POSITIVE_INFINITY};
+ long[] values = new long[raw.length];
+ int[] docIds = new int[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ docIds[i] = i;
+ values[i] = NumericUtils.floatToSortableInt(raw[i]);
+ }
+ w.addBatch(
+ simpleBatch(
+ raw.length,
+ new ArrayLongColumn("val", type, LongColumn.NumericKind.FLOAT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ // DV stores the sortable-int encoding; decode via sortableIntToFloat.
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("val");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(raw[i], NumericUtils.sortableIntToFloat((int) dv.nextValue()), 0f);
+ }
+
+ // Points sort numerically.
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(
+ raw.length,
+ searcher.count(
+ FloatPoint.newRangeQuery("val", Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY)));
+ assertEquals(1, searcher.count(FloatPoint.newExactQuery("val", -1.5f)));
+ assertEquals(3, searcher.count(FloatPoint.newRangeQuery("val", -1.5f, 2.25f)));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testNumericKindDoublePointsAndDV() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setDimensions(1, Double.BYTES);
+ type.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ type.freeze();
+
+ double[] raw = {Double.NEGATIVE_INFINITY, -1.5d, 0.0d, 2.25d, Double.POSITIVE_INFINITY};
+ long[] values = new long[raw.length];
+ int[] docIds = new int[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ docIds[i] = i;
+ values[i] = NumericUtils.doubleToSortableLong(raw[i]);
+ }
+ w.addBatch(
+ simpleBatch(
+ raw.length,
+ new ArrayLongColumn("val", type, LongColumn.NumericKind.DOUBLE, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+
+ SortedNumericDocValues dv = leaf.getSortedNumericDocValues("val");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(raw[i], NumericUtils.sortableLongToDouble(dv.nextValue()), 0d);
+ }
+
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(
+ raw.length,
+ searcher.count(
+ DoublePoint.newRangeQuery("val", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY)));
+ assertEquals(1, searcher.count(DoublePoint.newExactQuery("val", -1.5d)));
+ assertEquals(3, searcher.count(DoublePoint.newRangeQuery("val", -1.5d, 2.25d)));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testNumericKindPointsAndDVMultiDimRejected() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // 2D int: scenario 3 requires 1D.
+ FieldType type = new FieldType();
+ type.setDimensions(2, Integer.BYTES);
+ type.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ type.freeze();
+
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1,
+ new ArrayLongColumn(
+ "val",
+ type,
+ LongColumn.NumericKind.LONG,
+ new int[] {0},
+ new long[] {1L}))));
+
+ w.close();
+ dir.close();
+ }
+
+ public void testNumericKindPointsAndDVWidthMismatch() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // FLOAT kind requires a 4-byte point field; declaring Long.BYTES should throw.
+ FieldType type = new FieldType();
+ type.setDimensions(1, Long.BYTES);
+ type.setDocValuesType(DocValuesType.SORTED_NUMERIC);
+ type.freeze();
+
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1,
+ new ArrayLongColumn(
+ "val",
+ type,
+ LongColumn.NumericKind.FLOAT,
+ new int[] {0},
+ new long[] {1L}))));
+
+ w.close();
+ dir.close();
+ }
+
+ public void testNumericKindFloatDVOnly() throws IOException {
+ // DV only (no points): LongColumn stores the long value unchanged. For FLOAT, callers feed
+ // sortable-int bits in the low 32 bits, and DV reads them back sign-extended to long.
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType type = new FieldType();
+ type.setDocValuesType(DocValuesType.NUMERIC);
+ type.freeze();
+
+ float[] raw = {1.5f, -2.25f, Float.MAX_VALUE};
+ long[] values = new long[raw.length];
+ int[] docIds = new int[raw.length];
+ for (int i = 0; i < raw.length; i++) {
+ docIds[i] = i;
+ values[i] = NumericUtils.floatToSortableInt(raw[i]);
+ }
+ w.addBatch(
+ simpleBatch(
+ raw.length,
+ new ArrayLongColumn("val", type, LongColumn.NumericKind.FLOAT, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues dv = leaf.getNumericDocValues("val");
+ for (int i = 0; i < raw.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(values[i], dv.longValue());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ /**
+ * With a sparse row column, the batch must still produce {@code numDocs} documents in the
+ * segment, and stored-fields for un-populated docs must be empty (not shifted, not missing). This
+ * guards the row-dense framing contract: every doc-id in {@code [0, numDocs)} is framed
+ * regardless of whether any row column has a value at that doc.
+ */
+ public void testSparseStoredFramingPreservesNumDocs() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType storedOnly = new FieldType();
+ storedOnly.setStored(true);
+ storedOnly.freeze();
+
+ // 5 batch docs, but only docs 1 and 3 have a stored value.
+ int[] docIds = {1, 3};
+ BytesRef[] values = {newBytesRef("one"), newBytesRef("three")};
+ w.addBatch(simpleBatch(5, new ArrayBinaryColumn("field", storedOnly, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(5, leaf.maxDoc());
+
+ StoredFields storedFields = leaf.storedFields();
+ assertNull(storedFields.document(0).getField("field"));
+ assertEquals(newBytesRef("one"), storedFields.document(1).getField("field").binaryValue());
+ assertNull(storedFields.document(2).getField("field"));
+ assertEquals(newBytesRef("three"), storedFields.document(3).getField("field").binaryValue());
+ assertNull(storedFields.document(4).getField("field"));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ /**
+ * With a sparse indexed row column, the segment must still have {@code numDocs} documents, and
+ * the inverted index must reflect only the populated docs. Guards termsHash framing alignment.
+ */
+ public void testSparseIndexedFramingPreservesNumDocs() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType indexedType = new FieldType();
+ indexedType.setIndexOptions(IndexOptions.DOCS);
+ indexedType.setOmitNorms(true);
+ indexedType.setTokenized(false);
+ indexedType.freeze();
+
+ // 6 batch docs, only docs 2 and 5 have a term.
+ int[] docIds = {2, 5};
+ BytesRef[] values = {newBytesRef("a"), newBytesRef("b")};
+ w.addBatch(simpleBatch(6, new ArrayBinaryColumn("tag", indexedType, docIds, values)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(6, leaf.maxDoc());
+
+ IndexSearcher searcher = new IndexSearcher(r);
+ assertEquals(1, searcher.count(new TermQuery(new Term("tag", "a"))));
+ assertEquals(1, searcher.count(new TermQuery(new Term("tag", "b"))));
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ /**
+ * When some docs in the batch have only a DV column (no row column value), framing still happens
+ * for every doc: stored fields must be empty for those docs, inverted index untouched, and DV
+ * values align with their batch doc-ids.
+ */
+ public void testSparseRowMixedWithDenseDocValues() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType storedOnly = new FieldType();
+ storedOnly.setStored(true);
+ storedOnly.freeze();
+
+ // Row-sparse stored column: only docs 0 and 3 have a stored value.
+ int[] storedDocIds = {0, 3};
+ BytesRef[] storedValues = {newBytesRef("first"), newBytesRef("fourth")};
+ // Dense DV column covering every doc.
+ long[] dvValues = {100, 200, 300, 400};
+
+ w.addBatch(
+ simpleBatch(
+ 4,
+ new ArrayBinaryColumn("stored", storedOnly, storedDocIds, storedValues),
+ new ArrayDenseLongColumn("dv", NumericDocValuesField.TYPE, dvValues)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(4, leaf.maxDoc());
+
+ StoredFields storedFields = leaf.storedFields();
+ assertEquals(newBytesRef("first"), storedFields.document(0).getField("stored").binaryValue());
+ assertNull(storedFields.document(1).getField("stored"));
+ assertNull(storedFields.document(2).getField("stored"));
+ assertEquals(newBytesRef("fourth"), storedFields.document(3).getField("stored").binaryValue());
+
+ NumericDocValues dv = leaf.getNumericDocValues("dv");
+ for (int i = 0; i < dvValues.length; i++) {
+ assertEquals(i, dv.nextDoc());
+ assertEquals(dvValues[i], dv.longValue());
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, dv.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ /**
+ * Indexing the same logical docs via {@code addBatch} with a sparse row column vs. via {@code
+ * addDocument} one doc at a time must produce segments with the same {@code maxDoc} and the same
+ * stored-field / inverted-index visibility. This is the golden equivalence check.
+ */
+ public void testSparseBatchMatchesDocByDoc() throws IOException {
+ FieldType storedIndexed = new FieldType(StringField.TYPE_STORED);
+ storedIndexed.freeze();
+
+ // 7 docs; only docs 1, 2, and 5 have values for the row column.
+ int[] docIds = {1, 2, 5};
+ String[] values = {"alpha", "beta", "gamma"};
+ int totalDocs = 7;
+
+ // --- Batch path ---
+ Directory batchDir = newDirectory();
+ try (IndexWriter batchW = new IndexWriter(batchDir, newIndexWriterConfig())) {
+ BytesRef[] refs = new BytesRef[values.length];
+ for (int i = 0; i < values.length; i++) {
+ refs[i] = newBytesRef(values[i]);
+ }
+ // StringField stores as STRING — use the matching storedType so stored-value round-trip is
+ // comparable between the two paths.
+ batchW.addBatch(
+ simpleBatch(
+ totalDocs,
+ new ArrayBinaryColumn(
+ "field", storedIndexed, docIds, refs, StoredValue.Type.STRING)));
+ }
+
+ // --- Doc-by-doc path ---
+ Directory singleDir = newDirectory();
+ try (IndexWriter singleW = new IndexWriter(singleDir, newIndexWriterConfig())) {
+ int next = 0;
+ for (int d = 0; d < totalDocs; d++) {
+ Document doc = new Document();
+ if (next < docIds.length && docIds[next] == d) {
+ doc.add(
+ new StringField("field", values[next], org.apache.lucene.document.Field.Store.YES));
+ next++;
+ }
+ singleW.addDocument(doc);
+ }
+ }
+
+ try (DirectoryReader batchR = DirectoryReader.open(batchDir);
+ DirectoryReader singleR = DirectoryReader.open(singleDir)) {
+ LeafReader batchLeaf = getOnlyLeafReader(batchR);
+ LeafReader singleLeaf = getOnlyLeafReader(singleR);
+
+ assertEquals(singleLeaf.maxDoc(), batchLeaf.maxDoc());
+ assertEquals(totalDocs, batchLeaf.maxDoc());
+
+ StoredFields batchStored = batchLeaf.storedFields();
+ StoredFields singleStored = singleLeaf.storedFields();
+ for (int d = 0; d < totalDocs; d++) {
+ IndexableField bf = batchStored.document(d).getField("field");
+ IndexableField sf = singleStored.document(d).getField("field");
+ if (sf == null) {
+ assertNull("doc " + d + " should have no stored field", bf);
+ } else {
+ assertNotNull("doc " + d + " should have a stored field", bf);
+ assertEquals(sf.stringValue(), bf.stringValue());
+ }
+ }
+
+ IndexSearcher batchSearcher = new IndexSearcher(batchR);
+ IndexSearcher singleSearcher = new IndexSearcher(singleR);
+ for (String v : values) {
+ Term t = new Term("field", v);
+ assertEquals(singleSearcher.count(new TermQuery(t)), batchSearcher.count(new TermQuery(t)));
+ }
+ }
+
+ batchDir.close();
+ singleDir.close();
+ }
+
+ /** A row column that returns an out-of-order batch doc-id must be rejected. */
+ public void testRowColumnOutOfOrderDocIdThrows() throws IOException {
+ Directory dir = newDirectory();
+ try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
+ FieldType storedOnly = new FieldType();
+ storedOnly.setStored(true);
+ storedOnly.freeze();
+
+ // docIds intentionally not non-decreasing.
+ int[] docIds = {2, 1};
+ BytesRef[] values = {newBytesRef("a"), newBytesRef("b")};
+ expectThrows(
+ IllegalArgumentException.class,
+ () -> w.addBatch(simpleBatch(3, new ArrayBinaryColumn("f", storedOnly, docIds, values))));
+ }
+ dir.close();
+ }
+
+ /** A row column that returns a batch doc-id {@code >= numDocs} must be rejected. */
+ public void testRowColumnOutOfRangeDocIdThrows() throws IOException {
+ Directory dir = newDirectory();
+ try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
+ FieldType storedOnly = new FieldType();
+ storedOnly.setStored(true);
+ storedOnly.freeze();
+
+ // Batch size 3, but the column advertises a value at doc 5.
+ int[] docIds = {5};
+ BytesRef[] values = {newBytesRef("oob")};
+ expectThrows(
+ IllegalArgumentException.class,
+ () -> w.addBatch(simpleBatch(3, new ArrayBinaryColumn("f", storedOnly, docIds, values))));
+ }
+ dir.close();
+ }
+
+ // --- Test Column implementations backed by arrays ---
+
+ // ---- VectorColumn tests ----
+
+ public void testDenseFloatVectorColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(3, VectorSimilarityFunction.EUCLIDEAN);
+ float[][] vectors = {
+ {1f, 2f, 3f}, {4f, 5f, 6f}, {7f, 8f, 9f},
+ };
+ w.addBatch(simpleBatch(3, new ArrayDenseFloatVectorColumn("v", vectorType, vectors)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ FloatVectorValues values = leaf.getFloatVectorValues("v");
+ assertNotNull(values);
+ KnnVectorValues.DocIndexIterator it = values.iterator();
+ for (int i = 0; i < vectors.length; i++) {
+ assertEquals(i, it.nextDoc());
+ assertArrayEquals(vectors[i], values.vectorValue(it.index()), 0f);
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testDenseByteVectorColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = byteVectorType(4, VectorSimilarityFunction.EUCLIDEAN);
+ byte[][] vectors = {
+ {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12},
+ };
+ w.addBatch(simpleBatch(3, new ArrayDenseByteVectorColumn("v", vectorType, vectors)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ ByteVectorValues values = leaf.getByteVectorValues("v");
+ assertNotNull(values);
+ KnnVectorValues.DocIndexIterator it = values.iterator();
+ for (int i = 0; i < vectors.length; i++) {
+ assertEquals(i, it.nextDoc());
+ assertArrayEquals(vectors[i], values.vectorValue(it.index()));
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testSparseFloatVectorColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ int[] docIds = {0, 2, 5, 9};
+ float[][] vectors = {{1f, 1f}, {2f, 2f}, {3f, 3f}, {4f, 4f}};
+ // pair with a sparse long column so the batch has a defined doc count > vector count
+ int[] anchorIds = {0, 9};
+ long[] anchorVals = {0L, 9L};
+ w.addBatch(
+ simpleBatch(
+ 10,
+ new ArrayFloatVectorColumn("v", vectorType, docIds, vectors),
+ new ArrayLongColumn("anchor", NumericDocValuesField.TYPE, anchorIds, anchorVals)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ FloatVectorValues values = leaf.getFloatVectorValues("v");
+ assertNotNull(values);
+ KnnVectorValues.DocIndexIterator it = values.iterator();
+ for (int i = 0; i < docIds.length; i++) {
+ assertEquals(docIds[i], it.nextDoc());
+ assertArrayEquals(vectors[i], values.vectorValue(it.index()), 0f);
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testSparseByteVectorColumn() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = byteVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ int[] docIds = {1, 4};
+ byte[][] vectors = {{1, 2}, {3, 4}};
+ int[] anchorIds = {0, 5};
+ long[] anchorVals = {0L, 5L};
+ w.addBatch(
+ simpleBatch(
+ 6,
+ new ArrayByteVectorColumn("v", vectorType, docIds, vectors),
+ new ArrayLongColumn("anchor", NumericDocValuesField.TYPE, anchorIds, anchorVals)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ ByteVectorValues values = leaf.getByteVectorValues("v");
+ assertNotNull(values);
+ KnnVectorValues.DocIndexIterator it = values.iterator();
+ for (int i = 0; i < docIds.length; i++) {
+ assertEquals(docIds[i], it.nextDoc());
+ assertArrayEquals(vectors[i], values.vectorValue(it.index()));
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testVectorMixedWithLongAndBinary() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.DOT_PRODUCT);
+ float[][] vectors = {{0.6f, 0.8f}, {0.8f, 0.6f}, {1.0f, 0.0f}};
+ long[] longs = {10, 20, 30};
+ BytesRef[] bins = {newBytesRef("a"), newBytesRef("b"), newBytesRef("c")};
+ int[] ids = {0, 1, 2};
+ w.addBatch(
+ simpleBatch(
+ 3,
+ new ArrayDenseFloatVectorColumn("v", vectorType, vectors),
+ new ArrayLongColumn("num", NumericDocValuesField.TYPE, ids, longs),
+ new ArrayBinaryColumn("bin", BinaryDocValuesField.TYPE, ids, bins)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues nums = leaf.getNumericDocValues("num");
+ BinaryDocValues binDv = leaf.getBinaryDocValues("bin");
+ FloatVectorValues vec = leaf.getFloatVectorValues("v");
+ KnnVectorValues.DocIndexIterator it = vec.iterator();
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, nums.nextDoc());
+ assertEquals(longs[i], nums.longValue());
+ assertEquals(i, binDv.nextDoc());
+ assertEquals(bins[i], binDv.binaryValue());
+ assertEquals(i, it.nextDoc());
+ assertArrayEquals(vectors[i], vec.vectorValue(it.index()), 0f);
+ }
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testVectorAcrossMultipleBatches() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ float[][] firstBatch = {{1f, 1f}, {2f, 2f}};
+ float[][] secondBatch = {{3f, 3f}, {4f, 4f}, {5f, 5f}};
+ w.addBatch(simpleBatch(2, new ArrayDenseFloatVectorColumn("v", vectorType, firstBatch)));
+ w.addBatch(simpleBatch(3, new ArrayDenseFloatVectorColumn("v", vectorType, secondBatch)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ FloatVectorValues values = leaf.getFloatVectorValues("v");
+ KnnVectorValues.DocIndexIterator it = values.iterator();
+ float[][] all = {firstBatch[0], firstBatch[1], secondBatch[0], secondBatch[1], secondBatch[2]};
+ for (int i = 0; i < all.length; i++) {
+ assertEquals(i, it.nextDoc());
+ assertArrayEquals(all[i], values.vectorValue(it.index()), 0f);
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testEmptyVectorColumnRejected() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // A field type alone is not enough — every batch must have at least one column with data,
+ // and a vector-only column with no values is the equivalent of "no documents have this
+ // vector". We pair it with a long anchor to make the batch valid; the vector cursor returns
+ // NO_MORE_DOCS immediately.
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ int[] anchorIds = {0, 1};
+ long[] anchorVals = {0L, 1L};
+ w.addBatch(
+ simpleBatch(
+ 2,
+ new ArrayFloatVectorColumn("v", vectorType, new int[0], new float[0][]),
+ new ArrayLongColumn("anchor", NumericDocValuesField.TYPE, anchorIds, anchorVals)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ FloatVectorValues values = leaf.getFloatVectorValues("v");
+ if (values != null) {
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, values.iterator().nextDoc());
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testParentFieldWithVectorBatch() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig();
+ config.setParentField("_parent");
+ IndexWriter w = new IndexWriter(dir, config);
+
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ float[][] vectors = {{1f, 0f}, {0f, 1f}, {1f, 1f}};
+ w.addBatch(simpleBatch(3, new ArrayDenseFloatVectorColumn("v", vectorType, vectors)));
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ NumericDocValues parentDv = leaf.getNumericDocValues("_parent");
+ assertNotNull(parentDv);
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, parentDv.nextDoc());
+ }
+ FloatVectorValues values = leaf.getFloatVectorValues("v");
+ KnnVectorValues.DocIndexIterator it = values.iterator();
+ for (int i = 0; i < 3; i++) {
+ assertEquals(i, it.nextDoc());
+ assertArrayEquals(vectors[i], values.vectorValue(it.index()), 0f);
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testFloatVectorEncodingMismatchFails() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ // FieldType says FLOAT32 but column carries byte[] vectors.
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ byte[][] vectors = {{1, 2}, {3, 4}};
+ expectThrows(
+ ClassCastException.class,
+ () -> w.addBatch(simpleBatch(2, new ArrayDenseByteVectorColumn("v", vectorType, vectors))));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testWrongDimensionFails() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(3, VectorSimilarityFunction.EUCLIDEAN);
+ float[][] vectors = {{1f, 2f, 3f}, {4f, 5f}};
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(2, new ArrayDenseFloatVectorColumn("v", vectorType, vectors))));
+ assertTrue(e.getMessage(), e.getMessage().contains("expected dimension 3"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testZeroDimensionFieldTypeFails() {
+ FieldType bad = new FieldType();
+ // No vector attributes set -> vectorDimension() == 0
+ bad.setDocValuesType(DocValuesType.NUMERIC);
+ bad.freeze();
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () -> new ArrayDenseFloatVectorColumn("v", bad, new float[][] {{1f, 2f}}));
+ assertTrue(e.getMessage(), e.getMessage().contains("vectorDimension() > 0"));
+ }
+
+ public void testVectorWithDocValuesRejected() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType bad = new FieldType();
+ bad.setVectorAttributes(2, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN);
+ bad.setDocValuesType(DocValuesType.NUMERIC);
+ bad.freeze();
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayDenseFloatVectorColumn("v", bad, new float[][] {{1f, 2f}}))));
+ assertTrue(e.getMessage(), e.getMessage().contains("must be vector-only"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testVectorWithStoredRejected() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType bad = new FieldType();
+ bad.setVectorAttributes(2, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN);
+ bad.setStored(true);
+ bad.freeze();
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayDenseFloatVectorColumn("v", bad, new float[][] {{1f, 2f}}))));
+ assertTrue(e.getMessage(), e.getMessage().contains("must be vector-only"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testVectorWithIndexOptionsRejected() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType bad = new FieldType();
+ bad.setVectorAttributes(2, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN);
+ bad.setIndexOptions(IndexOptions.DOCS);
+ bad.setTokenized(false);
+ bad.freeze();
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayDenseFloatVectorColumn("v", bad, new float[][] {{1f, 2f}}))));
+ assertTrue(e.getMessage(), e.getMessage().contains("must be vector-only"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testVectorWithPointsRejected() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType bad = new FieldType();
+ bad.setVectorAttributes(2, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN);
+ bad.setDimensions(1, Integer.BYTES);
+ bad.freeze();
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayDenseFloatVectorColumn("v", bad, new float[][] {{1f, 2f}}))));
+ assertTrue(e.getMessage(), e.getMessage().contains("must be vector-only"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testDuplicateDocIDFails() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ int[] docIds = {0, 0};
+ float[][] vectors = {{1f, 2f}, {3f, 4f}};
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(2, new ArrayFloatVectorColumn("v", vectorType, docIds, vectors))));
+ assertTrue(e.getMessage(), e.getMessage().contains("strictly increasing"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testDecreasingDocIDFails() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ int[] docIds = {3, 1};
+ float[][] vectors = {{1f, 2f}, {3f, 4f}};
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(4, new ArrayFloatVectorColumn("v", vectorType, docIds, vectors))));
+ assertTrue(e.getMessage(), e.getMessage().contains("strictly increasing"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testVectorOutOfRangeDocIDFails() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ int[] docIds = {0, 5};
+ float[][] vectors = {{1f, 2f}, {3f, 4f}};
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(3, new ArrayFloatVectorColumn("v", vectorType, docIds, vectors))));
+ assertTrue(e.getMessage(), e.getMessage().contains("out of range"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testDenseVectorColumnTooFewValuesFails() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType vectorType = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ // 2 values declared DENSE but the batch has 3 docs.
+ float[][] vectors = {{1f, 2f}, {3f, 4f}};
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(3, new ArrayDenseFloatVectorColumn("v", vectorType, vectors))));
+ assertTrue(e.getMessage(), e.getMessage().contains("Dense column"));
+ w.rollback();
+ dir.close();
+ }
+
+ public void testVectorColumnSchemaConsistencyAcrossBatches() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+
+ FieldType float32Type = floatVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayDenseFloatVectorColumn("v", float32Type, new float[][] {{1f, 2f}})));
+
+ FieldType byteType = byteVectorType(2, VectorSimilarityFunction.EUCLIDEAN);
+ expectThrows(
+ IllegalArgumentException.class,
+ () ->
+ w.addBatch(
+ simpleBatch(
+ 1, new ArrayDenseByteVectorColumn("v", byteType, new byte[][] {{1, 2}}))));
+ w.rollback();
+ dir.close();
+ }
+
+ private static FieldType floatVectorType(int dimension, VectorSimilarityFunction sim) {
+ FieldType type = new FieldType();
+ type.setVectorAttributes(dimension, VectorEncoding.FLOAT32, sim);
+ type.freeze();
+ return type;
+ }
+
+ private static FieldType byteVectorType(int dimension, VectorSimilarityFunction sim) {
+ FieldType type = new FieldType();
+ type.setVectorAttributes(dimension, VectorEncoding.BYTE, sim);
+ type.freeze();
+ return type;
+ }
+
+ private static ColumnBatch simpleBatch(int numDocs, Column... columns) {
+ return new ColumnBatch() {
+ @Override
+ public int numDocs() {
+ return numDocs;
+ }
+
+ @Override
+ public Iterable columns() {
+ return List.of(columns);
+ }
+ };
+ }
+
+ private static class ArrayLongColumn extends LongColumn {
+ private final int[] docIds;
+ private final long[] values;
+
+ ArrayLongColumn(String name, IndexableFieldType fieldType, int[] docIds, long[] values) {
+ super(name, fieldType, Density.SPARSE);
+ assert docIds.length == values.length;
+ this.docIds = docIds;
+ this.values = values;
+ }
+
+ ArrayLongColumn(
+ String name,
+ IndexableFieldType fieldType,
+ NumericKind numericKind,
+ int[] docIds,
+ long[] values) {
+ super(name, fieldType, Density.SPARSE, numericKind);
+ assert docIds.length == values.length;
+ this.docIds = docIds;
+ this.values = values;
+ }
+
+ @Override
+ public LongTupleCursor tuples() {
+ return new LongTupleCursor() {
+ int pos = -1;
+
+ @Override
+ public int nextDoc() {
+ pos++;
+ return pos < docIds.length ? docIds[pos] : DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ @Override
+ public long longValue() {
+ return values[pos];
+ }
+ };
+ }
+ }
+
+ private static class ArrayBinaryColumn extends BinaryColumn {
+ private final int[] docIds;
+ private final BytesRef[] values;
+ private final StoredValue.Type storedType;
+
+ ArrayBinaryColumn(String name, IndexableFieldType fieldType, int[] docIds, BytesRef[] values) {
+ this(name, fieldType, docIds, values, StoredValue.Type.BINARY);
+ }
+
+ ArrayBinaryColumn(
+ String name,
+ IndexableFieldType fieldType,
+ int[] docIds,
+ BytesRef[] values,
+ StoredValue.Type storedType) {
+ super(name, fieldType, Density.SPARSE);
+ assert docIds.length == values.length;
+ this.docIds = docIds;
+ this.values = values;
+ this.storedType = storedType;
+ }
+
+ @Override
+ public StoredValue.Type storedType() {
+ return storedType;
+ }
+
+ @Override
+ public BinaryTupleCursor tuples() {
+ return new BinaryTupleCursor() {
+ int pos = -1;
+
+ @Override
+ public int nextDoc() {
+ pos++;
+ return pos < docIds.length ? docIds[pos] : DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ @Override
+ public BytesRef binaryValue() {
+ return values[pos];
+ }
+ };
+ }
+ }
+
+ /** Dense {@link LongColumn} with an optional bulk values cursor. */
+ private static class ArrayDenseLongColumn extends LongColumn {
+ private final long[] values;
+
+ ArrayDenseLongColumn(String name, IndexableFieldType fieldType, long[] values) {
+ super(name, fieldType, Density.DENSE);
+ this.values = values;
+ }
+
+ @Override
+ public LongTupleCursor tuples() {
+ return new LongTupleCursor() {
+ int pos = -1;
+
+ @Override
+ public int nextDoc() {
+ pos++;
+ return pos < values.length ? pos : DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ @Override
+ public long longValue() {
+ return values[pos];
+ }
+ };
+ }
+
+ @Override
+ public LongValuesCursor values() {
+ return new LongValuesCursor() {
+ int pos = 0;
+
+ @Override
+ public int size() {
+ return values.length;
+ }
+
+ @Override
+ public long nextLong() {
+ if (pos >= values.length) {
+ throw new IllegalStateException("LongValuesCursor exhausted: size=" + values.length);
+ }
+ return values[pos++];
+ }
+
+ @Override
+ public void fill(long[] dst, int offset, int length) {
+ if (pos + length > values.length) {
+ throw new IllegalStateException("LongValuesCursor exhausted: size=" + values.length);
+ }
+ System.arraycopy(values, pos, dst, offset, length);
+ pos += length;
+ }
+ };
+ }
+ }
+
+ private static class ArrayFloatVectorColumn extends VectorColumn {
+ private final int[] docIds;
+ private final float[][] values;
+
+ ArrayFloatVectorColumn(
+ String name, IndexableFieldType fieldType, int[] docIds, float[][] values) {
+ super(name, fieldType, Density.SPARSE);
+ assert docIds.length == values.length;
+ this.docIds = docIds;
+ this.values = values;
+ }
+
+ @Override
+ public VectorTupleCursor tuples() {
+ return new VectorTupleCursor<>() {
+ int pos = -1;
+
+ @Override
+ public int nextDoc() {
+ pos++;
+ return pos < docIds.length ? docIds[pos] : DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ @Override
+ public float[] vectorValue() {
+ return values[pos];
+ }
+ };
+ }
+ }
+
+ private static class ArrayByteVectorColumn extends VectorColumn {
+ private final int[] docIds;
+ private final byte[][] values;
+
+ ArrayByteVectorColumn(
+ String name, IndexableFieldType fieldType, int[] docIds, byte[][] values) {
+ super(name, fieldType, Density.SPARSE);
+ assert docIds.length == values.length;
+ this.docIds = docIds;
+ this.values = values;
+ }
+
+ @Override
+ public VectorTupleCursor tuples() {
+ return new VectorTupleCursor<>() {
+ int pos = -1;
+
+ @Override
+ public int nextDoc() {
+ pos++;
+ return pos < docIds.length ? docIds[pos] : DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ @Override
+ public byte[] vectorValue() {
+ return values[pos];
+ }
+ };
+ }
+ }
+
+ private static class ArrayDenseFloatVectorColumn extends VectorColumn {
+ private final float[][] values;
+
+ ArrayDenseFloatVectorColumn(String name, IndexableFieldType fieldType, float[][] values) {
+ super(name, fieldType, Density.DENSE);
+ this.values = values;
+ }
+
+ @Override
+ public VectorTupleCursor tuples() {
+ return new VectorTupleCursor<>() {
+ int pos = -1;
+
+ @Override
+ public int nextDoc() {
+ pos++;
+ return pos < values.length ? pos : DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ @Override
+ public float[] vectorValue() {
+ return values[pos];
+ }
+ };
+ }
+ }
+
+ private static class ArrayDenseByteVectorColumn extends VectorColumn {
+ private final byte[][] values;
+
+ ArrayDenseByteVectorColumn(String name, IndexableFieldType fieldType, byte[][] values) {
+ super(name, fieldType, Density.DENSE);
+ this.values = values;
+ }
+
+ @Override
+ public VectorTupleCursor tuples() {
+ return new VectorTupleCursor<>() {
+ int pos = -1;
+
+ @Override
+ public int nextDoc() {
+ pos++;
+ return pos < values.length ? pos : DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ @Override
+ public byte[] vectorValue() {
+ return values[pos];
+ }
+ };
+ }
+ }
+}