apache · Tim-Brooks · Apr 9, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -264,6 +264,8 @@ API Changes
 * GITHUB#15584: Add support for termdoc fields that use custom term freqs (via IndexOptions.DOCS_AND_CUSTOM_FREQS).
   IndexWriter counts their terms rather than summing their freqs.  Use
 
+* GITHUB#15990: Add experimental api to IndexWriter for columnar indexing.
+
 New Features
 ---------------------
 

diff --git a/lucene/core/src/java/module-info.java b/lucene/core/src/java/module-info.java
@@ -76,6 +76,10 @@
   exports org.apache.lucene.codecs.hnsw;
   exports org.apache.lucene.internal.vectorization to
       org.apache.lucene.benchmark.jmh;
+  exports org.apache.lucene.document.column;
+
+  opens org.apache.lucene.document.column to
+      org.apache.lucene.test_framework;
 
   provides org.apache.lucene.analysis.TokenizerFactory with
       org.apache.lucene.analysis.standard.StandardTokenizerFactory;

diff --git a/lucene/core/src/java/org/apache/lucene/document/column/BinaryColumn.java b/lucene/core/src/java/org/apache/lucene/document/column/BinaryColumn.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import org.apache.lucene.document.StoredValue;
+import org.apache.lucene.index.IndexableFieldType;
+
+/**
+ * A {@link Column} that provides variable-size binary values via a tuple cursor. Used for {@link
+ * org.apache.lucene.index.DocValuesType#BINARY BINARY}, {@link
+ * org.apache.lucene.index.DocValuesType#SORTED SORTED}, and {@link
+ * org.apache.lucene.index.DocValuesType#SORTED_SET SORTED_SET} doc values, and for stored/indexed
+ * binary or text fields. Values fed to points are passed through unchanged, so callers are
+ * responsible for producing sort-encoded bytes of the correct total length.
+ *
+ * <p>Numeric doc values ({@link org.apache.lucene.index.DocValuesType#NUMERIC NUMERIC} / {@link
+ * org.apache.lucene.index.DocValuesType#SORTED_NUMERIC SORTED_NUMERIC}) and 1-D numeric points (int
+ * / long / float / double) are fed by {@link LongColumn} instead.
+ *
+ * @lucene.experimental
+ */
+public abstract class BinaryColumn extends Column {
+
+  /** Creates a BinaryColumn with the given field name, type, and density. */
+  protected BinaryColumn(String name, IndexableFieldType fieldType, Density density) {
+    super(name, fieldType, density);
+  }
+
+  /**
+   * The {@link org.apache.lucene.document.StoredValue.Type} to emit when this column is written to
+   * stored fields. The default is {@link org.apache.lucene.document.StoredValue.Type#BINARY}. Only
+   * {@link org.apache.lucene.document.StoredValue.Type#BINARY} and {@link
+   * org.apache.lucene.document.StoredValue.Type#STRING} are supported; numeric stored types require
+   * {@link LongColumn}.
+   */
+  public StoredValue.Type storedType() {
+    return StoredValue.Type.BINARY;
+  }
+
+  /** Returns a fresh tuple cursor starting at the beginning of the batch. */
+  public abstract BinaryTupleCursor tuples();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/BinaryTupleCursor.java b/lucene/core/src/java/org/apache/lucene/document/column/BinaryTupleCursor.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A tuple cursor over a {@link BinaryColumn}. Yields {@code (docID, binaryValue)} pairs.
+ * Batch-local doc-ids are returned in non-decreasing order; the same doc-id may repeat for
+ * multi-valued fields (e.g. {@link org.apache.lucene.index.DocValuesType#SORTED_SET SORTED_SET}).
+ *
+ * @lucene.experimental
+ */
+public abstract class BinaryTupleCursor {
+
+  /** Sole constructor. */
+  protected BinaryTupleCursor() {}
+
+  /**
+   * Advances to the next doc-id that has a value and returns it, or {@link
+   * DocIdSetIterator#NO_MORE_DOCS} if exhausted. Doc-ids are batch-local (0 to {@code numDocs -
+   * 1}).
+   */
+  public abstract int nextDoc();
+
+  /**
+   * Returns the value at the current cursor position. Only valid until the next call to {@link
+   * #nextDoc()}, and only after a {@code nextDoc()} that returned a value other than {@link
+   * DocIdSetIterator#NO_MORE_DOCS}.
+   */
+  public abstract BytesRef binaryValue();
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/Column.java b/lucene/core/src/java/org/apache/lucene/document/column/Column.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+import java.util.Objects;
+import org.apache.lucene.index.IndexableFieldType;
+
+/**
+ * A single field's values across multiple documents in a {@link ColumnBatch}. A Column carries only
+ * metadata (name, field type, and density); iteration is performed via cursors obtained from {@link
+ * LongColumn}, {@link BinaryColumn}, or {@link VectorColumn}.
+ *
+ * <p>Each call that requests a cursor returns a fresh cursor positioned at the first value, so
+ * columns can be consumed multiple times (for example, once in the row-oriented pass for stored
+ * fields and again in the column-oriented pass for doc values).
+ *
+ * @lucene.experimental
+ */
+public abstract class Column {
+
+  /**
+   * Whether a column has a value for every document in the batch. This is a contract the column
+   * asserts up-front so the indexing chain can pick the right code path without probing the data.
+   */
+  public enum Density {
+    /** The column has a value for every batch-local doc-id in {@code [0, numDocs)}, in order. */
+    DENSE,
+    /** The column may be missing values or have multiple values for some doc-ids. */
+    SPARSE,
+  }
+
+  private final String name;
+  private final IndexableFieldType fieldType;
+  private final Density density;
+
+  /**
+   * Creates a Column with the given field name, type, and density.
+   *
+   * @param name the field name
+   * @param fieldType describes how this field should be indexed
+   * @param density whether this column has a value for every document in the batch
+   */
+  protected Column(String name, IndexableFieldType fieldType, Density density) {
+    this.name = Objects.requireNonNull(name, "field name must not be null");
+    this.fieldType = Objects.requireNonNull(fieldType, "field type must not be null");
+    this.density = Objects.requireNonNull(density, "density must not be null");
+  }
+
+  /** Returns the field name. */
+  public String name() {
+    return name;
+  }
+
+  /** Returns the field type describing how this field is indexed. */
+  public IndexableFieldType fieldType() {
+    return fieldType;
+  }
+
+  /** Returns the density of this column (whether every doc has a value). */
+  public Density density() {
+    return density;
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/column/ColumnBatch.java b/lucene/core/src/java/org/apache/lucene/document/column/ColumnBatch.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document.column;
+
+/**
+ * A column-oriented batch of documents for indexing. A Batch contains a collection of {@link
+ * Column}s, where each Column represents a single field across all documents in the batch.
+ * Documents are identified by batch-local IDs from 0 (inclusive) to {@link #numDocs()} (exclusive).
+ *
+ * @lucene.experimental
+ */
+public abstract class ColumnBatch {
+
+  /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+  protected ColumnBatch() {}
+
+  /**
+   * Returns the number of documents in this batch. All column doc-ids must be in the range [0,
+   * numDocs()).
+   */
+  public abstract int numDocs();
+
+  /**
+   * Returns the columns in this batch. Each column represents a single field across the documents
+   * in the batch.
+   */
+  public abstract Iterable<Column> columns();
+}