Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ API Changes
* GITHUB#14873: Change PriorityQueue to take a LessThan interface implementation
in its constructor rather than overriding the lessThan abstract method (Simon Cooper)

* GITHUB#14996: BitSet no longer implements Bits. (Adrien Grand)

New Features
---------------------
* GITHUB#14097: Binary partitioning merge policy over float-valued vector field. (Mike Sokolov)
Expand Down
5 changes: 5 additions & 0 deletions lucene/MIGRATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ long maxRamBytesUsed = 50 * 1024 * 1024; // 50MB
IndexSearcher.setDefaultQueryCache(new LRUQueryCache(maxCachedQueries, maxRamBytesUsed));
```

### BitSet no longer implements Bits

You should call `BitSet#asReadOnlyBits` if you would like to use a `BitSet` as
`Bits`.

## Migration from Lucene 9.x to Lucene 10.0

### DataInput#readVLong() may now read negative vlongs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,9 @@ FlushedSegment flush(DocumentsWriter.FlushNotifications flushNotifications) thro
flushState.softDelCountOnFlush = 0;
} else {
flushState.softDelCountOnFlush =
PendingSoftDeletes.countSoftDeletes(softDeletedDocs, flushState.liveDocs);
PendingSoftDeletes.countSoftDeletes(
softDeletedDocs,
flushState.liveDocs == null ? null : flushState.liveDocs.asReadOnlyBits());
assert flushState.segmentInfo.maxDoc()
>= flushState.softDelCountOnFlush + flushState.delCountOnFlush;
}
Expand Down Expand Up @@ -654,9 +656,11 @@ void sealFlushedSegment(
if (sortMap == null) {
bits = flushedSegment.liveDocs;
} else {
bits = sortLiveDocs(flushedSegment.liveDocs, sortMap);
bits = sortLiveDocs(flushedSegment.liveDocs.asReadOnlyBits(), sortMap);
}
codec.liveDocsFormat().writeLiveDocs(bits, directory, info, delCount, context);
codec
.liveDocsFormat()
.writeLiveDocs(bits.asReadOnlyBits(), directory, info, delCount, context);
newSegment.setDelCount(delCount);
newSegment.advanceDelGen();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,8 @@ class BufferedUpdateIterator {
this.lookAheadTermIterator =
termSortState != null ? termValues.iterator(termSortState) : null;
this.byteValuesIterator = isNumeric ? null : byteValues.iterator();
updatesWithValue = hasValues == null ? new Bits.MatchAllBits(numUpdates) : hasValues;
updatesWithValue =
hasValues == null ? new Bits.MatchAllBits(numUpdates) : hasValues.asReadOnlyBits();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,18 @@ static LeafReader wrap(LeafReader reader, String field) throws IOException {
return reader;
}
Bits liveDocs = reader.getLiveDocs();
final FixedBitSet bits;
final FixedBitSet bitSet;
if (liveDocs != null) {
bits = FixedBitSet.copyOf(liveDocs);
bitSet = FixedBitSet.copyOf(liveDocs);
} else {
bits = new FixedBitSet(reader.maxDoc());
bits.set(0, reader.maxDoc());
bitSet = new FixedBitSet(reader.maxDoc());
bitSet.set(0, reader.maxDoc());
}
int numSoftDeletes = PendingSoftDeletes.applySoftDeletes(iterator, bits);
int numSoftDeletes = PendingSoftDeletes.applySoftDeletes(iterator, bitSet);
if (numSoftDeletes == 0) {
return reader;
}
Bits bits = bitSet.asReadOnlyBits();
int numDeletes = reader.numDeletedDocs() + numSoftDeletes;
int numDocs = reader.maxDoc() - numDeletes;
assert assertDocCounts(numDocs, numSoftDeletes, reader);
Expand Down Expand Up @@ -188,11 +189,11 @@ private static boolean assertDocCounts(

static final class SoftDeletesFilterLeafReader extends FilterLeafReader {
private final LeafReader reader;
private final FixedBitSet bits;
private final Bits bits;
private final int numDocs;
private final CacheHelper readerCacheHelper;

private SoftDeletesFilterLeafReader(LeafReader reader, FixedBitSet bits, int numDocs) {
private SoftDeletesFilterLeafReader(LeafReader reader, Bits bits, int numDocs) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious why we are changing these signatures - it seems the call sites still always use FixedBitSet?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Bits are indeed always created bia FixedBitSet#asReadOnlyBits but I liked doing it on the call site better than in this constructor. Since it's a private constructor, it doesn't break users.

super(reader);
this.reader = reader;
this.bits = bits;
Expand Down Expand Up @@ -226,11 +227,11 @@ public CacheHelper getReaderCacheHelper() {

static final class SoftDeletesFilterCodecReader extends FilterCodecReader {
private final LeafReader reader;
private final FixedBitSet bits;
private final Bits bits;
private final int numDocs;
private final CacheHelper readerCacheHelper;

private SoftDeletesFilterCodecReader(CodecReader reader, FixedBitSet bits, int numDocs) {
private SoftDeletesFilterCodecReader(CodecReader reader, Bits bits, int numDocs) {
super(reader);
this.reader = reader;
this.bits = bits;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ public int length() {
+ " maxDoc: "
+ reader.maxDoc();
return FilterCodecReader.wrapLiveDocs(
reader, cloneLiveDocs, reader.numDocs() + numExtraLiveDocs);
reader, cloneLiveDocs.asReadOnlyBits(), reader.numDocs() + numExtraLiveDocs);
} else {
return reader;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,9 @@ private TopDocs getLeafResults(

// Perform the approximate kNN search
// We pass cost + 1 here to account for the edge case when we explore exactly cost vectors
TopDocs results = approximateSearch(ctx, acceptDocs, cost + 1, timeLimitingKnnCollectorManager);
TopDocs results =
approximateSearch(
ctx, acceptDocs.asReadOnlyBits(), cost + 1, timeLimitingKnnCollectorManager);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Within the HNSW format, we do this:

 if (acceptDocs instanceof BitSet bitSet) {
      // Use approximate cardinality as this is good enough, but ensure we don't exceed the graph
      // size as that is illogical
      filteredDocCount = Math.min(bitSet.approximateCardinality(), graph.size());
      if (unfilteredVisit >= filteredDocCount) {
        doHnsw = false;
      }
    }

We determine the filter size via casting the Bits to BitSet. So, I am not sure we can make this change without progressing: #15011

Or bits should give cardinality information...


if ((results.totalHits.relation() == TotalHits.Relation.EQUAL_TO
// We know that there are more than `perLeafTopK` available docs, if we didn't even get
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,11 @@ protected boolean match(int doc) {

// Perform an approximate search
TopDocs results =
approximateSearch(context, acceptDocs, cardinality, timeLimitingKnnCollectorManager);
approximateSearch(
context,
acceptDocs.asReadOnlyBits(),
cardinality,
timeLimitingKnnCollectorManager);

if (results.totalHits.relation() == TotalHits.Relation.EQUAL_TO
// Return partial results only when timeout is met
Expand Down
25 changes: 24 additions & 1 deletion lucene/core/src/java/org/apache/lucene/util/BitSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
*
* @lucene.internal
*/
public abstract class BitSet implements Bits, Accountable {
// NOTE: It doesn't implement `Bits` on purpose to avoid mistakenly increasing polymorphism by
// passing either a BitSet or the result of BitSet#asReadOnlyBits to functions that take a `Bits`
// parameter.
public abstract class BitSet implements Accountable {

/**
* Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}. NOTE: this
Expand Down Expand Up @@ -53,6 +56,18 @@ public void clear() {
clear(0, length());
}

/** Returns the number of bits in this set */
public abstract int length();

/**
* Returns the value of the bit with the specified <code>index</code>.
*
* @param index index, should be non-negative and &lt; {@link #length()}. The result of passing
* negative or out of bounds values is undefined by this interface, <b>just don't do it!</b>
* @return <code>true</code> if the bit is set, <code>false</code> otherwise.
*/
public abstract boolean get(int index);

/** Set the bit at <code>i</code>. */
public abstract void set(int i);

Expand Down Expand Up @@ -111,6 +126,14 @@ protected final void checkUnpositioned(DocIdSetIterator iter) {
}
}

/**
* Convert this instance to read-only {@link Bits}. This is useful in the case that this {@link
* BitSet} is returned as a {@link Bits} instance, to make sure that consumers may not get write
* access back by casting to a {@link BitSet}. NOTE: Changes to this {@link BitSet} will be
* reflected on the returned {@link Bits}.
*/
public abstract Bits asReadOnlyBits();

/**
* Does in-place OR of the bits provided by the iterator. The state of the iterator after this
* operation terminates is undefined.
Expand Down
41 changes: 7 additions & 34 deletions lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -784,48 +784,21 @@ public int hashCode() {
public static FixedBitSet copyOf(Bits bits) {
if (bits instanceof FixedBits fixedBits) {
// restore the original FixedBitSet
bits = fixedBits.bitSet;
return fixedBits.bitSet.clone();
}

if (bits instanceof FixedBitSet) {
return ((FixedBitSet) bits).clone();
} else {
int length = bits.length();
FixedBitSet bitSet = new FixedBitSet(length);
bitSet.set(0, length);
for (int i = 0; i < length; ++i) {
if (bits.get(i) == false) {
bitSet.clear(i);
}
}
return bitSet;
}
int length = bits.length();
FixedBitSet bitSet = new FixedBitSet(length);
bitSet.set(0, length);
bits.applyMask(bitSet, 0);
return bitSet;
}

/**
* Convert this instance to read-only {@link Bits}. This is useful in the case that this {@link
* FixedBitSet} is returned as a {@link Bits} instance, to make sure that consumers may not get
* write access back by casting to a {@link FixedBitSet}. NOTE: Changes to this {@link
* FixedBitSet} will be reflected on the returned {@link Bits}.
*/
@Override
public Bits asReadOnlyBits() {
return new FixedBits(bits, numBits);
}

@Override
public void applyMask(FixedBitSet bitSet, int offset) {
// Note: Some scorers don't track maxDoc and may thus call this method with an offset that is
// beyond bitSet.length()
int length = Math.min(bitSet.length(), length() - offset);
if (length >= 0) {
andRange(this, offset, bitSet, 0, length);
}
if (length < bitSet.length()
&& bitSet.nextSetBit(Math.max(0, length)) != DocIdSetIterator.NO_MORE_DOCS) {
throw new IllegalArgumentException("Some bits are set beyond the end of live docs");
}
}

/**
* For each set bit from {@code from} inclusive to {@code to} exclusive, add {@code base} to the
* bit index and call {@code consumer} on it. This is internally used by queries that use bit sets
Expand Down
15 changes: 13 additions & 2 deletions lucene/core/src/java/org/apache/lucene/util/FixedBits.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;

/** Immutable twin of FixedBitSet. */
final class FixedBits implements Bits {

Expand All @@ -31,8 +33,17 @@ public boolean get(int index) {
}

@Override
public void applyMask(FixedBitSet dest, int offset) {
bitSet.applyMask(dest, offset);
public void applyMask(FixedBitSet bitSet, int offset) {
// Note: Some scorers don't track maxDoc and may thus call this method with an offset that is
// beyond bitSet.length()
int length = Math.min(bitSet.length(), length() - offset);
if (length >= 0) {
FixedBitSet.andRange(this.bitSet, offset, bitSet, 0, length);
}
if (length < bitSet.length()
&& bitSet.nextSetBit(Math.max(0, length)) != DocIdSetIterator.NO_MORE_DOCS) {
throw new IllegalArgumentException("Some bits are set beyond the end of live docs");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this comment refers to "live docs" but the method is generic and could apply to other kinds of bit set applications - maybe change to "beyond the end of the bit set"?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I'll fix.

}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,11 @@ public void or(DocIdSetIterator it) throws IOException {
}
}

@Override
public Bits asReadOnlyBits() {
return new SparseFixedBits(this);
}

@Override
public long ramBytesUsed() {
return ramBytesUsed;
Expand Down
37 changes: 37 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/SparseFixedBits.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;

/** Unmodifiable {@link Bits} view on a {@link SparseFixedBitSet}. */
final class SparseFixedBits implements Bits {

final SparseFixedBitSet bitSet;

SparseFixedBits(SparseFixedBitSet bitSet) {
this.bitSet = bitSet;
}

@Override
public boolean get(int index) {
return bitSet.get(index);
}

@Override
public int length() {
return bitSet.length();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.knn.KnnSearchStrategy;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.hnsw.HnswUtil.Component;
Expand Down Expand Up @@ -464,6 +465,7 @@ private void connectComponents() throws IOException {

private boolean connectComponents(int level) throws IOException {
FixedBitSet notFullyConnected = new FixedBitSet(hnsw.size());
Bits notFullyConnectedAsBits = notFullyConnected.asReadOnlyBits();
int maxConn = M;
if (level == 0) {
maxConn *= 2;
Expand Down Expand Up @@ -501,7 +503,7 @@ private boolean connectComponents(int level) throws IOException {
scorer.setScoringOrdinal(c.start());
// find the closest node in the largest component to the lowest-numbered node in this
// component that has room to make a connection
graphSearcher.searchLevel(beam, scorer, level, eps, hnsw, notFullyConnected);
graphSearcher.searchLevel(beam, scorer, level, eps, hnsw, notFullyConnectedAsBits);
boolean linked = false;
while (beam.size() > 0) {
int c0node = beam.popNode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ public void testBuildDocMap() {
}
}

final PackedLongValues docMap = MergeState.removeDeletes(maxDoc, liveDocs);
final PackedLongValues docMap = MergeState.removeDeletes(maxDoc, liveDocs.asReadOnlyBits());

// assert the mapping is compact
for (int i = 0, del = 0; i < maxDoc; ++i) {
Expand Down
Loading
Loading