diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d1939e303f61..5540eeffca09 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -239,6 +239,8 @@ Optimizations * GITHUB#14980: Add bulk off-heap scoring for float32 vectors (Chris Hegarty) +* GITHUB#15045: Use FixedBitSet#cardinality for counting liveDocs in CheckIndex (Zhang Chao) + Changes in Runtime Behavior --------------------- * GITHUB#14823: Decrease TieredMergePolicy's default number of segments per diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 7e98e51bf69b..7d85d6258d57 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1297,12 +1297,7 @@ public static Status.LiveDocStatus testLiveDocs( if (liveDocs == null) { throw new CheckIndexException("segment should have deletions, but liveDocs is null"); } else { - int numLive = 0; - for (int j = 0; j < liveDocs.length(); j++) { - if (liveDocs.get(j)) { - numLive++; - } - } + int numLive = bitsCardinality(liveDocs); if (numLive != numDocs) { throw new CheckIndexException( "liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive); @@ -1348,6 +1343,28 @@ public static Status.LiveDocStatus testLiveDocs( return status; } + /** + * Returns the cardinality of the given {@code Bits}. + * + *
This method processes bits in batches of 1024 using {@link Bits#applyMask} and {@link + * FixedBitSet#cardinality}, which is faster than checking bits one by one. + */ + static int bitsCardinality(Bits bits) { + int cardinality = 0; + FixedBitSet copy = new FixedBitSet(1024); + for (int offset = 0; offset < bits.length(); offset += copy.length()) { + int numBitsToCopy = Math.min(bits.length() - offset, copy.length()); + copy.set(0, copy.length()); + if (numBitsToCopy < copy.length()) { + // Clear ghost bits + copy.clear(numBitsToCopy, copy.length()); + } + bits.applyMask(copy, offset); + cardinality += copy.cardinality(); + } + return cardinality; + } + /** Test field infos. */ public static Status.FieldInfoStatus testFieldInfos( CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {