Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ Improvements

* GITHUB#15453: Avoid unnecessary sorting and instantiations in readMapOfStrings. (Benjamin Lerer)

* GITHUB#15660: Introduce LargeNumHitsTopDocsCollectorManager to parallelize search when using LargeNumHitsTopDocsCollector. (Binlong Gao)

Optimizations
---------------------
* GITHUB#13782: Replace handwritten loops compare with Arrays.compareUnsigned in TermsEnum and TermsEnumFrame classes. (Zhou Hui)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.search;

import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.TopDocs;

/**
* CollectorManager for {@link LargeNumHitsTopDocsCollector} that enables concurrent collection of
* top docs across multiple segments.
*/
public class LargeNumHitsTopDocsCollectorManager
implements CollectorManager<LargeNumHitsTopDocsCollector, TopDocs> {
private final int numHits;

/**
* Creates a new {@link LargeNumHitsTopDocsCollectorManager} given the number of hits to collect.
*
* @param numHits the number of results to collect.
*/
public LargeNumHitsTopDocsCollectorManager(int numHits) {
if (numHits <= 0) {
throw new IllegalArgumentException("numHits must be > 0, got " + numHits);
}
this.numHits = numHits;
}

@Override
public LargeNumHitsTopDocsCollector newCollector() {
return new LargeNumHitsTopDocsCollector(numHits);
}

@Override
public TopDocs reduce(Collection<LargeNumHitsTopDocsCollector> collectors) throws IOException {
final TopDocs[] topDocs = new TopDocs[collectors.size()];
int i = 0;
for (LargeNumHitsTopDocsCollector collector : collectors) {
topDocs[i++] = collector.topDocs();
}
return TopDocs.merge(0, numHits, topDocs);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,74 +82,66 @@ public void testRequestLessHitsThanCollected() throws Exception {

public void testIllegalArguments() throws IOException {
IndexSearcher searcher = newSearcher(reader);
LargeNumHitsTopDocsCollector largeCollector = new LargeNumHitsTopDocsCollector(15);
LargeNumHitsTopDocsCollectorManager largeCollectorManager =
new LargeNumHitsTopDocsCollectorManager(15);
TopScoreDocCollectorManager regularCollectorManager =
new TopScoreDocCollectorManager(15, Integer.MAX_VALUE);

searcher.search(testQuery, largeCollector);
TopDocs topDocs = searcher.search(testQuery, regularCollectorManager);
TopDocs largeTopDocs = searcher.search(testQuery, largeCollectorManager);
TopDocs regularTopDocs = searcher.search(testQuery, regularCollectorManager);

assertEquals(largeCollector.totalHits, topDocs.totalHits.value());
assertEquals(largeTopDocs.totalHits.value(), regularTopDocs.totalHits.value());

LargeNumHitsTopDocsCollector collector = largeCollectorManager.newCollector();
IllegalArgumentException expected =
expectThrows(
IllegalArgumentException.class,
() -> {
largeCollector.topDocs(350_000);
});
expectThrows(IllegalArgumentException.class, () -> collector.topDocs(350_000));

assertTrue(expected.getMessage().contains("Incorrect number of hits requested"));
}

public void testNoPQBuild() throws IOException {
IndexSearcher searcher = newSearcher(reader);
LargeNumHitsTopDocsCollector largeCollector = new LargeNumHitsTopDocsCollector(250_000);
LargeNumHitsTopDocsCollectorManager largeCollectorManager =
new LargeNumHitsTopDocsCollectorManager(250_000);
TopScoreDocCollectorManager regularCollectorManager =
new TopScoreDocCollectorManager(reader.numDocs(), Integer.MAX_VALUE);

searcher.search(testQuery, largeCollector);
TopDocs topDocs = searcher.search(testQuery, regularCollectorManager);
TopDocs largeTopDocs = searcher.search(testQuery, largeCollectorManager);
TopDocs regularTopDocs = searcher.search(testQuery, regularCollectorManager);

assertEquals(largeCollector.totalHits, topDocs.totalHits.value());

assertNull(largeCollector.pq);
assertNull(largeCollector.pqTop);
assertEquals(largeTopDocs.totalHits.value(), regularTopDocs.totalHits.value());
CheckHits.checkEqual(testQuery, largeTopDocs.scoreDocs, regularTopDocs.scoreDocs);
}

public void testPQBuild() throws IOException {
IndexSearcher searcher = newSearcher(reader);
LargeNumHitsTopDocsCollector largeCollector = new LargeNumHitsTopDocsCollector(50);
LargeNumHitsTopDocsCollectorManager largeCollectorManager =
new LargeNumHitsTopDocsCollectorManager(50);
TopScoreDocCollectorManager regularCollectorManager =
new TopScoreDocCollectorManager(50, Integer.MAX_VALUE);

searcher.search(testQuery, largeCollector);
TopDocs topDocs = searcher.search(testQuery, regularCollectorManager);

assertEquals(largeCollector.totalHits, topDocs.totalHits.value());
TopDocs largeTopDocs = searcher.search(testQuery, largeCollectorManager);
TopDocs regularTopDocs = searcher.search(testQuery, regularCollectorManager);

assertNotNull(largeCollector.pq);
assertNotNull(largeCollector.pqTop);
assertEquals(largeTopDocs.totalHits.value(), regularTopDocs.totalHits.value());
CheckHits.checkEqual(testQuery, largeTopDocs.scoreDocs, regularTopDocs.scoreDocs);
}

public void testNoPQHitsOrder() throws IOException {
IndexSearcher searcher = newSearcher(reader);
LargeNumHitsTopDocsCollector largeCollector = new LargeNumHitsTopDocsCollector(250_000);
LargeNumHitsTopDocsCollectorManager largeCollectorManager =
new LargeNumHitsTopDocsCollectorManager(250_000);
TopScoreDocCollectorManager regularCollectorManager =
new TopScoreDocCollectorManager(reader.numDocs(), Integer.MAX_VALUE);

searcher.search(testQuery, largeCollector);
TopDocs topDocs = searcher.search(testQuery, regularCollectorManager);

assertEquals(largeCollector.totalHits, topDocs.totalHits.value());
TopDocs largeTopDocs = searcher.search(testQuery, largeCollectorManager);
TopDocs regularTopDocs = searcher.search(testQuery, regularCollectorManager);

assertNull(largeCollector.pq);
assertNull(largeCollector.pqTop);
assertEquals(largeTopDocs.totalHits.value(), regularTopDocs.totalHits.value());

topDocs = largeCollector.topDocs();

if (topDocs.scoreDocs.length > 0) {
float preScore = topDocs.scoreDocs[0].score;
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
if (largeTopDocs.scoreDocs.length > 0) {
float preScore = largeTopDocs.scoreDocs[0].score;
for (ScoreDoc scoreDoc : largeTopDocs.scoreDocs) {
assert scoreDoc.score <= preScore;
preScore = scoreDoc.score;
}
Expand All @@ -158,17 +150,16 @@ public void testNoPQHitsOrder() throws IOException {

private void runNumHits(int numHits) throws IOException {
IndexSearcher searcher = newSearcher(reader);
LargeNumHitsTopDocsCollector largeCollector = new LargeNumHitsTopDocsCollector(numHits);
LargeNumHitsTopDocsCollectorManager largeCollectorManager =
new LargeNumHitsTopDocsCollectorManager(numHits);
TopScoreDocCollectorManager regularCollectorManager =
new TopScoreDocCollectorManager(numHits, Integer.MAX_VALUE);

searcher.search(testQuery, largeCollector);

TopDocs firstTopDocs = largeCollector.topDocs();
TopDocs secondTopDocs = searcher.search(testQuery, regularCollectorManager);
TopDocs largeTopDocs = searcher.search(testQuery, largeCollectorManager);
TopDocs regularTopDocs = searcher.search(testQuery, regularCollectorManager);

assertEquals(largeCollector.totalHits, secondTopDocs.totalHits.value());
assertEquals(firstTopDocs.scoreDocs.length, secondTopDocs.scoreDocs.length);
CheckHits.checkEqual(testQuery, firstTopDocs.scoreDocs, secondTopDocs.scoreDocs);
assertEquals(largeTopDocs.totalHits.value(), regularTopDocs.totalHits.value());
assertEquals(largeTopDocs.scoreDocs.length, regularTopDocs.scoreDocs.length);
CheckHits.checkEqual(testQuery, largeTopDocs.scoreDocs, regularTopDocs.scoreDocs);
}
}
Loading