diff options
author | Jim Ferenczi <jim.ferenczi@elastic.co> | 2017-06-08 12:10:46 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-08 12:10:46 +0200 |
commit | 36a5cf8f35e5cbaa1ff857b5a5db8c02edc1a187 (patch) | |
tree | 500eaf53b1f42a7b23171ca2cbb029e4b18da579 /core/src/test/java/org/elasticsearch/search | |
parent | 21a57c14945fb0b82d2b78a2c89e0d92bbc086a0 (diff) |
Automatically early terminate search query based on index sorting (#24864)
This commit refactors the query phase in order to be able
to automatically detect queries that can be early terminated.
If the index sort matches the query sort, the top docs collection is early terminated
on each segment and the computing of the total number of hits that match the query is delegated to a simple TotalHitCountCollector.
This change also adds a new parameter to the search request called `track_total_hits`.
It indicates if the total number of hits that match the query should be tracked.
If false, queries sorted by the index sort will not try to compute this information and
and will limit the collection to the first N documents per segment.
Aggregations are not impacted and will continue to see every document
even when the index sort matches the query sort and `track_total_hits` is false.
Relates #6720
Diffstat (limited to 'core/src/test/java/org/elasticsearch/search')
3 files changed, 326 insertions, 7 deletions
diff --git a/core/src/test/java/org/elasticsearch/search/SearchHitsTests.java b/core/src/test/java/org/elasticsearch/search/SearchHitsTests.java index c25eb7da81..decfe804a4 100644 --- a/core/src/test/java/org/elasticsearch/search/SearchHitsTests.java +++ b/core/src/test/java/org/elasticsearch/search/SearchHitsTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.search; +import org.apache.lucene.util.TestUtil; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.ToXContent; @@ -44,7 +45,7 @@ public class SearchHitsTests extends ESTestCase { for (int i = 0; i < searchHits; i++) { hits[i] = SearchHitTests.createTestItem(false); // creating random innerHits could create loops } - long totalHits = randomLong(); + long totalHits = frequently() ? TestUtil.nextLong(random(), 0, Long.MAX_VALUE) : -1; float maxScore = frequently() ? randomFloat() : Float.NaN; return new SearchHits(hits, totalHits, maxScore); } diff --git a/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java index 65aa5f992e..2633cb706e 100644 --- a/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java +++ b/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java @@ -21,7 +21,9 @@ package org.elasticsearch.search.query; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; @@ -29,19 +31,30 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.MinDocQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; import org.elasticsearch.action.search.SearchTask; import org.elasticsearch.index.query.ParsedQuery; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.internal.ScrollContext; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.sort.SortAndFormats; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.TestSearchContext; @@ -49,6 +62,14 @@ import java.io.IOException; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.nullValue; + public class QueryPhaseTests extends ESTestCase { private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception { @@ -66,7 +87,7 @@ public class QueryPhaseTests extends ESTestCase { } }; - final boolean rescore = QueryPhase.execute(context, contextSearcher); + final boolean rescore = QueryPhase.execute(context, contextSearcher, null); assertFalse(rescore); assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits); assertEquals(shouldCollect, collected.get()); @@ -135,12 +156,12 @@ public class QueryPhaseTests extends ESTestCase { } }; - QueryPhase.execute(context, contextSearcher); + QueryPhase.execute(context, contextSearcher, null); assertEquals(0, context.queryResult().topDocs().totalHits); assertFalse(collected.get()); context.parsedPostFilter(new ParsedQuery(new MatchNoDocsQuery())); - QueryPhase.execute(context, contextSearcher); + QueryPhase.execute(context, contextSearcher, null); assertEquals(0, context.queryResult().topDocs().totalHits); assertTrue(collected.get()); } @@ -159,14 +180,264 @@ public class QueryPhaseTests extends ESTestCase { } }; - QueryPhase.execute(context, contextSearcher); + QueryPhase.execute(context, contextSearcher, null); assertEquals(0, context.queryResult().topDocs().totalHits); assertFalse(collected.get()); context.minimumScore(1); - QueryPhase.execute(context, contextSearcher); + QueryPhase.execute(context, contextSearcher, null); assertEquals(0, context.queryResult().topDocs().totalHits); assertTrue(collected.get()); } + public void testInOrderScrollOptimization() throws Exception { + Directory dir = newDirectory(); + final Sort sort = new Sort(new SortField("rank", SortField.Type.INT)); + IndexWriterConfig iwc = newIndexWriterConfig() + .setIndexSort(sort); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + final int numDocs = scaledRandomIntBetween(100, 200); + for (int i = 0; i < numDocs; ++i) { + w.addDocument(new Document()); + } + w.close(); + final AtomicBoolean collected = new AtomicBoolean(); + IndexReader reader = DirectoryReader.open(dir); + IndexSearcher contextSearcher = new IndexSearcher(reader) { + protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException { + collected.set(true); + super.search(leaves, weight, collector); + } + }; + + TestSearchContext context = new TestSearchContext(null); + context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery())); + ScrollContext scrollContext = new ScrollContext(); + scrollContext.lastEmittedDoc = null; + scrollContext.maxScore = Float.NaN; + scrollContext.totalHits = -1; + context.scrollContext(scrollContext); + context.setTask(new SearchTask(123L, "", "", "", null)); + context.setSize(10); + + QueryPhase.execute(context, contextSearcher, null); + assertThat(context.queryResult().topDocs().totalHits, equalTo(numDocs)); + assertTrue(collected.get()); + assertNull(context.queryResult().terminatedEarly()); + assertThat(context.terminateAfter(), equalTo(0)); + assertThat(context.queryResult().getTotalHits(), equalTo(numDocs)); + + QueryPhase.execute(context, contextSearcher, null); + assertThat(context.queryResult().topDocs().totalHits, equalTo(numDocs)); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.terminateAfter(), equalTo(10)); + assertThat(context.queryResult().getTotalHits(), equalTo(numDocs)); + assertThat(context.queryResult().topDocs().scoreDocs[0].doc, greaterThanOrEqualTo(10)); + reader.close(); + dir.close(); + } + + public void testTerminateAfterEarlyTermination() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + final int numDocs = scaledRandomIntBetween(100, 200); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + if (randomBoolean()) { + doc.add(new StringField("foo", "bar", Store.NO)); + } + if (randomBoolean()) { + doc.add(new StringField("foo", "baz", Store.NO)); + } + doc.add(new NumericDocValuesField("rank", numDocs - i)); + w.addDocument(doc); + } + w.close(); + TestSearchContext context = new TestSearchContext(null); + context.setTask(new SearchTask(123L, "", "", "", null)); + context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery())); + context.terminateAfter(1); + + final AtomicBoolean collected = new AtomicBoolean(); + final IndexReader reader = DirectoryReader.open(dir); + IndexSearcher contextSearcher = new IndexSearcher(reader) { + protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException { + collected.set(true); + super.search(leaves, weight, collector); + } + }; + + { + context.setSize(1); + QueryPhase.execute(context, contextSearcher, null); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + + context.setSize(0); + QueryPhase.execute(context, contextSearcher, null); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(0)); + } + + { + context.setSize(1); + QueryPhase.execute(context, contextSearcher, null); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + } + { + context.setSize(1); + BooleanQuery bq = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .build(); + context.parsedQuery(new ParsedQuery(bq)); + collected.set(false); + QueryPhase.execute(context, contextSearcher, null); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + + context.setSize(0); + context.parsedQuery(new ParsedQuery(bq)); + collected.set(false); + QueryPhase.execute(context, contextSearcher, null); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(0)); + } + { + context.setSize(1); + collected.set(false); + TotalHitCountCollector collector = new TotalHitCountCollector(); + context.queryCollectors().put(TotalHitCountCollector.class, collector); + QueryPhase.execute(context, contextSearcher, null); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + } + { + context.setSize(0); + collected.set(false); + TotalHitCountCollector collector = new TotalHitCountCollector(); + context.queryCollectors().put(TotalHitCountCollector.class, collector); + QueryPhase.execute(context, contextSearcher, null); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(0)); + assertThat(collector.getTotalHits(), equalTo(1)); + } + + reader.close(); + dir.close(); + } + + public void testIndexSortingEarlyTermination() throws Exception { + Directory dir = newDirectory(); + final Sort sort = new Sort(new SortField("rank", SortField.Type.INT)); + IndexWriterConfig iwc = newIndexWriterConfig() + .setIndexSort(sort); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + final int numDocs = scaledRandomIntBetween(100, 200); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + if (randomBoolean()) { + doc.add(new StringField("foo", "bar", Store.NO)); + } + if (randomBoolean()) { + doc.add(new StringField("foo", "baz", Store.NO)); + } + doc.add(new NumericDocValuesField("rank", numDocs - i)); + w.addDocument(doc); + } + w.close(); + + TestSearchContext context = new TestSearchContext(null); + context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery())); + context.setSize(1); + context.setTask(new SearchTask(123L, "", "", "", null)); + context.sort(new SortAndFormats(sort, new DocValueFormat[] {DocValueFormat.RAW})); + + final AtomicBoolean collected = new AtomicBoolean(); + final IndexReader reader = DirectoryReader.open(dir); + IndexSearcher contextSearcher = new IndexSearcher(reader) { + protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException { + collected.set(true); + super.search(leaves, weight, collector); + } + }; + QueryPhase.execute(context, contextSearcher, sort); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(numDocs)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class)); + FieldDoc fieldDoc = (FieldDoc) context.queryResult().topDocs().scoreDocs[0]; + assertThat(fieldDoc.fields[0], equalTo(1)); + + + { + collected.set(false); + context.parsedPostFilter(new ParsedQuery(new MinDocQuery(1))); + QueryPhase.execute(context, contextSearcher, sort); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(numDocs - 1)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class)); + assertThat(fieldDoc.fields[0], anyOf(equalTo(1), equalTo(2))); + context.parsedPostFilter(null); + + final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); + context.queryCollectors().put(TotalHitCountCollector.class, totalHitCountCollector); + collected.set(false); + QueryPhase.execute(context, contextSearcher, sort); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, equalTo(numDocs)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class)); + assertThat(fieldDoc.fields[0], anyOf(equalTo(1), equalTo(2))); + assertThat(totalHitCountCollector.getTotalHits(), equalTo(numDocs)); + context.queryCollectors().clear(); + } + + { + collected.set(false); + context.trackTotalHits(false); + QueryPhase.execute(context, contextSearcher, sort); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, lessThan(numDocs)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class)); + assertThat(fieldDoc.fields[0], anyOf(equalTo(1), equalTo(2))); + + final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); + context.queryCollectors().put(TotalHitCountCollector.class, totalHitCountCollector); + collected.set(false); + QueryPhase.execute(context, contextSearcher, sort); + assertTrue(collected.get()); + assertTrue(context.queryResult().terminatedEarly()); + assertThat(context.queryResult().topDocs().totalHits, lessThan(numDocs)); + assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1)); + assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class)); + assertThat(fieldDoc.fields[0], anyOf(equalTo(1), equalTo(2))); + assertThat(totalHitCountCollector.getTotalHits(), equalTo(numDocs)); + } + reader.close(); + dir.close(); + } } diff --git a/core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java b/core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java index f63f13b6dd..c4bb4a811a 100644 --- a/core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java @@ -30,7 +30,9 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.search.SearchContextException; import org.elasticsearch.search.rescore.QueryRescorerBuilder; +import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.ESIntegTestCase; import java.util.ArrayList; @@ -51,6 +53,8 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFail import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; public class SimpleSearchIT extends ESIntegTestCase { @@ -285,7 +289,50 @@ public class SimpleSearchIT extends ESIntegTestCase { .setTerminateAfter(2 * max).execute().actionGet(); assertHitCount(searchResponse, max); - assertFalse(searchResponse.isTerminatedEarly()); + assertNull(searchResponse.isTerminatedEarly()); + } + + public void testSimpleIndexSortEarlyTerminate() throws Exception { + prepareCreate("test") + .setSettings(Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.sort.field", "rank") + ) + .addMapping("type1", "rank", "type=integer") + .get(); + ensureGreen(); + int max = randomIntBetween(3, 29); + List<IndexRequestBuilder> docbuilders = new ArrayList<>(max); + + for (int i = max-1; i >= 0; i--) { + String id = String.valueOf(i); + docbuilders.add(client().prepareIndex("test", "type1", id).setSource("rank", i)); + } + + indexRandom(true, docbuilders); + ensureGreen(); + refresh(); + + SearchResponse searchResponse; + boolean hasEarlyTerminated = false; + for (int i = 1; i < max; i++) { + searchResponse = client().prepareSearch("test") + .addDocValueField("rank") + .setTrackTotalHits(false) + .addSort("rank", SortOrder.ASC) + .setSize(i).execute().actionGet(); + assertThat(searchResponse.getHits().getTotalHits(), equalTo(-1L)); + if (searchResponse.isTerminatedEarly() != null) { + assertTrue(searchResponse.isTerminatedEarly()); + hasEarlyTerminated = true; + } + for (int j = 0; j < i; j++) { + assertThat(searchResponse.getHits().getAt(j).field("rank").getValue(), + equalTo((long) j)); + } + } + assertTrue(hasEarlyTerminated); } public void testInsaneFromAndSize() throws Exception { |