diff options
author | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-03-06 20:12:51 +0100 |
---|---|---|
committer | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-03-12 16:14:47 -0700 |
commit | b01070a3903fd9c1db01e2b15763df5c0761e935 (patch) | |
tree | 08851c86ae132d1af7813ea6c7e89f7a86e9d5ad /core/src/test/java/org/elasticsearch/search/aggregations/bucket | |
parent | 9d4aff524c439b7be77400fc03030702605f04f9 (diff) |
[TEST] Added unit tests for diversified sampler aggregator.
Diffstat (limited to 'core/src/test/java/org/elasticsearch/search/aggregations/bucket')
-rw-r--r-- | core/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java | 110 | ||||
-rw-r--r-- | core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java | 113 | ||||
-rw-r--r-- | core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregationBuilderTests.java (renamed from core/src/test/java/org/elasticsearch/search/aggregations/bucket/DiversifiedSamplerTests.java) | 5 | ||||
-rw-r--r-- | core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedSamplerTests.java | 165 |
4 files changed, 390 insertions, 3 deletions
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java new file mode 100644 index 0000000000..02cd88f16f --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java @@ -0,0 +1,110 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.BucketCollector; +import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +public class BestBucketsDeferringCollectorTests extends AggregatorTestCase { + + public void testReplay() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + int numDocs = randomInt(128); + int maxNumValues = randomInt(16); + for (int i = 0; i < numDocs; i++) { + Document document = new Document(); + document.add(new StringField("field", String.valueOf(randomInt(maxNumValues)), Field.Store.NO)); + indexWriter.addDocument(document); + } + + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + TermQuery termQuery = new TermQuery(new Term("field", String.valueOf(randomInt(maxNumValues)))); + TopDocs topDocs = indexSearcher.search(termQuery, numDocs); + + SearchContext searchContext = createSearchContext(indexSearcher, createIndexSettings()); + BestBucketsDeferringCollector collector = new BestBucketsDeferringCollector(searchContext); + Set<Integer> deferredCollectedDocIds = new HashSet<>(); + collector.setDeferredCollector(Collections.singleton(bla(deferredCollectedDocIds))); + collector.preCollection(); + indexSearcher.search(termQuery, collector); + collector.postCollection(); + collector.replay(0); + + assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + assertTrue("expected docid [" + scoreDoc.doc + "] is missing", deferredCollectedDocIds.contains(scoreDoc.doc)); + } + indexReader.close(); + directory.close(); + } + + private BucketCollector bla(Set<Integer> docIds) { + return new BucketCollector() { + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { + return new LeafBucketCollector() { + @Override + public void collect(int doc, long bucket) throws IOException { + docIds.add(ctx.docBase + doc); + } + }; + } + + @Override + public void preCollection() throws IOException { + + } + + @Override + public void postCollection() throws IOException { + + } + + @Override + public boolean needsScores() { + return false; + } + }; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java new file mode 100644 index 0000000000..d99f7e9fa7 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java @@ -0,0 +1,113 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket.sampler; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.BucketCollector; +import org.elasticsearch.search.aggregations.LeafBucketCollector; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +public class BestDocsDeferringCollectorTests extends AggregatorTestCase { + + public void testReplay() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + int numDocs = randomIntBetween(1, 128); + int maxNumValues = randomInt(16); + for (int i = 0; i < numDocs; i++) { + Document document = new Document(); + document.add(new StringField("field", String.valueOf(randomInt(maxNumValues)), Field.Store.NO)); + indexWriter.addDocument(document); + } + + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + TermQuery termQuery = new TermQuery(new Term("field", String.valueOf(randomInt(maxNumValues)))); + TopDocs topDocs = indexSearcher.search(termQuery, numDocs); + + BestDocsDeferringCollector collector = + new BestDocsDeferringCollector(numDocs, new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService())); + Set<Integer> deferredCollectedDocIds = new HashSet<>(); + collector.setDeferredCollector(Collections.singleton(testCollector(deferredCollectedDocIds))); + collector.preCollection(); + indexSearcher.search(termQuery, collector); + collector.postCollection(); + collector.replay(0); + + assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + assertTrue("expected docid [" + scoreDoc.doc + "] is missing", deferredCollectedDocIds.contains(scoreDoc.doc)); + } + collector.close(); + indexReader.close(); + directory.close(); + } + + private BucketCollector testCollector(Set<Integer> docIds) { + return new BucketCollector() { + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { + return new LeafBucketCollector() { + @Override + public void collect(int doc, long bucket) throws IOException { + docIds.add(ctx.docBase + doc); + } + }; + } + + @Override + public void preCollection() throws IOException { + + } + + @Override + public void postCollection() throws IOException { + + } + + @Override + public boolean needsScores() { + return false; + } + }; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/DiversifiedSamplerTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregationBuilderTests.java index b68caad0ea..eed258bb78 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/DiversifiedSamplerTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregationBuilderTests.java @@ -17,14 +17,13 @@ * under the License. */ -package org.elasticsearch.search.aggregations.bucket; +package org.elasticsearch.search.aggregations.bucket.sampler; import org.elasticsearch.script.Script; import org.elasticsearch.search.aggregations.BaseAggregationTestCase; -import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregator.ExecutionMode; -public class DiversifiedSamplerTests extends BaseAggregationTestCase<DiversifiedAggregationBuilder> { +public class DiversifiedAggregationBuilderTests extends BaseAggregationTestCase<DiversifiedAggregationBuilder> { @Override protected final DiversifiedAggregationBuilder createTestAggregatorBuilder() { diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedSamplerTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedSamplerTests.java new file mode 100644 index 0000000000..4e3e95fd5e --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedSamplerTests.java @@ -0,0 +1,165 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket.sampler; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction; +import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; + +import java.io.IOException; +import java.util.function.Consumer; + +public class DiversifiedSamplerTests extends AggregatorTestCase { + + public void testDiversifiedSampler() throws Exception { + String data[] = { + // "id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s,genre_id", + "0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,A Song of Ice and Fire,1,fantasy,0", + "0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,A Song of Ice and Fire,2,fantasy,0", + "055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,A Song of Ice and Fire,3,fantasy,0", + "0553293354,book,Foundation,17.99,true,Isaac Asimov,Foundation Novels,1,scifi,1", + "0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy,0", + "0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi,1", + "0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy,0", + "0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy,0", + "0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy,0", + "080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy,0" + }; + + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + for (String entry : data) { + String[] parts = entry.split(","); + Document document = new Document(); + document.add(new SortedDocValuesField("id", new BytesRef(parts[0]))); + document.add(new StringField("cat", parts[1], Field.Store.NO)); + document.add(new TextField("name", parts[2], Field.Store.NO)); + document.add(new DoubleDocValuesField("price", Double.valueOf(parts[3]))); + document.add(new StringField("inStock", parts[4], Field.Store.NO)); + document.add(new StringField("author", parts[5], Field.Store.NO)); + document.add(new StringField("series", parts[6], Field.Store.NO)); + document.add(new StringField("sequence", parts[7], Field.Store.NO)); + document.add(new SortedDocValuesField("genre", new BytesRef(parts[8]))); + document.add(new NumericDocValuesField("genre_id", Long.valueOf(parts[9]))); + indexWriter.addDocument(document); + } + + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType(); + genreFieldType.setName("genre"); + genreFieldType.setHasDocValues(true); + Consumer<InternalSampler> verify = result -> { + Terms terms = result.getAggregations().get("terms"); + assertEquals(2, terms.getBuckets().size()); + assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString()); + assertEquals("0812550706", terms.getBuckets().get(1).getKeyAsString()); + }; + testCase(indexSearcher, genreFieldType, "map", verify); + testCase(indexSearcher, genreFieldType, "global_ordinals", verify); + testCase(indexSearcher, genreFieldType, "bytes_hash", verify); + + genreFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); + genreFieldType.setName("genre_id"); + testCase(indexSearcher, genreFieldType, null, verify); + + // wrong field: + genreFieldType = new KeywordFieldMapper.KeywordFieldType(); + genreFieldType.setName("wrong_field"); + genreFieldType.setHasDocValues(true); + testCase(indexSearcher, genreFieldType, null, result -> { + Terms terms = result.getAggregations().get("terms"); + assertEquals(1, terms.getBuckets().size()); + assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString()); + }); + + indexReader.close(); + directory.close(); + } + + private void testCase(IndexSearcher indexSearcher, MappedFieldType genreFieldType, String executionHint, + Consumer<InternalSampler> verify) throws IOException { + MappedFieldType idFieldType = new KeywordFieldMapper.KeywordFieldType(); + idFieldType.setName("id"); + idFieldType.setHasDocValues(true); + + SortedNumericDVIndexFieldData fieldData = new SortedNumericDVIndexFieldData(new Index("index", "index"), "price", + IndexNumericFieldData.NumericType.DOUBLE); + FunctionScoreQuery query = new FunctionScoreQuery(new MatchAllDocsQuery(), + new FieldValueFactorFunction("price", 1, FieldValueFactorFunction.Modifier.RECIPROCAL, null, fieldData)); + + DiversifiedAggregationBuilder builder = new DiversifiedAggregationBuilder("_name") + .field(genreFieldType.name()) + .executionHint(executionHint) + .subAggregation(new TermsAggregationBuilder("terms", null).field("id")); + + InternalSampler result = search(indexSearcher, query, builder, genreFieldType, idFieldType); + verify.accept(result); + } + + public void testDiversifiedSampler_noDocs() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + MappedFieldType idFieldType = new KeywordFieldMapper.KeywordFieldType(); + idFieldType.setName("id"); + idFieldType.setHasDocValues(true); + + MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType(); + genreFieldType.setName("genre"); + genreFieldType.setHasDocValues(true); + + DiversifiedAggregationBuilder builder = new DiversifiedAggregationBuilder("_name") + .field(genreFieldType.name()) + .subAggregation(new TermsAggregationBuilder("terms", null).field("id")); + + InternalSampler result = search(indexSearcher, new MatchAllDocsQuery(), builder, genreFieldType, idFieldType); + Terms terms = result.getAggregations().get("terms"); + assertEquals(0, terms.getBuckets().size()); + indexReader.close(); + directory.close(); + } +} |