summaryrefslogtreecommitdiff
path: root/core/src/test/java/org/elasticsearch/search/aggregations/bucket
diff options
context:
space:
mode:
authorTanguy Leroux <tlrx.dev@gmail.com>2017-03-02 10:48:29 +0100
committerGitHub <noreply@github.com>2017-03-02 10:48:29 +0100
commit5a668c4add167c5bcd2d4024c61e2dac23f2a036 (patch)
tree08b36df0742d35046751eb9eca3703fc2a3ba683 /core/src/test/java/org/elasticsearch/search/aggregations/bucket
parent1228084c1c691be8a481ffe6c6e97a8bc859642b (diff)
Tests: Add unit test for SignificantLongTerms and SignificantStringTerms (#23428)
Relates to #22278
Diffstat (limited to 'core/src/test/java/org/elasticsearch/search/aggregations/bucket')
-rw-r--r--core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java68
-rw-r--r--core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java91
-rw-r--r--core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java92
3 files changed, 251 insertions, 0 deletions
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java
new file mode 100644
index 0000000000..dde1562e1a
--- /dev/null
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.significant;
+
+import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+public abstract class InternalSignificantTermsTestCase extends InternalAggregationTestCase<InternalSignificantTerms<?, ?>> {
+
+ @Override
+ protected InternalSignificantTerms createUnmappedInstance(String name,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) {
+ InternalSignificantTerms<?, ?> testInstance = createTestInstance(name, pipelineAggregators, metaData);
+ return new UnmappedSignificantTerms(name, testInstance.requiredSize, testInstance.minDocCount, pipelineAggregators, metaData);
+ }
+
+ @Override
+ protected void assertReduced(InternalSignificantTerms<?, ?> reduced, List<InternalSignificantTerms<?, ?>> inputs) {
+ assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSubsetSize).sum(), reduced.getSubsetSize());
+ assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSupersetSize).sum(), reduced.getSupersetSize());
+
+ List<Function<SignificantTerms.Bucket, Long>> counts = Arrays.asList(
+ SignificantTerms.Bucket::getSubsetDf,
+ SignificantTerms.Bucket::getSupersetDf,
+ SignificantTerms.Bucket::getDocCount
+ );
+
+ for (Function<SignificantTerms.Bucket, Long> count : counts) {
+ Map<Object, Long> reducedCounts = toCounts(reduced.getBuckets().stream(), count);
+ Map<Object, Long> totalCounts = toCounts(inputs.stream().map(SignificantTerms::getBuckets).flatMap(List::stream), count);
+
+ Map<Object, Long> expectedReducedCounts = new HashMap<>(totalCounts);
+ expectedReducedCounts.keySet().retainAll(reducedCounts.keySet());
+ assertEquals(expectedReducedCounts, reducedCounts);
+ }
+ }
+
+ private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets,
+ Function<SignificantTerms.Bucket, Long> fn) {
+ return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum));
+ }
+}
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java
new file mode 100644
index 0000000000..7e80cf6160
--- /dev/null
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.significant;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.junit.Before;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY;
+
+public class SignificantLongTermsTests extends InternalSignificantTermsTestCase {
+
+ private SignificanceHeuristic significanceHeuristic;
+
+ @Before
+ public void setUpSignificanceHeuristic() {
+ significanceHeuristic = randomSignificanceHeuristic();
+ }
+
+ @Override
+ protected InternalSignificantTerms createTestInstance(String name,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) {
+ DocValueFormat format = DocValueFormat.RAW;
+ int requiredSize = randomIntBetween(1, 5);
+ int shardSize = requiredSize + 2;
+ final int numBuckets = randomInt(shardSize);
+
+ long globalSubsetSize = 0;
+ long globalSupersetSize = 0;
+
+ List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets);
+ Set<Long> terms = new HashSet<>();
+ for (int i = 0; i < numBuckets; ++i) {
+ long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);
+
+ int subsetDf = randomIntBetween(1, 10);
+ int supersetDf = randomIntBetween(subsetDf, 20);
+ int supersetSize = randomIntBetween(supersetDf, 30);
+
+ globalSubsetSize += subsetDf;
+ globalSupersetSize += supersetSize;
+
+ buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, EMPTY, format));
+ }
+ return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
+ globalSupersetSize, significanceHeuristic, buckets);
+ }
+
+ @Override
+ protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() {
+ return SignificantLongTerms::new;
+ }
+
+ private static SignificanceHeuristic randomSignificanceHeuristic() {
+ return randomFrom(
+ new JLHScore(),
+ new MutualInformation(randomBoolean(), randomBoolean()),
+ new GND(randomBoolean()),
+ new ChiSquare(randomBoolean(), randomBoolean()));
+ }
+}
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java
new file mode 100644
index 0000000000..f957836b3e
--- /dev/null
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.significant;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.junit.Before;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY;
+
+public class SignificantStringTermsTests extends InternalSignificantTermsTestCase {
+
+ private SignificanceHeuristic significanceHeuristic;
+
+ @Before
+ public void setUpSignificanceHeuristic() {
+ significanceHeuristic = randomSignificanceHeuristic();
+ }
+
+ @Override
+ protected InternalSignificantTerms createTestInstance(String name,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) {
+ DocValueFormat format = DocValueFormat.RAW;
+ int requiredSize = randomIntBetween(1, 5);
+ int shardSize = requiredSize + 2;
+ final int numBuckets = randomInt(shardSize);
+
+ long globalSubsetSize = 0;
+ long globalSupersetSize = 0;
+
+ List<SignificantStringTerms.Bucket> buckets = new ArrayList<>(numBuckets);
+ Set<BytesRef> terms = new HashSet<>();
+ for (int i = 0; i < numBuckets; ++i) {
+ BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAsciiOfLength(10)));
+
+ int subsetDf = randomIntBetween(1, 10);
+ int supersetDf = randomIntBetween(subsetDf, 20);
+ int supersetSize = randomIntBetween(supersetDf, 30);
+
+ globalSubsetSize += subsetDf;
+ globalSupersetSize += supersetSize;
+
+ buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, EMPTY, format));
+ }
+ return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
+ globalSupersetSize, significanceHeuristic, buckets);
+ }
+
+ @Override
+ protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() {
+ return SignificantStringTerms::new;
+ }
+
+ private static SignificanceHeuristic randomSignificanceHeuristic() {
+ return randomFrom(
+ new JLHScore(),
+ new MutualInformation(randomBoolean(), randomBoolean()),
+ new GND(randomBoolean()),
+ new ChiSquare(randomBoolean(), randomBoolean()));
+ }
+}