summaryrefslogtreecommitdiff
path: root/core/src/test/java/org/elasticsearch/search
diff options
context:
space:
mode:
authorTanguy Leroux <tlrx.dev@gmail.com>2017-06-02 09:45:15 +0200
committerGitHub <noreply@github.com>2017-06-02 09:45:15 +0200
commit528bd25fa704da1d234ed364503891c93c6b9d31 (patch)
treebcb16d48cc7a805b431522646375933ec4e2774e /core/src/test/java/org/elasticsearch/search
parentf4aee1e583b3a6ad17e879cb3503689726f37679 (diff)
Add superset size to Significant Term REST response (#24865)
This commit adds a new bg_count field to the REST response of SignificantTerms aggregations. Similarly to the bg_count that already exists in significant terms buckets, this new bg_count field is set at the aggregation level and is populated with the superset size value.
Diffstat (limited to 'core/src/test/java/org/elasticsearch/search')
-rw-r--r--core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java2
-rw-r--r--core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java66
-rw-r--r--core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java40
-rw-r--r--core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java47
4 files changed, 81 insertions, 74 deletions
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
index 394870a2a9..b72c3befa4 100644
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
@@ -310,6 +310,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
+ "\"doc_count\":4,"
+ "\"sig_terms\":{"
+ "\"doc_count\":4,"
+ + "\"bg_count\":7,"
+ "\"buckets\":["
+ "{"
+ "\"key\":" + (type.equals("long") ? "0," : "\"0\",")
@@ -325,6 +326,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
+ "\"doc_count\":3,"
+ "\"sig_terms\":{"
+ "\"doc_count\":3,"
+ + "\"bg_count\":7,"
+ "\"buckets\":["
+ "{"
+ "\"key\":" + (type.equals("long") ? "1," : "\"1\",")
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java
index 9a86e44b2a..238d5234dc 100644
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java
@@ -19,8 +19,14 @@
package org.elasticsearch.search.aggregations.bucket.significant;
+import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregationTestCase;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.util.Arrays;
@@ -33,6 +39,51 @@ import java.util.stream.Stream;
public abstract class InternalSignificantTermsTestCase extends InternalMultiBucketAggregationTestCase<InternalSignificantTerms<?, ?>> {
+ private SignificanceHeuristic significanceHeuristic;
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ significanceHeuristic = randomSignificanceHeuristic();
+ }
+
+ @Override
+ protected final InternalSignificantTerms createTestInstance(String name,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData,
+ InternalAggregations aggregations) {
+ final int requiredSize = randomIntBetween(1, 5);
+ final int numBuckets = randomInt(requiredSize + 2);
+
+ long subsetSize = 0;
+ long supersetSize = 0;
+
+ int[] subsetDfs = new int[numBuckets];
+ int[] supersetDfs = new int[numBuckets];
+
+ for (int i = 0; i < numBuckets; ++i) {
+ int subsetDf = randomIntBetween(1, 10);
+ subsetDfs[i] = subsetDf;
+
+ int supersetDf = randomIntBetween(subsetDf, 20);
+ supersetDfs[i] = supersetDf;
+
+ subsetSize += subsetDf;
+ supersetSize += supersetDf;
+ }
+ return createTestInstance(name, pipelineAggregators, metaData, aggregations, requiredSize, numBuckets, subsetSize, subsetDfs,
+ supersetSize, supersetDfs, significanceHeuristic);
+ }
+
+ protected abstract InternalSignificantTerms createTestInstance(String name,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData,
+ InternalAggregations aggregations,
+ int requiredSize, int numBuckets,
+ long subsetSize, int[] subsetDfs,
+ long supersetSize, int[] supersetDfs,
+ SignificanceHeuristic significanceHeuristic);
+
@Override
protected InternalSignificantTerms createUnmappedInstance(String name,
List<PipelineAggregator> pipelineAggregators,
@@ -72,6 +123,7 @@ public abstract class InternalSignificantTermsTestCase extends InternalMultiBuck
InternalSignificantTerms expectedSigTerms = (InternalSignificantTerms) expected;
ParsedSignificantTerms actualSigTerms = (ParsedSignificantTerms) actual;
assertEquals(expectedSigTerms.getSubsetSize(), actualSigTerms.getSubsetSize());
+ assertEquals(expectedSigTerms.getSupersetSize(), actualSigTerms.getSupersetSize());
for (SignificantTerms.Bucket bucket : (SignificantTerms) expected) {
String key = bucket.getKeyAsString();
@@ -91,14 +143,22 @@ public abstract class InternalSignificantTermsTestCase extends InternalMultiBuck
assertEquals(expectedSigTerm.getSignificanceScore(), actualSigTerm.getSignificanceScore(), 0.0);
assertEquals(expectedSigTerm.getSubsetDf(), actualSigTerm.getSubsetDf());
+ assertEquals(expectedSigTerm.getDocCount(), actualSigTerm.getSubsetDf());
assertEquals(expectedSigTerm.getSupersetDf(), actualSigTerm.getSupersetDf());
-
- expectThrows(UnsupportedOperationException.class, actualSigTerm::getSubsetSize);
- expectThrows(UnsupportedOperationException.class, actualSigTerm::getSupersetSize);
+ assertEquals(expectedSigTerm.getSubsetSize(), actualSigTerm.getSubsetSize());
+ assertEquals(expectedSigTerm.getSupersetSize(), actualSigTerm.getSupersetSize());
}
private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets,
Function<SignificantTerms.Bucket, Long> fn) {
return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum));
}
+
+ private static SignificanceHeuristic randomSignificanceHeuristic() {
+ return randomFrom(
+ new JLHScore(),
+ new MutualInformation(randomBoolean(), randomBoolean()),
+ new GND(randomBoolean()),
+ new ChiSquare(randomBoolean(), randomBoolean()));
+ }
}
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java
index 793c6aec5c..f41dc80c3e 100644
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java
@@ -23,10 +23,6 @@ import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
@@ -38,13 +34,11 @@ import java.util.Set;
public class SignificantLongTermsTests extends InternalSignificantTermsTestCase {
- private SignificanceHeuristic significanceHeuristic;
private DocValueFormat format;
@Override
public void setUp() throws Exception {
super.setUp();
- significanceHeuristic = randomSignificanceHeuristic();
format = randomNumericDocValueFormat();
}
@@ -52,30 +46,20 @@ public class SignificantLongTermsTests extends InternalSignificantTermsTestCase
protected InternalSignificantTerms createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
- InternalAggregations aggregations) {
- int requiredSize = randomIntBetween(1, 5);
- int shardSize = requiredSize + 2;
- final int numBuckets = randomInt(shardSize);
-
- long globalSubsetSize = 0;
- long globalSupersetSize = 0;
+ InternalAggregations aggs,
+ int requiredSize, int numBuckets,
+ long subsetSize, int[] subsetDfs,
+ long supersetSize, int[] supersetDfs,
+ SignificanceHeuristic significanceHeuristic) {
List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets);
Set<Long> terms = new HashSet<>();
for (int i = 0; i < numBuckets; ++i) {
long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);
-
- int subsetDf = randomIntBetween(1, 10);
- int supersetDf = randomIntBetween(subsetDf, 20);
- int supersetSize = randomIntBetween(supersetDf, 30);
-
- globalSubsetSize += subsetDf;
- globalSupersetSize += supersetSize;
-
- buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, aggregations, format));
+ buckets.add(new SignificantLongTerms.Bucket(subsetDfs[i], subsetSize, supersetDfs[i], supersetSize, term, aggs, format));
}
- return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
- globalSupersetSize, significanceHeuristic, buckets);
+ return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, subsetSize,
+ supersetSize, significanceHeuristic, buckets);
}
@Override
@@ -87,12 +71,4 @@ public class SignificantLongTermsTests extends InternalSignificantTermsTestCase
protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
return ParsedSignificantLongTerms.class;
}
-
- private static SignificanceHeuristic randomSignificanceHeuristic() {
- return randomFrom(
- new JLHScore(),
- new MutualInformation(randomBoolean(), randomBoolean()),
- new GND(randomBoolean()),
- new ChiSquare(randomBoolean(), randomBoolean()));
- }
}
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java
index 762472e4be..e9c716751f 100644
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java
@@ -24,10 +24,6 @@ import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
@@ -39,43 +35,24 @@ import java.util.Set;
public class SignificantStringTermsTests extends InternalSignificantTermsTestCase {
- private SignificanceHeuristic significanceHeuristic;
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- significanceHeuristic = randomSignificanceHeuristic();
- }
-
@Override
protected InternalSignificantTerms createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
- InternalAggregations aggregations) {
+ InternalAggregations aggs,
+ int requiredSize, int numBuckets,
+ long subsetSize, int[] subsetDfs,
+ long supersetSize, int[] supersetDfs,
+ SignificanceHeuristic significanceHeuristic) {
DocValueFormat format = DocValueFormat.RAW;
- int requiredSize = randomIntBetween(1, 5);
- int shardSize = requiredSize + 2;
- final int numBuckets = randomInt(shardSize);
-
- long globalSubsetSize = 0;
- long globalSupersetSize = 0;
-
List<SignificantStringTerms.Bucket> buckets = new ArrayList<>(numBuckets);
Set<BytesRef> terms = new HashSet<>();
for (int i = 0; i < numBuckets; ++i) {
BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAlphaOfLength(10)));
-
- int subsetDf = randomIntBetween(1, 10);
- int supersetDf = randomIntBetween(subsetDf, 20);
- int supersetSize = randomIntBetween(supersetDf, 30);
-
- globalSubsetSize += subsetDf;
- globalSupersetSize += supersetSize;
-
- buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, aggregations, format));
+ buckets.add(new SignificantStringTerms.Bucket(term, subsetDfs[i], subsetSize, supersetDfs[i], supersetSize, aggs, format));
}
- return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
- globalSupersetSize, significanceHeuristic, buckets);
+ return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, subsetSize,
+ supersetSize, significanceHeuristic, buckets);
}
@Override
@@ -87,12 +64,4 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas
protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
return ParsedSignificantStringTerms.class;
}
-
- private static SignificanceHeuristic randomSignificanceHeuristic() {
- return randomFrom(
- new JLHScore(),
- new MutualInformation(randomBoolean(), randomBoolean()),
- new GND(randomBoolean()),
- new ChiSquare(randomBoolean(), randomBoolean()));
- }
}