diff options
author | Tanguy Leroux <tlrx.dev@gmail.com> | 2017-06-02 09:45:15 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-02 09:45:15 +0200 |
commit | 528bd25fa704da1d234ed364503891c93c6b9d31 (patch) | |
tree | bcb16d48cc7a805b431522646375933ec4e2774e /core/src/test/java/org/elasticsearch/search | |
parent | f4aee1e583b3a6ad17e879cb3503689726f37679 (diff) |
Add superset size to Significant Term REST response (#24865)
This commit adds a new bg_count field to the REST response of
SignificantTerms aggregations. Similarly to the bg_count that already
exists in significant terms buckets, this new bg_count field is set at
the aggregation level and is populated with the superset size value.
Diffstat (limited to 'core/src/test/java/org/elasticsearch/search')
4 files changed, 81 insertions, 74 deletions
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java index 394870a2a9..b72c3befa4 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java @@ -310,6 +310,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { + "\"doc_count\":4," + "\"sig_terms\":{" + "\"doc_count\":4," + + "\"bg_count\":7," + "\"buckets\":[" + "{" + "\"key\":" + (type.equals("long") ? "0," : "\"0\",") @@ -325,6 +326,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { + "\"doc_count\":3," + "\"sig_terms\":{" + "\"doc_count\":3," + + "\"bg_count\":7," + "\"buckets\":[" + "{" + "\"key\":" + (type.equals("long") ? "1," : "\"1\",") diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java index 9a86e44b2a..238d5234dc 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java @@ -19,8 +19,14 @@ package org.elasticsearch.search.aggregations.bucket.significant; +import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.InternalMultiBucketAggregationTestCase; import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import java.util.Arrays; @@ -33,6 +39,51 @@ import java.util.stream.Stream; public abstract class InternalSignificantTermsTestCase extends InternalMultiBucketAggregationTestCase<InternalSignificantTerms<?, ?>> { + private SignificanceHeuristic significanceHeuristic; + + @Override + public void setUp() throws Exception { + super.setUp(); + significanceHeuristic = randomSignificanceHeuristic(); + } + + @Override + protected final InternalSignificantTerms createTestInstance(String name, + List<PipelineAggregator> pipelineAggregators, + Map<String, Object> metaData, + InternalAggregations aggregations) { + final int requiredSize = randomIntBetween(1, 5); + final int numBuckets = randomInt(requiredSize + 2); + + long subsetSize = 0; + long supersetSize = 0; + + int[] subsetDfs = new int[numBuckets]; + int[] supersetDfs = new int[numBuckets]; + + for (int i = 0; i < numBuckets; ++i) { + int subsetDf = randomIntBetween(1, 10); + subsetDfs[i] = subsetDf; + + int supersetDf = randomIntBetween(subsetDf, 20); + supersetDfs[i] = supersetDf; + + subsetSize += subsetDf; + supersetSize += supersetDf; + } + return createTestInstance(name, pipelineAggregators, metaData, aggregations, requiredSize, numBuckets, subsetSize, subsetDfs, + supersetSize, supersetDfs, significanceHeuristic); + } + + protected abstract InternalSignificantTerms createTestInstance(String name, + List<PipelineAggregator> pipelineAggregators, + Map<String, Object> metaData, + InternalAggregations aggregations, + int requiredSize, int numBuckets, + long subsetSize, int[] subsetDfs, + long supersetSize, int[] supersetDfs, + SignificanceHeuristic significanceHeuristic); + @Override protected InternalSignificantTerms createUnmappedInstance(String name, List<PipelineAggregator> pipelineAggregators, @@ -72,6 +123,7 @@ public abstract class InternalSignificantTermsTestCase extends InternalMultiBuck InternalSignificantTerms expectedSigTerms = (InternalSignificantTerms) expected; ParsedSignificantTerms actualSigTerms = (ParsedSignificantTerms) actual; assertEquals(expectedSigTerms.getSubsetSize(), actualSigTerms.getSubsetSize()); + assertEquals(expectedSigTerms.getSupersetSize(), actualSigTerms.getSupersetSize()); for (SignificantTerms.Bucket bucket : (SignificantTerms) expected) { String key = bucket.getKeyAsString(); @@ -91,14 +143,22 @@ public abstract class InternalSignificantTermsTestCase extends InternalMultiBuck assertEquals(expectedSigTerm.getSignificanceScore(), actualSigTerm.getSignificanceScore(), 0.0); assertEquals(expectedSigTerm.getSubsetDf(), actualSigTerm.getSubsetDf()); + assertEquals(expectedSigTerm.getDocCount(), actualSigTerm.getSubsetDf()); assertEquals(expectedSigTerm.getSupersetDf(), actualSigTerm.getSupersetDf()); - - expectThrows(UnsupportedOperationException.class, actualSigTerm::getSubsetSize); - expectThrows(UnsupportedOperationException.class, actualSigTerm::getSupersetSize); + assertEquals(expectedSigTerm.getSubsetSize(), actualSigTerm.getSubsetSize()); + assertEquals(expectedSigTerm.getSupersetSize(), actualSigTerm.getSupersetSize()); } private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets, Function<SignificantTerms.Bucket, Long> fn) { return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum)); } + + private static SignificanceHeuristic randomSignificanceHeuristic() { + return randomFrom( + new JLHScore(), + new MutualInformation(randomBoolean(), randomBoolean()), + new GND(randomBoolean()), + new ChiSquare(randomBoolean(), randomBoolean())); + } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java index 793c6aec5c..f41dc80c3e 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java @@ -23,10 +23,6 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; @@ -38,13 +34,11 @@ import java.util.Set; public class SignificantLongTermsTests extends InternalSignificantTermsTestCase { - private SignificanceHeuristic significanceHeuristic; private DocValueFormat format; @Override public void setUp() throws Exception { super.setUp(); - significanceHeuristic = randomSignificanceHeuristic(); format = randomNumericDocValueFormat(); } @@ -52,30 +46,20 @@ public class SignificantLongTermsTests extends InternalSignificantTermsTestCase protected InternalSignificantTerms createTestInstance(String name, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData, - InternalAggregations aggregations) { - int requiredSize = randomIntBetween(1, 5); - int shardSize = requiredSize + 2; - final int numBuckets = randomInt(shardSize); - - long globalSubsetSize = 0; - long globalSupersetSize = 0; + InternalAggregations aggs, + int requiredSize, int numBuckets, + long subsetSize, int[] subsetDfs, + long supersetSize, int[] supersetDfs, + SignificanceHeuristic significanceHeuristic) { List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets); Set<Long> terms = new HashSet<>(); for (int i = 0; i < numBuckets; ++i) { long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong); - - int subsetDf = randomIntBetween(1, 10); - int supersetDf = randomIntBetween(subsetDf, 20); - int supersetSize = randomIntBetween(supersetDf, 30); - - globalSubsetSize += subsetDf; - globalSupersetSize += supersetSize; - - buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, aggregations, format)); + buckets.add(new SignificantLongTerms.Bucket(subsetDfs[i], subsetSize, supersetDfs[i], supersetSize, term, aggs, format)); } - return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize, - globalSupersetSize, significanceHeuristic, buckets); + return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, subsetSize, + supersetSize, significanceHeuristic, buckets); } @Override @@ -87,12 +71,4 @@ public class SignificantLongTermsTests extends InternalSignificantTermsTestCase protected Class<? extends ParsedMultiBucketAggregation> implementationClass() { return ParsedSignificantLongTerms.class; } - - private static SignificanceHeuristic randomSignificanceHeuristic() { - return randomFrom( - new JLHScore(), - new MutualInformation(randomBoolean(), randomBoolean()), - new GND(randomBoolean()), - new ChiSquare(randomBoolean(), randomBoolean())); - } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java index 762472e4be..e9c716751f 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java @@ -24,10 +24,6 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; @@ -39,43 +35,24 @@ import java.util.Set; public class SignificantStringTermsTests extends InternalSignificantTermsTestCase { - private SignificanceHeuristic significanceHeuristic; - - @Override - public void setUp() throws Exception { - super.setUp(); - significanceHeuristic = randomSignificanceHeuristic(); - } - @Override protected InternalSignificantTerms createTestInstance(String name, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData, - InternalAggregations aggregations) { + InternalAggregations aggs, + int requiredSize, int numBuckets, + long subsetSize, int[] subsetDfs, + long supersetSize, int[] supersetDfs, + SignificanceHeuristic significanceHeuristic) { DocValueFormat format = DocValueFormat.RAW; - int requiredSize = randomIntBetween(1, 5); - int shardSize = requiredSize + 2; - final int numBuckets = randomInt(shardSize); - - long globalSubsetSize = 0; - long globalSupersetSize = 0; - List<SignificantStringTerms.Bucket> buckets = new ArrayList<>(numBuckets); Set<BytesRef> terms = new HashSet<>(); for (int i = 0; i < numBuckets; ++i) { BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAlphaOfLength(10))); - - int subsetDf = randomIntBetween(1, 10); - int supersetDf = randomIntBetween(subsetDf, 20); - int supersetSize = randomIntBetween(supersetDf, 30); - - globalSubsetSize += subsetDf; - globalSupersetSize += supersetSize; - - buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, aggregations, format)); + buckets.add(new SignificantStringTerms.Bucket(term, subsetDfs[i], subsetSize, supersetDfs[i], supersetSize, aggs, format)); } - return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize, - globalSupersetSize, significanceHeuristic, buckets); + return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, subsetSize, + supersetSize, significanceHeuristic, buckets); } @Override @@ -87,12 +64,4 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas protected Class<? extends ParsedMultiBucketAggregation> implementationClass() { return ParsedSignificantStringTerms.class; } - - private static SignificanceHeuristic randomSignificanceHeuristic() { - return randomFrom( - new JLHScore(), - new MutualInformation(randomBoolean(), randomBoolean()), - new GND(randomBoolean()), - new ChiSquare(randomBoolean(), randomBoolean())); - } } |