From 8f63c46d276c5d73a649ec9318200faa7d268f7b Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Tue, 8 Dec 2015 14:13:34 +0000 Subject: Aggregations Refactor: Refactor Terms Aggregation --- .../search/aggregations/Aggregator.java | 19 +- .../bucket/significant/SignificantLongTerms.java | 2 +- .../bucket/significant/SignificantStringTerms.java | 2 +- .../SignificantTermsAggregatorFactory.java | 176 ++++++++++++++-- .../significant/SignificantTermsBuilder.java | 40 ++-- .../SignificantTermsParametersParser.java | 83 -------- .../bucket/significant/SignificantTermsParser.java | 102 +++++---- .../significant/UnmappedSignificantTerms.java | 6 +- .../bucket/significant/heuristics/ChiSquare.java | 35 ++-- .../bucket/significant/heuristics/GND.java | 38 ++-- .../bucket/significant/heuristics/JLHScore.java | 47 ++--- .../significant/heuristics/MutualInformation.java | 35 ++-- .../heuristics/NXYSignificanceHeuristic.java | 8 +- .../significant/heuristics/PercentageScore.java | 47 ++--- .../significant/heuristics/ScriptHeuristic.java | 87 +++++--- .../heuristics/SignificanceHeuristic.java | 14 +- .../heuristics/SignificanceHeuristicParser.java | 3 +- .../heuristics/SignificanceHeuristicStreams.java | 41 ++-- .../terms/AbstractTermsParametersParser.java | 111 ---------- .../bucket/terms/AbstractTermsParser.java | 130 ++++++++++++ .../aggregations/bucket/terms/InternalOrder.java | 38 +++- .../search/aggregations/bucket/terms/Terms.java | 8 +- .../aggregations/bucket/terms/TermsAggregator.java | 120 +++++++---- .../bucket/terms/TermsAggregatorFactory.java | 225 ++++++++++++++++++-- .../aggregations/bucket/terms/TermsBuilder.java | 36 ++-- .../bucket/terms/TermsParametersParser.java | 144 ------------- .../aggregations/bucket/terms/TermsParser.java | 164 +++++++++++---- .../bucket/terms/support/IncludeExclude.java | 187 +++++++++++++++-- .../support/ValuesSourceAggregatorFactory.java | 8 +- .../aggregations/SubAggCollectionModeTests.java | 82 ++++++++ .../SignificantTermsSignificanceScoreIT.java | 45 ++-- .../aggregations/bucket/SignificantTermsTests.java | 226 ++++++++++++++++++++ .../search/aggregations/bucket/TermsTests.java | 232 +++++++++++++++++++++ .../significant/SignificanceHeuristicTests.java | 27 ++- .../bucket/SharedSignificantTermsTestMethods.java | 10 +- 35 files changed, 1831 insertions(+), 747 deletions(-) delete mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParametersParser.java delete mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParser.java delete mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/SubAggCollectionModeTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java index 99c23fc744..0e6387b1b7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java @@ -22,6 +22,9 @@ package org.elasticsearch.search.aggregations; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; @@ -112,7 +115,7 @@ public abstract class Aggregator extends BucketCollector implements Releasable { public abstract InternalAggregation buildEmptyAggregation(); /** Aggregation mode for sub aggregations. */ - public enum SubAggCollectionMode { + public enum SubAggCollectionMode implements Writeable { /** * Creates buckets and delegates to child aggregators in a single pass over @@ -148,5 +151,19 @@ public abstract class Aggregator extends BucketCollector implements Releasable { } throw new ElasticsearchParseException("no [{}] found for value [{}]", KEY.getPreferredName(), value); } + + @Override + public SubAggCollectionMode readFrom(StreamInput in) throws IOException { + int ordinal = in.readVInt(); + if (ordinal < 0 || ordinal >= values().length) { + throw new IOException("Unknown SubAggCollectionMode ordinal [" + ordinal + "]"); + } + return values()[ordinal]; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(ordinal()); + } } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java index 9e8ad33476..226c6d5b3b 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java @@ -228,7 +228,7 @@ public class SignificantLongTerms extends InternalSignificantTerms implements Releasable { + static final ParseField BACKGROUND_FILTER = new ParseField("background_filter"); + static final ParseField HEURISTIC = new ParseField("significance_heuristic"); + + static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds( + 3, 0, 10, -1); + public SignificanceHeuristic getSignificanceHeuristic() { return significanceHeuristic; } @@ -98,9 +115,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac List pipelineAggregators, Map metaData) throws IOException { final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, - (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, - aggregationContext, - parent, termsAggregatorFactory, pipelineAggregators, metaData); + (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent, + termsAggregatorFactory, pipelineAggregators, metaData); } }; @@ -129,36 +145,82 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac return parseField.getPreferredName(); } } - private final IncludeExclude includeExclude; - private final String executionHint; + + private IncludeExclude includeExclude = null; + private String executionHint = null; private String indexedFieldName; private MappedFieldType fieldType; private FilterableTermsEnum termsEnum; private int numberOfAggregatorsCreated = 0; - private final Query filter; - private final TermsAggregator.BucketCountThresholds bucketCountThresholds; - private final SignificanceHeuristic significanceHeuristic; + private QueryBuilder filterBuilder = null; + private TermsAggregator.BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS); + private SignificanceHeuristic significanceHeuristic = JLHScore.PROTOTYPE; protected TermsAggregator.BucketCountThresholds getBucketCountThresholds() { return new TermsAggregator.BucketCountThresholds(bucketCountThresholds); } - public SignificantTermsAggregatorFactory(String name, ValuesSourceParser.Input valueSourceInput, - TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - String executionHint, Query filter, SignificanceHeuristic significanceHeuristic) { + public SignificantTermsAggregatorFactory(String name, ValuesSourceType valuesSourceType, ValueType valueType) { + super(name, SignificantStringTerms.TYPE, valuesSourceType, valueType); + } + + public TermsAggregator.BucketCountThresholds bucketCountThresholds() { + return bucketCountThresholds; + } - super(name, SignificantStringTerms.TYPE, valueSourceInput); + public void bucketCountThresholds(TermsAggregator.BucketCountThresholds bucketCountThresholds) { this.bucketCountThresholds = bucketCountThresholds; - this.includeExclude = includeExclude; + } + + /** + * Expert: sets an execution hint to the aggregation. + */ + public void executionHint(String executionHint) { this.executionHint = executionHint; + } + + /** + * Expert: gets an execution hint to the aggregation. + */ + public String executionHint() { + return executionHint; + } + + public void backgroundFilter(QueryBuilder filterBuilder) { + this.filterBuilder = filterBuilder; + } + + public QueryBuilder backgroundFilter() { + return filterBuilder; + } + + /** + * Set terms to include and exclude from the aggregation results + */ + public void includeExclude(IncludeExclude includeExclude) { + this.includeExclude = includeExclude; + } + + /** + * Get terms to include and exclude from the aggregation results + */ + public IncludeExclude includeExclude() { + return includeExclude; + } + + public void significanceHeuristic(SignificanceHeuristic significanceHeuristic) { this.significanceHeuristic = significanceHeuristic; - this.filter = filter; + } + + public SignificanceHeuristic significanceHeuristic() { + return significanceHeuristic; } @Override public void doInit(AggregationContext context) { super.doInit(context); setFieldInfo(); + significanceHeuristic.initialize(context.searchContext()); } private void setFieldInfo() { @@ -191,6 +253,18 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } numberOfAggregatorsCreated++; + BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds); + if (bucketCountThresholds.getShardSize() == DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) { + //The user has not made a shardSize selection . + //Use default heuristic to avoid any wrong-ranking caused by distributed counting + //but request double the usual amount. + //We typically need more than the number of "top" terms requested by other aggregations + //as the significance algorithm is in less of a position to down-select at shard-level - + //some of the things we want to find have only one occurrence on each shard and as + // such are impossible to differentiate from non-significant terms at that early stage. + bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), + aggregationContext.searchContext().numberOfShards())); + } if (valuesSource instanceof ValuesSource.Bytes) { ExecutionMode execution = null; @@ -247,6 +321,14 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } SearchContext searchContext = context.searchContext(); IndexReader reader = searchContext.searcher().getIndexReader(); + Query filter = null; + try { + if (filterBuilder != null) { + filter = filterBuilder.toFilter(context.searchContext().indexShard().getQueryShardContext()); + } + } catch (IOException e) { + throw new ElasticsearchException("failed to create filter: " + filterBuilder.toString(), e); + } try { if (numberOfAggregatorsCreated == 1) { // Setup a termsEnum for sole use by one aggregator @@ -291,4 +373,68 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac termsEnum = null; } } + + @Override + protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { + bucketCountThresholds.toXContent(builder, params); + if (executionHint != null) { + builder.field(TermsAggregatorFactory.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); + } + if (filterBuilder != null) { + builder.field(BACKGROUND_FILTER.getPreferredName(), filterBuilder); + } + if (includeExclude != null) { + includeExclude.toXContent(builder, params); + } + significanceHeuristic.toXContent(builder, params); + return builder; + } + + @Override + protected ValuesSourceAggregatorFactory innerReadFrom(String name, ValuesSourceType valuesSourceType, + ValueType targetValueType, StreamInput in) throws IOException { + SignificantTermsAggregatorFactory factory = new SignificantTermsAggregatorFactory(name, valuesSourceType, targetValueType); + factory.bucketCountThresholds = BucketCountThresholds.readFromStream(in); + factory.executionHint = in.readOptionalString(); + if (in.readBoolean()) { + factory.filterBuilder = in.readQuery(); + } + if (in.readBoolean()) { + factory.includeExclude = IncludeExclude.readFromStream(in); + } + factory.significanceHeuristic = SignificanceHeuristicStreams.read(in); + return factory; + } + + @Override + protected void innerWriteTo(StreamOutput out) throws IOException { + bucketCountThresholds.writeTo(out); + out.writeOptionalString(executionHint); + boolean hasfilterBuilder = filterBuilder != null; + out.writeBoolean(hasfilterBuilder); + if (hasfilterBuilder) { + out.writeQuery(filterBuilder); + } + boolean hasIncExc = includeExclude != null; + out.writeBoolean(hasIncExc); + if (hasIncExc) { + includeExclude.writeTo(out); + } + SignificanceHeuristicStreams.writeTo(significanceHeuristic, out); + } + + @Override + protected int innerHashCode() { + return Objects.hash(bucketCountThresholds, executionHint, filterBuilder, includeExclude, significanceHeuristic); + } + + @Override + protected boolean innerEquals(Object obj) { + SignificantTermsAggregatorFactory other = (SignificantTermsAggregatorFactory) obj; + return Objects.equals(bucketCountThresholds, other.bucketCountThresholds) + && Objects.equals(executionHint, other.executionHint) + && Objects.equals(filterBuilder, other.filterBuilder) + && Objects.equals(includeExclude, other.includeExclude) + && Objects.equals(significanceHeuristic, other.significanceHeuristic); + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java index b67ce2a1f9..6bbb3348b7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java @@ -24,8 +24,8 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicBuilder; -import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import java.io.IOException; @@ -88,7 +88,7 @@ public class SignificantTermsBuilder extends AggregationBuilder doCreateFactory(String aggregationName, ValuesSourceType valuesSourceType, + ValueType targetValueType, BucketCountThresholds bucketCountThresholds, SubAggCollectionMode collectMode, String executionHint, + IncludeExclude incExc, Map otherOptions) { + SignificantTermsAggregatorFactory factory = new SignificantTermsAggregatorFactory(aggregationName, valuesSourceType, + targetValueType); + if (bucketCountThresholds != null) { + factory.bucketCountThresholds(bucketCountThresholds); + } + if (executionHint != null) { + factory.executionHint(executionHint); + } + if (incExc != null) { + factory.includeExclude(incExc); + } + QueryBuilder backgroundFilter = (QueryBuilder) otherOptions.get(SignificantTermsAggregatorFactory.BACKGROUND_FILTER); + if (backgroundFilter != null) { + factory.backgroundFilter(backgroundFilter); + } + SignificanceHeuristic significanceHeuristic = (SignificanceHeuristic) otherOptions.get(SignificantTermsAggregatorFactory.HEURISTIC); + if (significanceHeuristic != null) { + factory.significanceHeuristic(significanceHeuristic); } + return factory; + } - bucketCountThresholds.ensureValidity(); - SignificanceHeuristic significanceHeuristic = aggParser.getSignificanceHeuristic(); - if (significanceHeuristic == null) { - significanceHeuristic = JLHScore.INSTANCE; + @Override + public boolean parseSpecial(String aggregationName, XContentParser parser, ParseFieldMatcher parseFieldMatcher, Token token, + String currentFieldName, Map otherOptions) throws IOException { + if (token == XContentParser.Token.START_OBJECT) { + SignificanceHeuristicParser significanceHeuristicParser = significanceHeuristicParserMapper.get(currentFieldName); + if (significanceHeuristicParser != null) { + SignificanceHeuristic significanceHeuristic = significanceHeuristicParser.parse(parser, parseFieldMatcher); + otherOptions.put(SignificantTermsAggregatorFactory.HEURISTIC, significanceHeuristic); + return true; + } else if (parseFieldMatcher.match(currentFieldName, SignificantTermsAggregatorFactory.BACKGROUND_FILTER)) { + QueryParseContext queryParseContext = new QueryParseContext(queriesRegistry); + queryParseContext.reset(parser); + queryParseContext.parseFieldMatcher(parseFieldMatcher); + QueryBuilder filter = queryParseContext.parseInnerQueryBuilder(); + otherOptions.put(SignificantTermsAggregatorFactory.BACKGROUND_FILTER, filter); + return true; + } } - return new SignificantTermsAggregatorFactory(aggregationName, vsParser.input(), bucketCountThresholds, - aggParser.getIncludeExclude(), aggParser.getExecutionHint(), aggParser.getFilter(), significanceHeuristic); + return false; } - // NORELEASE implement this method when refactoring this aggregation @Override public AggregatorFactory[] getFactoryPrototypes() { - return null; + return new AggregatorFactory[] { new SignificantTermsAggregatorFactory(null, null, null) }; + } + + @Override + protected BucketCountThresholds getDefaultBucketCountThresholds() { + return new TermsAggregator.BucketCountThresholds(SignificantTermsAggregatorFactory.DEFAULT_BUCKET_COUNT_THRESHOLDS); } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/UnmappedSignificantTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/UnmappedSignificantTerms.java index c7569db612..bcad058f2e 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/UnmappedSignificantTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/UnmappedSignificantTerms.java @@ -58,9 +58,9 @@ public class UnmappedSignificantTerms extends InternalSignificantTerms pipelineAggregators, Map metaData) { - //We pass zero for index/subset sizes because for the purpose of significant term analysis - // we assume an unmapped index's size is irrelevant to the proceedings. - super(0, 0, name, requiredSize, minDocCount, JLHScore.INSTANCE, BUCKETS, pipelineAggregators, metaData); + //We pass zero for index/subset sizes because for the purpose of significant term analysis + // we assume an unmapped index's size is irrelevant to the proceedings. + super(0, 0, name, requiredSize, minDocCount, JLHScore.PROTOTYPE, BUCKETS, pipelineAggregators, metaData); } @Override diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ChiSquare.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ChiSquare.java index cc9303a635..c68e47acb7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ChiSquare.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ChiSquare.java @@ -23,13 +23,14 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; public class ChiSquare extends NXYSignificanceHeuristic { + static final ChiSquare PROTOTYPE = new ChiSquare(false, false); + protected static final ParseField NAMES_FIELD = new ParseField("chi_square"); public ChiSquare(boolean includeNegatives, boolean backgroundIsSuperset) { @@ -51,18 +52,6 @@ public class ChiSquare extends NXYSignificanceHeuristic { return result; } - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return new ChiSquare(in.readBoolean(), in.readBoolean()); - } - - @Override - public String getName() { - return NAMES_FIELD.getPreferredName(); - } - }; - /** * Calculates Chi^2 * see "Information Retrieval", Manning et al., Eq. 13.19 @@ -80,9 +69,21 @@ public class ChiSquare extends NXYSignificanceHeuristic { } @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - super.writeTo(out); + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } + + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return new ChiSquare(in.readBoolean(), in.readBoolean()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()); + super.build(builder); + builder.endObject(); + return builder; } public static class ChiSquareParser extends NXYParser { @@ -106,7 +107,7 @@ public class ChiSquare extends NXYSignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()); + builder.startObject(NAMES_FIELD.getPreferredName()); super.build(builder); builder.endObject(); return builder; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/GND.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/GND.java index 99ee7c73b2..6da05eddb5 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/GND.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/GND.java @@ -29,12 +29,13 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; public class GND extends NXYSignificanceHeuristic { + static final GND PROTOTYPE = new GND(false); + protected static final ParseField NAMES_FIELD = new ParseField("gnd"); public GND(boolean backgroundIsSuperset) { @@ -57,18 +58,6 @@ public class GND extends NXYSignificanceHeuristic { return result; } - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return new GND(in.readBoolean()); - } - - @Override - public String getName() { - return NAMES_FIELD.getPreferredName(); - } - }; - /** * Calculates Google Normalized Distance, as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 * link: http://arxiv.org/pdf/cs/0412098v3.pdf @@ -97,12 +86,29 @@ public class GND extends NXYSignificanceHeuristic { return score; } + @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } + + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return new GND(in.readBoolean()); + } + @Override public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); out.writeBoolean(backgroundIsSuperset); } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()); + builder.field(BACKGROUND_IS_SUPERSET.getPreferredName(), backgroundIsSuperset); + builder.endObject(); + return builder; + } + public static class GNDParser extends NXYParser { @Override @@ -116,7 +122,7 @@ public class GND extends NXYSignificanceHeuristic { } @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { String givenName = parser.currentName(); boolean backgroundIsSuperset = true; @@ -143,7 +149,7 @@ public class GND extends NXYSignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()); + builder.startObject(NAMES_FIELD.getPreferredName()); builder.field(BACKGROUND_IS_SUPERSET.getPreferredName(), backgroundIsSuperset); builder.endObject(); return builder; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/JLHScore.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/JLHScore.java index 97264e7d53..753c9ccb3e 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/JLHScore.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/JLHScore.java @@ -22,38 +22,42 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; public class JLHScore extends SignificanceHeuristic { - public static final JLHScore INSTANCE = new JLHScore(); + public static final JLHScore PROTOTYPE = new JLHScore(); - protected static final String[] NAMES = {"jlh"}; + protected static final ParseField NAMES_FIELD = new ParseField("jlh"); private JLHScore() {} - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return readFrom(in); - } + @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } - @Override - public String getName() { - return NAMES[0]; - } - }; + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return PROTOTYPE; + } - public static SignificanceHeuristic readFrom(StreamInput in) throws IOException { - return INSTANCE; + @Override + public void writeTo(StreamOutput out) throws IOException { + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); + return builder; } /** @@ -101,26 +105,21 @@ public class JLHScore extends SignificanceHeuristic { return absoluteProbabilityChange * relativeProbabilityChange; } - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - } - public static class JLHScoreParser implements SignificanceHeuristicParser { @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { // move to the closing bracket if (!parser.nextToken().equals(XContentParser.Token.END_OBJECT)) { throw new ElasticsearchParseException("failed to parse [jhl] significance heuristic. expected an empty object, but found [{}] instead", parser.currentToken()); } - return new JLHScore(); + return PROTOTYPE; } @Override public String[] getNames() { - return NAMES; + return NAMES_FIELD.getAllNamesIncludedDeprecated(); } } @@ -128,7 +127,7 @@ public class JLHScore extends SignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()).endObject(); + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); return builder; } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/MutualInformation.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/MutualInformation.java index b4529b8267..d20b5f3544 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/MutualInformation.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/MutualInformation.java @@ -23,13 +23,14 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; public class MutualInformation extends NXYSignificanceHeuristic { + static final MutualInformation PROTOTYPE = new MutualInformation(false, false); + protected static final ParseField NAMES_FIELD = new ParseField("mutual_information"); private static final double log2 = Math.log(2.0); @@ -53,18 +54,6 @@ public class MutualInformation extends NXYSignificanceHeuristic { return result; } - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return new MutualInformation(in.readBoolean(), in.readBoolean()); - } - - @Override - public String getName() { - return NAMES_FIELD.getPreferredName(); - } - }; - /** * Calculates mutual information * see "Information Retrieval", Manning et al., Eq. 13.17 @@ -113,9 +102,21 @@ public class MutualInformation extends NXYSignificanceHeuristic { } @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - super.writeTo(out); + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } + + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return new MutualInformation(in.readBoolean(), in.readBoolean()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()); + super.build(builder); + builder.endObject(); + return builder; } public static class MutualInformationParser extends NXYParser { @@ -139,7 +140,7 @@ public class MutualInformation extends NXYSignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()); + builder.startObject(NAMES_FIELD.getPreferredName()); super.build(builder); builder.endObject(); return builder; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/NXYSignificanceHeuristic.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/NXYSignificanceHeuristic.java index c6a6924108..d3f98f23b9 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/NXYSignificanceHeuristic.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/NXYSignificanceHeuristic.java @@ -28,7 +28,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -136,10 +135,15 @@ public abstract class NXYSignificanceHeuristic extends SignificanceHeuristic { } } + protected void build(XContentBuilder builder) throws IOException { + builder.field(INCLUDE_NEGATIVES_FIELD.getPreferredName(), includeNegatives).field(BACKGROUND_IS_SUPERSET.getPreferredName(), + backgroundIsSuperset); + } + public static abstract class NXYParser implements SignificanceHeuristicParser { @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { String givenName = parser.currentName(); boolean includeNegatives = false; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java index aceae8c251..e6cfe9a9bf 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java @@ -22,38 +22,42 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; public class PercentageScore extends SignificanceHeuristic { - public static final PercentageScore INSTANCE = new PercentageScore(); + public static final PercentageScore PROTOTYPE = new PercentageScore(); - protected static final String[] NAMES = {"percentage"}; + protected static final ParseField NAMES_FIELD = new ParseField("percentage"); private PercentageScore() {} - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return readFrom(in); - } + @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } - @Override - public String getName() { - return NAMES[0]; - } - }; + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return PROTOTYPE; + } - public static SignificanceHeuristic readFrom(StreamInput in) throws IOException { - return INSTANCE; + @Override + public void writeTo(StreamOutput out) throws IOException { + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); + return builder; } /** @@ -70,26 +74,21 @@ public class PercentageScore extends SignificanceHeuristic { return (double) subsetFreq / (double) supersetFreq; } - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - } - public static class PercentageScoreParser implements SignificanceHeuristicParser { @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { // move to the closing bracket if (!parser.nextToken().equals(XContentParser.Token.END_OBJECT)) { throw new ElasticsearchParseException("failed to parse [percentage] significance heuristic. expected an empty object, but got [{}] instead", parser.currentToken()); } - return new PercentageScore(); + return PROTOTYPE; } @Override public String[] getNames() { - return NAMES; + return NAMES_FIELD.getAllNamesIncludedDeprecated(); } } @@ -97,7 +96,7 @@ public class PercentageScore extends SignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()).endObject(); + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); return builder; } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ScriptHeuristic.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ScriptHeuristic.java index 9efea00051..9dd3d0796b 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ScriptHeuristic.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ScriptHeuristic.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.HasContextAndHeaders; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.io.stream.StreamInput; @@ -44,9 +45,12 @@ import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Objects; public class ScriptHeuristic extends SignificanceHeuristic { + static final ScriptHeuristic PROTOTYPE = new ScriptHeuristic(null); + protected static final ParseField NAMES_FIELD = new ParseField("script_heuristic"); private final LongAccessor subsetSizeHolder; private final LongAccessor supersetSizeHolder; @@ -55,31 +59,11 @@ public class ScriptHeuristic extends SignificanceHeuristic { ExecutableScript searchScript = null; Script script; - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - Script script = Script.readScript(in); - return new ScriptHeuristic(null, script); - } - - @Override - public String getName() { - return NAMES_FIELD.getPreferredName(); - } - }; - - public ScriptHeuristic(ExecutableScript searchScript, Script script) { + public ScriptHeuristic(Script script) { subsetSizeHolder = new LongAccessor(); supersetSizeHolder = new LongAccessor(); subsetDfHolder = new LongAccessor(); supersetDfHolder = new LongAccessor(); - this.searchScript = searchScript; - if (searchScript != null) { - searchScript.setNextVar("_subset_freq", subsetDfHolder); - searchScript.setNextVar("_subset_size", subsetSizeHolder); - searchScript.setNextVar("_superset_freq", supersetDfHolder); - searchScript.setNextVar("_superset_size", supersetSizeHolder); - } this.script = script; @@ -87,7 +71,16 @@ public class ScriptHeuristic extends SignificanceHeuristic { @Override public void initialize(InternalAggregation.ReduceContext context) { - searchScript = context.scriptService().executable(script, ScriptContext.Standard.AGGS, context, Collections.emptyMap()); + initialize(context.scriptService(), context); + } + + @Override + public void initialize(SearchContext context) { + initialize(context.scriptService(), context); + } + + public void initialize(ScriptService scriptService, HasContextAndHeaders hasContextAndHeaders) { + searchScript = scriptService.executable(script, ScriptContext.Standard.AGGS, hasContextAndHeaders, Collections.emptyMap()); searchScript.setNextVar("_subset_freq", subsetDfHolder); searchScript.setNextVar("_subset_size", subsetSizeHolder); searchScript.setNextVar("_superset_freq", supersetDfHolder); @@ -120,12 +113,48 @@ public class ScriptHeuristic extends SignificanceHeuristic { return ((Number) searchScript.run()).doubleValue(); } + @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } + + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + Script script = Script.readScript(in); + return new ScriptHeuristic(script); + } + @Override public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); script.writeTo(out); } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params builderParams) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()); + builder.field(ScriptField.SCRIPT.getPreferredName()); + script.toXContent(builder, builderParams); + builder.endObject(); + return builder; + } + + @Override + public int hashCode() { + return Objects.hash(script); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + ScriptHeuristic other = (ScriptHeuristic) obj; + return Objects.equals(script, other.script); + } + public static class ScriptHeuristicParser implements SignificanceHeuristicParser { private final ScriptService scriptService; @@ -134,7 +163,7 @@ public class ScriptHeuristic extends SignificanceHeuristic { } @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { String heuristicName = parser.currentName(); Script script = null; @@ -173,13 +202,7 @@ public class ScriptHeuristic extends SignificanceHeuristic { if (script == null) { throw new ElasticsearchParseException("failed to parse [{}] significance heuristic. no script found in script_heuristic", heuristicName); } - ExecutableScript searchScript; - try { - searchScript = scriptService.executable(script, ScriptContext.Standard.AGGS, context, Collections.emptyMap()); - } catch (Exception e) { - throw new ElasticsearchParseException("failed to parse [{}] significance heuristic. the script [{}] could not be loaded", e, script, heuristicName); - } - return new ScriptHeuristic(searchScript, script); + return new ScriptHeuristic(script); } @Override @@ -199,7 +222,7 @@ public class ScriptHeuristic extends SignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params builderParams) throws IOException { - builder.startObject(STREAM.getName()); + builder.startObject(NAMES_FIELD.getPreferredName()); builder.field(ScriptField.SCRIPT.getPreferredName()); script.toXContent(builder, builderParams); builder.endObject(); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristic.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristic.java index 4f12277ca0..972696ba99 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristic.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristic.java @@ -20,12 +20,12 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; -import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.NamedWriteable; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.internal.SearchContext; -import java.io.IOException; - -public abstract class SignificanceHeuristic { +public abstract class SignificanceHeuristic implements NamedWriteable, ToXContent { /** * @param subsetFreq The frequency of the term in the selected sample * @param subsetSize The size of the selected sample (typically number of docs) @@ -35,8 +35,6 @@ public abstract class SignificanceHeuristic { */ public abstract double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize); - abstract public void writeTo(StreamOutput out) throws IOException; - protected void checkFrequencyValidity(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize, String scoreFunctionName) { if (subsetFreq < 0 || subsetSize < 0 || supersetFreq < 0 || supersetSize < 0) { throw new IllegalArgumentException("Frequencies of subset and superset must be positive in " + scoreFunctionName + ".getScore()"); @@ -52,4 +50,8 @@ public abstract class SignificanceHeuristic { public void initialize(InternalAggregation.ReduceContext reduceContext) { } + + public void initialize(SearchContext context) { + + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicParser.java index 92baa43e6b..aa430dc251 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicParser.java @@ -23,13 +23,12 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; public interface SignificanceHeuristicParser { - SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) throws IOException, + SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, ParsingException; String[] getNames(); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicStreams.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicStreams.java index 198f129c28..2ffe5ecc70 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicStreams.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicStreams.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import java.io.IOException; import java.util.Collections; @@ -31,21 +32,26 @@ import java.util.Map; */ public class SignificanceHeuristicStreams { - private static Map STREAMS = Collections.emptyMap(); + private static Map STREAMS = Collections.emptyMap(); static { - HashMap map = new HashMap<>(); - map.put(JLHScore.STREAM.getName(), JLHScore.STREAM); - map.put(PercentageScore.STREAM.getName(), PercentageScore.STREAM); - map.put(MutualInformation.STREAM.getName(), MutualInformation.STREAM); - map.put(GND.STREAM.getName(), GND.STREAM); - map.put(ChiSquare.STREAM.getName(), ChiSquare.STREAM); - map.put(ScriptHeuristic.STREAM.getName(), ScriptHeuristic.STREAM); + HashMap map = new HashMap<>(); + map.put(JLHScore.NAMES_FIELD.getPreferredName(), JLHScore.PROTOTYPE); + map.put(PercentageScore.NAMES_FIELD.getPreferredName(), PercentageScore.PROTOTYPE); + map.put(MutualInformation.NAMES_FIELD.getPreferredName(), MutualInformation.PROTOTYPE); + map.put(GND.NAMES_FIELD.getPreferredName(), GND.PROTOTYPE); + map.put(ChiSquare.NAMES_FIELD.getPreferredName(), ChiSquare.PROTOTYPE); + map.put(ScriptHeuristic.NAMES_FIELD.getPreferredName(), ScriptHeuristic.PROTOTYPE); STREAMS = Collections.unmodifiableMap(map); } public static SignificanceHeuristic read(StreamInput in) throws IOException { - return stream(in.readString()).readResult(in); + return stream(in.readString()).readFrom(in); + } + + public static void writeTo(SignificanceHeuristic significanceHeuristic, StreamOutput out) throws IOException { + out.writeString(significanceHeuristic.getWriteableName()); + significanceHeuristic.writeTo(out); } /** @@ -59,17 +65,18 @@ public class SignificanceHeuristicStreams { } /** - * Registers the given stream and associate it with the given types. + * Registers the given prototype. * - * @param stream The stream to register + * @param prototype + * The prototype to register */ - public static synchronized void registerStream(Stream stream) { - if (STREAMS.containsKey(stream.getName())) { - throw new IllegalArgumentException("Can't register stream with name [" + stream.getName() + "] more than once"); + public static synchronized void registerPrototype(SignificanceHeuristic prototype) { + if (STREAMS.containsKey(prototype.getWriteableName())) { + throw new IllegalArgumentException("Can't register stream with name [" + prototype.getWriteableName() + "] more than once"); } - HashMap map = new HashMap<>(); + HashMap map = new HashMap<>(); map.putAll(STREAMS); - map.put(stream.getName(), stream); + map.put(prototype.getWriteableName(), prototype); STREAMS = Collections.unmodifiableMap(map); } @@ -79,7 +86,7 @@ public class SignificanceHeuristicStreams { * @param name The given name * @return The associated stream */ - private static synchronized Stream stream(String name) { + private static synchronized SignificanceHeuristic stream(String name) { return STREAMS.get(name); } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java deleted file mode 100644 index 891526c33c..0000000000 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -package org.elasticsearch.search.aggregations.bucket.terms; -import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; -import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; -import org.elasticsearch.search.internal.SearchContext; - -import java.io.IOException; - -public abstract class AbstractTermsParametersParser { - - public static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); - public static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); - public static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); - public static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); - public static final ParseField REQUIRED_SIZE_FIELD_NAME = new ParseField("size"); - public static final ParseField SHOW_TERM_DOC_COUNT_ERROR = new ParseField("show_term_doc_count_error"); - - - //These are the results of the parsing. - private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(); - - private String executionHint = null; - - private SubAggCollectionMode collectMode = SubAggCollectionMode.DEPTH_FIRST; - - - IncludeExclude includeExclude; - - public TermsAggregator.BucketCountThresholds getBucketCountThresholds() {return bucketCountThresholds;} - - //These are the results of the parsing. - - public String getExecutionHint() { - return executionHint; - } - - public IncludeExclude getIncludeExclude() { - return includeExclude; - } - - public SubAggCollectionMode getCollectionMode() { - return collectMode; - } - - public void parse(String aggregationName, XContentParser parser, SearchContext context, ValuesSourceParser vsParser, IncludeExclude.Parser incExcParser) throws IOException { - bucketCountThresholds = getDefaultBucketCountThresholds(); - XContentParser.Token token; - String currentFieldName = null; - - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (vsParser.token(currentFieldName, token, parser)) { - continue; - } else if (incExcParser.token(currentFieldName, token, parser)) { - continue; - } else if (token == XContentParser.Token.VALUE_STRING) { - if (context.parseFieldMatcher().match(currentFieldName, EXECUTION_HINT_FIELD_NAME)) { - executionHint = parser.text(); - } else if(context.parseFieldMatcher().match(currentFieldName, SubAggCollectionMode.KEY)){ - collectMode = SubAggCollectionMode.parse(parser.text(), context.parseFieldMatcher()); - } else if (context.parseFieldMatcher().match(currentFieldName, REQUIRED_SIZE_FIELD_NAME)) { - bucketCountThresholds.setRequiredSize(parser.intValue()); - } else { - parseSpecial(aggregationName, parser, context, token, currentFieldName); - } - } else if (token == XContentParser.Token.VALUE_NUMBER) { - if (context.parseFieldMatcher().match(currentFieldName, REQUIRED_SIZE_FIELD_NAME)) { - bucketCountThresholds.setRequiredSize(parser.intValue()); - } else if (context.parseFieldMatcher().match(currentFieldName, SHARD_SIZE_FIELD_NAME)) { - bucketCountThresholds.setShardSize(parser.intValue()); - } else if (context.parseFieldMatcher().match(currentFieldName, MIN_DOC_COUNT_FIELD_NAME)) { - bucketCountThresholds.setMinDocCount(parser.intValue()); - } else if (context.parseFieldMatcher().match(currentFieldName, SHARD_MIN_DOC_COUNT_FIELD_NAME)) { - bucketCountThresholds.setShardMinDocCount(parser.longValue()); - } else { - parseSpecial(aggregationName, parser, context, token, currentFieldName); - } - } else { - parseSpecial(aggregationName, parser, context, token, currentFieldName); - } - } - includeExclude = incExcParser.includeExclude(); - } - - public abstract void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException; - - protected abstract TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds(); -} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParser.java new file mode 100644 index 0000000000..16ec4ab9de --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParser.java @@ -0,0 +1,130 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.terms; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; +import org.elasticsearch.search.aggregations.support.AbstractValuesSourceParser.AnyValuesSourceParser; +import org.elasticsearch.search.aggregations.support.ValueType; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; + +import java.io.IOException; +import java.util.Map; + +public abstract class AbstractTermsParser extends AnyValuesSourceParser { + + public static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); + public static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); + public static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); + public static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); + public static final ParseField REQUIRED_SIZE_FIELD_NAME = new ParseField("size"); + + public IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(); + + protected AbstractTermsParser() { + super(true, true); + } + + @Override + protected final ValuesSourceAggregatorFactory createFactory(String aggregationName, ValuesSourceType valuesSourceType, + ValueType targetValueType, Map otherOptions) { + BucketCountThresholds bucketCountThresholds = getDefaultBucketCountThresholds(); + Integer requiredSize = (Integer) otherOptions.get(REQUIRED_SIZE_FIELD_NAME); + if (requiredSize != null && requiredSize != -1) { + bucketCountThresholds.setRequiredSize(requiredSize); + } + Integer shardSize = (Integer) otherOptions.get(SHARD_SIZE_FIELD_NAME); + if (shardSize != null && shardSize != -1) { + bucketCountThresholds.setShardSize(shardSize); + } + Long minDocCount = (Long) otherOptions.get(MIN_DOC_COUNT_FIELD_NAME); + if (minDocCount != null && minDocCount != -1) { + bucketCountThresholds.setMinDocCount(minDocCount); + } + Long shardMinDocCount = (Long) otherOptions.get(SHARD_MIN_DOC_COUNT_FIELD_NAME); + if (shardMinDocCount != null && shardMinDocCount != -1) { + bucketCountThresholds.setShardMinDocCount(shardMinDocCount); + } + SubAggCollectionMode collectMode = (SubAggCollectionMode) otherOptions.get(SubAggCollectionMode.KEY); + String executionHint = (String) otherOptions.get(EXECUTION_HINT_FIELD_NAME); + IncludeExclude incExc = incExcParser.createIncludeExclude(otherOptions); + return doCreateFactory(aggregationName, valuesSourceType, targetValueType, bucketCountThresholds, collectMode, executionHint, + incExc, + otherOptions); + } + + protected abstract ValuesSourceAggregatorFactory doCreateFactory(String aggregationName, + ValuesSourceType valuesSourceType, + ValueType targetValueType, BucketCountThresholds bucketCountThresholds, SubAggCollectionMode collectMode, String executionHint, + IncludeExclude incExc, Map otherOptions); + + @Override + protected boolean token(String aggregationName, String currentFieldName, Token token, XContentParser parser, + ParseFieldMatcher parseFieldMatcher, Map otherOptions) throws IOException { + if (incExcParser.token(currentFieldName, token, parser, parseFieldMatcher, otherOptions)) { + return true; + } else if (token == XContentParser.Token.VALUE_STRING) { + if (parseFieldMatcher.match(currentFieldName, EXECUTION_HINT_FIELD_NAME)) { + otherOptions.put(EXECUTION_HINT_FIELD_NAME, parser.text()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, SubAggCollectionMode.KEY)) { + otherOptions.put(SubAggCollectionMode.KEY, SubAggCollectionMode.parse(parser.text(), parseFieldMatcher)); + return true; + } else if (parseFieldMatcher.match(currentFieldName, REQUIRED_SIZE_FIELD_NAME)) { + otherOptions.put(REQUIRED_SIZE_FIELD_NAME, parser.intValue()); + return true; + } else if (parseSpecial(aggregationName, parser, parseFieldMatcher, token, currentFieldName, otherOptions)) { + return true; + } + } else if (token == XContentParser.Token.VALUE_NUMBER) { + if (parseFieldMatcher.match(currentFieldName, REQUIRED_SIZE_FIELD_NAME)) { + otherOptions.put(REQUIRED_SIZE_FIELD_NAME, parser.intValue()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, SHARD_SIZE_FIELD_NAME)) { + otherOptions.put(SHARD_SIZE_FIELD_NAME, parser.intValue()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, MIN_DOC_COUNT_FIELD_NAME)) { + otherOptions.put(MIN_DOC_COUNT_FIELD_NAME, parser.longValue()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, SHARD_MIN_DOC_COUNT_FIELD_NAME)) { + otherOptions.put(SHARD_MIN_DOC_COUNT_FIELD_NAME, parser.longValue()); + return true; + } else if (parseSpecial(aggregationName, parser, parseFieldMatcher, token, currentFieldName, otherOptions)) { + return true; + } + } else if (parseSpecial(aggregationName, parser, parseFieldMatcher, token, currentFieldName, otherOptions)) { + return true; + } + return false; + } + + public abstract boolean parseSpecial(String aggregationName, XContentParser parser, ParseFieldMatcher parseFieldMatcher, + XContentParser.Token token, String currentFieldName, Map otherOptions) throws IOException; + + protected abstract TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds(); + +} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java index 4e3e28a1b3..b5e1e81479 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java @@ -38,6 +38,7 @@ import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Objects; /** * @@ -263,6 +264,23 @@ class InternalOrder extends Terms.Order { return new CompoundOrderComparator(orderElements, aggregator); } + @Override + public int hashCode() { + return Objects.hash(orderElements); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + CompoundOrder other = (CompoundOrder) obj; + return Objects.equals(orderElements, other.orderElements); + } + public static class CompoundOrderComparator implements Comparator { private List compoundOrder; @@ -306,7 +324,7 @@ class InternalOrder extends Terms.Order { } public static Terms.Order readOrder(StreamInput in) throws IOException { - return readOrder(in, true); + return readOrder(in, false); } public static Terms.Order readOrder(StreamInput in, boolean absoluteOrder) throws IOException { @@ -332,4 +350,22 @@ class InternalOrder extends Terms.Order { } } } + + @Override + public int hashCode() { + return Objects.hash(id, asc); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + InternalOrder other = (InternalOrder) obj; + return Objects.equals(id, other.id) + && Objects.equals(asc, other.asc); + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java index 224e42cb7f..16fb7f1d81 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java @@ -82,7 +82,7 @@ public interface Terms extends MultiBucketsAggregation { * Get the bucket for the given term, or null if there is no such bucket. */ Bucket getBucketByKey(String term); - + /** * Get an upper bound of the error on document counts in this aggregation. */ @@ -166,5 +166,11 @@ public interface Terms extends MultiBucketsAggregation { abstract byte id(); + @Override + public abstract int hashCode(); + + @Override + public abstract boolean equals(Object obj); + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java index 7971d1f5ae..7ea88c9a10 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java @@ -21,7 +21,10 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.Explicit; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -36,99 +39,136 @@ import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; public abstract class TermsAggregator extends BucketsAggregator { - public static class BucketCountThresholds { - private Explicit minDocCount; - private Explicit shardMinDocCount; - private Explicit requiredSize; - private Explicit shardSize; + public static class BucketCountThresholds implements Writeable, ToXContent { - public BucketCountThresholds(long minDocCount, long shardMinDocCount, int requiredSize, int shardSize) { - this.minDocCount = new Explicit<>(minDocCount, false); - this.shardMinDocCount = new Explicit<>(shardMinDocCount, false); - this.requiredSize = new Explicit<>(requiredSize, false); - this.shardSize = new Explicit<>(shardSize, false); + private static final BucketCountThresholds PROTOTYPE = new BucketCountThresholds(-1, -1, -1, -1); + + private long minDocCount; + private long shardMinDocCount; + private int requiredSize; + private int shardSize; + + public static BucketCountThresholds readFromStream(StreamInput in) throws IOException { + return PROTOTYPE.readFrom(in); } - public BucketCountThresholds() { - this(-1, -1, -1, -1); + + public BucketCountThresholds(long minDocCount, long shardMinDocCount, int requiredSize, int shardSize) { + this.minDocCount = minDocCount; + this.shardMinDocCount = shardMinDocCount; + this.requiredSize = requiredSize; + this.shardSize = shardSize; } public BucketCountThresholds(BucketCountThresholds bucketCountThresholds) { - this(bucketCountThresholds.minDocCount.value(), bucketCountThresholds.shardMinDocCount.value(), bucketCountThresholds.requiredSize.value(), bucketCountThresholds.shardSize.value()); + this(bucketCountThresholds.minDocCount, bucketCountThresholds.shardMinDocCount, bucketCountThresholds.requiredSize, + bucketCountThresholds.shardSize); } public void ensureValidity() { - if (shardSize.value() == 0) { + if (shardSize == 0) { setShardSize(Integer.MAX_VALUE); } - if (requiredSize.value() == 0) { + if (requiredSize == 0) { setRequiredSize(Integer.MAX_VALUE); } // shard_size cannot be smaller than size as we need to at least fetch entries from every shards in order to return - if (shardSize.value() < requiredSize.value()) { - setShardSize(requiredSize.value()); + if (shardSize < requiredSize) { + setShardSize(requiredSize); } // shard_min_doc_count should not be larger than min_doc_count because this can cause buckets to be removed that would match the min_doc_count criteria - if (shardMinDocCount.value() > minDocCount.value()) { - setShardMinDocCount(minDocCount.value()); + if (shardMinDocCount > minDocCount) { + setShardMinDocCount(minDocCount); } - if (requiredSize.value() < 0 || minDocCount.value() < 0) { + if (requiredSize < 0 || minDocCount < 0) { throw new ElasticsearchException("parameters [requiredSize] and [minDocCount] must be >=0 in terms aggregation."); } } public long getShardMinDocCount() { - return shardMinDocCount.value(); + return shardMinDocCount; } public void setShardMinDocCount(long shardMinDocCount) { - this.shardMinDocCount = new Explicit<>(shardMinDocCount, true); + this.shardMinDocCount = shardMinDocCount; } public long getMinDocCount() { - return minDocCount.value(); + return minDocCount; } public void setMinDocCount(long minDocCount) { - this.minDocCount = new Explicit<>(minDocCount, true); + this.minDocCount = minDocCount; } public int getRequiredSize() { - return requiredSize.value(); + return requiredSize; } public void setRequiredSize(int requiredSize) { - this.requiredSize = new Explicit<>(requiredSize, true); + this.requiredSize = requiredSize; } public int getShardSize() { - return shardSize.value(); + return shardSize; } public void setShardSize(int shardSize) { - this.shardSize = new Explicit<>(shardSize, true); + this.shardSize = shardSize; } - public void toXContent(XContentBuilder builder) throws IOException { - if (requiredSize.explicit()) { - builder.field(AbstractTermsParametersParser.REQUIRED_SIZE_FIELD_NAME.getPreferredName(), requiredSize.value()); - } - if (shardSize.explicit()) { - builder.field(AbstractTermsParametersParser.SHARD_SIZE_FIELD_NAME.getPreferredName(), shardSize.value()); - } - if (minDocCount.explicit()) { - builder.field(AbstractTermsParametersParser.MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), minDocCount.value()); + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(TermsAggregatorFactory.REQUIRED_SIZE_FIELD_NAME.getPreferredName(), requiredSize); + builder.field(TermsAggregatorFactory.SHARD_SIZE_FIELD_NAME.getPreferredName(), shardSize); + builder.field(TermsAggregatorFactory.MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), minDocCount); + builder.field(TermsAggregatorFactory.SHARD_MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), shardMinDocCount); + return builder; + } + + @Override + public BucketCountThresholds readFrom(StreamInput in) throws IOException { + int requiredSize = in.readInt(); + int shardSize = in.readInt(); + long minDocCount = in.readLong(); + long shardMinDocCount = in.readLong(); + return new BucketCountThresholds(minDocCount, shardMinDocCount, requiredSize, shardSize); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeInt(requiredSize); + out.writeInt(shardSize); + out.writeLong(minDocCount); + out.writeLong(shardMinDocCount); + } + + @Override + public int hashCode() { + return Objects.hash(requiredSize, shardSize, minDocCount, shardMinDocCount); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; } - if (shardMinDocCount.explicit()) { - builder.field(AbstractTermsParametersParser.SHARD_MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), shardMinDocCount.value()); + if (getClass() != obj.getClass()) { + return false; } + BucketCountThresholds other = (BucketCountThresholds) obj; + return Objects.equals(requiredSize, other.requiredSize) + && Objects.equals(shardSize, other.shardSize) + && Objects.equals(minDocCount, other.minDocCount) + && Objects.equals(shardMinDocCount, other.shardMinDocCount); } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index f668e3ad99..29fbe3bae1 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -21,28 +21,52 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.NonCollectingAggregator; +import org.elasticsearch.search.aggregations.bucket.BucketUtils; +import org.elasticsearch.search.aggregations.bucket.terms.Terms.Order; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; +/** + * + */ /** * */ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { + public static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); + public static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); + public static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); + public static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); + public static final ParseField REQUIRED_SIZE_FIELD_NAME = new ParseField("size"); + + static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds(1, 0, 10, + -1); + public static final ParseField SHOW_TERM_DOC_COUNT_ERROR = new ParseField("show_term_doc_count_error"); + public static final ParseField ORDER_FIELD = new ParseField("order"); + public enum ExecutionMode { MAP(new ParseField("map")) { @@ -155,28 +179,100 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory orders = Collections.singletonList(Terms.Order.count(false)); + private IncludeExclude includeExclude = null; + private String executionHint = null; + private SubAggCollectionMode collectMode = SubAggCollectionMode.DEPTH_FIRST; + private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds( + DEFAULT_BUCKET_COUNT_THRESHOLDS); + private boolean showTermDocCountError = false; + + public TermsAggregatorFactory(String name, ValuesSourceType valuesSourceType, ValueType valueType) { + super(name, StringTerms.TYPE, valuesSourceType, valueType); + } + + public TermsAggregator.BucketCountThresholds bucketCountThresholds() { + return bucketCountThresholds; + } + + public void bucketCountThresholds(TermsAggregator.BucketCountThresholds bucketCountThresholds) { this.bucketCountThresholds = bucketCountThresholds; - this.collectMode = executionMode; + } + + /** + * Sets the order in which the buckets will be returned. + */ + public void order(List order) { + this.orders = order; + } + + /** + * Gets the order in which the buckets will be returned. + */ + public List order() { + return orders; + } + + /** + * Expert: sets an execution hint to the aggregation. + */ + public void executionHint(String executionHint) { + this.executionHint = executionHint; + } + + /** + * Expert: gets an execution hint to the aggregation. + */ + public String executionHint() { + return executionHint; + } + + /** + * Expert: set the collection mode. + */ + public void collectMode(SubAggCollectionMode mode) { + this.collectMode = mode; + } + + /** + * Expert: get the collection mode. + */ + public SubAggCollectionMode collectMode() { + return collectMode; + } + + /** + * Set terms to include and exclude from the aggregation results + */ + public void includeExclude(IncludeExclude includeExclude) { + this.includeExclude = includeExclude; + } + + /** + * Get terms to include and exclude from the aggregation results + */ + public IncludeExclude includeExclude() { + return includeExclude; + } + + /** + * Get whether doc count error will be return for individual terms + */ + public boolean showTermDocCountError() { + return showTermDocCountError; + } + + /** + * Set whether doc count error will be return for individual terms + */ + public void showTermDocCountError(boolean showTermDocCountError) { this.showTermDocCountError = showTermDocCountError; } @Override protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent, List pipelineAggregators, Map metaData) throws IOException { + Terms.Order order = resolveOrder(orders); final InternalAggregation aggregation = new UnmappedTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators, metaData); return new NonCollectingAggregator(name, aggregationContext, parent, factories, pipelineAggregators, metaData) { @@ -192,13 +288,38 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory orders) { + Terms.Order order; + if (orders.size() == 1 && (orders.get(0) == InternalOrder.TERM_ASC || orders.get(0) == InternalOrder.TERM_DESC)) { + // If order is only terms order then we don't need compound + // ordering + order = orders.get(0); + } else { + // for all other cases we need compound order so term order asc + // can be added to make the order deterministic + order = Order.compound(orders); + } + return order; + } + @Override protected Aggregator doCreateInternal(ValuesSource valuesSource, AggregationContext aggregationContext, Aggregator parent, boolean collectsFromSingleBucket, List pipelineAggregators, Map metaData) throws IOException { + Terms.Order order = resolveOrder(orders); if (collectsFromSingleBucket == false) { return asMultiBucketAggregator(this, aggregationContext, parent); } + BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds); + if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) + && bucketCountThresholds.getShardSize() == DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) { + // The user has not made a shardSize selection. Use default + // heuristic to avoid any wrong-ranking caused by distributed + // counting + bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), + aggregationContext.searchContext().numberOfShards())); + } + bucketCountThresholds.ensureValidity(); if (valuesSource instanceof ValuesSource.Bytes) { ExecutionMode execution = null; if (executionHint != null) { @@ -278,4 +399,76 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory innerReadFrom(String name, ValuesSourceType valuesSourceType, + ValueType targetValueType, StreamInput in) throws IOException { + TermsAggregatorFactory factory = new TermsAggregatorFactory(name, valuesSourceType, targetValueType); + factory.bucketCountThresholds = BucketCountThresholds.readFromStream(in); + factory.collectMode = SubAggCollectionMode.BREADTH_FIRST.readFrom(in); + factory.executionHint = in.readOptionalString(); + if (in.readBoolean()) { + factory.includeExclude = IncludeExclude.readFromStream(in); + } + int numOrders = in.readVInt(); + List orders = new ArrayList<>(numOrders); + for (int i = 0; i < numOrders; i++) { + orders.add(InternalOrder.Streams.readOrder(in)); + } + factory.orders = orders; + factory.showTermDocCountError = in.readBoolean(); + return factory; + } + + @Override + protected void innerWriteTo(StreamOutput out) throws IOException { + bucketCountThresholds.writeTo(out); + collectMode.writeTo(out); + out.writeOptionalString(executionHint); + boolean hasIncExc = includeExclude != null; + out.writeBoolean(hasIncExc); + if (hasIncExc) { + includeExclude.writeTo(out); + } + out.writeVInt(orders.size()); + for (Terms.Order order : orders) { + InternalOrder.Streams.writeOrder(order, out); + } + out.writeBoolean(showTermDocCountError); + } + + @Override + protected int innerHashCode() { + return Objects.hash(bucketCountThresholds, collectMode, executionHint, includeExclude, orders, showTermDocCountError); + } + + @Override + protected boolean innerEquals(Object obj) { + TermsAggregatorFactory other = (TermsAggregatorFactory) obj; + return Objects.equals(bucketCountThresholds, other.bucketCountThresholds) + && Objects.equals(collectMode, other.collectMode) + && Objects.equals(executionHint, other.executionHint) + && Objects.equals(includeExclude, other.includeExclude) + && Objects.equals(orders, other.orders) + && Objects.equals(showTermDocCountError, other.showTermDocCountError); + } + } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java index 9bc1f7a9a6..3d8aff9b24 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java @@ -97,7 +97,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { this.includePattern = regex; return this; } - + /** * Define a set of terms that should be aggregated. */ @@ -107,8 +107,8 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { } this.includeTerms = terms; return this; - } - + } + /** * Define a set of terms that should be aggregated. */ @@ -118,16 +118,16 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { } this.includeTerms = longsArrToStringArr(terms); return this; - } - + } + private String[] longsArrToStringArr(long[] terms) { String[] termsAsString = new String[terms.length]; for (int i = 0; i < terms.length; i++) { termsAsString[i] = Long.toString(terms[i]); } return termsAsString; - } - + } + /** * Define a set of terms that should be aggregated. @@ -146,7 +146,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { termsAsString[i] = Double.toString(terms[i]); } return termsAsString; - } + } /** * Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular @@ -161,7 +161,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { this.excludePattern = regex; return this; } - + /** * Define a set of terms that should not be aggregated. */ @@ -171,9 +171,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { } this.excludeTerms = terms; return this; - } - - + } + + /** * Define a set of terms that should not be aggregated. */ @@ -194,9 +194,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { } this.excludeTerms = doubleArrToStringArr(terms); return this; - } - - + } + + /** * When using scripts, the value type indicates the types of the values the script is generating. @@ -241,13 +241,13 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { @Override protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException { - bucketCountThresholds.toXContent(builder); + bucketCountThresholds.toXContent(builder, params); if (showTermDocCountError != null) { - builder.field(AbstractTermsParametersParser.SHOW_TERM_DOC_COUNT_ERROR.getPreferredName(), showTermDocCountError); + builder.field(TermsAggregatorFactory.SHOW_TERM_DOC_COUNT_ERROR.getPreferredName(), showTermDocCountError); } if (executionHint != null) { - builder.field(AbstractTermsParametersParser.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); + builder.field(TermsAggregatorFactory.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); } if (valueType != null) { builder.field("value_type", valueType.name().toLowerCase(Locale.ROOT)); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java deleted file mode 100644 index c8138b7b45..0000000000 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -package org.elasticsearch.search.aggregations.bucket.terms; - -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.SearchParseException; -import org.elasticsearch.search.internal.SearchContext; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - - -public class TermsParametersParser extends AbstractTermsParametersParser { - - private static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds(1, 0, 10, -1); - - public List getOrderElements() { - return orderElements; - } - - public boolean showTermDocCountError() { - return showTermDocCountError; - } - - List orderElements; - private boolean showTermDocCountError = false; - - public TermsParametersParser() { - orderElements = new ArrayList<>(1); - orderElements.add(new OrderElement("_count", false)); - } - - @Override - public void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException { - if (token == XContentParser.Token.START_OBJECT) { - if ("order".equals(currentFieldName)) { - this.orderElements = Collections.singletonList(parseOrderParam(aggregationName, parser, context)); - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" - + currentFieldName + "].", parser.getTokenLocation()); - } - } else if (token == XContentParser.Token.START_ARRAY) { - if ("order".equals(currentFieldName)) { - orderElements = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token == XContentParser.Token.START_OBJECT) { - OrderElement orderParam = parseOrderParam(aggregationName, parser, context); - orderElements.add(orderParam); - } else { - throw new SearchParseException(context, "Order elements must be of type object in [" + aggregationName + "].", - parser.getTokenLocation()); - } - } - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" - + currentFieldName + "].", parser.getTokenLocation()); - } - } else if (token == XContentParser.Token.VALUE_BOOLEAN) { - if (context.parseFieldMatcher().match(currentFieldName, SHOW_TERM_DOC_COUNT_ERROR)) { - showTermDocCountError = parser.booleanValue(); - } - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName - + "].", parser.getTokenLocation()); - } - } - - private OrderElement parseOrderParam(String aggregationName, XContentParser parser, SearchContext context) throws IOException { - XContentParser.Token token; - OrderElement orderParam = null; - String orderKey = null; - boolean orderAsc = false; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - orderKey = parser.currentName(); - } else if (token == XContentParser.Token.VALUE_STRING) { - String dir = parser.text(); - if ("asc".equalsIgnoreCase(dir)) { - orderAsc = true; - } else if ("desc".equalsIgnoreCase(dir)) { - orderAsc = false; - } else { - throw new SearchParseException(context, "Unknown terms order direction [" + dir + "] in terms aggregation [" - + aggregationName + "]", parser.getTokenLocation()); - } - } else { - throw new SearchParseException(context, "Unexpected token " + token + " for [order] in [" + aggregationName + "].", - parser.getTokenLocation()); - } - } - if (orderKey == null) { - throw new SearchParseException(context, "Must specify at least one field for [order] in [" + aggregationName + "].", - parser.getTokenLocation()); - } else { - orderParam = new OrderElement(orderKey, orderAsc); - } - return orderParam; - } - - static class OrderElement { - private final String key; - private final boolean asc; - - public OrderElement(String key, boolean asc) { - this.key = key; - this.asc = asc; - } - - public String key() { - return key; - } - - public boolean asc() { - return asc; - } - - - } - - @Override - public TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds() { - return new TermsAggregator.BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS); - } -} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java index 4398ec655b..4a40816ab2 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java @@ -18,24 +18,32 @@ */ package org.elasticsearch.search.aggregations.bucket.terms; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.AggregatorFactory; -import org.elasticsearch.search.aggregations.bucket.BucketUtils; import org.elasticsearch.search.aggregations.bucket.terms.Terms.Order; -import org.elasticsearch.search.aggregations.bucket.terms.TermsParametersParser.OrderElement; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; -import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.aggregations.support.ValueType; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Map; /** * */ -public class TermsParser implements Aggregator.Parser { +public class TermsParser extends AbstractTermsParser { + @Override public String type() { @@ -43,38 +51,123 @@ public class TermsParser implements Aggregator.Parser { } @Override - public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException { - TermsParametersParser aggParser = new TermsParametersParser(); - ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context).scriptable(true).formattable(true).build(); - IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(); - aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); - - List orderElements = aggParser.getOrderElements(); - List orders = new ArrayList<>(orderElements.size()); - for (OrderElement orderElement : orderElements) { - orders.add(resolveOrder(orderElement.key(), orderElement.asc())); + protected ValuesSourceAggregatorFactory doCreateFactory(String aggregationName, ValuesSourceType valuesSourceType, + ValueType targetValueType, BucketCountThresholds bucketCountThresholds, SubAggCollectionMode collectMode, String executionHint, + IncludeExclude incExc, Map otherOptions) { + TermsAggregatorFactory factory = new TermsAggregatorFactory(aggregationName, valuesSourceType, targetValueType); + List orderElements = (List) otherOptions.get(TermsAggregatorFactory.ORDER_FIELD); + if (orderElements != null) { + List orders = new ArrayList<>(orderElements.size()); + for (OrderElement orderElement : orderElements) { + orders.add(resolveOrder(orderElement.key(), orderElement.asc())); + } + factory.order(orders); + } + if (bucketCountThresholds != null) { + factory.bucketCountThresholds(bucketCountThresholds); + } + if (collectMode != null) { + factory.collectMode(collectMode); + } + if (executionHint != null) { + factory.executionHint(executionHint); + } + if (incExc != null) { + factory.includeExclude(incExc); + } + Boolean showTermDocCountError = (Boolean) otherOptions.get(TermsAggregatorFactory.SHOW_TERM_DOC_COUNT_ERROR); + if (showTermDocCountError != null) { + factory.showTermDocCountError(showTermDocCountError); } - Terms.Order order; - if (orders.size() == 1 && (orders.get(0) == InternalOrder.TERM_ASC || orders.get(0) == InternalOrder.TERM_DESC)) - { - // If order is only terms order then we don't need compound ordering - order = orders.get(0); + return factory; + } + + @Override + public boolean parseSpecial(String aggregationName, XContentParser parser, ParseFieldMatcher parseFieldMatcher, Token token, + String currentFieldName, Map otherOptions) throws IOException { + if (token == XContentParser.Token.START_OBJECT) { + if (parseFieldMatcher.match(currentFieldName, TermsAggregatorFactory.ORDER_FIELD)) { + otherOptions.put(TermsAggregatorFactory.ORDER_FIELD, Collections.singletonList(parseOrderParam(aggregationName, parser))); + return true; + } + } else if (token == XContentParser.Token.START_ARRAY) { + if (parseFieldMatcher.match(currentFieldName, TermsAggregatorFactory.ORDER_FIELD)) { + List orderElements = new ArrayList<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.START_OBJECT) { + OrderElement orderParam = parseOrderParam(aggregationName, parser); + orderElements.add(orderParam); + } else { + throw new ParsingException(parser.getTokenLocation(), + "Order elements must be of type object in [" + aggregationName + "]."); + } + } + otherOptions.put(TermsAggregatorFactory.ORDER_FIELD, orderElements); + return true; + } + } else if (token == XContentParser.Token.VALUE_BOOLEAN) { + if (parseFieldMatcher.match(currentFieldName, TermsAggregatorFactory.SHOW_TERM_DOC_COUNT_ERROR)) { + otherOptions.put(TermsAggregatorFactory.SHOW_TERM_DOC_COUNT_ERROR, parser.booleanValue()); + return true; + } } - else - { - // for all other cases we need compound order so term order asc can be added to make the order deterministic - order = Order.compound(orders); + return false; + } + + private OrderElement parseOrderParam(String aggregationName, XContentParser parser) throws IOException { + XContentParser.Token token; + OrderElement orderParam = null; + String orderKey = null; + boolean orderAsc = false; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + orderKey = parser.currentName(); + } else if (token == XContentParser.Token.VALUE_STRING) { + String dir = parser.text(); + if ("asc".equalsIgnoreCase(dir)) { + orderAsc = true; + } else if ("desc".equalsIgnoreCase(dir)) { + orderAsc = false; + } else { + throw new ParsingException(parser.getTokenLocation(), + "Unknown terms order direction [" + dir + "] in terms aggregation [" + aggregationName + "]"); + } + } else { + throw new ParsingException(parser.getTokenLocation(), + "Unexpected token " + token + " for [order] in [" + aggregationName + "]."); + } } - TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds(); - if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) - && bucketCountThresholds.getShardSize() == aggParser.getDefaultBucketCountThresholds().getShardSize()) { - // The user has not made a shardSize selection. Use default heuristic to avoid any wrong-ranking caused by distributed counting - bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), - context.numberOfShards())); + if (orderKey == null) { + throw new ParsingException(parser.getTokenLocation(), + "Must specify at least one field for [order] in [" + aggregationName + "]."); + } else { + orderParam = new OrderElement(orderKey, orderAsc); } - bucketCountThresholds.ensureValidity(); - return new TermsAggregatorFactory(aggregationName, vsParser.input(), order, bucketCountThresholds, aggParser.getIncludeExclude(), - aggParser.getExecutionHint(), aggParser.getCollectionMode(), aggParser.showTermDocCountError()); + return orderParam; + } + + static class OrderElement { + private final String key; + private final boolean asc; + + public OrderElement(String key, boolean asc) { + this.key = key; + this.asc = asc; + } + + public String key() { + return key; + } + + public boolean asc() { + return asc; + } + + } + + @Override + public TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds() { + return new TermsAggregator.BucketCountThresholds(TermsAggregatorFactory.DEFAULT_BUCKET_COUNT_THRESHOLDS); } static Terms.Order resolveOrder(String key, boolean asc) { @@ -87,10 +180,9 @@ public class TermsParser implements Aggregator.Parser { return Order.aggregation(key, asc); } - // NORELEASE implement this method when refactoring this aggregation @Override public AggregatorFactory[] getFactoryPrototypes() { - return null; + return new AggregatorFactory[] { new TermsAggregatorFactory(null, null, null) }; } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java index 9c33a987a9..f6df150a4c 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java @@ -34,12 +34,22 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals; import java.io.IOException; +import java.util.Collections; import java.util.HashSet; +import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -48,7 +58,16 @@ import java.util.TreeSet; * Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic, * exclusion has precedence, where the {@code include} is evaluated first and then the {@code exclude}. */ -public class IncludeExclude { +public class IncludeExclude implements Writeable, ToXContent { + + private static final IncludeExclude PROTOTYPE = new IncludeExclude(Collections.emptySortedSet(), Collections.emptySortedSet()); + private static final ParseField INCLUDE_FIELD = new ParseField("include"); + private static final ParseField EXCLUDE_FIELD = new ParseField("exclude"); + private static final ParseField PATTERN_FIELD = new ParseField("pattern"); + + public static IncludeExclude readFromStream(StreamInput in) throws IOException { + return PROTOTYPE.readFrom(in); + } // The includeValue and excludeValue ByteRefs which are the result of the parsing // process are converted into a LongFilter when used on numeric fields @@ -283,18 +302,14 @@ public class IncludeExclude { public static class Parser { - String include = null; - String exclude = null; - SortedSet includeValues; - SortedSet excludeValues; - - public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser) throws IOException { + public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser, + ParseFieldMatcher parseFieldMatcher, Map otherOptions) throws IOException { if (token == XContentParser.Token.VALUE_STRING) { - if ("include".equals(currentFieldName)) { - include = parser.text(); - } else if ("exclude".equals(currentFieldName)) { - exclude = parser.text(); + if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) { + otherOptions.put(INCLUDE_FIELD, parser.text()); + } else if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) { + otherOptions.put(EXCLUDE_FIELD, parser.text()); } else { return false; } @@ -302,35 +317,35 @@ public class IncludeExclude { } if (token == XContentParser.Token.START_ARRAY) { - if ("include".equals(currentFieldName)) { - includeValues = new TreeSet<>(parseArrayToSet(parser)); + if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) { + otherOptions.put(INCLUDE_FIELD, new TreeSet<>(parseArrayToSet(parser))); return true; } - if ("exclude".equals(currentFieldName)) { - excludeValues = new TreeSet<>(parseArrayToSet(parser)); + if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) { + otherOptions.put(EXCLUDE_FIELD, new TreeSet<>(parseArrayToSet(parser))); return true; } return false; } if (token == XContentParser.Token.START_OBJECT) { - if ("include".equals(currentFieldName)) { + if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.VALUE_STRING) { - if ("pattern".equals(currentFieldName)) { - include = parser.text(); + if (parseFieldMatcher.match(currentFieldName, PATTERN_FIELD)) { + otherOptions.put(INCLUDE_FIELD, parser.text()); } } } - } else if ("exclude".equals(currentFieldName)) { + } else if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.VALUE_STRING) { - if ("pattern".equals(currentFieldName)) { - exclude = parser.text(); + if (parseFieldMatcher.match(currentFieldName, PATTERN_FIELD)) { + otherOptions.put(EXCLUDE_FIELD, parser.text()); } } } @@ -342,6 +357,7 @@ public class IncludeExclude { return false; } + private Set parseArrayToSet(XContentParser parser) throws IOException { final Set set = new HashSet<>(); if (parser.currentToken() != XContentParser.Token.START_ARRAY) { @@ -356,7 +372,27 @@ public class IncludeExclude { return set; } - public IncludeExclude includeExclude() { + public IncludeExclude createIncludeExclude(Map otherOptions) { + Object includeObject = otherOptions.get(INCLUDE_FIELD); + String include = null; + SortedSet includeValues = null; + if (includeObject != null) { + if (includeObject instanceof String) { + include = (String) includeObject; + } else if (includeObject instanceof SortedSet) { + includeValues = (SortedSet) includeObject; + } + } + Object excludeObject = otherOptions.get(EXCLUDE_FIELD); + String exclude = null; + SortedSet excludeValues = null; + if (excludeObject != null) { + if (excludeObject instanceof String) { + exclude = (String) excludeObject; + } else if (excludeObject instanceof SortedSet) { + excludeValues = (SortedSet) excludeObject; + } + } RegExp includePattern = include != null ? new RegExp(include) : null; RegExp excludePattern = exclude != null ? new RegExp(exclude) : null; if (includePattern != null || excludePattern != null) { @@ -444,4 +480,111 @@ public class IncludeExclude { return result; } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (include != null) { + builder.field(INCLUDE_FIELD.getPreferredName(), include.getOriginalString()); + } + if (includeValues != null) { + builder.startArray(INCLUDE_FIELD.getPreferredName()); + for (BytesRef value : includeValues) { + builder.value(value.utf8ToString()); + } + builder.endArray(); + } + if (exclude != null) { + builder.field(EXCLUDE_FIELD.getPreferredName(), exclude.getOriginalString()); + } + if (excludeValues != null) { + builder.startArray(EXCLUDE_FIELD.getPreferredName()); + for (BytesRef value : excludeValues) { + builder.value(value.utf8ToString()); + } + builder.endArray(); + } + return builder; + } + + @Override + public IncludeExclude readFrom(StreamInput in) throws IOException { + if (in.readBoolean()) { + String includeString = in.readOptionalString(); + RegExp include = null; + if (includeString != null) { + include = new RegExp(includeString); + } + String excludeString = in.readOptionalString(); + RegExp exclude = null; + if (excludeString != null) { + exclude = new RegExp(excludeString); + } + return new IncludeExclude(include, exclude); + } else { + SortedSet includes = null; + if (in.readBoolean()) { + int size = in.readVInt(); + includes = new TreeSet<>(); + for (int i = 0; i < size; i++) { + includes.add(in.readBytesRef()); + } + } + SortedSet excludes = null; + if (in.readBoolean()) { + int size = in.readVInt(); + excludes = new TreeSet<>(); + for (int i = 0; i < size; i++) { + excludes.add(in.readBytesRef()); + } + } + return new IncludeExclude(includes, excludes); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + boolean regexBased = isRegexBased(); + out.writeBoolean(regexBased); + if (regexBased) { + out.writeOptionalString(include == null ? null : include.getOriginalString()); + out.writeOptionalString(exclude == null ? null : exclude.getOriginalString()); + } else { + boolean hasIncludes = includeValues != null; + out.writeBoolean(hasIncludes); + if (hasIncludes) { + out.writeVInt(includeValues.size()); + for (BytesRef value : includeValues) { + out.writeBytesRef(value); + } + } + boolean hasExcludes = excludeValues != null; + out.writeBoolean(hasExcludes); + if (hasExcludes) { + out.writeVInt(excludeValues.size()); + for (BytesRef value : excludeValues) { + out.writeBytesRef(value); + } + } + } + } + + @Override + public int hashCode() { + return Objects.hash(include == null ? null : include.getOriginalString(), exclude == null ? null : exclude.getOriginalString(), + includeValues, excludeValues); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } if (getClass() != obj.getClass()) { + return false; + } + IncludeExclude other = (IncludeExclude) obj; + return Objects.equals(include == null ? null : include.getOriginalString(), other.include == null ? null : other.include.getOriginalString()) + && Objects.equals(exclude == null ? null : exclude.getOriginalString(), other.exclude == null ? null : other.exclude.getOriginalString()) + && Objects.equals(includeValues, other.includeValues) + && Objects.equals(excludeValues, other.excludeValues); + } + } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java index 0084e634a1..931863dcef 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java @@ -343,7 +343,7 @@ public abstract class ValuesSourceAggregatorFactory ext } @Override - public void doWriteTo(StreamOutput out) throws IOException { + protected final void doWriteTo(StreamOutput out) throws IOException { valuesSourceType.writeTo(out); boolean hasTargetValueType = targetValueType != null; out.writeBoolean(hasTargetValueType); @@ -376,7 +376,7 @@ public abstract class ValuesSourceAggregatorFactory ext } @Override - protected ValuesSourceAggregatorFactory doReadFrom(String name, StreamInput in) throws IOException { + protected final ValuesSourceAggregatorFactory doReadFrom(String name, StreamInput in) throws IOException { ValuesSourceType valuesSourceType = ValuesSourceType.ANY.readFrom(in); ValueType targetValueType = null; if (in.readBoolean()) { @@ -433,7 +433,7 @@ public abstract class ValuesSourceAggregatorFactory ext } @Override - public int doHashCode() { + public final int doHashCode() { return Objects.hash(field, format, missing, script, targetValueType, timeZone, valueType, valuesSourceType, innerHashCode()); } @@ -446,7 +446,7 @@ public abstract class ValuesSourceAggregatorFactory ext } @Override - public boolean doEquals(Object obj) { + public final boolean doEquals(Object obj) { ValuesSourceAggregatorFactory other = (ValuesSourceAggregatorFactory) obj; if (!Objects.equals(field, other.field)) return false; diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/SubAggCollectionModeTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/SubAggCollectionModeTests.java new file mode 100644 index 0000000000..131144dc5d --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/SubAggCollectionModeTests.java @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations; + +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +public class SubAggCollectionModeTests extends ESTestCase { + + public void testValidOrdinals() { + assertThat(SubAggCollectionMode.DEPTH_FIRST.ordinal(), equalTo(0)); + assertThat(SubAggCollectionMode.BREADTH_FIRST.ordinal(), equalTo(1)); + } + + public void testwriteTo() throws Exception { + try (BytesStreamOutput out = new BytesStreamOutput()) { + SubAggCollectionMode.DEPTH_FIRST.writeTo(out); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + assertThat(in.readVInt(), equalTo(0)); + } + } + + try (BytesStreamOutput out = new BytesStreamOutput()) { + SubAggCollectionMode.BREADTH_FIRST.writeTo(out); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + assertThat(in.readVInt(), equalTo(1)); + } + } + } + + public void testReadFrom() throws Exception { + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.writeVInt(0); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + assertThat(SubAggCollectionMode.BREADTH_FIRST.readFrom(in), equalTo(SubAggCollectionMode.DEPTH_FIRST)); + } + } + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.writeVInt(1); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + assertThat(SubAggCollectionMode.BREADTH_FIRST.readFrom(in), equalTo(SubAggCollectionMode.BREADTH_FIRST)); + } + } + } + + public void testInvalidReadFrom() throws Exception { + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.writeVInt(randomIntBetween(2, Integer.MAX_VALUE)); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + SubAggCollectionMode.BREADTH_FIRST.readFrom(in); + fail("Expected IOException"); + } catch(IOException e) { + assertThat(e.getMessage(), containsString("Unknown SubAggCollectionMode ordinal [")); + } + + } + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java index 6c1e7dfcab..173f9ddfef 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -52,7 +53,6 @@ import org.elasticsearch.search.aggregations.bucket.significant.heuristics.Signi import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder; -import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.search.aggregations.bucket.SharedSignificantTermsTestMethods; @@ -163,7 +163,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { public static class CustomSignificanceHeuristicPlugin extends Plugin { static { - SignificanceHeuristicStreams.registerStream(SimpleHeuristic.STREAM); + SignificanceHeuristicStreams.registerPrototype(SimpleHeuristic.PROTOTYPE); } @Override @@ -187,24 +187,30 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { public static class SimpleHeuristic extends SignificanceHeuristic { - protected static final String[] NAMES = {"simple"}; + static final SimpleHeuristic PROTOTYPE = new SimpleHeuristic(); - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return readFrom(in); - } + protected static final ParseField NAMES_FIELD = new ParseField("simple"); - @Override - public String getName() { - return NAMES[0]; - } - }; + @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } - public static SignificanceHeuristic readFrom(StreamInput in) throws IOException { + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { return new SimpleHeuristic(); } + @Override + public void writeTo(StreamOutput out) throws IOException { + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); + return builder; + } + /** * @param subsetFreq The frequency of the term in the selected sample * @param subsetSize The size of the selected sample (typically number of docs) @@ -217,15 +223,10 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { return subsetFreq / subsetSize > supersetFreq / supersetSize ? 2.0 : 1.0; } - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - } - public static class SimpleHeuristicParser implements SignificanceHeuristicParser { @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { parser.nextToken(); return new SimpleHeuristic(); @@ -233,7 +234,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { @Override public String[] getNames() { - return NAMES; + return NAMES_FIELD.getAllNamesIncludedDeprecated(); } } @@ -241,7 +242,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()).endObject(); + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); return builder; } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java new file mode 100644 index 0000000000..8ad928e5ed --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java @@ -0,0 +1,226 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.RegExp; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.script.Script; +import org.elasticsearch.search.aggregations.BaseAggregationTestCase; +import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.PercentageScore; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ScriptHeuristic; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory.ExecutionMode; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; + +import java.util.SortedSet; +import java.util.TreeSet; + +public class SignificantTermsTests extends BaseAggregationTestCase { + + private static final String[] executionHints; + + static { + ExecutionMode[] executionModes = ExecutionMode.values(); + executionHints = new String[executionModes.length]; + for (int i = 0; i < executionModes.length; i++) { + executionHints[i] = executionModes[i].toString(); + } + } + + @Override + protected SignificantTermsAggregatorFactory createTestAggregatorFactory() { + String name = randomAsciiOfLengthBetween(3, 20); + SignificantTermsAggregatorFactory factory = new SignificantTermsAggregatorFactory(name, ValuesSourceType.ANY, null); + String field = randomAsciiOfLengthBetween(3, 20); + int randomFieldBranch = randomInt(2); + switch (randomFieldBranch) { + case 0: + factory.field(field); + break; + case 1: + factory.field(field); + factory.script(new Script("_value + 1")); + break; + case 2: + factory.script(new Script("doc[" + field + "] + 1")); + break; + default: + fail(); + } + if (randomBoolean()) { + factory.missing("MISSING"); + } + if (randomBoolean()) { + int size = randomInt(4); + switch (size) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + size = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setRequiredSize(size); + + } + if (randomBoolean()) { + int shardSize = randomInt(4); + switch (shardSize) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardSize = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setShardSize(shardSize); + } + if (randomBoolean()) { + int minDocCount = randomInt(4); + switch (minDocCount) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + minDocCount = randomInt(); + break; + } + factory.bucketCountThresholds().setMinDocCount(minDocCount); + } + if (randomBoolean()) { + int shardMinDocCount = randomInt(4); + switch (shardMinDocCount) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardMinDocCount = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setShardMinDocCount(shardMinDocCount); + } + if (randomBoolean()) { + factory.executionHint(randomFrom(executionHints)); + } + if (randomBoolean()) { + factory.format("###.##"); + } + if (randomBoolean()) { + IncludeExclude incExc = null; + switch (randomInt(5)) { + case 0: + incExc = new IncludeExclude(new RegExp("foobar"), null); + break; + case 1: + incExc = new IncludeExclude(null, new RegExp("foobaz")); + break; + case 2: + incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz")); + break; + case 3: + SortedSet includeValues = new TreeSet<>(); + int numIncs = randomIntBetween(1, 20); + for (int i = 0; i < numIncs; i++) { + includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + SortedSet excludeValues = null; + incExc = new IncludeExclude(includeValues, excludeValues); + break; + case 4: + SortedSet includeValues2 = null; + SortedSet excludeValues2 = new TreeSet<>(); + int numExcs2 = randomIntBetween(1, 20); + for (int i = 0; i < numExcs2; i++) { + excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + incExc = new IncludeExclude(includeValues2, excludeValues2); + break; + case 5: + SortedSet includeValues3 = new TreeSet<>(); + int numIncs3 = randomIntBetween(1, 20); + for (int i = 0; i < numIncs3; i++) { + includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + SortedSet excludeValues3 = new TreeSet<>(); + int numExcs3 = randomIntBetween(1, 20); + for (int i = 0; i < numExcs3; i++) { + excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + incExc = new IncludeExclude(includeValues3, excludeValues3); + break; + default: + fail(); + } + factory.includeExclude(incExc); + } + if (randomBoolean()) { + SignificanceHeuristic significanceHeuristic = null; + switch (randomInt(5)) { + case 0: + significanceHeuristic = PercentageScore.PROTOTYPE; + break; + case 1: + significanceHeuristic = new ChiSquare(randomBoolean(), randomBoolean()); + break; + case 2: + significanceHeuristic = new GND(randomBoolean()); + break; + case 3: + significanceHeuristic = new MutualInformation(randomBoolean(), randomBoolean()); + break; + case 4: + significanceHeuristic = new ScriptHeuristic(new Script("foo")); + break; + case 5: + significanceHeuristic = JLHScore.PROTOTYPE; + break; + default: + fail(); + } + factory.significanceHeuristic(significanceHeuristic); + } + if (randomBoolean()) { + factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar")); + } + return factory; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java new file mode 100644 index 0000000000..fb57b6d1b2 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java @@ -0,0 +1,232 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.RegExp; +import org.elasticsearch.script.Script; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; +import org.elasticsearch.search.aggregations.BaseAggregationTestCase; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory.ExecutionMode; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; + +import java.util.ArrayList; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +public class TermsTests extends BaseAggregationTestCase { + + private static final String[] executionHints; + + static { + ExecutionMode[] executionModes = ExecutionMode.values(); + executionHints = new String[executionModes.length]; + for (int i = 0; i < executionModes.length; i++) { + executionHints[i] = executionModes[i].toString(); + } + } + + @Override + protected TermsAggregatorFactory createTestAggregatorFactory() { + String name = randomAsciiOfLengthBetween(3, 20); + TermsAggregatorFactory factory = new TermsAggregatorFactory(name, ValuesSourceType.ANY, null); + String field = randomAsciiOfLengthBetween(3, 20); + int randomFieldBranch = randomInt(2); + switch (randomFieldBranch) { + case 0: + factory.field(field); + break; + case 1: + factory.field(field); + factory.script(new Script("_value + 1")); + break; + case 2: + factory.script(new Script("doc[" + field + "] + 1")); + break; + default: + fail(); + } + if (randomBoolean()) { + factory.missing("MISSING"); + } + if (randomBoolean()) { + int size = randomInt(4); + switch (size) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + size = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setRequiredSize(size); + + } + if (randomBoolean()) { + int shardSize = randomInt(4); + switch (shardSize) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardSize = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setShardSize(shardSize); + } + if (randomBoolean()) { + int minDocCount = randomInt(4); + switch (minDocCount) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + minDocCount = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setMinDocCount(minDocCount); + } + if (randomBoolean()) { + int shardMinDocCount = randomInt(4); + switch (shardMinDocCount) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardMinDocCount = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setShardMinDocCount(shardMinDocCount); + } + if (randomBoolean()) { + factory.collectMode(randomFrom(SubAggCollectionMode.values())); + } + if (randomBoolean()) { + factory.executionHint(randomFrom(executionHints)); + } + if (randomBoolean()) { + factory.format("###.##"); + } + if (randomBoolean()) { + IncludeExclude incExc = null; + switch (randomInt(5)) { + case 0: + incExc = new IncludeExclude(new RegExp("foobar"), null); + break; + case 1: + incExc = new IncludeExclude(null, new RegExp("foobaz")); + break; + case 2: + incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz")); + break; + case 3: + SortedSet includeValues = new TreeSet<>(); + int numIncs = randomIntBetween(1, 20); + for (int i = 0; i < numIncs; i++) { + includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + SortedSet excludeValues = null; + incExc = new IncludeExclude(includeValues, excludeValues); + break; + case 4: + SortedSet includeValues2 = null; + SortedSet excludeValues2 = new TreeSet<>(); + int numExcs2 = randomIntBetween(1, 20); + for (int i = 0; i < numExcs2; i++) { + excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + incExc = new IncludeExclude(includeValues2, excludeValues2); + break; + case 5: + SortedSet includeValues3 = new TreeSet<>(); + int numIncs3 = randomIntBetween(1, 20); + for (int i = 0; i < numIncs3; i++) { + includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + SortedSet excludeValues3 = new TreeSet<>(); + int numExcs3 = randomIntBetween(1, 20); + for (int i = 0; i < numExcs3; i++) { + excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + incExc = new IncludeExclude(includeValues3, excludeValues3); + break; + default: + fail(); + } + factory.includeExclude(incExc); + } + if (randomBoolean()) { + List order = randomOrder(); + factory.order(order); + } + if (randomBoolean()) { + factory.showTermDocCountError(randomBoolean()); + } + return factory; + } + + private List randomOrder() { + List orders = new ArrayList<>(); + switch (randomInt(4)) { + case 0: + orders.add(Terms.Order.term(randomBoolean())); + break; + case 1: + orders.add(Terms.Order.count(randomBoolean())); + break; + case 2: + orders.add(Terms.Order.aggregation(randomAsciiOfLengthBetween(3, 20), randomBoolean())); + break; + case 3: + orders.add(Terms.Order.aggregation(randomAsciiOfLengthBetween(3, 20), randomAsciiOfLengthBetween(3, 20), randomBoolean())); + break; + case 4: + int numOrders = randomIntBetween(1, 3); + for (int i = 0; i < numOrders; i++) { + orders.addAll(randomOrder()); + } + break; + default: + fail(); + } + return orders; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java index 0fe9113e8f..166efe27a3 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java @@ -22,11 +22,14 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.Version; import org.elasticsearch.common.io.stream.InputStreamStreamInput; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.OutputStreamStreamOutput; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.indices.query.IndicesQueriesRegistry; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; @@ -127,7 +130,7 @@ public class SignificanceHeuristicTests extends ESTestCase { SignificanceHeuristic getRandomSignificanceheuristic() { List heuristics = new ArrayList<>(); - heuristics.add(JLHScore.INSTANCE); + heuristics.add(JLHScore.PROTOTYPE); heuristics.add(new MutualInformation(randomBoolean(), randomBoolean())); heuristics.add(new GND(randomBoolean())); heuristics.add(new ChiSquare(randomBoolean(), randomBoolean())); @@ -227,11 +230,14 @@ public class SignificanceHeuristicTests extends ESTestCase { checkParseException(heuristicParserMapper, searchContext, faultyHeuristicdefinition, expectedError); } - protected void checkParseException(SignificanceHeuristicParserMapper heuristicParserMapper, SearchContext searchContext, String faultyHeuristicDefinition, String expectedError) throws IOException { + protected void checkParseException(SignificanceHeuristicParserMapper heuristicParserMapper, SearchContext searchContext, + String faultyHeuristicDefinition, String expectedError) throws IOException { + + IndicesQueriesRegistry registry = new IndicesQueriesRegistry(Settings.EMPTY, new HashSet<>(), new NamedWriteableRegistry()); try { XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"text\", " + faultyHeuristicDefinition + ",\"min_doc_count\":200}"); stParser.nextToken(); - new SignificantTermsParser(heuristicParserMapper).parse("testagg", stParser, searchContext); + new SignificantTermsParser(heuristicParserMapper, registry).parse("testagg", stParser, searchContext); fail(); } catch (ElasticsearchParseException e) { assertTrue(e.getMessage().contains(expectedError)); @@ -247,9 +253,12 @@ public class SignificanceHeuristicTests extends ESTestCase { return parseSignificanceHeuristic(heuristicParserMapper, searchContext, stParser); } - private SignificanceHeuristic parseSignificanceHeuristic(SignificanceHeuristicParserMapper heuristicParserMapper, SearchContext searchContext, XContentParser stParser) throws IOException { + private SignificanceHeuristic parseSignificanceHeuristic(SignificanceHeuristicParserMapper heuristicParserMapper, + SearchContext searchContext, XContentParser stParser) throws IOException { + IndicesQueriesRegistry registry = new IndicesQueriesRegistry(Settings.EMPTY, new HashSet<>(), new NamedWriteableRegistry()); stParser.nextToken(); - SignificantTermsAggregatorFactory aggregatorFactory = (SignificantTermsAggregatorFactory) new SignificantTermsParser(heuristicParserMapper).parse("testagg", stParser, searchContext); + SignificantTermsAggregatorFactory aggregatorFactory = (SignificantTermsAggregatorFactory) new SignificantTermsParser( + heuristicParserMapper, registry).parse("testagg", stParser, searchContext); stParser.nextToken(); assertThat(aggregatorFactory.getBucketCountThresholds().getMinDocCount(), equalTo(200l)); assertThat(stParser.currentToken(), equalTo(null)); @@ -365,14 +374,14 @@ public class SignificanceHeuristicTests extends ESTestCase { testBackgroundAssertions(new MutualInformation(true, true), new MutualInformation(true, false)); testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false)); testBackgroundAssertions(new GND(true), new GND(false)); - testAssertions(PercentageScore.INSTANCE); - testAssertions(JLHScore.INSTANCE); + testAssertions(PercentageScore.PROTOTYPE); + testAssertions(JLHScore.PROTOTYPE); } public void testBasicScoreProperties() { - basicScoreProperties(JLHScore.INSTANCE, true); + basicScoreProperties(JLHScore.PROTOTYPE, true); basicScoreProperties(new GND(true), true); - basicScoreProperties(PercentageScore.INSTANCE, true); + basicScoreProperties(PercentageScore.PROTOTYPE, true); basicScoreProperties(new MutualInformation(true, true), false); basicScoreProperties(new ChiSquare(true, true), false); } diff --git a/core/src/test/java/org/elasticsearch/test/search/aggregations/bucket/SharedSignificantTermsTestMethods.java b/core/src/test/java/org/elasticsearch/test/search/aggregations/bucket/SharedSignificantTermsTestMethods.java index 1df965968a..a120b63c82 100644 --- a/core/src/test/java/org/elasticsearch/test/search/aggregations/bucket/SharedSignificantTermsTestMethods.java +++ b/core/src/test/java/org/elasticsearch/test/search/aggregations/bucket/SharedSignificantTermsTestMethods.java @@ -57,13 +57,9 @@ public class SharedSignificantTermsTestMethods { } private static void checkSignificantTermsAggregationCorrect(ESIntegTestCase testCase) { - - SearchResponse response = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE) - .addAggregation(new TermsBuilder("class").field(CLASS_FIELD).subAggregation( - new SignificantTermsBuilder("sig_terms") - .field(TEXT_FIELD))) - .execute() - .actionGet(); + SearchResponse response = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE).addAggregation( + new TermsBuilder("class").field(CLASS_FIELD).subAggregation(new SignificantTermsBuilder("sig_terms").field(TEXT_FIELD))) + .execute().actionGet(); assertSearchResponse(response); StringTerms classes = response.getAggregations().get("class"); Assert.assertThat(classes.getBuckets().size(), equalTo(2)); -- cgit v1.2.3