diff options
author | Colin Goodheart-Smithe <colings86@users.noreply.github.com> | 2015-12-08 14:13:34 +0000 |
---|---|---|
committer | Colin Goodheart-Smithe <colings86@users.noreply.github.com> | 2015-12-21 09:35:46 +0000 |
commit | 8f63c46d276c5d73a649ec9318200faa7d268f7b (patch) | |
tree | 99cae5d566dadb17aff1c57bfdf7e3d7934324a2 | |
parent | 9fcf453858609be1a4ba53c673ed6c12783306e8 (diff) |
Aggregations Refactor: Refactor Terms Aggregation
35 files changed, 1831 insertions, 747 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java index 99c23fc744..0e6387b1b7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java @@ -22,6 +22,9 @@ package org.elasticsearch.search.aggregations; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; @@ -112,7 +115,7 @@ public abstract class Aggregator extends BucketCollector implements Releasable { public abstract InternalAggregation buildEmptyAggregation(); /** Aggregation mode for sub aggregations. */ - public enum SubAggCollectionMode { + public enum SubAggCollectionMode implements Writeable<SubAggCollectionMode> { /** * Creates buckets and delegates to child aggregators in a single pass over @@ -148,5 +151,19 @@ public abstract class Aggregator extends BucketCollector implements Releasable { } throw new ElasticsearchParseException("no [{}] found for value [{}]", KEY.getPreferredName(), value); } + + @Override + public SubAggCollectionMode readFrom(StreamInput in) throws IOException { + int ordinal = in.readVInt(); + if (ordinal < 0 || ordinal >= values().length) { + throw new IOException("Unknown SubAggCollectionMode ordinal [" + ordinal + "]"); + } + return values()[ordinal]; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(ordinal()); + } } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java index 9e8ad33476..226c6d5b3b 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java @@ -228,7 +228,7 @@ public class SignificantLongTerms extends InternalSignificantTerms<SignificantLo out.writeVLong(minDocCount); out.writeVLong(subsetSize); out.writeVLong(supersetSize); - significanceHeuristic.writeTo(out); + SignificanceHeuristicStreams.writeTo(significanceHeuristic, out); out.writeVInt(buckets.size()); for (InternalSignificantTerms.Bucket bucket : buckets) { diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java index 6c1ca0a11a..b0479475d8 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java @@ -214,7 +214,7 @@ public class SignificantStringTerms extends InternalSignificantTerms<Significant out.writeVLong(minDocCount); out.writeVLong(subsetSize); out.writeVLong(supersetSize); - significanceHeuristic.writeTo(out); + SignificanceHeuristicStreams.writeTo(significanceHeuristic, out); out.writeVInt(buckets.size()); for (InternalSignificantTerms.Bucket bucket : buckets) { bucket.writeTo(out); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 0ad7389a5f..bb927c9cec 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -26,34 +26,51 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lucene.index.FilterableTermsEnum; import org.elasticsearch.common.lucene.index.FreqTermsEnum; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.NonCollectingAggregator; +import org.elasticsearch.search.aggregations.bucket.BucketUtils; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicStreams; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Objects; /** * */ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> implements Releasable { + static final ParseField BACKGROUND_FILTER = new ParseField("background_filter"); + static final ParseField HEURISTIC = new ParseField("significance_heuristic"); + + static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds( + 3, 0, 10, -1); + public SignificanceHeuristic getSignificanceHeuristic() { return significanceHeuristic; } @@ -98,9 +115,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, - (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, - aggregationContext, - parent, termsAggregatorFactory, pipelineAggregators, metaData); + (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent, + termsAggregatorFactory, pipelineAggregators, metaData); } }; @@ -129,36 +145,82 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac return parseField.getPreferredName(); } } - private final IncludeExclude includeExclude; - private final String executionHint; + + private IncludeExclude includeExclude = null; + private String executionHint = null; private String indexedFieldName; private MappedFieldType fieldType; private FilterableTermsEnum termsEnum; private int numberOfAggregatorsCreated = 0; - private final Query filter; - private final TermsAggregator.BucketCountThresholds bucketCountThresholds; - private final SignificanceHeuristic significanceHeuristic; + private QueryBuilder<?> filterBuilder = null; + private TermsAggregator.BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS); + private SignificanceHeuristic significanceHeuristic = JLHScore.PROTOTYPE; protected TermsAggregator.BucketCountThresholds getBucketCountThresholds() { return new TermsAggregator.BucketCountThresholds(bucketCountThresholds); } - public SignificantTermsAggregatorFactory(String name, ValuesSourceParser.Input valueSourceInput, - TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - String executionHint, Query filter, SignificanceHeuristic significanceHeuristic) { + public SignificantTermsAggregatorFactory(String name, ValuesSourceType valuesSourceType, ValueType valueType) { + super(name, SignificantStringTerms.TYPE, valuesSourceType, valueType); + } + + public TermsAggregator.BucketCountThresholds bucketCountThresholds() { + return bucketCountThresholds; + } - super(name, SignificantStringTerms.TYPE, valueSourceInput); + public void bucketCountThresholds(TermsAggregator.BucketCountThresholds bucketCountThresholds) { this.bucketCountThresholds = bucketCountThresholds; - this.includeExclude = includeExclude; + } + + /** + * Expert: sets an execution hint to the aggregation. + */ + public void executionHint(String executionHint) { this.executionHint = executionHint; + } + + /** + * Expert: gets an execution hint to the aggregation. + */ + public String executionHint() { + return executionHint; + } + + public void backgroundFilter(QueryBuilder<?> filterBuilder) { + this.filterBuilder = filterBuilder; + } + + public QueryBuilder<?> backgroundFilter() { + return filterBuilder; + } + + /** + * Set terms to include and exclude from the aggregation results + */ + public void includeExclude(IncludeExclude includeExclude) { + this.includeExclude = includeExclude; + } + + /** + * Get terms to include and exclude from the aggregation results + */ + public IncludeExclude includeExclude() { + return includeExclude; + } + + public void significanceHeuristic(SignificanceHeuristic significanceHeuristic) { this.significanceHeuristic = significanceHeuristic; - this.filter = filter; + } + + public SignificanceHeuristic significanceHeuristic() { + return significanceHeuristic; } @Override public void doInit(AggregationContext context) { super.doInit(context); setFieldInfo(); + significanceHeuristic.initialize(context.searchContext()); } private void setFieldInfo() { @@ -191,6 +253,18 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } numberOfAggregatorsCreated++; + BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds); + if (bucketCountThresholds.getShardSize() == DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) { + //The user has not made a shardSize selection . + //Use default heuristic to avoid any wrong-ranking caused by distributed counting + //but request double the usual amount. + //We typically need more than the number of "top" terms requested by other aggregations + //as the significance algorithm is in less of a position to down-select at shard-level - + //some of the things we want to find have only one occurrence on each shard and as + // such are impossible to differentiate from non-significant terms at that early stage. + bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), + aggregationContext.searchContext().numberOfShards())); + } if (valuesSource instanceof ValuesSource.Bytes) { ExecutionMode execution = null; @@ -247,6 +321,14 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } SearchContext searchContext = context.searchContext(); IndexReader reader = searchContext.searcher().getIndexReader(); + Query filter = null; + try { + if (filterBuilder != null) { + filter = filterBuilder.toFilter(context.searchContext().indexShard().getQueryShardContext()); + } + } catch (IOException e) { + throw new ElasticsearchException("failed to create filter: " + filterBuilder.toString(), e); + } try { if (numberOfAggregatorsCreated == 1) { // Setup a termsEnum for sole use by one aggregator @@ -291,4 +373,68 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac termsEnum = null; } } + + @Override + protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { + bucketCountThresholds.toXContent(builder, params); + if (executionHint != null) { + builder.field(TermsAggregatorFactory.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); + } + if (filterBuilder != null) { + builder.field(BACKGROUND_FILTER.getPreferredName(), filterBuilder); + } + if (includeExclude != null) { + includeExclude.toXContent(builder, params); + } + significanceHeuristic.toXContent(builder, params); + return builder; + } + + @Override + protected ValuesSourceAggregatorFactory<ValuesSource> innerReadFrom(String name, ValuesSourceType valuesSourceType, + ValueType targetValueType, StreamInput in) throws IOException { + SignificantTermsAggregatorFactory factory = new SignificantTermsAggregatorFactory(name, valuesSourceType, targetValueType); + factory.bucketCountThresholds = BucketCountThresholds.readFromStream(in); + factory.executionHint = in.readOptionalString(); + if (in.readBoolean()) { + factory.filterBuilder = in.readQuery(); + } + if (in.readBoolean()) { + factory.includeExclude = IncludeExclude.readFromStream(in); + } + factory.significanceHeuristic = SignificanceHeuristicStreams.read(in); + return factory; + } + + @Override + protected void innerWriteTo(StreamOutput out) throws IOException { + bucketCountThresholds.writeTo(out); + out.writeOptionalString(executionHint); + boolean hasfilterBuilder = filterBuilder != null; + out.writeBoolean(hasfilterBuilder); + if (hasfilterBuilder) { + out.writeQuery(filterBuilder); + } + boolean hasIncExc = includeExclude != null; + out.writeBoolean(hasIncExc); + if (hasIncExc) { + includeExclude.writeTo(out); + } + SignificanceHeuristicStreams.writeTo(significanceHeuristic, out); + } + + @Override + protected int innerHashCode() { + return Objects.hash(bucketCountThresholds, executionHint, filterBuilder, includeExclude, significanceHeuristic); + } + + @Override + protected boolean innerEquals(Object obj) { + SignificantTermsAggregatorFactory other = (SignificantTermsAggregatorFactory) obj; + return Objects.equals(bucketCountThresholds, other.bucketCountThresholds) + && Objects.equals(executionHint, other.executionHint) + && Objects.equals(filterBuilder, other.filterBuilder) + && Objects.equals(includeExclude, other.includeExclude) + && Objects.equals(significanceHeuristic, other.significanceHeuristic); + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java index b67ce2a1f9..6bbb3348b7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java @@ -24,8 +24,8 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicBuilder; -import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import java.io.IOException; @@ -88,7 +88,7 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms bucketCountThresholds.setMinDocCount(minDocCount); return this; } - + /** * Set the background filter to compare to. Defaults to the whole index. */ @@ -96,7 +96,7 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms this.filterBuilder = filter; return this; } - + /** * Expert: set the minimum number of documents that a term should match to * be retrieved from a shard. @@ -138,7 +138,7 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms this.includeFlags = flags; return this; } - + /** * Define a set of terms that should be aggregated. */ @@ -148,8 +148,8 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms } this.includeTerms = terms; return this; - } - + } + /** * Define a set of terms that should be aggregated. */ @@ -159,16 +159,16 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms } this.includeTerms = longsArrToStringArr(terms); return this; - } - + } + private String[] longsArrToStringArr(long[] terms) { String[] termsAsString = new String[terms.length]; for (int i = 0; i < terms.length; i++) { termsAsString[i] = Long.toString(terms[i]); } return termsAsString; - } - + } + /** * Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular @@ -194,7 +194,7 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms this.excludeFlags = flags; return this; } - + /** * Define a set of terms that should not be aggregated. */ @@ -204,9 +204,9 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms } this.excludeTerms = terms; return this; - } - - + } + + /** * Define a set of terms that should not be aggregated. */ @@ -224,9 +224,9 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms if (field != null) { builder.field("field", field); } - bucketCountThresholds.toXContent(builder); + bucketCountThresholds.toXContent(builder, params); if (executionHint != null) { - builder.field(AbstractTermsParametersParser.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); + builder.field(TermsAggregatorFactory.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); } if (includePattern != null) { if (includeFlags == 0) { @@ -241,7 +241,7 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms if (includeTerms != null) { builder.array("include", includeTerms); } - + if (excludePattern != null) { if (excludeFlags == 0) { builder.field("exclude", excludePattern); @@ -255,10 +255,10 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms if (excludeTerms != null) { builder.array("exclude", excludeTerms); } - + if (filterBuilder != null) { - builder.field(SignificantTermsParametersParser.BACKGROUND_FILTER.getPreferredName()); - filterBuilder.toXContent(builder, params); + builder.field(SignificantTermsAggregatorFactory.BACKGROUND_FILTER.getPreferredName()); + filterBuilder.toXContent(builder, params); } if (significanceHeuristicBuilder != null) { significanceHeuristicBuilder.toXContent(builder, params); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParametersParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParametersParser.java deleted file mode 100644 index 020229867f..0000000000 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParametersParser.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -package org.elasticsearch.search.aggregations.bucket.significant; - -import org.apache.lucene.search.Query; -import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.SearchParseException; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParserMapper; -import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser; -import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; -import org.elasticsearch.search.internal.SearchContext; - -import java.io.IOException; - - -public class SignificantTermsParametersParser extends AbstractTermsParametersParser { - - private static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds(3, 0, 10, -1); - private final SignificanceHeuristicParserMapper significanceHeuristicParserMapper; - - public SignificantTermsParametersParser(SignificanceHeuristicParserMapper significanceHeuristicParserMapper) { - this.significanceHeuristicParserMapper = significanceHeuristicParserMapper; - } - - public Query getFilter() { - return filter; - } - - private Query filter = null; - - private SignificanceHeuristic significanceHeuristic; - - @Override - public TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds() { - return new TermsAggregator.BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS); - } - - static final ParseField BACKGROUND_FILTER = new ParseField("background_filter"); - - @Override - public void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException { - - if (token == XContentParser.Token.START_OBJECT) { - SignificanceHeuristicParser significanceHeuristicParser = significanceHeuristicParserMapper.get(currentFieldName); - if (significanceHeuristicParser != null) { - significanceHeuristic = significanceHeuristicParser.parse(parser, context.parseFieldMatcher(), context); - } else if (context.parseFieldMatcher().match(currentFieldName, BACKGROUND_FILTER)) { - filter = context.indexShard().getQueryShardContext().parseInnerFilter(parser).query(); - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" - + currentFieldName + "].", parser.getTokenLocation()); - } - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName - + "].", parser.getTokenLocation()); - } - } - - public SignificanceHeuristic getSignificanceHeuristic() { - return significanceHeuristic; - } -} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParser.java index bcd9bdd4cb..6b44309ad8 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParser.java @@ -18,31 +18,43 @@ */ package org.elasticsearch.search.aggregations.bucket.significant; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.indices.query.IndicesQueriesRegistry; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.AggregatorFactory; -import org.elasticsearch.search.aggregations.bucket.BucketUtils; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParserMapper; +import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParser; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; -import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.aggregations.support.ValueType; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; +import java.util.Map; /** * */ -public class SignificantTermsParser implements Aggregator.Parser { - +public class SignificantTermsParser extends AbstractTermsParser { private final SignificanceHeuristicParserMapper significanceHeuristicParserMapper; + private final IndicesQueriesRegistry queriesRegistry; @Inject - public SignificantTermsParser(SignificanceHeuristicParserMapper significanceHeuristicParserMapper) { + public SignificantTermsParser(SignificanceHeuristicParserMapper significanceHeuristicParserMapper, + IndicesQueriesRegistry queriesRegistry) { this.significanceHeuristicParserMapper = significanceHeuristicParserMapper; + this.queriesRegistry = queriesRegistry; } @Override @@ -51,39 +63,59 @@ public class SignificantTermsParser implements Aggregator.Parser { } @Override - public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException { - SignificantTermsParametersParser aggParser = new SignificantTermsParametersParser(significanceHeuristicParserMapper); - ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, SignificantStringTerms.TYPE, context) - .scriptable(false) - .formattable(true) - .build(); - IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(); - aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); - - TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds(); - if (bucketCountThresholds.getShardSize() == aggParser.getDefaultBucketCountThresholds().getShardSize()) { - //The user has not made a shardSize selection . - //Use default heuristic to avoid any wrong-ranking caused by distributed counting - //but request double the usual amount. - //We typically need more than the number of "top" terms requested by other aggregations - //as the significance algorithm is in less of a position to down-select at shard-level - - //some of the things we want to find have only one occurrence on each shard and as - // such are impossible to differentiate from non-significant terms at that early stage. - bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards())); + protected ValuesSourceAggregatorFactory<ValuesSource> doCreateFactory(String aggregationName, ValuesSourceType valuesSourceType, + ValueType targetValueType, BucketCountThresholds bucketCountThresholds, SubAggCollectionMode collectMode, String executionHint, + IncludeExclude incExc, Map<ParseField, Object> otherOptions) { + SignificantTermsAggregatorFactory factory = new SignificantTermsAggregatorFactory(aggregationName, valuesSourceType, + targetValueType); + if (bucketCountThresholds != null) { + factory.bucketCountThresholds(bucketCountThresholds); + } + if (executionHint != null) { + factory.executionHint(executionHint); + } + if (incExc != null) { + factory.includeExclude(incExc); + } + QueryBuilder<?> backgroundFilter = (QueryBuilder<?>) otherOptions.get(SignificantTermsAggregatorFactory.BACKGROUND_FILTER); + if (backgroundFilter != null) { + factory.backgroundFilter(backgroundFilter); + } + SignificanceHeuristic significanceHeuristic = (SignificanceHeuristic) otherOptions.get(SignificantTermsAggregatorFactory.HEURISTIC); + if (significanceHeuristic != null) { + factory.significanceHeuristic(significanceHeuristic); } + return factory; + } - bucketCountThresholds.ensureValidity(); - SignificanceHeuristic significanceHeuristic = aggParser.getSignificanceHeuristic(); - if (significanceHeuristic == null) { - significanceHeuristic = JLHScore.INSTANCE; + @Override + public boolean parseSpecial(String aggregationName, XContentParser parser, ParseFieldMatcher parseFieldMatcher, Token token, + String currentFieldName, Map<ParseField, Object> otherOptions) throws IOException { + if (token == XContentParser.Token.START_OBJECT) { + SignificanceHeuristicParser significanceHeuristicParser = significanceHeuristicParserMapper.get(currentFieldName); + if (significanceHeuristicParser != null) { + SignificanceHeuristic significanceHeuristic = significanceHeuristicParser.parse(parser, parseFieldMatcher); + otherOptions.put(SignificantTermsAggregatorFactory.HEURISTIC, significanceHeuristic); + return true; + } else if (parseFieldMatcher.match(currentFieldName, SignificantTermsAggregatorFactory.BACKGROUND_FILTER)) { + QueryParseContext queryParseContext = new QueryParseContext(queriesRegistry); + queryParseContext.reset(parser); + queryParseContext.parseFieldMatcher(parseFieldMatcher); + QueryBuilder<?> filter = queryParseContext.parseInnerQueryBuilder(); + otherOptions.put(SignificantTermsAggregatorFactory.BACKGROUND_FILTER, filter); + return true; + } } - return new SignificantTermsAggregatorFactory(aggregationName, vsParser.input(), bucketCountThresholds, - aggParser.getIncludeExclude(), aggParser.getExecutionHint(), aggParser.getFilter(), significanceHeuristic); + return false; } - // NORELEASE implement this method when refactoring this aggregation @Override public AggregatorFactory[] getFactoryPrototypes() { - return null; + return new AggregatorFactory[] { new SignificantTermsAggregatorFactory(null, null, null) }; + } + + @Override + protected BucketCountThresholds getDefaultBucketCountThresholds() { + return new TermsAggregator.BucketCountThresholds(SignificantTermsAggregatorFactory.DEFAULT_BUCKET_COUNT_THRESHOLDS); } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/UnmappedSignificantTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/UnmappedSignificantTerms.java index c7569db612..bcad058f2e 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/UnmappedSignificantTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/UnmappedSignificantTerms.java @@ -58,9 +58,9 @@ public class UnmappedSignificantTerms extends InternalSignificantTerms<UnmappedS UnmappedSignificantTerms() {} // for serialization public UnmappedSignificantTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) { - //We pass zero for index/subset sizes because for the purpose of significant term analysis - // we assume an unmapped index's size is irrelevant to the proceedings. - super(0, 0, name, requiredSize, minDocCount, JLHScore.INSTANCE, BUCKETS, pipelineAggregators, metaData); + //We pass zero for index/subset sizes because for the purpose of significant term analysis + // we assume an unmapped index's size is irrelevant to the proceedings. + super(0, 0, name, requiredSize, minDocCount, JLHScore.PROTOTYPE, BUCKETS, pipelineAggregators, metaData); } @Override diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ChiSquare.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ChiSquare.java index cc9303a635..c68e47acb7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ChiSquare.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ChiSquare.java @@ -23,13 +23,14 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; public class ChiSquare extends NXYSignificanceHeuristic { + static final ChiSquare PROTOTYPE = new ChiSquare(false, false); + protected static final ParseField NAMES_FIELD = new ParseField("chi_square"); public ChiSquare(boolean includeNegatives, boolean backgroundIsSuperset) { @@ -51,18 +52,6 @@ public class ChiSquare extends NXYSignificanceHeuristic { return result; } - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return new ChiSquare(in.readBoolean(), in.readBoolean()); - } - - @Override - public String getName() { - return NAMES_FIELD.getPreferredName(); - } - }; - /** * Calculates Chi^2 * see "Information Retrieval", Manning et al., Eq. 13.19 @@ -80,9 +69,21 @@ public class ChiSquare extends NXYSignificanceHeuristic { } @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - super.writeTo(out); + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } + + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return new ChiSquare(in.readBoolean(), in.readBoolean()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()); + super.build(builder); + builder.endObject(); + return builder; } public static class ChiSquareParser extends NXYParser { @@ -106,7 +107,7 @@ public class ChiSquare extends NXYSignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()); + builder.startObject(NAMES_FIELD.getPreferredName()); super.build(builder); builder.endObject(); return builder; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/GND.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/GND.java index 99ee7c73b2..6da05eddb5 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/GND.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/GND.java @@ -29,12 +29,13 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; public class GND extends NXYSignificanceHeuristic { + static final GND PROTOTYPE = new GND(false); + protected static final ParseField NAMES_FIELD = new ParseField("gnd"); public GND(boolean backgroundIsSuperset) { @@ -57,18 +58,6 @@ public class GND extends NXYSignificanceHeuristic { return result; } - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return new GND(in.readBoolean()); - } - - @Override - public String getName() { - return NAMES_FIELD.getPreferredName(); - } - }; - /** * Calculates Google Normalized Distance, as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 * link: http://arxiv.org/pdf/cs/0412098v3.pdf @@ -98,11 +87,28 @@ public class GND extends NXYSignificanceHeuristic { } @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } + + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return new GND(in.readBoolean()); + } + + @Override public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); out.writeBoolean(backgroundIsSuperset); } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()); + builder.field(BACKGROUND_IS_SUPERSET.getPreferredName(), backgroundIsSuperset); + builder.endObject(); + return builder; + } + public static class GNDParser extends NXYParser { @Override @@ -116,7 +122,7 @@ public class GND extends NXYSignificanceHeuristic { } @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { String givenName = parser.currentName(); boolean backgroundIsSuperset = true; @@ -143,7 +149,7 @@ public class GND extends NXYSignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()); + builder.startObject(NAMES_FIELD.getPreferredName()); builder.field(BACKGROUND_IS_SUPERSET.getPreferredName(), backgroundIsSuperset); builder.endObject(); return builder; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/JLHScore.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/JLHScore.java index 97264e7d53..753c9ccb3e 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/JLHScore.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/JLHScore.java @@ -22,38 +22,42 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; public class JLHScore extends SignificanceHeuristic { - public static final JLHScore INSTANCE = new JLHScore(); + public static final JLHScore PROTOTYPE = new JLHScore(); - protected static final String[] NAMES = {"jlh"}; + protected static final ParseField NAMES_FIELD = new ParseField("jlh"); private JLHScore() {} - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return readFrom(in); - } + @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } - @Override - public String getName() { - return NAMES[0]; - } - }; + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return PROTOTYPE; + } - public static SignificanceHeuristic readFrom(StreamInput in) throws IOException { - return INSTANCE; + @Override + public void writeTo(StreamOutput out) throws IOException { + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); + return builder; } /** @@ -101,26 +105,21 @@ public class JLHScore extends SignificanceHeuristic { return absoluteProbabilityChange * relativeProbabilityChange; } - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - } - public static class JLHScoreParser implements SignificanceHeuristicParser { @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { // move to the closing bracket if (!parser.nextToken().equals(XContentParser.Token.END_OBJECT)) { throw new ElasticsearchParseException("failed to parse [jhl] significance heuristic. expected an empty object, but found [{}] instead", parser.currentToken()); } - return new JLHScore(); + return PROTOTYPE; } @Override public String[] getNames() { - return NAMES; + return NAMES_FIELD.getAllNamesIncludedDeprecated(); } } @@ -128,7 +127,7 @@ public class JLHScore extends SignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()).endObject(); + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); return builder; } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/MutualInformation.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/MutualInformation.java index b4529b8267..d20b5f3544 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/MutualInformation.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/MutualInformation.java @@ -23,13 +23,14 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; public class MutualInformation extends NXYSignificanceHeuristic { + static final MutualInformation PROTOTYPE = new MutualInformation(false, false); + protected static final ParseField NAMES_FIELD = new ParseField("mutual_information"); private static final double log2 = Math.log(2.0); @@ -53,18 +54,6 @@ public class MutualInformation extends NXYSignificanceHeuristic { return result; } - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return new MutualInformation(in.readBoolean(), in.readBoolean()); - } - - @Override - public String getName() { - return NAMES_FIELD.getPreferredName(); - } - }; - /** * Calculates mutual information * see "Information Retrieval", Manning et al., Eq. 13.17 @@ -113,9 +102,21 @@ public class MutualInformation extends NXYSignificanceHeuristic { } @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - super.writeTo(out); + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } + + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return new MutualInformation(in.readBoolean(), in.readBoolean()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()); + super.build(builder); + builder.endObject(); + return builder; } public static class MutualInformationParser extends NXYParser { @@ -139,7 +140,7 @@ public class MutualInformation extends NXYSignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()); + builder.startObject(NAMES_FIELD.getPreferredName()); super.build(builder); builder.endObject(); return builder; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/NXYSignificanceHeuristic.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/NXYSignificanceHeuristic.java index c6a6924108..d3f98f23b9 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/NXYSignificanceHeuristic.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/NXYSignificanceHeuristic.java @@ -28,7 +28,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -136,10 +135,15 @@ public abstract class NXYSignificanceHeuristic extends SignificanceHeuristic { } } + protected void build(XContentBuilder builder) throws IOException { + builder.field(INCLUDE_NEGATIVES_FIELD.getPreferredName(), includeNegatives).field(BACKGROUND_IS_SUPERSET.getPreferredName(), + backgroundIsSuperset); + } + public static abstract class NXYParser implements SignificanceHeuristicParser { @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { String givenName = parser.currentName(); boolean includeNegatives = false; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java index aceae8c251..e6cfe9a9bf 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java @@ -22,38 +22,42 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; public class PercentageScore extends SignificanceHeuristic { - public static final PercentageScore INSTANCE = new PercentageScore(); + public static final PercentageScore PROTOTYPE = new PercentageScore(); - protected static final String[] NAMES = {"percentage"}; + protected static final ParseField NAMES_FIELD = new ParseField("percentage"); private PercentageScore() {} - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return readFrom(in); - } + @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } - @Override - public String getName() { - return NAMES[0]; - } - }; + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + return PROTOTYPE; + } - public static SignificanceHeuristic readFrom(StreamInput in) throws IOException { - return INSTANCE; + @Override + public void writeTo(StreamOutput out) throws IOException { + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); + return builder; } /** @@ -70,26 +74,21 @@ public class PercentageScore extends SignificanceHeuristic { return (double) subsetFreq / (double) supersetFreq; } - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - } - public static class PercentageScoreParser implements SignificanceHeuristicParser { @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { // move to the closing bracket if (!parser.nextToken().equals(XContentParser.Token.END_OBJECT)) { throw new ElasticsearchParseException("failed to parse [percentage] significance heuristic. expected an empty object, but got [{}] instead", parser.currentToken()); } - return new PercentageScore(); + return PROTOTYPE; } @Override public String[] getNames() { - return NAMES; + return NAMES_FIELD.getAllNamesIncludedDeprecated(); } } @@ -97,7 +96,7 @@ public class PercentageScore extends SignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()).endObject(); + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); return builder; } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ScriptHeuristic.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ScriptHeuristic.java index 9efea00051..9dd3d0796b 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ScriptHeuristic.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/ScriptHeuristic.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.HasContextAndHeaders; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.io.stream.StreamInput; @@ -44,9 +45,12 @@ import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Objects; public class ScriptHeuristic extends SignificanceHeuristic { + static final ScriptHeuristic PROTOTYPE = new ScriptHeuristic(null); + protected static final ParseField NAMES_FIELD = new ParseField("script_heuristic"); private final LongAccessor subsetSizeHolder; private final LongAccessor supersetSizeHolder; @@ -55,31 +59,11 @@ public class ScriptHeuristic extends SignificanceHeuristic { ExecutableScript searchScript = null; Script script; - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - Script script = Script.readScript(in); - return new ScriptHeuristic(null, script); - } - - @Override - public String getName() { - return NAMES_FIELD.getPreferredName(); - } - }; - - public ScriptHeuristic(ExecutableScript searchScript, Script script) { + public ScriptHeuristic(Script script) { subsetSizeHolder = new LongAccessor(); supersetSizeHolder = new LongAccessor(); subsetDfHolder = new LongAccessor(); supersetDfHolder = new LongAccessor(); - this.searchScript = searchScript; - if (searchScript != null) { - searchScript.setNextVar("_subset_freq", subsetDfHolder); - searchScript.setNextVar("_subset_size", subsetSizeHolder); - searchScript.setNextVar("_superset_freq", supersetDfHolder); - searchScript.setNextVar("_superset_size", supersetSizeHolder); - } this.script = script; @@ -87,7 +71,16 @@ public class ScriptHeuristic extends SignificanceHeuristic { @Override public void initialize(InternalAggregation.ReduceContext context) { - searchScript = context.scriptService().executable(script, ScriptContext.Standard.AGGS, context, Collections.emptyMap()); + initialize(context.scriptService(), context); + } + + @Override + public void initialize(SearchContext context) { + initialize(context.scriptService(), context); + } + + public void initialize(ScriptService scriptService, HasContextAndHeaders hasContextAndHeaders) { + searchScript = scriptService.executable(script, ScriptContext.Standard.AGGS, hasContextAndHeaders, Collections.emptyMap()); searchScript.setNextVar("_subset_freq", subsetDfHolder); searchScript.setNextVar("_subset_size", subsetSizeHolder); searchScript.setNextVar("_superset_freq", supersetDfHolder); @@ -121,11 +114,47 @@ public class ScriptHeuristic extends SignificanceHeuristic { } @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } + + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { + Script script = Script.readScript(in); + return new ScriptHeuristic(script); + } + + @Override public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); script.writeTo(out); } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params builderParams) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()); + builder.field(ScriptField.SCRIPT.getPreferredName()); + script.toXContent(builder, builderParams); + builder.endObject(); + return builder; + } + + @Override + public int hashCode() { + return Objects.hash(script); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + ScriptHeuristic other = (ScriptHeuristic) obj; + return Objects.equals(script, other.script); + } + public static class ScriptHeuristicParser implements SignificanceHeuristicParser { private final ScriptService scriptService; @@ -134,7 +163,7 @@ public class ScriptHeuristic extends SignificanceHeuristic { } @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { String heuristicName = parser.currentName(); Script script = null; @@ -173,13 +202,7 @@ public class ScriptHeuristic extends SignificanceHeuristic { if (script == null) { throw new ElasticsearchParseException("failed to parse [{}] significance heuristic. no script found in script_heuristic", heuristicName); } - ExecutableScript searchScript; - try { - searchScript = scriptService.executable(script, ScriptContext.Standard.AGGS, context, Collections.emptyMap()); - } catch (Exception e) { - throw new ElasticsearchParseException("failed to parse [{}] significance heuristic. the script [{}] could not be loaded", e, script, heuristicName); - } - return new ScriptHeuristic(searchScript, script); + return new ScriptHeuristic(script); } @Override @@ -199,7 +222,7 @@ public class ScriptHeuristic extends SignificanceHeuristic { @Override public XContentBuilder toXContent(XContentBuilder builder, Params builderParams) throws IOException { - builder.startObject(STREAM.getName()); + builder.startObject(NAMES_FIELD.getPreferredName()); builder.field(ScriptField.SCRIPT.getPreferredName()); script.toXContent(builder, builderParams); builder.endObject(); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristic.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristic.java index 4f12277ca0..972696ba99 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristic.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristic.java @@ -20,12 +20,12 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; -import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.NamedWriteable; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.internal.SearchContext; -import java.io.IOException; - -public abstract class SignificanceHeuristic { +public abstract class SignificanceHeuristic implements NamedWriteable<SignificanceHeuristic>, ToXContent { /** * @param subsetFreq The frequency of the term in the selected sample * @param subsetSize The size of the selected sample (typically number of docs) @@ -35,8 +35,6 @@ public abstract class SignificanceHeuristic { */ public abstract double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize); - abstract public void writeTo(StreamOutput out) throws IOException; - protected void checkFrequencyValidity(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize, String scoreFunctionName) { if (subsetFreq < 0 || subsetSize < 0 || supersetFreq < 0 || supersetSize < 0) { throw new IllegalArgumentException("Frequencies of subset and superset must be positive in " + scoreFunctionName + ".getScore()"); @@ -52,4 +50,8 @@ public abstract class SignificanceHeuristic { public void initialize(InternalAggregation.ReduceContext reduceContext) { } + + public void initialize(SearchContext context) { + + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicParser.java index 92baa43e6b..aa430dc251 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicParser.java @@ -23,13 +23,12 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; public interface SignificanceHeuristicParser { - SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) throws IOException, + SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, ParsingException; String[] getNames(); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicStreams.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicStreams.java index 198f129c28..2ffe5ecc70 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicStreams.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/SignificanceHeuristicStreams.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.bucket.significant.heuristics; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import java.io.IOException; import java.util.Collections; @@ -31,21 +32,26 @@ import java.util.Map; */ public class SignificanceHeuristicStreams { - private static Map<String, Stream> STREAMS = Collections.emptyMap(); + private static Map<String, SignificanceHeuristic> STREAMS = Collections.emptyMap(); static { - HashMap<String, Stream> map = new HashMap<>(); - map.put(JLHScore.STREAM.getName(), JLHScore.STREAM); - map.put(PercentageScore.STREAM.getName(), PercentageScore.STREAM); - map.put(MutualInformation.STREAM.getName(), MutualInformation.STREAM); - map.put(GND.STREAM.getName(), GND.STREAM); - map.put(ChiSquare.STREAM.getName(), ChiSquare.STREAM); - map.put(ScriptHeuristic.STREAM.getName(), ScriptHeuristic.STREAM); + HashMap<String, SignificanceHeuristic> map = new HashMap<>(); + map.put(JLHScore.NAMES_FIELD.getPreferredName(), JLHScore.PROTOTYPE); + map.put(PercentageScore.NAMES_FIELD.getPreferredName(), PercentageScore.PROTOTYPE); + map.put(MutualInformation.NAMES_FIELD.getPreferredName(), MutualInformation.PROTOTYPE); + map.put(GND.NAMES_FIELD.getPreferredName(), GND.PROTOTYPE); + map.put(ChiSquare.NAMES_FIELD.getPreferredName(), ChiSquare.PROTOTYPE); + map.put(ScriptHeuristic.NAMES_FIELD.getPreferredName(), ScriptHeuristic.PROTOTYPE); STREAMS = Collections.unmodifiableMap(map); } public static SignificanceHeuristic read(StreamInput in) throws IOException { - return stream(in.readString()).readResult(in); + return stream(in.readString()).readFrom(in); + } + + public static void writeTo(SignificanceHeuristic significanceHeuristic, StreamOutput out) throws IOException { + out.writeString(significanceHeuristic.getWriteableName()); + significanceHeuristic.writeTo(out); } /** @@ -59,17 +65,18 @@ public class SignificanceHeuristicStreams { } /** - * Registers the given stream and associate it with the given types. + * Registers the given prototype. * - * @param stream The stream to register + * @param prototype + * The prototype to register */ - public static synchronized void registerStream(Stream stream) { - if (STREAMS.containsKey(stream.getName())) { - throw new IllegalArgumentException("Can't register stream with name [" + stream.getName() + "] more than once"); + public static synchronized void registerPrototype(SignificanceHeuristic prototype) { + if (STREAMS.containsKey(prototype.getWriteableName())) { + throw new IllegalArgumentException("Can't register stream with name [" + prototype.getWriteableName() + "] more than once"); } - HashMap<String, Stream> map = new HashMap<>(); + HashMap<String, SignificanceHeuristic> map = new HashMap<>(); map.putAll(STREAMS); - map.put(stream.getName(), stream); + map.put(prototype.getWriteableName(), prototype); STREAMS = Collections.unmodifiableMap(map); } @@ -79,7 +86,7 @@ public class SignificanceHeuristicStreams { * @param name The given name * @return The associated stream */ - private static synchronized Stream stream(String name) { + private static synchronized SignificanceHeuristic stream(String name) { return STREAMS.get(name); } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java deleted file mode 100644 index 891526c33c..0000000000 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -package org.elasticsearch.search.aggregations.bucket.terms; -import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; -import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; -import org.elasticsearch.search.internal.SearchContext; - -import java.io.IOException; - -public abstract class AbstractTermsParametersParser { - - public static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); - public static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); - public static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); - public static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); - public static final ParseField REQUIRED_SIZE_FIELD_NAME = new ParseField("size"); - public static final ParseField SHOW_TERM_DOC_COUNT_ERROR = new ParseField("show_term_doc_count_error"); - - - //These are the results of the parsing. - private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(); - - private String executionHint = null; - - private SubAggCollectionMode collectMode = SubAggCollectionMode.DEPTH_FIRST; - - - IncludeExclude includeExclude; - - public TermsAggregator.BucketCountThresholds getBucketCountThresholds() {return bucketCountThresholds;} - - //These are the results of the parsing. - - public String getExecutionHint() { - return executionHint; - } - - public IncludeExclude getIncludeExclude() { - return includeExclude; - } - - public SubAggCollectionMode getCollectionMode() { - return collectMode; - } - - public void parse(String aggregationName, XContentParser parser, SearchContext context, ValuesSourceParser vsParser, IncludeExclude.Parser incExcParser) throws IOException { - bucketCountThresholds = getDefaultBucketCountThresholds(); - XContentParser.Token token; - String currentFieldName = null; - - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (vsParser.token(currentFieldName, token, parser)) { - continue; - } else if (incExcParser.token(currentFieldName, token, parser)) { - continue; - } else if (token == XContentParser.Token.VALUE_STRING) { - if (context.parseFieldMatcher().match(currentFieldName, EXECUTION_HINT_FIELD_NAME)) { - executionHint = parser.text(); - } else if(context.parseFieldMatcher().match(currentFieldName, SubAggCollectionMode.KEY)){ - collectMode = SubAggCollectionMode.parse(parser.text(), context.parseFieldMatcher()); - } else if (context.parseFieldMatcher().match(currentFieldName, REQUIRED_SIZE_FIELD_NAME)) { - bucketCountThresholds.setRequiredSize(parser.intValue()); - } else { - parseSpecial(aggregationName, parser, context, token, currentFieldName); - } - } else if (token == XContentParser.Token.VALUE_NUMBER) { - if (context.parseFieldMatcher().match(currentFieldName, REQUIRED_SIZE_FIELD_NAME)) { - bucketCountThresholds.setRequiredSize(parser.intValue()); - } else if (context.parseFieldMatcher().match(currentFieldName, SHARD_SIZE_FIELD_NAME)) { - bucketCountThresholds.setShardSize(parser.intValue()); - } else if (context.parseFieldMatcher().match(currentFieldName, MIN_DOC_COUNT_FIELD_NAME)) { - bucketCountThresholds.setMinDocCount(parser.intValue()); - } else if (context.parseFieldMatcher().match(currentFieldName, SHARD_MIN_DOC_COUNT_FIELD_NAME)) { - bucketCountThresholds.setShardMinDocCount(parser.longValue()); - } else { - parseSpecial(aggregationName, parser, context, token, currentFieldName); - } - } else { - parseSpecial(aggregationName, parser, context, token, currentFieldName); - } - } - includeExclude = incExcParser.includeExclude(); - } - - public abstract void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException; - - protected abstract TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds(); -} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParser.java new file mode 100644 index 0000000000..16ec4ab9de --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParser.java @@ -0,0 +1,130 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.terms; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; +import org.elasticsearch.search.aggregations.support.AbstractValuesSourceParser.AnyValuesSourceParser; +import org.elasticsearch.search.aggregations.support.ValueType; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; + +import java.io.IOException; +import java.util.Map; + +public abstract class AbstractTermsParser extends AnyValuesSourceParser { + + public static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); + public static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); + public static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); + public static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); + public static final ParseField REQUIRED_SIZE_FIELD_NAME = new ParseField("size"); + + public IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(); + + protected AbstractTermsParser() { + super(true, true); + } + + @Override + protected final ValuesSourceAggregatorFactory<ValuesSource> createFactory(String aggregationName, ValuesSourceType valuesSourceType, + ValueType targetValueType, Map<ParseField, Object> otherOptions) { + BucketCountThresholds bucketCountThresholds = getDefaultBucketCountThresholds(); + Integer requiredSize = (Integer) otherOptions.get(REQUIRED_SIZE_FIELD_NAME); + if (requiredSize != null && requiredSize != -1) { + bucketCountThresholds.setRequiredSize(requiredSize); + } + Integer shardSize = (Integer) otherOptions.get(SHARD_SIZE_FIELD_NAME); + if (shardSize != null && shardSize != -1) { + bucketCountThresholds.setShardSize(shardSize); + } + Long minDocCount = (Long) otherOptions.get(MIN_DOC_COUNT_FIELD_NAME); + if (minDocCount != null && minDocCount != -1) { + bucketCountThresholds.setMinDocCount(minDocCount); + } + Long shardMinDocCount = (Long) otherOptions.get(SHARD_MIN_DOC_COUNT_FIELD_NAME); + if (shardMinDocCount != null && shardMinDocCount != -1) { + bucketCountThresholds.setShardMinDocCount(shardMinDocCount); + } + SubAggCollectionMode collectMode = (SubAggCollectionMode) otherOptions.get(SubAggCollectionMode.KEY); + String executionHint = (String) otherOptions.get(EXECUTION_HINT_FIELD_NAME); + IncludeExclude incExc = incExcParser.createIncludeExclude(otherOptions); + return doCreateFactory(aggregationName, valuesSourceType, targetValueType, bucketCountThresholds, collectMode, executionHint, + incExc, + otherOptions); + } + + protected abstract ValuesSourceAggregatorFactory<ValuesSource> doCreateFactory(String aggregationName, + ValuesSourceType valuesSourceType, + ValueType targetValueType, BucketCountThresholds bucketCountThresholds, SubAggCollectionMode collectMode, String executionHint, + IncludeExclude incExc, Map<ParseField, Object> otherOptions); + + @Override + protected boolean token(String aggregationName, String currentFieldName, Token token, XContentParser parser, + ParseFieldMatcher parseFieldMatcher, Map<ParseField, Object> otherOptions) throws IOException { + if (incExcParser.token(currentFieldName, token, parser, parseFieldMatcher, otherOptions)) { + return true; + } else if (token == XContentParser.Token.VALUE_STRING) { + if (parseFieldMatcher.match(currentFieldName, EXECUTION_HINT_FIELD_NAME)) { + otherOptions.put(EXECUTION_HINT_FIELD_NAME, parser.text()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, SubAggCollectionMode.KEY)) { + otherOptions.put(SubAggCollectionMode.KEY, SubAggCollectionMode.parse(parser.text(), parseFieldMatcher)); + return true; + } else if (parseFieldMatcher.match(currentFieldName, REQUIRED_SIZE_FIELD_NAME)) { + otherOptions.put(REQUIRED_SIZE_FIELD_NAME, parser.intValue()); + return true; + } else if (parseSpecial(aggregationName, parser, parseFieldMatcher, token, currentFieldName, otherOptions)) { + return true; + } + } else if (token == XContentParser.Token.VALUE_NUMBER) { + if (parseFieldMatcher.match(currentFieldName, REQUIRED_SIZE_FIELD_NAME)) { + otherOptions.put(REQUIRED_SIZE_FIELD_NAME, parser.intValue()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, SHARD_SIZE_FIELD_NAME)) { + otherOptions.put(SHARD_SIZE_FIELD_NAME, parser.intValue()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, MIN_DOC_COUNT_FIELD_NAME)) { + otherOptions.put(MIN_DOC_COUNT_FIELD_NAME, parser.longValue()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, SHARD_MIN_DOC_COUNT_FIELD_NAME)) { + otherOptions.put(SHARD_MIN_DOC_COUNT_FIELD_NAME, parser.longValue()); + return true; + } else if (parseSpecial(aggregationName, parser, parseFieldMatcher, token, currentFieldName, otherOptions)) { + return true; + } + } else if (parseSpecial(aggregationName, parser, parseFieldMatcher, token, currentFieldName, otherOptions)) { + return true; + } + return false; + } + + public abstract boolean parseSpecial(String aggregationName, XContentParser parser, ParseFieldMatcher parseFieldMatcher, + XContentParser.Token token, String currentFieldName, Map<ParseField, Object> otherOptions) throws IOException; + + protected abstract TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds(); + +} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java index 4e3e28a1b3..b5e1e81479 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalOrder.java @@ -38,6 +38,7 @@ import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Objects; /** * @@ -263,6 +264,23 @@ class InternalOrder extends Terms.Order { return new CompoundOrderComparator(orderElements, aggregator); } + @Override + public int hashCode() { + return Objects.hash(orderElements); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + CompoundOrder other = (CompoundOrder) obj; + return Objects.equals(orderElements, other.orderElements); + } + public static class CompoundOrderComparator implements Comparator<Terms.Bucket> { private List<Terms.Order> compoundOrder; @@ -306,7 +324,7 @@ class InternalOrder extends Terms.Order { } public static Terms.Order readOrder(StreamInput in) throws IOException { - return readOrder(in, true); + return readOrder(in, false); } public static Terms.Order readOrder(StreamInput in, boolean absoluteOrder) throws IOException { @@ -332,4 +350,22 @@ class InternalOrder extends Terms.Order { } } } + + @Override + public int hashCode() { + return Objects.hash(id, asc); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + InternalOrder other = (InternalOrder) obj; + return Objects.equals(id, other.id) + && Objects.equals(asc, other.asc); + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java index 224e42cb7f..16fb7f1d81 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java @@ -82,7 +82,7 @@ public interface Terms extends MultiBucketsAggregation { * Get the bucket for the given term, or null if there is no such bucket. */ Bucket getBucketByKey(String term); - + /** * Get an upper bound of the error on document counts in this aggregation. */ @@ -166,5 +166,11 @@ public interface Terms extends MultiBucketsAggregation { abstract byte id(); + @Override + public abstract int hashCode(); + + @Override + public abstract boolean equals(Object obj); + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java index 7971d1f5ae..7ea88c9a10 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java @@ -21,7 +21,10 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.Explicit; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -36,99 +39,136 @@ import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; public abstract class TermsAggregator extends BucketsAggregator { - public static class BucketCountThresholds { - private Explicit<Long> minDocCount; - private Explicit<Long> shardMinDocCount; - private Explicit<Integer> requiredSize; - private Explicit<Integer> shardSize; + public static class BucketCountThresholds implements Writeable<BucketCountThresholds>, ToXContent { - public BucketCountThresholds(long minDocCount, long shardMinDocCount, int requiredSize, int shardSize) { - this.minDocCount = new Explicit<>(minDocCount, false); - this.shardMinDocCount = new Explicit<>(shardMinDocCount, false); - this.requiredSize = new Explicit<>(requiredSize, false); - this.shardSize = new Explicit<>(shardSize, false); + private static final BucketCountThresholds PROTOTYPE = new BucketCountThresholds(-1, -1, -1, -1); + + private long minDocCount; + private long shardMinDocCount; + private int requiredSize; + private int shardSize; + + public static BucketCountThresholds readFromStream(StreamInput in) throws IOException { + return PROTOTYPE.readFrom(in); } - public BucketCountThresholds() { - this(-1, -1, -1, -1); + + public BucketCountThresholds(long minDocCount, long shardMinDocCount, int requiredSize, int shardSize) { + this.minDocCount = minDocCount; + this.shardMinDocCount = shardMinDocCount; + this.requiredSize = requiredSize; + this.shardSize = shardSize; } public BucketCountThresholds(BucketCountThresholds bucketCountThresholds) { - this(bucketCountThresholds.minDocCount.value(), bucketCountThresholds.shardMinDocCount.value(), bucketCountThresholds.requiredSize.value(), bucketCountThresholds.shardSize.value()); + this(bucketCountThresholds.minDocCount, bucketCountThresholds.shardMinDocCount, bucketCountThresholds.requiredSize, + bucketCountThresholds.shardSize); } public void ensureValidity() { - if (shardSize.value() == 0) { + if (shardSize == 0) { setShardSize(Integer.MAX_VALUE); } - if (requiredSize.value() == 0) { + if (requiredSize == 0) { setRequiredSize(Integer.MAX_VALUE); } // shard_size cannot be smaller than size as we need to at least fetch <size> entries from every shards in order to return <size> - if (shardSize.value() < requiredSize.value()) { - setShardSize(requiredSize.value()); + if (shardSize < requiredSize) { + setShardSize(requiredSize); } // shard_min_doc_count should not be larger than min_doc_count because this can cause buckets to be removed that would match the min_doc_count criteria - if (shardMinDocCount.value() > minDocCount.value()) { - setShardMinDocCount(minDocCount.value()); + if (shardMinDocCount > minDocCount) { + setShardMinDocCount(minDocCount); } - if (requiredSize.value() < 0 || minDocCount.value() < 0) { + if (requiredSize < 0 || minDocCount < 0) { throw new ElasticsearchException("parameters [requiredSize] and [minDocCount] must be >=0 in terms aggregation."); } } public long getShardMinDocCount() { - return shardMinDocCount.value(); + return shardMinDocCount; } public void setShardMinDocCount(long shardMinDocCount) { - this.shardMinDocCount = new Explicit<>(shardMinDocCount, true); + this.shardMinDocCount = shardMinDocCount; } public long getMinDocCount() { - return minDocCount.value(); + return minDocCount; } public void setMinDocCount(long minDocCount) { - this.minDocCount = new Explicit<>(minDocCount, true); + this.minDocCount = minDocCount; } public int getRequiredSize() { - return requiredSize.value(); + return requiredSize; } public void setRequiredSize(int requiredSize) { - this.requiredSize = new Explicit<>(requiredSize, true); + this.requiredSize = requiredSize; } public int getShardSize() { - return shardSize.value(); + return shardSize; } public void setShardSize(int shardSize) { - this.shardSize = new Explicit<>(shardSize, true); + this.shardSize = shardSize; } - public void toXContent(XContentBuilder builder) throws IOException { - if (requiredSize.explicit()) { - builder.field(AbstractTermsParametersParser.REQUIRED_SIZE_FIELD_NAME.getPreferredName(), requiredSize.value()); - } - if (shardSize.explicit()) { - builder.field(AbstractTermsParametersParser.SHARD_SIZE_FIELD_NAME.getPreferredName(), shardSize.value()); - } - if (minDocCount.explicit()) { - builder.field(AbstractTermsParametersParser.MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), minDocCount.value()); + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(TermsAggregatorFactory.REQUIRED_SIZE_FIELD_NAME.getPreferredName(), requiredSize); + builder.field(TermsAggregatorFactory.SHARD_SIZE_FIELD_NAME.getPreferredName(), shardSize); + builder.field(TermsAggregatorFactory.MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), minDocCount); + builder.field(TermsAggregatorFactory.SHARD_MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), shardMinDocCount); + return builder; + } + + @Override + public BucketCountThresholds readFrom(StreamInput in) throws IOException { + int requiredSize = in.readInt(); + int shardSize = in.readInt(); + long minDocCount = in.readLong(); + long shardMinDocCount = in.readLong(); + return new BucketCountThresholds(minDocCount, shardMinDocCount, requiredSize, shardSize); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeInt(requiredSize); + out.writeInt(shardSize); + out.writeLong(minDocCount); + out.writeLong(shardMinDocCount); + } + + @Override + public int hashCode() { + return Objects.hash(requiredSize, shardSize, minDocCount, shardMinDocCount); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; } - if (shardMinDocCount.explicit()) { - builder.field(AbstractTermsParametersParser.SHARD_MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), shardMinDocCount.value()); + if (getClass() != obj.getClass()) { + return false; } + BucketCountThresholds other = (BucketCountThresholds) obj; + return Objects.equals(requiredSize, other.requiredSize) + && Objects.equals(shardSize, other.shardSize) + && Objects.equals(minDocCount, other.minDocCount) + && Objects.equals(shardMinDocCount, other.shardMinDocCount); } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index f668e3ad99..29fbe3bae1 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -21,28 +21,52 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.NonCollectingAggregator; +import org.elasticsearch.search.aggregations.bucket.BucketUtils; +import org.elasticsearch.search.aggregations.bucket.terms.Terms.Order; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; /** * */ +/** + * + */ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> { + public static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); + public static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); + public static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); + public static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); + public static final ParseField REQUIRED_SIZE_FIELD_NAME = new ParseField("size"); + + static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds(1, 0, 10, + -1); + public static final ParseField SHOW_TERM_DOC_COUNT_ERROR = new ParseField("show_term_doc_count_error"); + public static final ParseField ORDER_FIELD = new ParseField("order"); + public enum ExecutionMode { MAP(new ParseField("map")) { @@ -155,28 +179,100 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values } } - private final Terms.Order order; - private final IncludeExclude includeExclude; - private final String executionHint; - private final SubAggCollectionMode collectMode; - private final TermsAggregator.BucketCountThresholds bucketCountThresholds; - private final boolean showTermDocCountError; - - public TermsAggregatorFactory(String name, ValuesSourceParser.Input input, Terms.Order order, - TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, String executionHint, - SubAggCollectionMode executionMode, boolean showTermDocCountError) { - super(name, StringTerms.TYPE, input); - this.order = order; - this.includeExclude = includeExclude; - this.executionHint = executionHint; + private List<Terms.Order> orders = Collections.singletonList(Terms.Order.count(false)); + private IncludeExclude includeExclude = null; + private String executionHint = null; + private SubAggCollectionMode collectMode = SubAggCollectionMode.DEPTH_FIRST; + private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds( + DEFAULT_BUCKET_COUNT_THRESHOLDS); + private boolean showTermDocCountError = false; + + public TermsAggregatorFactory(String name, ValuesSourceType valuesSourceType, ValueType valueType) { + super(name, StringTerms.TYPE, valuesSourceType, valueType); + } + + public TermsAggregator.BucketCountThresholds bucketCountThresholds() { + return bucketCountThresholds; + } + + public void bucketCountThresholds(TermsAggregator.BucketCountThresholds bucketCountThresholds) { this.bucketCountThresholds = bucketCountThresholds; - this.collectMode = executionMode; + } + + /** + * Sets the order in which the buckets will be returned. + */ + public void order(List<Terms.Order> order) { + this.orders = order; + } + + /** + * Gets the order in which the buckets will be returned. + */ + public List<Terms.Order> order() { + return orders; + } + + /** + * Expert: sets an execution hint to the aggregation. + */ + public void executionHint(String executionHint) { + this.executionHint = executionHint; + } + + /** + * Expert: gets an execution hint to the aggregation. + */ + public String executionHint() { + return executionHint; + } + + /** + * Expert: set the collection mode. + */ + public void collectMode(SubAggCollectionMode mode) { + this.collectMode = mode; + } + + /** + * Expert: get the collection mode. + */ + public SubAggCollectionMode collectMode() { + return collectMode; + } + + /** + * Set terms to include and exclude from the aggregation results + */ + public void includeExclude(IncludeExclude includeExclude) { + this.includeExclude = includeExclude; + } + + /** + * Get terms to include and exclude from the aggregation results + */ + public IncludeExclude includeExclude() { + return includeExclude; + } + + /** + * Get whether doc count error will be return for individual terms + */ + public boolean showTermDocCountError() { + return showTermDocCountError; + } + + /** + * Set whether doc count error will be return for individual terms + */ + public void showTermDocCountError(boolean showTermDocCountError) { this.showTermDocCountError = showTermDocCountError; } @Override protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { + Terms.Order order = resolveOrder(orders); final InternalAggregation aggregation = new UnmappedTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators, metaData); return new NonCollectingAggregator(name, aggregationContext, parent, factories, pipelineAggregators, metaData) { @@ -192,13 +288,38 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values }; } + private Order resolveOrder(List<Order> orders) { + Terms.Order order; + if (orders.size() == 1 && (orders.get(0) == InternalOrder.TERM_ASC || orders.get(0) == InternalOrder.TERM_DESC)) { + // If order is only terms order then we don't need compound + // ordering + order = orders.get(0); + } else { + // for all other cases we need compound order so term order asc + // can be added to make the order deterministic + order = Order.compound(orders); + } + return order; + } + @Override protected Aggregator doCreateInternal(ValuesSource valuesSource, AggregationContext aggregationContext, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { + Terms.Order order = resolveOrder(orders); if (collectsFromSingleBucket == false) { return asMultiBucketAggregator(this, aggregationContext, parent); } + BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds); + if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) + && bucketCountThresholds.getShardSize() == DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) { + // The user has not made a shardSize selection. Use default + // heuristic to avoid any wrong-ranking caused by distributed + // counting + bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), + aggregationContext.searchContext().numberOfShards())); + } + bucketCountThresholds.ensureValidity(); if (valuesSource instanceof ValuesSource.Bytes) { ExecutionMode execution = null; if (executionHint != null) { @@ -278,4 +399,76 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values + "]. It can only be applied to numeric or string fields."); } + @Override + protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { + bucketCountThresholds.toXContent(builder, params); + builder.field(SHOW_TERM_DOC_COUNT_ERROR.getPreferredName(), showTermDocCountError); + if (executionHint != null) { + builder.field(TermsAggregatorFactory.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); + } + builder.startArray(ORDER_FIELD.getPreferredName()); + for (Terms.Order order : orders) { + order.toXContent(builder, params); + } + builder.endArray(); + builder.field(SubAggCollectionMode.KEY.getPreferredName(), collectMode.parseField().getPreferredName()); + if (includeExclude != null) { + includeExclude.toXContent(builder, params); + } + return builder; + } + + @Override + protected ValuesSourceAggregatorFactory<ValuesSource> innerReadFrom(String name, ValuesSourceType valuesSourceType, + ValueType targetValueType, StreamInput in) throws IOException { + TermsAggregatorFactory factory = new TermsAggregatorFactory(name, valuesSourceType, targetValueType); + factory.bucketCountThresholds = BucketCountThresholds.readFromStream(in); + factory.collectMode = SubAggCollectionMode.BREADTH_FIRST.readFrom(in); + factory.executionHint = in.readOptionalString(); + if (in.readBoolean()) { + factory.includeExclude = IncludeExclude.readFromStream(in); + } + int numOrders = in.readVInt(); + List<Terms.Order> orders = new ArrayList<>(numOrders); + for (int i = 0; i < numOrders; i++) { + orders.add(InternalOrder.Streams.readOrder(in)); + } + factory.orders = orders; + factory.showTermDocCountError = in.readBoolean(); + return factory; + } + + @Override + protected void innerWriteTo(StreamOutput out) throws IOException { + bucketCountThresholds.writeTo(out); + collectMode.writeTo(out); + out.writeOptionalString(executionHint); + boolean hasIncExc = includeExclude != null; + out.writeBoolean(hasIncExc); + if (hasIncExc) { + includeExclude.writeTo(out); + } + out.writeVInt(orders.size()); + for (Terms.Order order : orders) { + InternalOrder.Streams.writeOrder(order, out); + } + out.writeBoolean(showTermDocCountError); + } + + @Override + protected int innerHashCode() { + return Objects.hash(bucketCountThresholds, collectMode, executionHint, includeExclude, orders, showTermDocCountError); + } + + @Override + protected boolean innerEquals(Object obj) { + TermsAggregatorFactory other = (TermsAggregatorFactory) obj; + return Objects.equals(bucketCountThresholds, other.bucketCountThresholds) + && Objects.equals(collectMode, other.collectMode) + && Objects.equals(executionHint, other.executionHint) + && Objects.equals(includeExclude, other.includeExclude) + && Objects.equals(orders, other.orders) + && Objects.equals(showTermDocCountError, other.showTermDocCountError); + } + } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java index 9bc1f7a9a6..3d8aff9b24 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java @@ -97,7 +97,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { this.includePattern = regex; return this; } - + /** * Define a set of terms that should be aggregated. */ @@ -107,8 +107,8 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { } this.includeTerms = terms; return this; - } - + } + /** * Define a set of terms that should be aggregated. */ @@ -118,16 +118,16 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { } this.includeTerms = longsArrToStringArr(terms); return this; - } - + } + private String[] longsArrToStringArr(long[] terms) { String[] termsAsString = new String[terms.length]; for (int i = 0; i < terms.length; i++) { termsAsString[i] = Long.toString(terms[i]); } return termsAsString; - } - + } + /** * Define a set of terms that should be aggregated. @@ -146,7 +146,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { termsAsString[i] = Double.toString(terms[i]); } return termsAsString; - } + } /** * Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular @@ -161,7 +161,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { this.excludePattern = regex; return this; } - + /** * Define a set of terms that should not be aggregated. */ @@ -171,9 +171,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { } this.excludeTerms = terms; return this; - } - - + } + + /** * Define a set of terms that should not be aggregated. */ @@ -194,9 +194,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { } this.excludeTerms = doubleArrToStringArr(terms); return this; - } - - + } + + /** * When using scripts, the value type indicates the types of the values the script is generating. @@ -241,13 +241,13 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { @Override protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException { - bucketCountThresholds.toXContent(builder); + bucketCountThresholds.toXContent(builder, params); if (showTermDocCountError != null) { - builder.field(AbstractTermsParametersParser.SHOW_TERM_DOC_COUNT_ERROR.getPreferredName(), showTermDocCountError); + builder.field(TermsAggregatorFactory.SHOW_TERM_DOC_COUNT_ERROR.getPreferredName(), showTermDocCountError); } if (executionHint != null) { - builder.field(AbstractTermsParametersParser.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); + builder.field(TermsAggregatorFactory.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); } if (valueType != null) { builder.field("value_type", valueType.name().toLowerCase(Locale.ROOT)); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java deleted file mode 100644 index c8138b7b45..0000000000 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -package org.elasticsearch.search.aggregations.bucket.terms; - -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.SearchParseException; -import org.elasticsearch.search.internal.SearchContext; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - - -public class TermsParametersParser extends AbstractTermsParametersParser { - - private static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds(1, 0, 10, -1); - - public List<OrderElement> getOrderElements() { - return orderElements; - } - - public boolean showTermDocCountError() { - return showTermDocCountError; - } - - List<OrderElement> orderElements; - private boolean showTermDocCountError = false; - - public TermsParametersParser() { - orderElements = new ArrayList<>(1); - orderElements.add(new OrderElement("_count", false)); - } - - @Override - public void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException { - if (token == XContentParser.Token.START_OBJECT) { - if ("order".equals(currentFieldName)) { - this.orderElements = Collections.singletonList(parseOrderParam(aggregationName, parser, context)); - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" - + currentFieldName + "].", parser.getTokenLocation()); - } - } else if (token == XContentParser.Token.START_ARRAY) { - if ("order".equals(currentFieldName)) { - orderElements = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token == XContentParser.Token.START_OBJECT) { - OrderElement orderParam = parseOrderParam(aggregationName, parser, context); - orderElements.add(orderParam); - } else { - throw new SearchParseException(context, "Order elements must be of type object in [" + aggregationName + "].", - parser.getTokenLocation()); - } - } - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" - + currentFieldName + "].", parser.getTokenLocation()); - } - } else if (token == XContentParser.Token.VALUE_BOOLEAN) { - if (context.parseFieldMatcher().match(currentFieldName, SHOW_TERM_DOC_COUNT_ERROR)) { - showTermDocCountError = parser.booleanValue(); - } - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName - + "].", parser.getTokenLocation()); - } - } - - private OrderElement parseOrderParam(String aggregationName, XContentParser parser, SearchContext context) throws IOException { - XContentParser.Token token; - OrderElement orderParam = null; - String orderKey = null; - boolean orderAsc = false; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - orderKey = parser.currentName(); - } else if (token == XContentParser.Token.VALUE_STRING) { - String dir = parser.text(); - if ("asc".equalsIgnoreCase(dir)) { - orderAsc = true; - } else if ("desc".equalsIgnoreCase(dir)) { - orderAsc = false; - } else { - throw new SearchParseException(context, "Unknown terms order direction [" + dir + "] in terms aggregation [" - + aggregationName + "]", parser.getTokenLocation()); - } - } else { - throw new SearchParseException(context, "Unexpected token " + token + " for [order] in [" + aggregationName + "].", - parser.getTokenLocation()); - } - } - if (orderKey == null) { - throw new SearchParseException(context, "Must specify at least one field for [order] in [" + aggregationName + "].", - parser.getTokenLocation()); - } else { - orderParam = new OrderElement(orderKey, orderAsc); - } - return orderParam; - } - - static class OrderElement { - private final String key; - private final boolean asc; - - public OrderElement(String key, boolean asc) { - this.key = key; - this.asc = asc; - } - - public String key() { - return key; - } - - public boolean asc() { - return asc; - } - - - } - - @Override - public TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds() { - return new TermsAggregator.BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS); - } -} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java index 4398ec655b..4a40816ab2 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java @@ -18,24 +18,32 @@ */ package org.elasticsearch.search.aggregations.bucket.terms; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.AggregatorFactory; -import org.elasticsearch.search.aggregations.bucket.BucketUtils; import org.elasticsearch.search.aggregations.bucket.terms.Terms.Order; -import org.elasticsearch.search.aggregations.bucket.terms.TermsParametersParser.OrderElement; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; -import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.aggregations.support.ValueType; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Map; /** * */ -public class TermsParser implements Aggregator.Parser { +public class TermsParser extends AbstractTermsParser { + @Override public String type() { @@ -43,38 +51,123 @@ public class TermsParser implements Aggregator.Parser { } @Override - public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException { - TermsParametersParser aggParser = new TermsParametersParser(); - ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context).scriptable(true).formattable(true).build(); - IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(); - aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); - - List<OrderElement> orderElements = aggParser.getOrderElements(); - List<Terms.Order> orders = new ArrayList<>(orderElements.size()); - for (OrderElement orderElement : orderElements) { - orders.add(resolveOrder(orderElement.key(), orderElement.asc())); + protected ValuesSourceAggregatorFactory<ValuesSource> doCreateFactory(String aggregationName, ValuesSourceType valuesSourceType, + ValueType targetValueType, BucketCountThresholds bucketCountThresholds, SubAggCollectionMode collectMode, String executionHint, + IncludeExclude incExc, Map<ParseField, Object> otherOptions) { + TermsAggregatorFactory factory = new TermsAggregatorFactory(aggregationName, valuesSourceType, targetValueType); + List<OrderElement> orderElements = (List<OrderElement>) otherOptions.get(TermsAggregatorFactory.ORDER_FIELD); + if (orderElements != null) { + List<Terms.Order> orders = new ArrayList<>(orderElements.size()); + for (OrderElement orderElement : orderElements) { + orders.add(resolveOrder(orderElement.key(), orderElement.asc())); + } + factory.order(orders); + } + if (bucketCountThresholds != null) { + factory.bucketCountThresholds(bucketCountThresholds); + } + if (collectMode != null) { + factory.collectMode(collectMode); + } + if (executionHint != null) { + factory.executionHint(executionHint); + } + if (incExc != null) { + factory.includeExclude(incExc); + } + Boolean showTermDocCountError = (Boolean) otherOptions.get(TermsAggregatorFactory.SHOW_TERM_DOC_COUNT_ERROR); + if (showTermDocCountError != null) { + factory.showTermDocCountError(showTermDocCountError); } - Terms.Order order; - if (orders.size() == 1 && (orders.get(0) == InternalOrder.TERM_ASC || orders.get(0) == InternalOrder.TERM_DESC)) - { - // If order is only terms order then we don't need compound ordering - order = orders.get(0); + return factory; + } + + @Override + public boolean parseSpecial(String aggregationName, XContentParser parser, ParseFieldMatcher parseFieldMatcher, Token token, + String currentFieldName, Map<ParseField, Object> otherOptions) throws IOException { + if (token == XContentParser.Token.START_OBJECT) { + if (parseFieldMatcher.match(currentFieldName, TermsAggregatorFactory.ORDER_FIELD)) { + otherOptions.put(TermsAggregatorFactory.ORDER_FIELD, Collections.singletonList(parseOrderParam(aggregationName, parser))); + return true; + } + } else if (token == XContentParser.Token.START_ARRAY) { + if (parseFieldMatcher.match(currentFieldName, TermsAggregatorFactory.ORDER_FIELD)) { + List<OrderElement> orderElements = new ArrayList<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.START_OBJECT) { + OrderElement orderParam = parseOrderParam(aggregationName, parser); + orderElements.add(orderParam); + } else { + throw new ParsingException(parser.getTokenLocation(), + "Order elements must be of type object in [" + aggregationName + "]."); + } + } + otherOptions.put(TermsAggregatorFactory.ORDER_FIELD, orderElements); + return true; + } + } else if (token == XContentParser.Token.VALUE_BOOLEAN) { + if (parseFieldMatcher.match(currentFieldName, TermsAggregatorFactory.SHOW_TERM_DOC_COUNT_ERROR)) { + otherOptions.put(TermsAggregatorFactory.SHOW_TERM_DOC_COUNT_ERROR, parser.booleanValue()); + return true; + } } - else - { - // for all other cases we need compound order so term order asc can be added to make the order deterministic - order = Order.compound(orders); + return false; + } + + private OrderElement parseOrderParam(String aggregationName, XContentParser parser) throws IOException { + XContentParser.Token token; + OrderElement orderParam = null; + String orderKey = null; + boolean orderAsc = false; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + orderKey = parser.currentName(); + } else if (token == XContentParser.Token.VALUE_STRING) { + String dir = parser.text(); + if ("asc".equalsIgnoreCase(dir)) { + orderAsc = true; + } else if ("desc".equalsIgnoreCase(dir)) { + orderAsc = false; + } else { + throw new ParsingException(parser.getTokenLocation(), + "Unknown terms order direction [" + dir + "] in terms aggregation [" + aggregationName + "]"); + } + } else { + throw new ParsingException(parser.getTokenLocation(), + "Unexpected token " + token + " for [order] in [" + aggregationName + "]."); + } } - TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds(); - if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) - && bucketCountThresholds.getShardSize() == aggParser.getDefaultBucketCountThresholds().getShardSize()) { - // The user has not made a shardSize selection. Use default heuristic to avoid any wrong-ranking caused by distributed counting - bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), - context.numberOfShards())); + if (orderKey == null) { + throw new ParsingException(parser.getTokenLocation(), + "Must specify at least one field for [order] in [" + aggregationName + "]."); + } else { + orderParam = new OrderElement(orderKey, orderAsc); } - bucketCountThresholds.ensureValidity(); - return new TermsAggregatorFactory(aggregationName, vsParser.input(), order, bucketCountThresholds, aggParser.getIncludeExclude(), - aggParser.getExecutionHint(), aggParser.getCollectionMode(), aggParser.showTermDocCountError()); + return orderParam; + } + + static class OrderElement { + private final String key; + private final boolean asc; + + public OrderElement(String key, boolean asc) { + this.key = key; + this.asc = asc; + } + + public String key() { + return key; + } + + public boolean asc() { + return asc; + } + + } + + @Override + public TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds() { + return new TermsAggregator.BucketCountThresholds(TermsAggregatorFactory.DEFAULT_BUCKET_COUNT_THRESHOLDS); } static Terms.Order resolveOrder(String key, boolean asc) { @@ -87,10 +180,9 @@ public class TermsParser implements Aggregator.Parser { return Order.aggregation(key, asc); } - // NORELEASE implement this method when refactoring this aggregation @Override public AggregatorFactory[] getFactoryPrototypes() { - return null; + return new AggregatorFactory[] { new TermsAggregatorFactory(null, null, null) }; } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java index 9c33a987a9..f6df150a4c 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java @@ -34,12 +34,22 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals; import java.io.IOException; +import java.util.Collections; import java.util.HashSet; +import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -48,7 +58,16 @@ import java.util.TreeSet; * Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic, * exclusion has precedence, where the {@code include} is evaluated first and then the {@code exclude}. */ -public class IncludeExclude { +public class IncludeExclude implements Writeable<IncludeExclude>, ToXContent { + + private static final IncludeExclude PROTOTYPE = new IncludeExclude(Collections.emptySortedSet(), Collections.emptySortedSet()); + private static final ParseField INCLUDE_FIELD = new ParseField("include"); + private static final ParseField EXCLUDE_FIELD = new ParseField("exclude"); + private static final ParseField PATTERN_FIELD = new ParseField("pattern"); + + public static IncludeExclude readFromStream(StreamInput in) throws IOException { + return PROTOTYPE.readFrom(in); + } // The includeValue and excludeValue ByteRefs which are the result of the parsing // process are converted into a LongFilter when used on numeric fields @@ -283,18 +302,14 @@ public class IncludeExclude { public static class Parser { - String include = null; - String exclude = null; - SortedSet<BytesRef> includeValues; - SortedSet<BytesRef> excludeValues; - - public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser) throws IOException { + public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser, + ParseFieldMatcher parseFieldMatcher, Map<ParseField, Object> otherOptions) throws IOException { if (token == XContentParser.Token.VALUE_STRING) { - if ("include".equals(currentFieldName)) { - include = parser.text(); - } else if ("exclude".equals(currentFieldName)) { - exclude = parser.text(); + if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) { + otherOptions.put(INCLUDE_FIELD, parser.text()); + } else if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) { + otherOptions.put(EXCLUDE_FIELD, parser.text()); } else { return false; } @@ -302,35 +317,35 @@ public class IncludeExclude { } if (token == XContentParser.Token.START_ARRAY) { - if ("include".equals(currentFieldName)) { - includeValues = new TreeSet<>(parseArrayToSet(parser)); + if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) { + otherOptions.put(INCLUDE_FIELD, new TreeSet<>(parseArrayToSet(parser))); return true; } - if ("exclude".equals(currentFieldName)) { - excludeValues = new TreeSet<>(parseArrayToSet(parser)); + if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) { + otherOptions.put(EXCLUDE_FIELD, new TreeSet<>(parseArrayToSet(parser))); return true; } return false; } if (token == XContentParser.Token.START_OBJECT) { - if ("include".equals(currentFieldName)) { + if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.VALUE_STRING) { - if ("pattern".equals(currentFieldName)) { - include = parser.text(); + if (parseFieldMatcher.match(currentFieldName, PATTERN_FIELD)) { + otherOptions.put(INCLUDE_FIELD, parser.text()); } } } - } else if ("exclude".equals(currentFieldName)) { + } else if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.VALUE_STRING) { - if ("pattern".equals(currentFieldName)) { - exclude = parser.text(); + if (parseFieldMatcher.match(currentFieldName, PATTERN_FIELD)) { + otherOptions.put(EXCLUDE_FIELD, parser.text()); } } } @@ -342,6 +357,7 @@ public class IncludeExclude { return false; } + private Set<BytesRef> parseArrayToSet(XContentParser parser) throws IOException { final Set<BytesRef> set = new HashSet<>(); if (parser.currentToken() != XContentParser.Token.START_ARRAY) { @@ -356,7 +372,27 @@ public class IncludeExclude { return set; } - public IncludeExclude includeExclude() { + public IncludeExclude createIncludeExclude(Map<ParseField, Object> otherOptions) { + Object includeObject = otherOptions.get(INCLUDE_FIELD); + String include = null; + SortedSet<BytesRef> includeValues = null; + if (includeObject != null) { + if (includeObject instanceof String) { + include = (String) includeObject; + } else if (includeObject instanceof SortedSet) { + includeValues = (SortedSet<BytesRef>) includeObject; + } + } + Object excludeObject = otherOptions.get(EXCLUDE_FIELD); + String exclude = null; + SortedSet<BytesRef> excludeValues = null; + if (excludeObject != null) { + if (excludeObject instanceof String) { + exclude = (String) excludeObject; + } else if (excludeObject instanceof SortedSet) { + excludeValues = (SortedSet<BytesRef>) excludeObject; + } + } RegExp includePattern = include != null ? new RegExp(include) : null; RegExp excludePattern = exclude != null ? new RegExp(exclude) : null; if (includePattern != null || excludePattern != null) { @@ -444,4 +480,111 @@ public class IncludeExclude { return result; } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (include != null) { + builder.field(INCLUDE_FIELD.getPreferredName(), include.getOriginalString()); + } + if (includeValues != null) { + builder.startArray(INCLUDE_FIELD.getPreferredName()); + for (BytesRef value : includeValues) { + builder.value(value.utf8ToString()); + } + builder.endArray(); + } + if (exclude != null) { + builder.field(EXCLUDE_FIELD.getPreferredName(), exclude.getOriginalString()); + } + if (excludeValues != null) { + builder.startArray(EXCLUDE_FIELD.getPreferredName()); + for (BytesRef value : excludeValues) { + builder.value(value.utf8ToString()); + } + builder.endArray(); + } + return builder; + } + + @Override + public IncludeExclude readFrom(StreamInput in) throws IOException { + if (in.readBoolean()) { + String includeString = in.readOptionalString(); + RegExp include = null; + if (includeString != null) { + include = new RegExp(includeString); + } + String excludeString = in.readOptionalString(); + RegExp exclude = null; + if (excludeString != null) { + exclude = new RegExp(excludeString); + } + return new IncludeExclude(include, exclude); + } else { + SortedSet<BytesRef> includes = null; + if (in.readBoolean()) { + int size = in.readVInt(); + includes = new TreeSet<>(); + for (int i = 0; i < size; i++) { + includes.add(in.readBytesRef()); + } + } + SortedSet<BytesRef> excludes = null; + if (in.readBoolean()) { + int size = in.readVInt(); + excludes = new TreeSet<>(); + for (int i = 0; i < size; i++) { + excludes.add(in.readBytesRef()); + } + } + return new IncludeExclude(includes, excludes); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + boolean regexBased = isRegexBased(); + out.writeBoolean(regexBased); + if (regexBased) { + out.writeOptionalString(include == null ? null : include.getOriginalString()); + out.writeOptionalString(exclude == null ? null : exclude.getOriginalString()); + } else { + boolean hasIncludes = includeValues != null; + out.writeBoolean(hasIncludes); + if (hasIncludes) { + out.writeVInt(includeValues.size()); + for (BytesRef value : includeValues) { + out.writeBytesRef(value); + } + } + boolean hasExcludes = excludeValues != null; + out.writeBoolean(hasExcludes); + if (hasExcludes) { + out.writeVInt(excludeValues.size()); + for (BytesRef value : excludeValues) { + out.writeBytesRef(value); + } + } + } + } + + @Override + public int hashCode() { + return Objects.hash(include == null ? null : include.getOriginalString(), exclude == null ? null : exclude.getOriginalString(), + includeValues, excludeValues); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } if (getClass() != obj.getClass()) { + return false; + } + IncludeExclude other = (IncludeExclude) obj; + return Objects.equals(include == null ? null : include.getOriginalString(), other.include == null ? null : other.include.getOriginalString()) + && Objects.equals(exclude == null ? null : exclude.getOriginalString(), other.exclude == null ? null : other.exclude.getOriginalString()) + && Objects.equals(includeValues, other.includeValues) + && Objects.equals(excludeValues, other.excludeValues); + } + } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java index 0084e634a1..931863dcef 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java @@ -343,7 +343,7 @@ public abstract class ValuesSourceAggregatorFactory<VS extends ValuesSource> ext } @Override - public void doWriteTo(StreamOutput out) throws IOException { + protected final void doWriteTo(StreamOutput out) throws IOException { valuesSourceType.writeTo(out); boolean hasTargetValueType = targetValueType != null; out.writeBoolean(hasTargetValueType); @@ -376,7 +376,7 @@ public abstract class ValuesSourceAggregatorFactory<VS extends ValuesSource> ext } @Override - protected ValuesSourceAggregatorFactory<VS> doReadFrom(String name, StreamInput in) throws IOException { + protected final ValuesSourceAggregatorFactory<VS> doReadFrom(String name, StreamInput in) throws IOException { ValuesSourceType valuesSourceType = ValuesSourceType.ANY.readFrom(in); ValueType targetValueType = null; if (in.readBoolean()) { @@ -433,7 +433,7 @@ public abstract class ValuesSourceAggregatorFactory<VS extends ValuesSource> ext } @Override - public int doHashCode() { + public final int doHashCode() { return Objects.hash(field, format, missing, script, targetValueType, timeZone, valueType, valuesSourceType, innerHashCode()); } @@ -446,7 +446,7 @@ public abstract class ValuesSourceAggregatorFactory<VS extends ValuesSource> ext } @Override - public boolean doEquals(Object obj) { + public final boolean doEquals(Object obj) { ValuesSourceAggregatorFactory<?> other = (ValuesSourceAggregatorFactory<?>) obj; if (!Objects.equals(field, other.field)) return false; diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/SubAggCollectionModeTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/SubAggCollectionModeTests.java new file mode 100644 index 0000000000..131144dc5d --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/SubAggCollectionModeTests.java @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations; + +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +public class SubAggCollectionModeTests extends ESTestCase { + + public void testValidOrdinals() { + assertThat(SubAggCollectionMode.DEPTH_FIRST.ordinal(), equalTo(0)); + assertThat(SubAggCollectionMode.BREADTH_FIRST.ordinal(), equalTo(1)); + } + + public void testwriteTo() throws Exception { + try (BytesStreamOutput out = new BytesStreamOutput()) { + SubAggCollectionMode.DEPTH_FIRST.writeTo(out); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + assertThat(in.readVInt(), equalTo(0)); + } + } + + try (BytesStreamOutput out = new BytesStreamOutput()) { + SubAggCollectionMode.BREADTH_FIRST.writeTo(out); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + assertThat(in.readVInt(), equalTo(1)); + } + } + } + + public void testReadFrom() throws Exception { + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.writeVInt(0); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + assertThat(SubAggCollectionMode.BREADTH_FIRST.readFrom(in), equalTo(SubAggCollectionMode.DEPTH_FIRST)); + } + } + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.writeVInt(1); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + assertThat(SubAggCollectionMode.BREADTH_FIRST.readFrom(in), equalTo(SubAggCollectionMode.BREADTH_FIRST)); + } + } + } + + public void testInvalidReadFrom() throws Exception { + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.writeVInt(randomIntBetween(2, Integer.MAX_VALUE)); + try (StreamInput in = StreamInput.wrap(out.bytes())) { + SubAggCollectionMode.BREADTH_FIRST.readFrom(in); + fail("Expected IOException"); + } catch(IOException e) { + assertThat(e.getMessage(), containsString("Unknown SubAggCollectionMode ordinal [")); + } + + } + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java index 6c1e7dfcab..173f9ddfef 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -52,7 +53,6 @@ import org.elasticsearch.search.aggregations.bucket.significant.heuristics.Signi import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder; -import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.search.aggregations.bucket.SharedSignificantTermsTestMethods; @@ -163,7 +163,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { public static class CustomSignificanceHeuristicPlugin extends Plugin { static { - SignificanceHeuristicStreams.registerStream(SimpleHeuristic.STREAM); + SignificanceHeuristicStreams.registerPrototype(SimpleHeuristic.PROTOTYPE); } @Override @@ -187,24 +187,30 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { public static class SimpleHeuristic extends SignificanceHeuristic { - protected static final String[] NAMES = {"simple"}; + static final SimpleHeuristic PROTOTYPE = new SimpleHeuristic(); - public static final SignificanceHeuristicStreams.Stream STREAM = new SignificanceHeuristicStreams.Stream() { - @Override - public SignificanceHeuristic readResult(StreamInput in) throws IOException { - return readFrom(in); - } + protected static final ParseField NAMES_FIELD = new ParseField("simple"); - @Override - public String getName() { - return NAMES[0]; - } - }; + @Override + public String getWriteableName() { + return NAMES_FIELD.getPreferredName(); + } - public static SignificanceHeuristic readFrom(StreamInput in) throws IOException { + @Override + public SignificanceHeuristic readFrom(StreamInput in) throws IOException { return new SimpleHeuristic(); } + @Override + public void writeTo(StreamOutput out) throws IOException { + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); + return builder; + } + /** * @param subsetFreq The frequency of the term in the selected sample * @param subsetSize The size of the selected sample (typically number of docs) @@ -217,15 +223,10 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { return subsetFreq / subsetSize > supersetFreq / supersetSize ? 2.0 : 1.0; } - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(STREAM.getName()); - } - public static class SimpleHeuristicParser implements SignificanceHeuristicParser { @Override - public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, SearchContext context) + public SignificanceHeuristic parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher) throws IOException, QueryShardException { parser.nextToken(); return new SimpleHeuristic(); @@ -233,7 +234,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { @Override public String[] getNames() { - return NAMES; + return NAMES_FIELD.getAllNamesIncludedDeprecated(); } } @@ -241,7 +242,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(STREAM.getName()).endObject(); + builder.startObject(NAMES_FIELD.getPreferredName()).endObject(); return builder; } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java new file mode 100644 index 0000000000..8ad928e5ed --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java @@ -0,0 +1,226 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.RegExp; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.script.Script; +import org.elasticsearch.search.aggregations.BaseAggregationTestCase; +import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.PercentageScore; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ScriptHeuristic; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory.ExecutionMode; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; + +import java.util.SortedSet; +import java.util.TreeSet; + +public class SignificantTermsTests extends BaseAggregationTestCase<SignificantTermsAggregatorFactory> { + + private static final String[] executionHints; + + static { + ExecutionMode[] executionModes = ExecutionMode.values(); + executionHints = new String[executionModes.length]; + for (int i = 0; i < executionModes.length; i++) { + executionHints[i] = executionModes[i].toString(); + } + } + + @Override + protected SignificantTermsAggregatorFactory createTestAggregatorFactory() { + String name = randomAsciiOfLengthBetween(3, 20); + SignificantTermsAggregatorFactory factory = new SignificantTermsAggregatorFactory(name, ValuesSourceType.ANY, null); + String field = randomAsciiOfLengthBetween(3, 20); + int randomFieldBranch = randomInt(2); + switch (randomFieldBranch) { + case 0: + factory.field(field); + break; + case 1: + factory.field(field); + factory.script(new Script("_value + 1")); + break; + case 2: + factory.script(new Script("doc[" + field + "] + 1")); + break; + default: + fail(); + } + if (randomBoolean()) { + factory.missing("MISSING"); + } + if (randomBoolean()) { + int size = randomInt(4); + switch (size) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + size = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setRequiredSize(size); + + } + if (randomBoolean()) { + int shardSize = randomInt(4); + switch (shardSize) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardSize = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setShardSize(shardSize); + } + if (randomBoolean()) { + int minDocCount = randomInt(4); + switch (minDocCount) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + minDocCount = randomInt(); + break; + } + factory.bucketCountThresholds().setMinDocCount(minDocCount); + } + if (randomBoolean()) { + int shardMinDocCount = randomInt(4); + switch (shardMinDocCount) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardMinDocCount = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setShardMinDocCount(shardMinDocCount); + } + if (randomBoolean()) { + factory.executionHint(randomFrom(executionHints)); + } + if (randomBoolean()) { + factory.format("###.##"); + } + if (randomBoolean()) { + IncludeExclude incExc = null; + switch (randomInt(5)) { + case 0: + incExc = new IncludeExclude(new RegExp("foobar"), null); + break; + case 1: + incExc = new IncludeExclude(null, new RegExp("foobaz")); + break; + case 2: + incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz")); + break; + case 3: + SortedSet<BytesRef> includeValues = new TreeSet<>(); + int numIncs = randomIntBetween(1, 20); + for (int i = 0; i < numIncs; i++) { + includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + SortedSet<BytesRef> excludeValues = null; + incExc = new IncludeExclude(includeValues, excludeValues); + break; + case 4: + SortedSet<BytesRef> includeValues2 = null; + SortedSet<BytesRef> excludeValues2 = new TreeSet<>(); + int numExcs2 = randomIntBetween(1, 20); + for (int i = 0; i < numExcs2; i++) { + excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + incExc = new IncludeExclude(includeValues2, excludeValues2); + break; + case 5: + SortedSet<BytesRef> includeValues3 = new TreeSet<>(); + int numIncs3 = randomIntBetween(1, 20); + for (int i = 0; i < numIncs3; i++) { + includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + SortedSet<BytesRef> excludeValues3 = new TreeSet<>(); + int numExcs3 = randomIntBetween(1, 20); + for (int i = 0; i < numExcs3; i++) { + excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + incExc = new IncludeExclude(includeValues3, excludeValues3); + break; + default: + fail(); + } + factory.includeExclude(incExc); + } + if (randomBoolean()) { + SignificanceHeuristic significanceHeuristic = null; + switch (randomInt(5)) { + case 0: + significanceHeuristic = PercentageScore.PROTOTYPE; + break; + case 1: + significanceHeuristic = new ChiSquare(randomBoolean(), randomBoolean()); + break; + case 2: + significanceHeuristic = new GND(randomBoolean()); + break; + case 3: + significanceHeuristic = new MutualInformation(randomBoolean(), randomBoolean()); + break; + case 4: + significanceHeuristic = new ScriptHeuristic(new Script("foo")); + break; + case 5: + significanceHeuristic = JLHScore.PROTOTYPE; + break; + default: + fail(); + } + factory.significanceHeuristic(significanceHeuristic); + } + if (randomBoolean()) { + factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar")); + } + return factory; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java new file mode 100644 index 0000000000..fb57b6d1b2 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java @@ -0,0 +1,232 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.RegExp; +import org.elasticsearch.script.Script; +import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; +import org.elasticsearch.search.aggregations.BaseAggregationTestCase; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory.ExecutionMode; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; + +import java.util.ArrayList; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +public class TermsTests extends BaseAggregationTestCase<TermsAggregatorFactory> { + + private static final String[] executionHints; + + static { + ExecutionMode[] executionModes = ExecutionMode.values(); + executionHints = new String[executionModes.length]; + for (int i = 0; i < executionModes.length; i++) { + executionHints[i] = executionModes[i].toString(); + } + } + + @Override + protected TermsAggregatorFactory createTestAggregatorFactory() { + String name = randomAsciiOfLengthBetween(3, 20); + TermsAggregatorFactory factory = new TermsAggregatorFactory(name, ValuesSourceType.ANY, null); + String field = randomAsciiOfLengthBetween(3, 20); + int randomFieldBranch = randomInt(2); + switch (randomFieldBranch) { + case 0: + factory.field(field); + break; + case 1: + factory.field(field); + factory.script(new Script("_value + 1")); + break; + case 2: + factory.script(new Script("doc[" + field + "] + 1")); + break; + default: + fail(); + } + if (randomBoolean()) { + factory.missing("MISSING"); + } + if (randomBoolean()) { + int size = randomInt(4); + switch (size) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + size = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setRequiredSize(size); + + } + if (randomBoolean()) { + int shardSize = randomInt(4); + switch (shardSize) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardSize = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setShardSize(shardSize); + } + if (randomBoolean()) { + int minDocCount = randomInt(4); + switch (minDocCount) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + minDocCount = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setMinDocCount(minDocCount); + } + if (randomBoolean()) { + int shardMinDocCount = randomInt(4); + switch (shardMinDocCount) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardMinDocCount = randomInt(); + break; + default: + fail(); + } + factory.bucketCountThresholds().setShardMinDocCount(shardMinDocCount); + } + if (randomBoolean()) { + factory.collectMode(randomFrom(SubAggCollectionMode.values())); + } + if (randomBoolean()) { + factory.executionHint(randomFrom(executionHints)); + } + if (randomBoolean()) { + factory.format("###.##"); + } + if (randomBoolean()) { + IncludeExclude incExc = null; + switch (randomInt(5)) { + case 0: + incExc = new IncludeExclude(new RegExp("foobar"), null); + break; + case 1: + incExc = new IncludeExclude(null, new RegExp("foobaz")); + break; + case 2: + incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz")); + break; + case 3: + SortedSet<BytesRef> includeValues = new TreeSet<>(); + int numIncs = randomIntBetween(1, 20); + for (int i = 0; i < numIncs; i++) { + includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + SortedSet<BytesRef> excludeValues = null; + incExc = new IncludeExclude(includeValues, excludeValues); + break; + case 4: + SortedSet<BytesRef> includeValues2 = null; + SortedSet<BytesRef> excludeValues2 = new TreeSet<>(); + int numExcs2 = randomIntBetween(1, 20); + for (int i = 0; i < numExcs2; i++) { + excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + incExc = new IncludeExclude(includeValues2, excludeValues2); + break; + case 5: + SortedSet<BytesRef> includeValues3 = new TreeSet<>(); + int numIncs3 = randomIntBetween(1, 20); + for (int i = 0; i < numIncs3; i++) { + includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + SortedSet<BytesRef> excludeValues3 = new TreeSet<>(); + int numExcs3 = randomIntBetween(1, 20); + for (int i = 0; i < numExcs3; i++) { + excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30))); + } + incExc = new IncludeExclude(includeValues3, excludeValues3); + break; + default: + fail(); + } + factory.includeExclude(incExc); + } + if (randomBoolean()) { + List<Terms.Order> order = randomOrder(); + factory.order(order); + } + if (randomBoolean()) { + factory.showTermDocCountError(randomBoolean()); + } + return factory; + } + + private List<Terms.Order> randomOrder() { + List<Terms.Order> orders = new ArrayList<>(); + switch (randomInt(4)) { + case 0: + orders.add(Terms.Order.term(randomBoolean())); + break; + case 1: + orders.add(Terms.Order.count(randomBoolean())); + break; + case 2: + orders.add(Terms.Order.aggregation(randomAsciiOfLengthBetween(3, 20), randomBoolean())); + break; + case 3: + orders.add(Terms.Order.aggregation(randomAsciiOfLengthBetween(3, 20), randomAsciiOfLengthBetween(3, 20), randomBoolean())); + break; + case 4: + int numOrders = randomIntBetween(1, 3); + for (int i = 0; i < numOrders; i++) { + orders.addAll(randomOrder()); + } + break; + default: + fail(); + } + return orders; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java index 0fe9113e8f..166efe27a3 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java @@ -22,11 +22,14 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.Version; import org.elasticsearch.common.io.stream.InputStreamStreamInput; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.OutputStreamStreamOutput; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.indices.query.IndicesQueriesRegistry; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; @@ -127,7 +130,7 @@ public class SignificanceHeuristicTests extends ESTestCase { SignificanceHeuristic getRandomSignificanceheuristic() { List<SignificanceHeuristic> heuristics = new ArrayList<>(); - heuristics.add(JLHScore.INSTANCE); + heuristics.add(JLHScore.PROTOTYPE); heuristics.add(new MutualInformation(randomBoolean(), randomBoolean())); heuristics.add(new GND(randomBoolean())); heuristics.add(new ChiSquare(randomBoolean(), randomBoolean())); @@ -227,11 +230,14 @@ public class SignificanceHeuristicTests extends ESTestCase { checkParseException(heuristicParserMapper, searchContext, faultyHeuristicdefinition, expectedError); } - protected void checkParseException(SignificanceHeuristicParserMapper heuristicParserMapper, SearchContext searchContext, String faultyHeuristicDefinition, String expectedError) throws IOException { + protected void checkParseException(SignificanceHeuristicParserMapper heuristicParserMapper, SearchContext searchContext, + String faultyHeuristicDefinition, String expectedError) throws IOException { + + IndicesQueriesRegistry registry = new IndicesQueriesRegistry(Settings.EMPTY, new HashSet<>(), new NamedWriteableRegistry()); try { XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"text\", " + faultyHeuristicDefinition + ",\"min_doc_count\":200}"); stParser.nextToken(); - new SignificantTermsParser(heuristicParserMapper).parse("testagg", stParser, searchContext); + new SignificantTermsParser(heuristicParserMapper, registry).parse("testagg", stParser, searchContext); fail(); } catch (ElasticsearchParseException e) { assertTrue(e.getMessage().contains(expectedError)); @@ -247,9 +253,12 @@ public class SignificanceHeuristicTests extends ESTestCase { return parseSignificanceHeuristic(heuristicParserMapper, searchContext, stParser); } - private SignificanceHeuristic parseSignificanceHeuristic(SignificanceHeuristicParserMapper heuristicParserMapper, SearchContext searchContext, XContentParser stParser) throws IOException { + private SignificanceHeuristic parseSignificanceHeuristic(SignificanceHeuristicParserMapper heuristicParserMapper, + SearchContext searchContext, XContentParser stParser) throws IOException { + IndicesQueriesRegistry registry = new IndicesQueriesRegistry(Settings.EMPTY, new HashSet<>(), new NamedWriteableRegistry()); stParser.nextToken(); - SignificantTermsAggregatorFactory aggregatorFactory = (SignificantTermsAggregatorFactory) new SignificantTermsParser(heuristicParserMapper).parse("testagg", stParser, searchContext); + SignificantTermsAggregatorFactory aggregatorFactory = (SignificantTermsAggregatorFactory) new SignificantTermsParser( + heuristicParserMapper, registry).parse("testagg", stParser, searchContext); stParser.nextToken(); assertThat(aggregatorFactory.getBucketCountThresholds().getMinDocCount(), equalTo(200l)); assertThat(stParser.currentToken(), equalTo(null)); @@ -365,14 +374,14 @@ public class SignificanceHeuristicTests extends ESTestCase { testBackgroundAssertions(new MutualInformation(true, true), new MutualInformation(true, false)); testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false)); testBackgroundAssertions(new GND(true), new GND(false)); - testAssertions(PercentageScore.INSTANCE); - testAssertions(JLHScore.INSTANCE); + testAssertions(PercentageScore.PROTOTYPE); + testAssertions(JLHScore.PROTOTYPE); } public void testBasicScoreProperties() { - basicScoreProperties(JLHScore.INSTANCE, true); + basicScoreProperties(JLHScore.PROTOTYPE, true); basicScoreProperties(new GND(true), true); - basicScoreProperties(PercentageScore.INSTANCE, true); + basicScoreProperties(PercentageScore.PROTOTYPE, true); basicScoreProperties(new MutualInformation(true, true), false); basicScoreProperties(new ChiSquare(true, true), false); } diff --git a/core/src/test/java/org/elasticsearch/test/search/aggregations/bucket/SharedSignificantTermsTestMethods.java b/core/src/test/java/org/elasticsearch/test/search/aggregations/bucket/SharedSignificantTermsTestMethods.java index 1df965968a..a120b63c82 100644 --- a/core/src/test/java/org/elasticsearch/test/search/aggregations/bucket/SharedSignificantTermsTestMethods.java +++ b/core/src/test/java/org/elasticsearch/test/search/aggregations/bucket/SharedSignificantTermsTestMethods.java @@ -57,13 +57,9 @@ public class SharedSignificantTermsTestMethods { } private static void checkSignificantTermsAggregationCorrect(ESIntegTestCase testCase) { - - SearchResponse response = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE) - .addAggregation(new TermsBuilder("class").field(CLASS_FIELD).subAggregation( - new SignificantTermsBuilder("sig_terms") - .field(TEXT_FIELD))) - .execute() - .actionGet(); + SearchResponse response = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE).addAggregation( + new TermsBuilder("class").field(CLASS_FIELD).subAggregation(new SignificantTermsBuilder("sig_terms").field(TEXT_FIELD))) + .execute().actionGet(); assertSearchResponse(response); StringTerms classes = response.getAggregations().get("class"); Assert.assertThat(classes.getBuckets().size(), equalTo(2)); |