/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.aggregations.bucket.significant; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ParseFieldRegistry; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.aggregations.AbstractAggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationInitializationException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories.Builder; import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Objects; public class SignificantTextAggregationBuilder extends AbstractAggregationBuilder { public static final String NAME = "significant_text"; static final ParseField FIELD_NAME = new ParseField("field"); static final ParseField SOURCE_FIELDS_NAME = new ParseField("source_fields"); static final ParseField FILTER_DUPLICATE_TEXT_FIELD_NAME = new ParseField( "filter_duplicate_text"); static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS; static final SignificanceHeuristic DEFAULT_SIGNIFICANCE_HEURISTIC = SignificantTermsAggregationBuilder.DEFAULT_SIGNIFICANCE_HEURISTIC; private String fieldName = null; private String [] sourceFieldNames = null; private boolean filterDuplicateText = false; private IncludeExclude includeExclude = null; private QueryBuilder filterBuilder = null; private TermsAggregator.BucketCountThresholds bucketCountThresholds = new BucketCountThresholds( DEFAULT_BUCKET_COUNT_THRESHOLDS); private SignificanceHeuristic significanceHeuristic = DEFAULT_SIGNIFICANCE_HEURISTIC; public static Aggregator.Parser getParser( ParseFieldRegistry significanceHeuristicParserRegistry) { ObjectParser PARSER = new ObjectParser<>( SignificantTextAggregationBuilder.NAME); PARSER.declareInt(SignificantTextAggregationBuilder::shardSize, TermsAggregationBuilder.SHARD_SIZE_FIELD_NAME); PARSER.declareLong(SignificantTextAggregationBuilder::minDocCount, TermsAggregationBuilder.MIN_DOC_COUNT_FIELD_NAME); PARSER.declareLong(SignificantTextAggregationBuilder::shardMinDocCount, TermsAggregationBuilder.SHARD_MIN_DOC_COUNT_FIELD_NAME); PARSER.declareInt(SignificantTextAggregationBuilder::size, TermsAggregationBuilder.REQUIRED_SIZE_FIELD_NAME); PARSER.declareString(SignificantTextAggregationBuilder::fieldName, FIELD_NAME); PARSER.declareStringArray(SignificantTextAggregationBuilder::sourceFieldNames, SOURCE_FIELDS_NAME); PARSER.declareBoolean(SignificantTextAggregationBuilder::filterDuplicateText, FILTER_DUPLICATE_TEXT_FIELD_NAME); PARSER.declareObject(SignificantTextAggregationBuilder::backgroundFilter, (p, context) -> AbstractQueryBuilder.parseInnerQueryBuilder(p), SignificantTermsAggregationBuilder.BACKGROUND_FILTER); PARSER.declareField((b, v) -> b.includeExclude(IncludeExclude.merge(v, b.includeExclude())), IncludeExclude::parseInclude, IncludeExclude.INCLUDE_FIELD, ObjectParser.ValueType.OBJECT_ARRAY_OR_STRING); PARSER.declareField((b, v) -> b.includeExclude(IncludeExclude.merge(b.includeExclude(), v)), IncludeExclude::parseExclude, IncludeExclude.EXCLUDE_FIELD, ObjectParser.ValueType.STRING_ARRAY); for (String name : significanceHeuristicParserRegistry.getNames()) { PARSER.declareObject(SignificantTextAggregationBuilder::significanceHeuristic, (p, context) -> { SignificanceHeuristicParser significanceHeuristicParser = significanceHeuristicParserRegistry .lookupReturningNullIfNotFound(name); return significanceHeuristicParser.parse(p); }, new ParseField(name)); } return new Aggregator.Parser() { @Override public AggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException { return PARSER.parse(parser, new SignificantTextAggregationBuilder(aggregationName, null), null); } }; } protected TermsAggregator.BucketCountThresholds getBucketCountThresholds() { return new TermsAggregator.BucketCountThresholds(bucketCountThresholds); } public TermsAggregator.BucketCountThresholds bucketCountThresholds() { return bucketCountThresholds; } @Override public SignificantTextAggregationBuilder subAggregations(Builder subFactories) { throw new AggregationInitializationException("Aggregator [" + name + "] of type [" + getType() + "] cannot accept sub-aggregations"); } @Override public SignificantTextAggregationBuilder subAggregation(AggregationBuilder aggregation) { throw new AggregationInitializationException("Aggregator [" + name + "] of type [" + getType() + "] cannot accept sub-aggregations"); } public SignificantTextAggregationBuilder bucketCountThresholds( TermsAggregator.BucketCountThresholds bucketCountThresholds) { if (bucketCountThresholds == null) { throw new IllegalArgumentException( "[bucketCountThresholds] must not be null: [" + name + "]"); } this.bucketCountThresholds = bucketCountThresholds; return this; } /** * Sets the size - indicating how many term buckets should be returned * (defaults to 10) */ public SignificantTextAggregationBuilder size(int size) { if (size <= 0) { throw new IllegalArgumentException( "[size] must be greater than 0. Found [" + size + "] in [" + name + "]"); } bucketCountThresholds.setRequiredSize(size); return this; } /** * Sets the shard_size - indicating the number of term buckets each shard * will return to the coordinating node (the node that coordinates the * search execution). The higher the shard size is, the more accurate the * results are. */ public SignificantTextAggregationBuilder shardSize(int shardSize) { if (shardSize <= 0) { throw new IllegalArgumentException("[shardSize] must be greater than 0. Found [" + shardSize + "] in [" + name + "]"); } bucketCountThresholds.setShardSize(shardSize); return this; } /** * Sets the name of the text field that will be the subject of this * aggregation. */ public SignificantTextAggregationBuilder fieldName(String fieldName) { this.fieldName = fieldName; return this; } /** * Selects the fields to load from _source JSON and analyze. * If none are specified, the indexed "fieldName" value is assumed * to also be the name of the JSON field holding the value */ public SignificantTextAggregationBuilder sourceFieldNames(List names) { this.sourceFieldNames = names.toArray(new String [names.size()]); return this; } /** * Control if duplicate paragraphs of text should try be filtered from the * statistical text analysis. Can improve results but slows down analysis. * Default is false. */ public SignificantTextAggregationBuilder filterDuplicateText(boolean filterDuplicateText) { this.filterDuplicateText = filterDuplicateText; return this; } /** * Set the minimum document count terms should have in order to appear in * the response. */ public SignificantTextAggregationBuilder minDocCount(long minDocCount) { if (minDocCount < 0) { throw new IllegalArgumentException( "[minDocCount] must be greater than or equal to 0. Found [" + minDocCount + "] in [" + name + "]"); } bucketCountThresholds.setMinDocCount(minDocCount); return this; } /** * Set the minimum document count terms should have on the shard in order to * appear in the response. */ public SignificantTextAggregationBuilder shardMinDocCount(long shardMinDocCount) { if (shardMinDocCount < 0) { throw new IllegalArgumentException( "[shardMinDocCount] must be greater than or equal to 0. Found [" + shardMinDocCount + "] in [" + name + "]"); } bucketCountThresholds.setShardMinDocCount(shardMinDocCount); return this; } public SignificantTextAggregationBuilder backgroundFilter(QueryBuilder backgroundFilter) { if (backgroundFilter == null) { throw new IllegalArgumentException( "[backgroundFilter] must not be null: [" + name + "]"); } this.filterBuilder = backgroundFilter; return this; } public QueryBuilder backgroundFilter() { return filterBuilder; } /** * Set terms to include and exclude from the aggregation results */ public SignificantTextAggregationBuilder includeExclude(IncludeExclude includeExclude) { this.includeExclude = includeExclude; return this; } /** * Get terms to include and exclude from the aggregation results */ public IncludeExclude includeExclude() { return includeExclude; } public SignificantTextAggregationBuilder significanceHeuristic( SignificanceHeuristic significanceHeuristic) { if (significanceHeuristic == null) { throw new IllegalArgumentException( "[significanceHeuristic] must not be null: [" + name + "]"); } this.significanceHeuristic = significanceHeuristic; return this; } public SignificanceHeuristic significanceHeuristic() { return significanceHeuristic; } /** * @param name * the name of this aggregation * @param fieldName * the name of the text field that will be the subject of this * aggregation * */ public SignificantTextAggregationBuilder(String name, String fieldName) { super(name); this.fieldName = fieldName; } /** * Read from a stream. */ public SignificantTextAggregationBuilder(StreamInput in) throws IOException { super(in); fieldName = in.readString(); filterDuplicateText = in.readBoolean(); bucketCountThresholds = new BucketCountThresholds(in); filterBuilder = in.readOptionalNamedWriteable(QueryBuilder.class); includeExclude = in.readOptionalWriteable(IncludeExclude::new); significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class); sourceFieldNames = in.readOptionalStringArray(); } @Override protected void doWriteTo(StreamOutput out) throws IOException { out.writeString(fieldName); out.writeBoolean(filterDuplicateText); bucketCountThresholds.writeTo(out); out.writeOptionalNamedWriteable(filterBuilder); out.writeOptionalWriteable(includeExclude); out.writeNamedWriteable(significanceHeuristic); out.writeOptionalStringArray(sourceFieldNames); } @Override protected AggregatorFactory doBuild(SearchContext context, AggregatorFactory parent, Builder subFactoriesBuilder) throws IOException { SignificanceHeuristic executionHeuristic = this.significanceHeuristic.rewrite(context); String[] execFieldNames = sourceFieldNames; if (execFieldNames == null) { execFieldNames = new String[] { fieldName }; } return new SignificantTextAggregatorFactory(name, includeExclude, filterBuilder, bucketCountThresholds, executionHeuristic, context, parent, subFactoriesBuilder, fieldName, execFieldNames, filterDuplicateText, metaData); } @Override protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); bucketCountThresholds.toXContent(builder, params); if (fieldName != null) { builder.field(FIELD_NAME.getPreferredName(), fieldName); } if (sourceFieldNames != null) { builder.array(SOURCE_FIELDS_NAME.getPreferredName(), sourceFieldNames); } if (filterDuplicateText) { builder.field(FILTER_DUPLICATE_TEXT_FIELD_NAME.getPreferredName(), filterDuplicateText); } if (filterBuilder != null) { builder.field(SignificantTermsAggregationBuilder.BACKGROUND_FILTER.getPreferredName(), filterBuilder); } if (includeExclude != null) { includeExclude.toXContent(builder, params); } significanceHeuristic.toXContent(builder, params); builder.endObject(); return builder; } @Override protected int doHashCode() { return Objects.hash(bucketCountThresholds, fieldName, filterDuplicateText, filterBuilder, includeExclude, significanceHeuristic, Arrays.hashCode(sourceFieldNames)); } @Override protected boolean doEquals(Object obj) { SignificantTextAggregationBuilder other = (SignificantTextAggregationBuilder) obj; return Objects.equals(bucketCountThresholds, other.bucketCountThresholds) && Objects.equals(fieldName, other.fieldName) && Arrays.equals(sourceFieldNames, other.sourceFieldNames) && filterDuplicateText == other.filterDuplicateText && Objects.equals(filterBuilder, other.filterBuilder) && Objects.equals(includeExclude, other.includeExclude) && Objects.equals(significanceHeuristic, other.significanceHeuristic); } @Override public String getType() { return NAME; } }