diff options
Diffstat (limited to 'core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight')
17 files changed, 2898 insertions, 0 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java new file mode 100644 index 0000000000..72bd436a88 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java @@ -0,0 +1,607 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.highlight.SimpleFragmenter; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; +import org.elasticsearch.action.support.ToXContentToBytes; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Order; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Map; +import java.util.Objects; +import java.util.function.BiFunction; + +import static org.elasticsearch.common.xcontent.ObjectParser.fromList; + +/** + * This abstract class holds parameters shared by {@link HighlightBuilder} and {@link HighlightBuilder.Field} + * and provides the common setters, equality, hashCode calculation and common serialization + */ +public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterBuilder<?>> extends ToXContentToBytes implements Writeable { + public static final ParseField PRE_TAGS_FIELD = new ParseField("pre_tags"); + public static final ParseField POST_TAGS_FIELD = new ParseField("post_tags"); + public static final ParseField FIELDS_FIELD = new ParseField("fields"); + public static final ParseField ORDER_FIELD = new ParseField("order"); + public static final ParseField HIGHLIGHT_FILTER_FIELD = new ParseField("highlight_filter"); + public static final ParseField FRAGMENT_SIZE_FIELD = new ParseField("fragment_size"); + public static final ParseField FRAGMENT_OFFSET_FIELD = new ParseField("fragment_offset"); + public static final ParseField NUMBER_OF_FRAGMENTS_FIELD = new ParseField("number_of_fragments"); + public static final ParseField ENCODER_FIELD = new ParseField("encoder"); + public static final ParseField REQUIRE_FIELD_MATCH_FIELD = new ParseField("require_field_match"); + public static final ParseField BOUNDARY_MAX_SCAN_FIELD = new ParseField("boundary_max_scan"); + public static final ParseField BOUNDARY_CHARS_FIELD = new ParseField("boundary_chars"); + public static final ParseField TYPE_FIELD = new ParseField("type"); + public static final ParseField FRAGMENTER_FIELD = new ParseField("fragmenter"); + public static final ParseField NO_MATCH_SIZE_FIELD = new ParseField("no_match_size"); + public static final ParseField FORCE_SOURCE_FIELD = new ParseField("force_source"); + public static final ParseField PHRASE_LIMIT_FIELD = new ParseField("phrase_limit"); + public static final ParseField OPTIONS_FIELD = new ParseField("options"); + public static final ParseField HIGHLIGHT_QUERY_FIELD = new ParseField("highlight_query"); + public static final ParseField MATCHED_FIELDS_FIELD = new ParseField("matched_fields"); + + protected String[] preTags; + + protected String[] postTags; + + protected Integer fragmentSize; + + protected Integer numOfFragments; + + protected String highlighterType; + + protected String fragmenter; + + protected QueryBuilder highlightQuery; + + protected Order order; + + protected Boolean highlightFilter; + + protected Boolean forceSource; + + protected Integer boundaryMaxScan; + + protected char[] boundaryChars; + + protected Integer noMatchSize; + + protected Integer phraseLimit; + + protected Map<String, Object> options; + + protected Boolean requireFieldMatch; + + public AbstractHighlighterBuilder() { + } + + /** + * Read from a stream. + */ + protected AbstractHighlighterBuilder(StreamInput in) throws IOException { + preTags(in.readOptionalStringArray()); + postTags(in.readOptionalStringArray()); + fragmentSize(in.readOptionalVInt()); + numOfFragments(in.readOptionalVInt()); + highlighterType(in.readOptionalString()); + fragmenter(in.readOptionalString()); + if (in.readBoolean()) { + highlightQuery(in.readNamedWriteable(QueryBuilder.class)); + } + order(in.readOptionalWriteable(Order::readFromStream)); + highlightFilter(in.readOptionalBoolean()); + forceSource(in.readOptionalBoolean()); + boundaryMaxScan(in.readOptionalVInt()); + if (in.readBoolean()) { + boundaryChars(in.readString().toCharArray()); + } + noMatchSize(in.readOptionalVInt()); + phraseLimit(in.readOptionalVInt()); + if (in.readBoolean()) { + options(in.readMap()); + } + requireFieldMatch(in.readOptionalBoolean()); + } + + /** + * write common parameters to {@link StreamOutput} + */ + @Override + public final void writeTo(StreamOutput out) throws IOException { + out.writeOptionalStringArray(preTags); + out.writeOptionalStringArray(postTags); + out.writeOptionalVInt(fragmentSize); + out.writeOptionalVInt(numOfFragments); + out.writeOptionalString(highlighterType); + out.writeOptionalString(fragmenter); + boolean hasQuery = highlightQuery != null; + out.writeBoolean(hasQuery); + if (hasQuery) { + out.writeNamedWriteable(highlightQuery); + } + out.writeOptionalWriteable(order); + out.writeOptionalBoolean(highlightFilter); + out.writeOptionalBoolean(forceSource); + out.writeOptionalVInt(boundaryMaxScan); + boolean hasBounaryChars = boundaryChars != null; + out.writeBoolean(hasBounaryChars); + if (hasBounaryChars) { + out.writeString(String.valueOf(boundaryChars)); + } + out.writeOptionalVInt(noMatchSize); + out.writeOptionalVInt(phraseLimit); + boolean hasOptions = options != null; + out.writeBoolean(hasOptions); + if (hasOptions) { + out.writeMap(options); + } + out.writeOptionalBoolean(requireFieldMatch); + doWriteTo(out); + } + + protected abstract void doWriteTo(StreamOutput out) throws IOException; + + /** + * Set the pre tags that will be used for highlighting. + */ + @SuppressWarnings("unchecked") + public HB preTags(String... preTags) { + this.preTags = preTags; + return (HB) this; + } + + /** + * @return the value set by {@link #preTags(String...)} + */ + public String[] preTags() { + return this.preTags; + } + + /** + * Set the post tags that will be used for highlighting. + */ + @SuppressWarnings("unchecked") + public HB postTags(String... postTags) { + this.postTags = postTags; + return (HB) this; + } + + /** + * @return the value set by {@link #postTags(String...)} + */ + public String[] postTags() { + return this.postTags; + } + + /** + * Set the fragment size in characters, defaults to {@link HighlightBuilder#DEFAULT_FRAGMENT_CHAR_SIZE} + */ + @SuppressWarnings("unchecked") + public HB fragmentSize(Integer fragmentSize) { + this.fragmentSize = fragmentSize; + return (HB) this; + } + + /** + * @return the value set by {@link #fragmentSize(Integer)} + */ + public Integer fragmentSize() { + return this.fragmentSize; + } + + /** + * Set the number of fragments, defaults to {@link HighlightBuilder#DEFAULT_NUMBER_OF_FRAGMENTS} + */ + @SuppressWarnings("unchecked") + public HB numOfFragments(Integer numOfFragments) { + this.numOfFragments = numOfFragments; + return (HB) this; + } + + /** + * @return the value set by {@link #numOfFragments(Integer)} + */ + public Integer numOfFragments() { + return this.numOfFragments; + } + + /** + * Set type of highlighter to use. Out of the box supported types + * are <tt>plain</tt>, <tt>fvh</tt> and <tt>postings</tt>. + * The default option selected is dependent on the mappings defined for your index. + * Details of the different highlighter types are covered in the reference guide. + */ + @SuppressWarnings("unchecked") + public HB highlighterType(String highlighterType) { + this.highlighterType = highlighterType; + return (HB) this; + } + + /** + * @return the value set by {@link #highlighterType(String)} + */ + public String highlighterType() { + return this.highlighterType; + } + + /** + * Sets what fragmenter to use to break up text that is eligible for highlighting. + * This option is only applicable when using the plain highlighterType <tt>highlighter</tt>. + * Permitted values are "simple" or "span" relating to {@link SimpleFragmenter} and + * {@link SimpleSpanFragmenter} implementations respectively with the default being "span" + */ + @SuppressWarnings("unchecked") + public HB fragmenter(String fragmenter) { + this.fragmenter = fragmenter; + return (HB) this; + } + + /** + * @return the value set by {@link #fragmenter(String)} + */ + public String fragmenter() { + return this.fragmenter; + } + + /** + * Sets a query to be used for highlighting instead of the search query. + */ + @SuppressWarnings("unchecked") + public HB highlightQuery(QueryBuilder highlightQuery) { + this.highlightQuery = highlightQuery; + return (HB) this; + } + + /** + * @return the value set by {@link #highlightQuery(QueryBuilder)} + */ + public QueryBuilder highlightQuery() { + return this.highlightQuery; + } + + /** + * The order of fragments per field. By default, ordered by the order in the + * highlighted text. Can be <tt>score</tt>, which then it will be ordered + * by score of the fragments, or <tt>none</TT>. + */ + public HB order(String order) { + return order(Order.fromString(order)); + } + + /** + * By default, fragments of a field are ordered by the order in the highlighted text. + * If set to {@link Order#SCORE}, this changes order to score of the fragments. + */ + @SuppressWarnings("unchecked") + public HB order(Order scoreOrdered) { + this.order = scoreOrdered; + return (HB) this; + } + + /** + * @return the value set by {@link #order(Order)} + */ + public Order order() { + return this.order; + } + + /** + * Set this to true when using the highlighterType <tt>fvh</tt> + * and you want to provide highlighting on filter clauses in your + * query. Default is <tt>false</tt>. + */ + @SuppressWarnings("unchecked") + public HB highlightFilter(Boolean highlightFilter) { + this.highlightFilter = highlightFilter; + return (HB) this; + } + + /** + * @return the value set by {@link #highlightFilter(Boolean)} + */ + public Boolean highlightFilter() { + return this.highlightFilter; + } + + /** + * When using the highlighterType <tt>fvh</tt> this setting + * controls how far to look for boundary characters, and defaults to 20. + */ + @SuppressWarnings("unchecked") + public HB boundaryMaxScan(Integer boundaryMaxScan) { + this.boundaryMaxScan = boundaryMaxScan; + return (HB) this; + } + + /** + * @return the value set by {@link #boundaryMaxScan(Integer)} + */ + public Integer boundaryMaxScan() { + return this.boundaryMaxScan; + } + + /** + * When using the highlighterType <tt>fvh</tt> this setting + * defines what constitutes a boundary for highlighting. It’s a single string with + * each boundary character defined in it. It defaults to .,!? \t\n + */ + @SuppressWarnings("unchecked") + public HB boundaryChars(char[] boundaryChars) { + this.boundaryChars = boundaryChars; + return (HB) this; + } + + /** + * @return the value set by {@link #boundaryChars(char[])} + */ + public char[] boundaryChars() { + return this.boundaryChars; + } + + /** + * Allows to set custom options for custom highlighters. + */ + @SuppressWarnings("unchecked") + public HB options(Map<String, Object> options) { + this.options = options; + return (HB) this; + } + + /** + * @return the value set by {@link #options(Map)} + */ + public Map<String, Object> options() { + return this.options; + } + + /** + * Set to true to cause a field to be highlighted only if a query matches that field. + * Default is false meaning that terms are highlighted on all requested fields regardless + * if the query matches specifically on them. + */ + @SuppressWarnings("unchecked") + public HB requireFieldMatch(Boolean requireFieldMatch) { + this.requireFieldMatch = requireFieldMatch; + return (HB) this; + } + + /** + * @return the value set by {@link #requireFieldMatch(Boolean)} + */ + public Boolean requireFieldMatch() { + return this.requireFieldMatch; + } + + /** + * Sets the size of the fragment to return from the beginning of the field if there are no matches to + * highlight and the field doesn't also define noMatchSize. + * @param noMatchSize integer to set or null to leave out of request. default is null. + * @return this for chaining + */ + @SuppressWarnings("unchecked") + public HB noMatchSize(Integer noMatchSize) { + this.noMatchSize = noMatchSize; + return (HB) this; + } + + /** + * @return the value set by {@link #noMatchSize(Integer)} + */ + public Integer noMatchSize() { + return this.noMatchSize; + } + + /** + * Sets the maximum number of phrases the fvh will consider if the field doesn't also define phraseLimit. + * @param phraseLimit maximum number of phrases the fvh will consider + * @return this for chaining + */ + @SuppressWarnings("unchecked") + public HB phraseLimit(Integer phraseLimit) { + this.phraseLimit = phraseLimit; + return (HB) this; + } + + /** + * @return the value set by {@link #phraseLimit(Integer)} + */ + public Integer phraseLimit() { + return this.phraseLimit; + } + + /** + * Forces the highlighting to highlight fields based on the source even if fields are stored separately. + */ + @SuppressWarnings("unchecked") + public HB forceSource(Boolean forceSource) { + this.forceSource = forceSource; + return (HB) this; + } + + /** + * @return the value set by {@link #forceSource(Boolean)} + */ + public Boolean forceSource() { + return this.forceSource; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + innerXContent(builder); + builder.endObject(); + return builder; + } + + protected abstract void innerXContent(XContentBuilder builder) throws IOException; + + void commonOptionsToXContent(XContentBuilder builder) throws IOException { + if (preTags != null) { + builder.array(PRE_TAGS_FIELD.getPreferredName(), preTags); + } + if (postTags != null) { + builder.array(POST_TAGS_FIELD.getPreferredName(), postTags); + } + if (fragmentSize != null) { + builder.field(FRAGMENT_SIZE_FIELD.getPreferredName(), fragmentSize); + } + if (numOfFragments != null) { + builder.field(NUMBER_OF_FRAGMENTS_FIELD.getPreferredName(), numOfFragments); + } + if (highlighterType != null) { + builder.field(TYPE_FIELD.getPreferredName(), highlighterType); + } + if (fragmenter != null) { + builder.field(FRAGMENTER_FIELD.getPreferredName(), fragmenter); + } + if (highlightQuery != null) { + builder.field(HIGHLIGHT_QUERY_FIELD.getPreferredName(), highlightQuery); + } + if (order != null) { + builder.field(ORDER_FIELD.getPreferredName(), order.toString()); + } + if (highlightFilter != null) { + builder.field(HIGHLIGHT_FILTER_FIELD.getPreferredName(), highlightFilter); + } + if (boundaryMaxScan != null) { + builder.field(BOUNDARY_MAX_SCAN_FIELD.getPreferredName(), boundaryMaxScan); + } + if (boundaryChars != null) { + builder.field(BOUNDARY_CHARS_FIELD.getPreferredName(), new String(boundaryChars)); + } + if (options != null && options.size() > 0) { + builder.field(OPTIONS_FIELD.getPreferredName(), options); + } + if (forceSource != null) { + builder.field(FORCE_SOURCE_FIELD.getPreferredName(), forceSource); + } + if (requireFieldMatch != null) { + builder.field(REQUIRE_FIELD_MATCH_FIELD.getPreferredName(), requireFieldMatch); + } + if (noMatchSize != null) { + builder.field(NO_MATCH_SIZE_FIELD.getPreferredName(), noMatchSize); + } + if (phraseLimit != null) { + builder.field(PHRASE_LIMIT_FIELD.getPreferredName(), phraseLimit); + } + } + + static <HB extends AbstractHighlighterBuilder<HB>> BiFunction<QueryParseContext, HB, HB> setupParser( + ObjectParser<HB, QueryParseContext> parser) { + parser.declareStringArray(fromList(String.class, HB::preTags), PRE_TAGS_FIELD); + parser.declareStringArray(fromList(String.class, HB::postTags), POST_TAGS_FIELD); + parser.declareString(HB::order, ORDER_FIELD); + parser.declareBoolean(HB::highlightFilter, HIGHLIGHT_FILTER_FIELD); + parser.declareInt(HB::fragmentSize, FRAGMENT_SIZE_FIELD); + parser.declareInt(HB::numOfFragments, NUMBER_OF_FRAGMENTS_FIELD); + parser.declareBoolean(HB::requireFieldMatch, REQUIRE_FIELD_MATCH_FIELD); + parser.declareInt(HB::boundaryMaxScan, BOUNDARY_MAX_SCAN_FIELD); + parser.declareString((HB hb, String bc) -> hb.boundaryChars(bc.toCharArray()) , BOUNDARY_CHARS_FIELD); + parser.declareString(HB::highlighterType, TYPE_FIELD); + parser.declareString(HB::fragmenter, FRAGMENTER_FIELD); + parser.declareInt(HB::noMatchSize, NO_MATCH_SIZE_FIELD); + parser.declareBoolean(HB::forceSource, FORCE_SOURCE_FIELD); + parser.declareInt(HB::phraseLimit, PHRASE_LIMIT_FIELD); + parser.declareObject(HB::options, (XContentParser p, QueryParseContext c) -> { + try { + return p.map(); + } catch (IOException e) { + throw new RuntimeException("Error parsing options", e); + } + }, OPTIONS_FIELD); + parser.declareObject(HB::highlightQuery, (XContentParser p, QueryParseContext c) -> { + try { + return c.parseInnerQueryBuilder().orElse(null); + } catch (IOException e) { + throw new RuntimeException("Error parsing query", e); + } + }, HIGHLIGHT_QUERY_FIELD); + return (QueryParseContext c, HB hb) -> { + try { + parser.parse(c.parser(), hb, c); + if (hb.preTags() != null && hb.postTags() == null) { + throw new ParsingException(c.parser().getTokenLocation(), + "pre_tags are set but post_tags are not set"); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + return hb; + }; + } + + @Override + public final int hashCode() { + return Objects.hash(getClass(), Arrays.hashCode(preTags), Arrays.hashCode(postTags), fragmentSize, + numOfFragments, highlighterType, fragmenter, highlightQuery, order, highlightFilter, + forceSource, boundaryMaxScan, Arrays.hashCode(boundaryChars), noMatchSize, + phraseLimit, options, requireFieldMatch, doHashCode()); + } + + /** + * fields only present in subclass should contribute to hashCode in the implementation + */ + protected abstract int doHashCode(); + + @Override + public final boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + @SuppressWarnings("unchecked") + HB other = (HB) obj; + return Arrays.equals(preTags, other.preTags) && + Arrays.equals(postTags, other.postTags) && + Objects.equals(fragmentSize, other.fragmentSize) && + Objects.equals(numOfFragments, other.numOfFragments) && + Objects.equals(highlighterType, other.highlighterType) && + Objects.equals(fragmenter, other.fragmenter) && + Objects.equals(highlightQuery, other.highlightQuery) && + Objects.equals(order, other.order) && + Objects.equals(highlightFilter, other.highlightFilter) && + Objects.equals(forceSource, other.forceSource) && + Objects.equals(boundaryMaxScan, other.boundaryMaxScan) && + Arrays.equals(boundaryChars, other.boundaryChars) && + Objects.equals(noMatchSize, other.noMatchSize) && + Objects.equals(phraseLimit, other.phraseLimit) && + Objects.equals(options, other.options) && + Objects.equals(requireFieldMatch, other.requireFieldMatch) && + doEquals(other); + } + + /** + * fields only present in subclass should be checked for equality in the implementation + */ + protected abstract boolean doEquals(HB other); +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java new file mode 100644 index 0000000000..b62d28f8ab --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java @@ -0,0 +1,105 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.WeightedSpanTerm; +import org.apache.lucene.search.highlight.WeightedSpanTermExtractor; +import org.apache.lucene.spatial.geopoint.search.GeoPointInBBoxQuery; +import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery; +import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; +import org.elasticsearch.index.query.HasChildQueryBuilder; + +import java.io.IOException; +import java.util.Map; + +public final class CustomQueryScorer extends QueryScorer { + + public CustomQueryScorer(Query query, IndexReader reader, String field, + String defaultField) { + super(query, reader, field, defaultField); + } + + public CustomQueryScorer(Query query, IndexReader reader, String field) { + super(query, reader, field); + } + + public CustomQueryScorer(Query query, String field, String defaultField) { + super(query, field, defaultField); + } + + public CustomQueryScorer(Query query, String field) { + super(query, field); + } + + public CustomQueryScorer(Query query) { + super(query); + } + + public CustomQueryScorer(WeightedSpanTerm[] weightedTerms) { + super(weightedTerms); + } + + @Override + protected WeightedSpanTermExtractor newTermExtractor(String defaultField) { + return defaultField == null ? new CustomWeightedSpanTermExtractor() + : new CustomWeightedSpanTermExtractor(defaultField); + } + + private static class CustomWeightedSpanTermExtractor extends WeightedSpanTermExtractor { + + public CustomWeightedSpanTermExtractor() { + super(); + } + + public CustomWeightedSpanTermExtractor(String defaultField) { + super(defaultField); + } + + @Override + protected void extractUnknownQuery(Query query, + Map<String, WeightedSpanTerm> terms) throws IOException { + if (query instanceof FunctionScoreQuery) { + query = ((FunctionScoreQuery) query).getSubQuery(); + extract(query, 1F, terms); + } else if (query instanceof FiltersFunctionScoreQuery) { + query = ((FiltersFunctionScoreQuery) query).getSubQuery(); + extract(query, 1F, terms); + } else if (terms.isEmpty()) { + extractWeightedTerms(terms, query, 1F); + } + } + + protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException { + if (query instanceof GeoPointInBBoxQuery) { + // skip all geo queries, see https://issues.apache.org/jira/browse/LUCENE-7293 and + // https://github.com/elastic/elasticsearch/issues/17537 + return; + } else if (query instanceof HasChildQueryBuilder.LateParsingQuery) { + // skip has_child or has_parent queries, see: https://github.com/elastic/elasticsearch/issues/14999 + return; + } + + super.extract(query, boost, terms); + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java new file mode 100644 index 0000000000..8110780a9b --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java @@ -0,0 +1,199 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder; +import org.apache.lucene.search.vectorhighlight.BoundaryScanner; +import org.apache.lucene.search.vectorhighlight.CustomFieldQuery; +import org.apache.lucene.search.vectorhighlight.FieldFragList; +import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo; +import org.apache.lucene.search.vectorhighlight.FieldQuery; +import org.apache.lucene.search.vectorhighlight.FragListBuilder; +import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; +import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder; +import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner; +import org.apache.lucene.search.vectorhighlight.SimpleFieldFragList; +import org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder; +import org.apache.lucene.search.vectorhighlight.SingleFragListBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.fetch.FetchPhaseExecutionException; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.internal.SearchContext; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * + */ +public class FastVectorHighlighter implements Highlighter { + + private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner(); + + private static final String CACHE_KEY = "highlight-fsv"; + private final Boolean termVectorMultiValue; + + public FastVectorHighlighter(Settings settings) { + this.termVectorMultiValue = settings.getAsBoolean("search.highlight.term_vector_multi_value", true); + } + + @Override + public HighlightField highlight(HighlighterContext highlighterContext) { + SearchContextHighlight.Field field = highlighterContext.field; + SearchContext context = highlighterContext.context; + FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; + FieldMapper mapper = highlighterContext.mapper; + + if (canHighlight(mapper) == false) { + throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); + } + + Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; + + if (!hitContext.cache().containsKey(CACHE_KEY)) { + hitContext.cache().put(CACHE_KEY, new HighlighterEntry()); + } + HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY); + + try { + FieldQuery fieldQuery; + if (field.fieldOptions().requireFieldMatch()) { + if (cache.fieldMatchFieldQuery == null) { + // we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...) + cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); + } + fieldQuery = cache.fieldMatchFieldQuery; + } else { + if (cache.noFieldMatchFieldQuery == null) { + // we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...) + cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); + } + fieldQuery = cache.noFieldMatchFieldQuery; + } + + MapperHighlightEntry entry = cache.mappers.get(mapper); + if (entry == null) { + FragListBuilder fragListBuilder; + BaseFragmentsBuilder fragmentsBuilder; + + BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER; + if (field.fieldOptions().boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.fieldOptions().boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) { + boundaryScanner = new SimpleBoundaryScanner(field.fieldOptions().boundaryMaxScan(), field.fieldOptions().boundaryChars()); + } + boolean forceSource = context.highlight().forceSource(field); + if (field.fieldOptions().numberOfFragments() == 0) { + fragListBuilder = new SingleFragListBuilder(); + + if (!forceSource && mapper.fieldType().stored()) { + fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); + } else { + fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); + } + } else { + fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset()); + if (field.fieldOptions().scoreOrdered()) { + if (!forceSource && mapper.fieldType().stored()) { + fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); + } else { + fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); + } + } else { + if (!forceSource && mapper.fieldType().stored()) { + fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); + } else { + fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); + } + } + } + fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue); + entry = new MapperHighlightEntry(); + entry.fragListBuilder = fragListBuilder; + entry.fragmentsBuilder = fragmentsBuilder; + if (cache.fvh == null) { + // parameters to FVH are not requires since: + // first two booleans are not relevant since they are set on the CustomFieldQuery (phrase and fieldMatch) + // fragment builders are used explicitly + cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter(); + } + CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter()); + cache.mappers.put(mapper, entry); + } + cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit()); + + String[] fragments; + + // a HACK to make highlighter do highlighting, even though its using the single frag list builder + int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().numberOfFragments(); + int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize(); + // we highlight against the low level reader and docId, because if we load source, we want to reuse it if possible + // Only send matched fields if they were requested to save time. + if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) { + fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), field.fieldOptions().matchedFields(), fragmentCharSize, + numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); + } else { + fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), fragmentCharSize, + numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); + } + + if (fragments != null && fragments.length > 0) { + return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); + } + + int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize(); + if (noMatchSize > 0) { + // Essentially we just request that a fragment is built from 0 to noMatchSize using the normal fragmentsBuilder + FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/); + fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList()); + fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), + fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); + if (fragments != null && fragments.length > 0) { + return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); + } + } + + return null; + + } catch (Exception e) { + throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); + } + } + + @Override + public boolean canHighlight(FieldMapper fieldMapper) { + return fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions(); + } + + private class MapperHighlightEntry { + public FragListBuilder fragListBuilder; + public FragmentsBuilder fragmentsBuilder; + + public org.apache.lucene.search.highlight.Highlighter highlighter; + } + + private class HighlighterEntry { + public org.apache.lucene.search.vectorhighlight.FastVectorHighlighter fvh; + public FieldQuery noFieldMatchFieldQuery; + public FieldQuery fieldMatchFieldQuery; + public Map<FieldMapper, MapperHighlightEntry> mappers = new HashMap<>(); + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FragmentBuilderHelper.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FragmentBuilderHelper.java new file mode 100644 index 0000000000..ac0dab3a63 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FragmentBuilderHelper.java @@ -0,0 +1,102 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo; +import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; +import org.apache.lucene.util.CollectionUtil; +import org.apache.lucene.util.Version; +import org.elasticsearch.index.analysis.CustomAnalyzer; +import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory; +import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory; +import org.elasticsearch.index.analysis.NGramTokenFilterFactory; +import org.elasticsearch.index.analysis.NGramTokenizerFactory; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory; +import org.elasticsearch.index.mapper.FieldMapper; + +import java.util.Comparator; +import java.util.List; + +/** + * Simple helper class for {@link FastVectorHighlighter} {@link FragmentsBuilder} implementations. + */ +public final class FragmentBuilderHelper { + + private FragmentBuilderHelper() { + // no instance + } + + /** + * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to + * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} + */ + public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { + assert fragInfo != null : "FragInfo must not be null"; + assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); + if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { + /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time + * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort + * the fragments based on their offsets rather than using soley the positions as it is done in + * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather + * than in this hack... aka. "we are are working on in!" */ + final List<SubInfo> subInfos = fragInfo.getSubInfos(); + CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { + @Override + public int compare(SubInfo o1, SubInfo o2) { + int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); + int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); + return FragmentBuilderHelper.compare(startOffset, startOffset2); + } + }); + return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), + fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); + } else { + return fragInfo; + } + } + + private static int compare(int x, int y) { + return (x < y) ? -1 : ((x == y) ? 0 : 1); + } + + private static boolean containsBrokenAnalysis(Analyzer analyzer) { + // TODO maybe we need a getter on Namedanalyzer that tells if this uses broken Analysis + if (analyzer instanceof NamedAnalyzer) { + analyzer = ((NamedAnalyzer) analyzer).analyzer(); + } + if (analyzer instanceof CustomAnalyzer) { + final CustomAnalyzer a = (CustomAnalyzer) analyzer; + TokenFilterFactory[] tokenFilters = a.tokenFilters(); + for (TokenFilterFactory tokenFilterFactory : tokenFilters) { + if (tokenFilterFactory instanceof WordDelimiterTokenFilterFactory + || tokenFilterFactory instanceof EdgeNGramTokenFilterFactory) { + return true; + } + } + } + return false; + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java new file mode 100644 index 0000000000..fe4587826c --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java @@ -0,0 +1,525 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser.NamedObjectParser; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.FieldOptions; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.Set; +import java.util.function.BiFunction; + +import static org.elasticsearch.common.xcontent.ObjectParser.fromList; + +/** + * A builder for search highlighting. Settings can control how large fields + * are summarized to show only selected snippets ("fragments") containing search terms. + * + * @see org.elasticsearch.search.builder.SearchSourceBuilder#highlight() + */ +public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilder> { + /** default for whether to highlight fields based on the source even if stored separately */ + public static final boolean DEFAULT_FORCE_SOURCE = false; + /** default for whether a field should be highlighted only if a query matches that field */ + public static final boolean DEFAULT_REQUIRE_FIELD_MATCH = true; + /** default for whether <tt>fvh</tt> should provide highlighting on filter clauses */ + public static final boolean DEFAULT_HIGHLIGHT_FILTER = false; + /** default for highlight fragments being ordered by score */ + public static final boolean DEFAULT_SCORE_ORDERED = false; + /** the default encoder setting */ + public static final String DEFAULT_ENCODER = "default"; + /** default for the maximum number of phrases the fvh will consider */ + public static final int DEFAULT_PHRASE_LIMIT = 256; + /** default for fragment size when there are no matches */ + public static final int DEFAULT_NO_MATCH_SIZE = 0; + /** the default number of fragments for highlighting */ + public static final int DEFAULT_NUMBER_OF_FRAGMENTS = 5; + /** the default number of fragments size in characters */ + public static final int DEFAULT_FRAGMENT_CHAR_SIZE = 100; + /** the default opening tag */ + public static final String[] DEFAULT_PRE_TAGS = new String[]{"<em>"}; + /** the default closing tag */ + public static final String[] DEFAULT_POST_TAGS = new String[]{"</em>"}; + + /** the default opening tags when <tt>tag_schema = "styled"</tt> */ + public static final String[] DEFAULT_STYLED_PRE_TAG = { + "<em class=\"hlt1\">", "<em class=\"hlt2\">", "<em class=\"hlt3\">", + "<em class=\"hlt4\">", "<em class=\"hlt5\">", "<em class=\"hlt6\">", + "<em class=\"hlt7\">", "<em class=\"hlt8\">", "<em class=\"hlt9\">", + "<em class=\"hlt10\">" + }; + /** the default closing tags when <tt>tag_schema = "styled"</tt> */ + public static final String[] DEFAULT_STYLED_POST_TAGS = {"</em>"}; + + /** + * a {@link FieldOptions} with default settings + */ + static final FieldOptions defaultOptions = new SearchContextHighlight.FieldOptions.Builder() + .preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(DEFAULT_SCORE_ORDERED) + .highlightFilter(DEFAULT_HIGHLIGHT_FILTER).requireFieldMatch(DEFAULT_REQUIRE_FIELD_MATCH) + .forceSource(DEFAULT_FORCE_SOURCE).fragmentCharSize(DEFAULT_FRAGMENT_CHAR_SIZE) + .numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER) + .boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN).boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) + .noMatchSize(DEFAULT_NO_MATCH_SIZE).phraseLimit(DEFAULT_PHRASE_LIMIT).build(); + + private final List<Field> fields = new ArrayList<>(); + + private String encoder; + + private boolean useExplicitFieldOrder = false; + + public HighlightBuilder() { + } + + /** + * Read from a stream. + */ + public HighlightBuilder(StreamInput in) throws IOException { + super(in); + encoder(in.readOptionalString()); + useExplicitFieldOrder(in.readBoolean()); + int fields = in.readVInt(); + for (int i = 0; i < fields; i++) { + field(new Field(in)); + } + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeOptionalString(encoder); + out.writeBoolean(useExplicitFieldOrder); + out.writeVInt(fields.size()); + for (int i = 0; i < fields.size(); i++) { + fields.get(i).writeTo(out); + } + } + + /** + * Adds a field to be highlighted with default fragment size of 100 characters, and + * default number of fragments of 5 using the default encoder + * + * @param name The field to highlight + */ + public HighlightBuilder field(String name) { + return field(new Field(name)); + } + + /** + * Adds a field to be highlighted with a provided fragment size (in characters), and + * default number of fragments of 5. + * + * @param name The field to highlight + * @param fragmentSize The size of a fragment in characters + */ + public HighlightBuilder field(String name, int fragmentSize) { + return field(new Field(name).fragmentSize(fragmentSize)); + } + + + /** + * Adds a field to be highlighted with a provided fragment size (in characters), and + * a provided (maximum) number of fragments. + * + * @param name The field to highlight + * @param fragmentSize The size of a fragment in characters + * @param numberOfFragments The (maximum) number of fragments + */ + public HighlightBuilder field(String name, int fragmentSize, int numberOfFragments) { + return field(new Field(name).fragmentSize(fragmentSize).numOfFragments(numberOfFragments)); + } + + /** + * Adds a field to be highlighted with a provided fragment size (in characters), and + * a provided (maximum) number of fragments. + * + * @param name The field to highlight + * @param fragmentSize The size of a fragment in characters + * @param numberOfFragments The (maximum) number of fragments + * @param fragmentOffset The offset from the start of the fragment to the start of the highlight + */ + public HighlightBuilder field(String name, int fragmentSize, int numberOfFragments, int fragmentOffset) { + return field(new Field(name).fragmentSize(fragmentSize).numOfFragments(numberOfFragments) + .fragmentOffset(fragmentOffset)); + } + + public HighlightBuilder field(Field field) { + fields.add(field); + return this; + } + + void fields(List<Field> fields) { + this.fields.addAll(fields); + } + + public List<Field> fields() { + return this.fields; + } + + /** + * Set a tag scheme that encapsulates a built in pre and post tags. The allowed schemes + * are <tt>styled</tt> and <tt>default</tt>. + * + * @param schemaName The tag scheme name + */ + public HighlightBuilder tagsSchema(String schemaName) { + switch (schemaName) { + case "default": + preTags(DEFAULT_PRE_TAGS); + postTags(DEFAULT_POST_TAGS); + break; + case "styled": + preTags(DEFAULT_STYLED_PRE_TAG); + postTags(DEFAULT_STYLED_POST_TAGS); + break; + default: + throw new IllegalArgumentException("Unknown tag schema ["+ schemaName +"]"); + } + return this; + } + + /** + * Set encoder for the highlighting + * are <tt>styled</tt> and <tt>default</tt>. + * + * @param encoder name + */ + public HighlightBuilder encoder(String encoder) { + this.encoder = encoder; + return this; + } + + /** + * Getter for {@link #encoder(String)} + */ + public String encoder() { + return this.encoder; + } + + /** + * Send the fields to be highlighted using a syntax that is specific about the order in which they should be highlighted. + * @return this for chaining + */ + public HighlightBuilder useExplicitFieldOrder(boolean useExplicitFieldOrder) { + this.useExplicitFieldOrder = useExplicitFieldOrder; + return this; + } + + /** + * Gets value set with {@link #useExplicitFieldOrder(boolean)} + */ + public Boolean useExplicitFieldOrder() { + return this.useExplicitFieldOrder; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + innerXContent(builder); + builder.endObject(); + return builder; + } + + private static final BiFunction<QueryParseContext, HighlightBuilder, HighlightBuilder> PARSER; + static { + ObjectParser<HighlightBuilder, QueryParseContext> parser = new ObjectParser<>("highlight"); + parser.declareString(HighlightBuilder::tagsSchema, new ParseField("tags_schema")); + parser.declareString(HighlightBuilder::encoder, ENCODER_FIELD); + parser.declareNamedObjects(HighlightBuilder::fields, Field.PARSER, (HighlightBuilder hb) -> hb.useExplicitFieldOrder(true), + FIELDS_FIELD); + PARSER = setupParser(parser); + } + public static HighlightBuilder fromXContent(QueryParseContext c) { + return PARSER.apply(c, new HighlightBuilder()); + } + + public SearchContextHighlight build(QueryShardContext context) throws IOException { + // create template global options that are later merged with any partial field options + final SearchContextHighlight.FieldOptions.Builder globalOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder(); + globalOptionsBuilder.encoder(this.encoder); + transferOptions(this, globalOptionsBuilder, context); + + // overwrite unset global options by default values + globalOptionsBuilder.merge(defaultOptions); + + // create field options + Collection<org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field> fieldOptions = new ArrayList<>(); + for (Field field : this.fields) { + final SearchContextHighlight.FieldOptions.Builder fieldOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder(); + fieldOptionsBuilder.fragmentOffset(field.fragmentOffset); + if (field.matchedFields != null) { + Set<String> matchedFields = new HashSet<String>(field.matchedFields.length); + Collections.addAll(matchedFields, field.matchedFields); + fieldOptionsBuilder.matchedFields(matchedFields); + } + transferOptions(field, fieldOptionsBuilder, context); + fieldOptions.add(new SearchContextHighlight.Field(field.name(), fieldOptionsBuilder + .merge(globalOptionsBuilder.build()).build())); + } + return new SearchContextHighlight(fieldOptions); + } + + /** + * Transfers field options present in the input {@link AbstractHighlighterBuilder} to the receiving + * {@link FieldOptions.Builder}, effectively overwriting existing settings + * @param targetOptionsBuilder the receiving options builder + * @param highlighterBuilder highlight builder with the input options + * @param context needed to convert {@link QueryBuilder} to {@link Query} + * @throws IOException on errors parsing any optional nested highlight query + */ + @SuppressWarnings({ "rawtypes", "unchecked" }) + private static void transferOptions(AbstractHighlighterBuilder highlighterBuilder, + SearchContextHighlight.FieldOptions.Builder targetOptionsBuilder, QueryShardContext context) throws IOException { + if (highlighterBuilder.preTags != null) { + targetOptionsBuilder.preTags(highlighterBuilder.preTags); + } + if (highlighterBuilder.postTags != null) { + targetOptionsBuilder.postTags(highlighterBuilder.postTags); + } + if (highlighterBuilder.order != null) { + targetOptionsBuilder.scoreOrdered(highlighterBuilder.order == Order.SCORE); + } + if (highlighterBuilder.highlightFilter != null) { + targetOptionsBuilder.highlightFilter(highlighterBuilder.highlightFilter); + } + if (highlighterBuilder.fragmentSize != null) { + targetOptionsBuilder.fragmentCharSize(highlighterBuilder.fragmentSize); + } + if (highlighterBuilder.numOfFragments != null) { + targetOptionsBuilder.numberOfFragments(highlighterBuilder.numOfFragments); + } + if (highlighterBuilder.requireFieldMatch != null) { + targetOptionsBuilder.requireFieldMatch(highlighterBuilder.requireFieldMatch); + } + if (highlighterBuilder.boundaryMaxScan != null) { + targetOptionsBuilder.boundaryMaxScan(highlighterBuilder.boundaryMaxScan); + } + if (highlighterBuilder.boundaryChars != null) { + targetOptionsBuilder.boundaryChars(convertCharArray(highlighterBuilder.boundaryChars)); + } + if (highlighterBuilder.highlighterType != null) { + targetOptionsBuilder.highlighterType(highlighterBuilder.highlighterType); + } + if (highlighterBuilder.fragmenter != null) { + targetOptionsBuilder.fragmenter(highlighterBuilder.fragmenter); + } + if (highlighterBuilder.noMatchSize != null) { + targetOptionsBuilder.noMatchSize(highlighterBuilder.noMatchSize); + } + if (highlighterBuilder.forceSource != null) { + targetOptionsBuilder.forceSource(highlighterBuilder.forceSource); + } + if (highlighterBuilder.phraseLimit != null) { + targetOptionsBuilder.phraseLimit(highlighterBuilder.phraseLimit); + } + if (highlighterBuilder.options != null) { + targetOptionsBuilder.options(highlighterBuilder.options); + } + if (highlighterBuilder.highlightQuery != null) { + targetOptionsBuilder.highlightQuery(QueryBuilder.rewriteQuery(highlighterBuilder.highlightQuery, context).toQuery(context)); + } + } + + static Character[] convertCharArray(char[] array) { + if (array == null) { + return null; + } + Character[] charArray = new Character[array.length]; + for (int i = 0; i < array.length; i++) { + charArray[i] = array[i]; + } + return charArray; + } + + @Override + public void innerXContent(XContentBuilder builder) throws IOException { + // first write common options + commonOptionsToXContent(builder); + // special options for top-level highlighter + if (encoder != null) { + builder.field(ENCODER_FIELD.getPreferredName(), encoder); + } + if (fields.size() > 0) { + if (useExplicitFieldOrder) { + builder.startArray(FIELDS_FIELD.getPreferredName()); + } else { + builder.startObject(FIELDS_FIELD.getPreferredName()); + } + for (Field field : fields) { + if (useExplicitFieldOrder) { + builder.startObject(); + } + field.innerXContent(builder); + if (useExplicitFieldOrder) { + builder.endObject(); + } + } + if (useExplicitFieldOrder) { + builder.endArray(); + } else { + builder.endObject(); + } + } + } + + @Override + protected int doHashCode() { + return Objects.hash(encoder, useExplicitFieldOrder, fields); + } + + @Override + protected boolean doEquals(HighlightBuilder other) { + return Objects.equals(encoder, other.encoder) && + Objects.equals(useExplicitFieldOrder, other.useExplicitFieldOrder) && + Objects.equals(fields, other.fields); + } + + public static class Field extends AbstractHighlighterBuilder<Field> { + static final NamedObjectParser<Field, QueryParseContext> PARSER; + static { + ObjectParser<Field, QueryParseContext> parser = new ObjectParser<>("highlight_field"); + parser.declareInt(Field::fragmentOffset, FRAGMENT_OFFSET_FIELD); + parser.declareStringArray(fromList(String.class, Field::matchedFields), MATCHED_FIELDS_FIELD); + BiFunction<QueryParseContext, Field, Field> decoratedParser = setupParser(parser); + PARSER = (XContentParser p, QueryParseContext c, String name) -> decoratedParser.apply(c, new Field(name)); + } + + private final String name; + + int fragmentOffset = -1; + + String[] matchedFields; + + public Field(String name) { + this.name = name; + } + + /** + * Read from a stream. + */ + public Field(StreamInput in) throws IOException { + super(in); + name = in.readString(); + fragmentOffset(in.readVInt()); + matchedFields(in.readOptionalStringArray()); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeString(name); + out.writeVInt(fragmentOffset); + out.writeOptionalStringArray(matchedFields); + } + + public String name() { + return name; + } + + public Field fragmentOffset(int fragmentOffset) { + this.fragmentOffset = fragmentOffset; + return this; + } + + /** + * Set the matched fields to highlight against this field data. Default to null, meaning just + * the named field. If you provide a list of fields here then don't forget to include name as + * it is not automatically included. + */ + public Field matchedFields(String... matchedFields) { + this.matchedFields = matchedFields; + return this; + } + + @Override + public void innerXContent(XContentBuilder builder) throws IOException { + builder.startObject(name); + // write common options + commonOptionsToXContent(builder); + // write special field-highlighter options + if (fragmentOffset != -1) { + builder.field(FRAGMENT_OFFSET_FIELD.getPreferredName(), fragmentOffset); + } + if (matchedFields != null) { + builder.field(MATCHED_FIELDS_FIELD.getPreferredName(), matchedFields); + } + builder.endObject(); + } + + @Override + protected int doHashCode() { + return Objects.hash(name, fragmentOffset, Arrays.hashCode(matchedFields)); + } + + @Override + protected boolean doEquals(Field other) { + return Objects.equals(name, other.name) && + Objects.equals(fragmentOffset, other.fragmentOffset) && + Arrays.equals(matchedFields, other.matchedFields); + } + } + + public enum Order implements Writeable { + NONE, SCORE; + + public static Order readFromStream(StreamInput in) throws IOException { + int ordinal = in.readVInt(); + if (ordinal < 0 || ordinal >= values().length) { + throw new IOException("Unknown Order ordinal [" + ordinal + "]"); + } + return values()[ordinal]; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(this.ordinal()); + } + + public static Order fromString(String order) { + if (order.toUpperCase(Locale.ROOT).equals(SCORE.name())) { + return Order.SCORE; + } + return NONE; + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java new file mode 100644 index 0000000000..91fde32c88 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java @@ -0,0 +1,115 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Streamable; +import org.elasticsearch.common.text.Text; + +import java.io.IOException; +import java.util.Arrays; + +/** + * A field highlighted with its highlighted fragments. + */ +public class HighlightField implements Streamable { + + private String name; + + private Text[] fragments; + + HighlightField() { + } + + public HighlightField(String name, Text[] fragments) { + this.name = name; + this.fragments = fragments; + } + + /** + * The name of the field highlighted. + */ + public String name() { + return name; + } + + /** + * The name of the field highlighted. + */ + public String getName() { + return name(); + } + + /** + * The highlighted fragments. <tt>null</tt> if failed to highlight (for example, the field is not stored). + */ + public Text[] fragments() { + return fragments; + } + + /** + * The highlighted fragments. <tt>null</tt> if failed to highlight (for example, the field is not stored). + */ + public Text[] getFragments() { + return fragments(); + } + + @Override + public String toString() { + return "[" + name + "], fragments[" + Arrays.toString(fragments) + "]"; + } + + public static HighlightField readHighlightField(StreamInput in) throws IOException { + HighlightField field = new HighlightField(); + field.readFrom(in); + return field; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + name = in.readString(); + if (in.readBoolean()) { + int size = in.readVInt(); + if (size == 0) { + fragments = Text.EMPTY_ARRAY; + } else { + fragments = new Text[size]; + for (int i = 0; i < size; i++) { + fragments[i] = in.readText(); + } + } + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + if (fragments == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeVInt(fragments.length); + for (Text fragment : fragments) { + out.writeText(fragment); + } + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java new file mode 100644 index 0000000000..2909e71445 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java @@ -0,0 +1,132 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.Query; +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.SourceFieldMapper; +import org.elasticsearch.index.mapper.StringFieldMapper; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.internal.SearchContext; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class HighlightPhase extends AbstractComponent implements FetchSubPhase { + private static final List<String> STANDARD_HIGHLIGHTERS_BY_PRECEDENCE = Arrays.asList("fvh", "postings", "plain"); + + private final Map<String, Highlighter> highlighters; + + public HighlightPhase(Settings settings, Map<String, Highlighter> highlighters) { + super(settings); + this.highlighters = highlighters; + } + + @Override + public void hitExecute(SearchContext context, HitContext hitContext) { + if (context.highlight() == null) { + return; + } + Map<String, HighlightField> highlightFields = new HashMap<>(); + for (SearchContextHighlight.Field field : context.highlight().fields()) { + Collection<String> fieldNamesToHighlight; + if (Regex.isSimpleMatchPattern(field.field())) { + DocumentMapper documentMapper = context.mapperService().documentMapper(hitContext.hit().type()); + fieldNamesToHighlight = documentMapper.mappers().simpleMatchToFullName(field.field()); + } else { + fieldNamesToHighlight = Collections.singletonList(field.field()); + } + + if (context.highlight().forceSource(field)) { + SourceFieldMapper sourceFieldMapper = context.mapperService().documentMapper(hitContext.hit().type()).sourceMapper(); + if (!sourceFieldMapper.enabled()) { + throw new IllegalArgumentException("source is forced for fields " + fieldNamesToHighlight + " but type [" + hitContext.hit().type() + "] has disabled _source"); + } + } + + boolean fieldNameContainsWildcards = field.field().contains("*"); + for (String fieldName : fieldNamesToHighlight) { + FieldMapper fieldMapper = getMapperForField(fieldName, context, hitContext); + if (fieldMapper == null) { + continue; + } + + // We should prevent highlighting if a field is anything but a text or keyword field. + // However, someone might implement a custom field type that has text and still want to + // highlight on that. We cannot know in advance if the highlighter will be able to + // highlight such a field and so we do the following: + // If the field is only highlighted because the field matches a wildcard we assume + // it was a mistake and do not process it. + // If the field was explicitly given we assume that whoever issued the query knew + // what they were doing and try to highlight anyway. + if (fieldNameContainsWildcards) { + if (fieldMapper.fieldType().typeName().equals(TextFieldMapper.CONTENT_TYPE) == false && + fieldMapper.fieldType().typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false && + fieldMapper.fieldType().typeName().equals(StringFieldMapper.CONTENT_TYPE) == false) { + continue; + } + } + String highlighterType = field.fieldOptions().highlighterType(); + if (highlighterType == null) { + for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) { + if (highlighters.get(highlighterCandidate).canHighlight(fieldMapper)) { + highlighterType = highlighterCandidate; + break; + } + } + assert highlighterType != null; + } + Highlighter highlighter = highlighters.get(highlighterType); + if (highlighter == null) { + throw new IllegalArgumentException("unknown highlighter type [" + highlighterType + "] for the field [" + fieldName + "]"); + } + + Query highlightQuery = field.fieldOptions().highlightQuery() == null ? context.parsedQuery().query() : field.fieldOptions().highlightQuery(); + HighlighterContext highlighterContext = new HighlighterContext(fieldName, field, fieldMapper, context, hitContext, highlightQuery); + + if ((highlighter.canHighlight(fieldMapper) == false) && fieldNameContainsWildcards) { + // if several fieldnames matched the wildcard then we want to skip those that we cannot highlight + continue; + } + HighlightField highlightField = highlighter.highlight(highlighterContext); + if (highlightField != null) { + highlightFields.put(highlightField.name(), highlightField); + } + } + } + hitContext.hit().highlightFields(highlightFields); + } + + private FieldMapper getMapperForField(String fieldName, SearchContext searchContext, HitContext hitContext) { + DocumentMapper documentMapper = searchContext.mapperService().documentMapper(hitContext.hit().type()); + // TODO: no need to lookup the doc mapper with unambiguous field names? just look at the mapper service + return documentMapper.mappers().smartNameFieldMapper(fieldName); + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java new file mode 100644 index 0000000000..dc805ea8d8 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java @@ -0,0 +1,71 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.highlight.DefaultEncoder; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.highlight.SimpleHTMLEncoder; +import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +import static java.util.Collections.singleton; + +public final class HighlightUtils { + + //U+2029 PARAGRAPH SEPARATOR (PS): each value holds a discrete passage for highlighting (postings highlighter) + public static final char PARAGRAPH_SEPARATOR = 8233; + public static final char NULL_SEPARATOR = '\u0000'; + + private HighlightUtils() { + + } + + static List<Object> loadFieldValues(SearchContextHighlight.Field field, FieldMapper mapper, SearchContext searchContext, FetchSubPhase.HitContext hitContext) throws IOException { + //percolator needs to always load from source, thus it sets the global force source to true + boolean forceSource = searchContext.highlight().forceSource(field); + List<Object> textsToHighlight; + if (!forceSource && mapper.fieldType().stored()) { + CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(mapper.fieldType().name()), false); + hitContext.reader().document(hitContext.docId(), fieldVisitor); + textsToHighlight = fieldVisitor.fields().get(mapper.fieldType().name()); + if (textsToHighlight == null) { + // Can happen if the document doesn't have the field to highlight + textsToHighlight = Collections.emptyList(); + } + } else { + SourceLookup sourceLookup = searchContext.lookup().source(); + sourceLookup.setSegmentAndDocument(hitContext.readerContext(), hitContext.docId()); + textsToHighlight = sourceLookup.extractRawValues(mapper.fieldType().name()); + } + assert textsToHighlight != null; + return textsToHighlight; + } + + static class Encoders { + static Encoder DEFAULT = new DefaultEncoder(); + static Encoder HTML = new SimpleHTMLEncoder(); + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/Highlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/Highlighter.java new file mode 100644 index 0000000000..ab76da6e72 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/Highlighter.java @@ -0,0 +1,31 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.elasticsearch.index.mapper.FieldMapper; + +/** + * Highlights a search result. + */ +public interface Highlighter { + + HighlightField highlight(HighlighterContext highlighterContext); + + boolean canHighlight(FieldMapper fieldMapper); +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterContext.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterContext.java new file mode 100644 index 0000000000..7b9526d152 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterContext.java @@ -0,0 +1,47 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.Query; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.internal.SearchContext; + +/** + * + */ +public class HighlighterContext { + + public final String fieldName; + public final SearchContextHighlight.Field field; + public final FieldMapper mapper; + public final SearchContext context; + public final FetchSubPhase.HitContext hitContext; + public final Query query; + + public HighlighterContext(String fieldName, SearchContextHighlight.Field field, FieldMapper mapper, SearchContext context, + FetchSubPhase.HitContext hitContext, Query query) { + this.fieldName = fieldName; + this.field = field; + this.mapper = mapper; + this.context = context; + this.hitContext = hitContext; + this.query = query; + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java new file mode 100644 index 0000000000..01f70d4b27 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -0,0 +1,208 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.Fragmenter; +import org.apache.lucene.search.highlight.NullFragmenter; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleFragmenter; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.CollectionUtil; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.fetch.FetchPhaseExecutionException; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * + */ +public class PlainHighlighter implements Highlighter { + + private static final String CACHE_KEY = "highlight-plain"; + + @Override + public HighlightField highlight(HighlighterContext highlighterContext) { + SearchContextHighlight.Field field = highlighterContext.field; + SearchContext context = highlighterContext.context; + FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; + FieldMapper mapper = highlighterContext.mapper; + + Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; + + if (!hitContext.cache().containsKey(CACHE_KEY)) { + Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> mappers = new HashMap<>(); + hitContext.cache().put(CACHE_KEY, mappers); + } + @SuppressWarnings("unchecked") + Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter>) hitContext.cache().get(CACHE_KEY); + + org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper); + if (entry == null) { + QueryScorer queryScorer = new CustomQueryScorer(highlighterContext.query, field.fieldOptions().requireFieldMatch() ? mapper.fieldType().name() : null); + queryScorer.setExpandMultiTermQuery(true); + Fragmenter fragmenter; + if (field.fieldOptions().numberOfFragments() == 0) { + fragmenter = new NullFragmenter(); + } else if (field.fieldOptions().fragmenter() == null) { + fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize()); + } else if ("simple".equals(field.fieldOptions().fragmenter())) { + fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize()); + } else if ("span".equals(field.fieldOptions().fragmenter())) { + fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize()); + } else { + throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + highlighterContext.fieldName + "]"); + } + Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]); + + entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer); + entry.setTextFragmenter(fragmenter); + // always highlight across all data + entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); + + cache.put(mapper, entry); + } + + // a HACK to make highlighter do highlighting, even though its using the single frag list builder + int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments(); + ArrayList<TextFragment> fragsList = new ArrayList<>(); + List<Object> textsToHighlight; + Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer(); + + try { + textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext); + + for (Object textToHighlight : textsToHighlight) { + String text = textToHighlight.toString(); + + try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) { + if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) { + // can't perform highlighting if the stream has no terms (binary token stream) or no offsets + continue; + } + TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments); + for (TextFragment bestTextFragment : bestTextFragments) { + if (bestTextFragment != null && bestTextFragment.getScore() > 0) { + fragsList.add(bestTextFragment); + } + } + } + } + } catch (Exception e) { + if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) { + // this can happen if for example a field is not_analyzed and ignore_above option is set. + // the field will be ignored when indexing but the huge term is still in the source and + // the plain highlighter will parse the source and try to analyze it. + return null; + } else { + throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); + } + } + if (field.fieldOptions().scoreOrdered()) { + CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() { + @Override + public int compare(TextFragment o1, TextFragment o2) { + return Math.round(o2.getScore() - o1.getScore()); + } + }); + } + String[] fragments; + // number_of_fragments is set to 0 but we have a multivalued field + if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) { + fragments = new String[fragsList.size()]; + for (int i = 0; i < fragsList.size(); i++) { + fragments[i] = fragsList.get(i).toString(); + } + } else { + // refine numberOfFragments if needed + numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments; + fragments = new String[numberOfFragments]; + for (int i = 0; i < fragments.length; i++) { + fragments[i] = fragsList.get(i).toString(); + } + } + + if (fragments.length > 0) { + return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); + } + + int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize(); + if (noMatchSize > 0 && textsToHighlight.size() > 0) { + // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary. + String fieldContents = textsToHighlight.get(0).toString(); + int end; + try { + end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().name(), fieldContents); + } catch (Exception e) { + throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); + } + if (end > 0) { + return new HighlightField(highlighterContext.fieldName, new Text[] { new Text(fieldContents.substring(0, end)) }); + } + } + return null; + } + + @Override + public boolean canHighlight(FieldMapper fieldMapper) { + return true; + } + + private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException { + try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) { + if (!tokenStream.hasAttribute(OffsetAttribute.class)) { + // Can't split on term boundaries without offsets + return -1; + } + int end = -1; + tokenStream.reset(); + while (tokenStream.incrementToken()) { + OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class); + if (attr.endOffset() >= noMatchSize) { + // Jump to the end of this token if it wouldn't put us past the boundary + if (attr.endOffset() == noMatchSize) { + end = noMatchSize; + } + return end; + } + end = attr.endOffset(); + } + tokenStream.end(); + // We've exhausted the token stream so we should just highlight everything. + return end; + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java new file mode 100644 index 0000000000..b2b08edaca --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java @@ -0,0 +1,189 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.postingshighlight.CustomPassageFormatter; +import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter; +import org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator; +import org.apache.lucene.search.postingshighlight.Snippet; +import org.apache.lucene.util.CollectionUtil; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.fetch.FetchPhaseExecutionException; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.text.BreakIterator; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +public class PostingsHighlighter implements Highlighter { + + private static final String CACHE_KEY = "highlight-postings"; + + @Override + public HighlightField highlight(HighlighterContext highlighterContext) { + + FieldMapper fieldMapper = highlighterContext.mapper; + SearchContextHighlight.Field field = highlighterContext.field; + if (canHighlight(fieldMapper) == false) { + throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter"); + } + + SearchContext context = highlighterContext.context; + FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; + + if (!hitContext.cache().containsKey(CACHE_KEY)) { + hitContext.cache().put(CACHE_KEY, new HighlighterEntry()); + } + + HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY); + MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper); + + if (mapperHighlighterEntry == null) { + Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; + CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder); + mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter); + } + + List<Snippet> snippets = new ArrayList<>(); + int numberOfFragments; + try { + Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer(); + List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext); + CustomPostingsHighlighter highlighter; + if (field.fieldOptions().numberOfFragments() == 0) { + //we use a control char to separate values, which is the only char that the custom break iterator breaks the text on, + //so we don't lose the distinction between the different values of a field and we get back a snippet per value + String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.NULL_SEPARATOR); + CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(HighlightUtils.NULL_SEPARATOR); + highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, breakIterator, fieldValue, field.fieldOptions().noMatchSize() > 0); + numberOfFragments = fieldValues.size(); //we are highlighting the whole content, one snippet per value + } else { + //using paragraph separator we make sure that each field value holds a discrete passage for highlighting + String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.PARAGRAPH_SEPARATOR); + highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, fieldValue, field.fieldOptions().noMatchSize() > 0); + numberOfFragments = field.fieldOptions().numberOfFragments(); + } + + IndexSearcher searcher = new IndexSearcher(hitContext.reader()); + Snippet[] fieldSnippets = highlighter.highlightField(fieldMapper.fieldType().name(), highlighterContext.query, searcher, hitContext.docId(), numberOfFragments); + for (Snippet fieldSnippet : fieldSnippets) { + if (Strings.hasText(fieldSnippet.getText())) { + snippets.add(fieldSnippet); + } + } + + } catch(IOException e) { + throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); + } + + snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments()); + + if (field.fieldOptions().scoreOrdered()) { + //let's sort the snippets by score if needed + CollectionUtil.introSort(snippets, new Comparator<Snippet>() { + @Override + public int compare(Snippet o1, Snippet o2) { + return (int) Math.signum(o2.getScore() - o1.getScore()); + } + }); + } + + String[] fragments = new String[snippets.size()]; + for (int i = 0; i < fragments.length; i++) { + fragments[i] = snippets.get(i).getText(); + } + + if (fragments.length > 0) { + return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); + } + + return null; + } + + @Override + public boolean canHighlight(FieldMapper fieldMapper) { + return fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + } + + private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) { + //postings highlighter accepts all values in a single string, as offsets etc. need to match with content + //loaded from stored fields, we merge all values using a proper separator + String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator)); + return rawValue.substring(0, Math.min(rawValue.length(), Integer.MAX_VALUE - 1)); + } + + private static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) { + + //We need to filter the snippets as due to no_match_size we could have + //either highlighted snippets or non highlighted ones and we don't want to mix those up + List<Snippet> filteredSnippets = new ArrayList<>(snippets.size()); + for (Snippet snippet : snippets) { + if (snippet.isHighlighted()) { + filteredSnippets.add(snippet); + } + } + + //if there's at least one highlighted snippet, we return all the highlighted ones + //otherwise we return the first non highlighted one if available + if (filteredSnippets.size() == 0) { + if (snippets.size() > 0) { + Snippet snippet = snippets.get(0); + //if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0) + //we need to return the first sentence of the content rather than the whole content + if (numberOfFragments == 0) { + BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT); + String text = snippet.getText(); + bi.setText(text); + int next = bi.next(); + if (next != BreakIterator.DONE) { + String newText = text.substring(0, next).trim(); + snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted()); + } + } + filteredSnippets.add(snippet); + } + } + + return filteredSnippets; + } + + private static class HighlighterEntry { + Map<FieldMapper, MapperHighlighterEntry> mappers = new HashMap<>(); + } + + private static class MapperHighlighterEntry { + final CustomPassageFormatter passageFormatter; + + private MapperHighlighterEntry(CustomPassageFormatter passageFormatter) { + this.passageFormatter = passageFormatter; + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java new file mode 100644 index 0000000000..9f2074d741 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java @@ -0,0 +1,361 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.Query; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * + */ +public class SearchContextHighlight { + + private final Map<String, Field> fields; + + private boolean globalForceSource = false; + + public SearchContextHighlight(Collection<Field> fields) { + assert fields != null; + this.fields = new LinkedHashMap<String, Field>(fields.size()); + for (Field field : fields) { + this.fields.put(field.field, field); + } + } + + public Collection<Field> fields() { + return fields.values(); + } + + public void globalForceSource(boolean globalForceSource) { + this.globalForceSource = globalForceSource; + } + + boolean globalForceSource() { + return this.globalForceSource; + } + + public boolean forceSource(Field field) { + if (globalForceSource) { + return true; + } + + Field _field = fields.get(field.field); + return _field == null ? false : _field.fieldOptions.forceSource; + } + + public static class Field { + private final String field; + private final FieldOptions fieldOptions; + + Field(String field, FieldOptions fieldOptions) { + assert field != null; + assert fieldOptions != null; + this.field = field; + this.fieldOptions = fieldOptions; + } + + public String field() { + return field; + } + + public FieldOptions fieldOptions() { + return fieldOptions; + } + } + + public static class FieldOptions { + + // Field options that default to null or -1 are often set to their real default in HighlighterParseElement#parse + private int fragmentCharSize = -1; + + private int numberOfFragments = -1; + + private int fragmentOffset = -1; + + private String encoder; + + private String[] preTags; + + private String[] postTags; + + private Boolean scoreOrdered; + + private Boolean highlightFilter; + + private Boolean requireFieldMatch; + + private String highlighterType; + + private Boolean forceSource; + + private String fragmenter; + + private int boundaryMaxScan = -1; + + private Character[] boundaryChars = null; + + private Query highlightQuery; + + private int noMatchSize = -1; + + private Set<String> matchedFields; + + private Map<String, Object> options; + + private int phraseLimit = -1; + + public int fragmentCharSize() { + return fragmentCharSize; + } + + public int numberOfFragments() { + return numberOfFragments; + } + + public int fragmentOffset() { + return fragmentOffset; + } + + public String encoder() { + return encoder; + } + + public String[] preTags() { + return preTags; + } + + public String[] postTags() { + return postTags; + } + + public Boolean scoreOrdered() { + return scoreOrdered; + } + + public Boolean highlightFilter() { + return highlightFilter; + } + + public Boolean requireFieldMatch() { + return requireFieldMatch; + } + + public String highlighterType() { + return highlighterType; + } + + public String fragmenter() { + return fragmenter; + } + + public int boundaryMaxScan() { + return boundaryMaxScan; + } + + public Character[] boundaryChars() { + return boundaryChars; + } + + public Query highlightQuery() { + return highlightQuery; + } + + public int noMatchSize() { + return noMatchSize; + } + + public int phraseLimit() { + return phraseLimit; + } + + public Set<String> matchedFields() { + return matchedFields; + } + + public Map<String, Object> options() { + return options; + } + + static class Builder { + + private final FieldOptions fieldOptions = new FieldOptions(); + + Builder fragmentCharSize(int fragmentCharSize) { + fieldOptions.fragmentCharSize = fragmentCharSize; + return this; + } + + Builder numberOfFragments(int numberOfFragments) { + fieldOptions.numberOfFragments = numberOfFragments; + return this; + } + + Builder fragmentOffset(int fragmentOffset) { + fieldOptions.fragmentOffset = fragmentOffset; + return this; + } + + Builder encoder(String encoder) { + fieldOptions.encoder = encoder; + return this; + } + + Builder preTags(String[] preTags) { + fieldOptions.preTags = preTags; + return this; + } + + Builder postTags(String[] postTags) { + fieldOptions.postTags = postTags; + return this; + } + + Builder scoreOrdered(boolean scoreOrdered) { + fieldOptions.scoreOrdered = scoreOrdered; + return this; + } + + Builder highlightFilter(boolean highlightFilter) { + fieldOptions.highlightFilter = highlightFilter; + return this; + } + + Builder requireFieldMatch(boolean requireFieldMatch) { + fieldOptions.requireFieldMatch = requireFieldMatch; + return this; + } + + Builder highlighterType(String type) { + fieldOptions.highlighterType = type; + return this; + } + + Builder forceSource(boolean forceSource) { + fieldOptions.forceSource = forceSource; + return this; + } + + Builder fragmenter(String fragmenter) { + fieldOptions.fragmenter = fragmenter; + return this; + } + + Builder boundaryMaxScan(int boundaryMaxScan) { + fieldOptions.boundaryMaxScan = boundaryMaxScan; + return this; + } + + Builder boundaryChars(Character[] boundaryChars) { + fieldOptions.boundaryChars = boundaryChars; + return this; + } + + Builder highlightQuery(Query highlightQuery) { + fieldOptions.highlightQuery = highlightQuery; + return this; + } + + Builder noMatchSize(int noMatchSize) { + fieldOptions.noMatchSize = noMatchSize; + return this; + } + + Builder phraseLimit(int phraseLimit) { + fieldOptions.phraseLimit = phraseLimit; + return this; + } + + Builder matchedFields(Set<String> matchedFields) { + fieldOptions.matchedFields = matchedFields; + return this; + } + + Builder options(Map<String, Object> options) { + fieldOptions.options = options; + return this; + } + + FieldOptions build() { + return fieldOptions; + } + + Builder merge(FieldOptions globalOptions) { + if (fieldOptions.preTags == null && globalOptions.preTags != null) { + fieldOptions.preTags = Arrays.copyOf(globalOptions.preTags, globalOptions.preTags.length); + } + if (fieldOptions.postTags == null && globalOptions.postTags != null) { + fieldOptions.postTags = Arrays.copyOf(globalOptions.postTags, globalOptions.postTags.length); + } + if (fieldOptions.highlightFilter == null) { + fieldOptions.highlightFilter = globalOptions.highlightFilter; + } + if (fieldOptions.scoreOrdered == null) { + fieldOptions.scoreOrdered = globalOptions.scoreOrdered; + } + if (fieldOptions.fragmentCharSize == -1) { + fieldOptions.fragmentCharSize = globalOptions.fragmentCharSize; + } + if (fieldOptions.numberOfFragments == -1) { + fieldOptions.numberOfFragments = globalOptions.numberOfFragments; + } + if (fieldOptions.encoder == null) { + fieldOptions.encoder = globalOptions.encoder; + } + if (fieldOptions.requireFieldMatch == null) { + fieldOptions.requireFieldMatch = globalOptions.requireFieldMatch; + } + if (fieldOptions.boundaryMaxScan == -1) { + fieldOptions.boundaryMaxScan = globalOptions.boundaryMaxScan; + } + if (fieldOptions.boundaryChars == null && globalOptions.boundaryChars != null) { + fieldOptions.boundaryChars = Arrays.copyOf(globalOptions.boundaryChars, globalOptions.boundaryChars.length); + } + if (fieldOptions.highlighterType == null) { + fieldOptions.highlighterType = globalOptions.highlighterType; + } + if (fieldOptions.fragmenter == null) { + fieldOptions.fragmenter = globalOptions.fragmenter; + } + if ((fieldOptions.options == null || fieldOptions.options.size() == 0) && globalOptions.options != null) { + fieldOptions.options = new HashMap<>(globalOptions.options); + } + if (fieldOptions.highlightQuery == null && globalOptions.highlightQuery != null) { + fieldOptions.highlightQuery = globalOptions.highlightQuery; + } + if (fieldOptions.noMatchSize == -1) { + fieldOptions.noMatchSize = globalOptions.noMatchSize; + } + if (fieldOptions.forceSource == null) { + fieldOptions.forceSource = globalOptions.forceSource; + } + if (fieldOptions.phraseLimit == -1) { + fieldOptions.phraseLimit = globalOptions.phraseLimit; + } + return this; + } + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SimpleFragmentsBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SimpleFragmentsBuilder.java new file mode 100644 index 0000000000..68c40ad846 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SimpleFragmentsBuilder.java @@ -0,0 +1,45 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.document.Field; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.vectorhighlight.BoundaryScanner; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; +import org.elasticsearch.index.mapper.FieldMapper; + +/** + * Direct Subclass of Lucene's org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder + * that corrects offsets for broken analysis chains. + */ +public class SimpleFragmentsBuilder extends org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder { + protected final FieldMapper mapper; + + public SimpleFragmentsBuilder(FieldMapper mapper, + String[] preTags, String[] postTags, BoundaryScanner boundaryScanner) { + super(preTags, postTags, boundaryScanner); + this.mapper = mapper; + } + + @Override + protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, + String[] preTags, String[] postTags, Encoder encoder ){ + return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder); + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceScoreOrderFragmentsBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceScoreOrderFragmentsBuilder.java new file mode 100644 index 0000000000..dabe3b48ba --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceScoreOrderFragmentsBuilder.java @@ -0,0 +1,71 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.vectorhighlight.BoundaryScanner; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; +import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.util.List; + +/** + * + */ +public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder { + + private final FieldMapper mapper; + + private final SearchContext searchContext; + + public SourceScoreOrderFragmentsBuilder(FieldMapper mapper, SearchContext searchContext, String[] preTags, String[] postTags, + BoundaryScanner boundaryScanner) { + super(preTags, postTags, boundaryScanner); + this.mapper = mapper; + this.searchContext = searchContext; + } + + @Override + protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { + // we know its low level reader, and matching docId, since that's how we call the highlighter with + SourceLookup sourceLookup = searchContext.lookup().source(); + sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); + + List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name()); + Field[] fields = new Field[values.size()]; + for (int i = 0; i < values.size(); i++) { + fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED); + } + return fields; + } + + @Override + protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, + String[] preTags, String[] postTags, Encoder encoder ){ + return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder); + } +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceSimpleFragmentsBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceSimpleFragmentsBuilder.java new file mode 100644 index 0000000000..4ff52547c7 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceSimpleFragmentsBuilder.java @@ -0,0 +1,65 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.vectorhighlight.BoundaryScanner; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.util.List; + +/** + * + */ +public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder { + + private final SearchContext searchContext; + + public SourceSimpleFragmentsBuilder(FieldMapper mapper, SearchContext searchContext, String[] preTags, String[] postTags, + BoundaryScanner boundaryScanner) { + super(mapper, preTags, postTags, boundaryScanner); + this.searchContext = searchContext; + } + + public static final Field[] EMPTY_FIELDS = new Field[0]; + + @Override + protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { + // we know its low level reader, and matching docId, since that's how we call the highlighter with + SourceLookup sourceLookup = searchContext.lookup().source(); + sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); + + List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name()); + if (values.isEmpty()) { + return EMPTY_FIELDS; + } + Field[] fields = new Field[values.size()]; + for (int i = 0; i < values.size(); i++) { + fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED); + } + return fields; + } + +} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/package-info.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/package-info.java new file mode 100644 index 0000000000..0e0daf6670 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/package-info.java @@ -0,0 +1,25 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Fetch sub phase that extracts significant portions of string fields, marking the matches. Pluggable by implementing + * {@link org.elasticsearch.search.fetch.subphase.highlight.Highlighter} and + * {@link org.elasticsearch.plugins.SearchPlugin#getHighlighters()}. + */ +package org.elasticsearch.search.fetch.subphase.highlight; |