summaryrefslogtreecommitdiff
path: root/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight')
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java607
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java105
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java199
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FragmentBuilderHelper.java102
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java525
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java115
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java132
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java71
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/Highlighter.java31
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterContext.java47
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java208
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java189
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java361
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SimpleFragmentsBuilder.java45
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceScoreOrderFragmentsBuilder.java71
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceSimpleFragmentsBuilder.java65
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/package-info.java25
17 files changed, 2898 insertions, 0 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
new file mode 100644
index 0000000000..72bd436a88
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
@@ -0,0 +1,607 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.elasticsearch.action.support.ToXContentToBytes;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.ParsingException;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Order;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.BiFunction;
+
+import static org.elasticsearch.common.xcontent.ObjectParser.fromList;
+
+/**
+ * This abstract class holds parameters shared by {@link HighlightBuilder} and {@link HighlightBuilder.Field}
+ * and provides the common setters, equality, hashCode calculation and common serialization
+ */
+public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterBuilder<?>> extends ToXContentToBytes implements Writeable {
+ public static final ParseField PRE_TAGS_FIELD = new ParseField("pre_tags");
+ public static final ParseField POST_TAGS_FIELD = new ParseField("post_tags");
+ public static final ParseField FIELDS_FIELD = new ParseField("fields");
+ public static final ParseField ORDER_FIELD = new ParseField("order");
+ public static final ParseField HIGHLIGHT_FILTER_FIELD = new ParseField("highlight_filter");
+ public static final ParseField FRAGMENT_SIZE_FIELD = new ParseField("fragment_size");
+ public static final ParseField FRAGMENT_OFFSET_FIELD = new ParseField("fragment_offset");
+ public static final ParseField NUMBER_OF_FRAGMENTS_FIELD = new ParseField("number_of_fragments");
+ public static final ParseField ENCODER_FIELD = new ParseField("encoder");
+ public static final ParseField REQUIRE_FIELD_MATCH_FIELD = new ParseField("require_field_match");
+ public static final ParseField BOUNDARY_MAX_SCAN_FIELD = new ParseField("boundary_max_scan");
+ public static final ParseField BOUNDARY_CHARS_FIELD = new ParseField("boundary_chars");
+ public static final ParseField TYPE_FIELD = new ParseField("type");
+ public static final ParseField FRAGMENTER_FIELD = new ParseField("fragmenter");
+ public static final ParseField NO_MATCH_SIZE_FIELD = new ParseField("no_match_size");
+ public static final ParseField FORCE_SOURCE_FIELD = new ParseField("force_source");
+ public static final ParseField PHRASE_LIMIT_FIELD = new ParseField("phrase_limit");
+ public static final ParseField OPTIONS_FIELD = new ParseField("options");
+ public static final ParseField HIGHLIGHT_QUERY_FIELD = new ParseField("highlight_query");
+ public static final ParseField MATCHED_FIELDS_FIELD = new ParseField("matched_fields");
+
+ protected String[] preTags;
+
+ protected String[] postTags;
+
+ protected Integer fragmentSize;
+
+ protected Integer numOfFragments;
+
+ protected String highlighterType;
+
+ protected String fragmenter;
+
+ protected QueryBuilder highlightQuery;
+
+ protected Order order;
+
+ protected Boolean highlightFilter;
+
+ protected Boolean forceSource;
+
+ protected Integer boundaryMaxScan;
+
+ protected char[] boundaryChars;
+
+ protected Integer noMatchSize;
+
+ protected Integer phraseLimit;
+
+ protected Map<String, Object> options;
+
+ protected Boolean requireFieldMatch;
+
+ public AbstractHighlighterBuilder() {
+ }
+
+ /**
+ * Read from a stream.
+ */
+ protected AbstractHighlighterBuilder(StreamInput in) throws IOException {
+ preTags(in.readOptionalStringArray());
+ postTags(in.readOptionalStringArray());
+ fragmentSize(in.readOptionalVInt());
+ numOfFragments(in.readOptionalVInt());
+ highlighterType(in.readOptionalString());
+ fragmenter(in.readOptionalString());
+ if (in.readBoolean()) {
+ highlightQuery(in.readNamedWriteable(QueryBuilder.class));
+ }
+ order(in.readOptionalWriteable(Order::readFromStream));
+ highlightFilter(in.readOptionalBoolean());
+ forceSource(in.readOptionalBoolean());
+ boundaryMaxScan(in.readOptionalVInt());
+ if (in.readBoolean()) {
+ boundaryChars(in.readString().toCharArray());
+ }
+ noMatchSize(in.readOptionalVInt());
+ phraseLimit(in.readOptionalVInt());
+ if (in.readBoolean()) {
+ options(in.readMap());
+ }
+ requireFieldMatch(in.readOptionalBoolean());
+ }
+
+ /**
+ * write common parameters to {@link StreamOutput}
+ */
+ @Override
+ public final void writeTo(StreamOutput out) throws IOException {
+ out.writeOptionalStringArray(preTags);
+ out.writeOptionalStringArray(postTags);
+ out.writeOptionalVInt(fragmentSize);
+ out.writeOptionalVInt(numOfFragments);
+ out.writeOptionalString(highlighterType);
+ out.writeOptionalString(fragmenter);
+ boolean hasQuery = highlightQuery != null;
+ out.writeBoolean(hasQuery);
+ if (hasQuery) {
+ out.writeNamedWriteable(highlightQuery);
+ }
+ out.writeOptionalWriteable(order);
+ out.writeOptionalBoolean(highlightFilter);
+ out.writeOptionalBoolean(forceSource);
+ out.writeOptionalVInt(boundaryMaxScan);
+ boolean hasBounaryChars = boundaryChars != null;
+ out.writeBoolean(hasBounaryChars);
+ if (hasBounaryChars) {
+ out.writeString(String.valueOf(boundaryChars));
+ }
+ out.writeOptionalVInt(noMatchSize);
+ out.writeOptionalVInt(phraseLimit);
+ boolean hasOptions = options != null;
+ out.writeBoolean(hasOptions);
+ if (hasOptions) {
+ out.writeMap(options);
+ }
+ out.writeOptionalBoolean(requireFieldMatch);
+ doWriteTo(out);
+ }
+
+ protected abstract void doWriteTo(StreamOutput out) throws IOException;
+
+ /**
+ * Set the pre tags that will be used for highlighting.
+ */
+ @SuppressWarnings("unchecked")
+ public HB preTags(String... preTags) {
+ this.preTags = preTags;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #preTags(String...)}
+ */
+ public String[] preTags() {
+ return this.preTags;
+ }
+
+ /**
+ * Set the post tags that will be used for highlighting.
+ */
+ @SuppressWarnings("unchecked")
+ public HB postTags(String... postTags) {
+ this.postTags = postTags;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #postTags(String...)}
+ */
+ public String[] postTags() {
+ return this.postTags;
+ }
+
+ /**
+ * Set the fragment size in characters, defaults to {@link HighlightBuilder#DEFAULT_FRAGMENT_CHAR_SIZE}
+ */
+ @SuppressWarnings("unchecked")
+ public HB fragmentSize(Integer fragmentSize) {
+ this.fragmentSize = fragmentSize;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #fragmentSize(Integer)}
+ */
+ public Integer fragmentSize() {
+ return this.fragmentSize;
+ }
+
+ /**
+ * Set the number of fragments, defaults to {@link HighlightBuilder#DEFAULT_NUMBER_OF_FRAGMENTS}
+ */
+ @SuppressWarnings("unchecked")
+ public HB numOfFragments(Integer numOfFragments) {
+ this.numOfFragments = numOfFragments;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #numOfFragments(Integer)}
+ */
+ public Integer numOfFragments() {
+ return this.numOfFragments;
+ }
+
+ /**
+ * Set type of highlighter to use. Out of the box supported types
+ * are <tt>plain</tt>, <tt>fvh</tt> and <tt>postings</tt>.
+ * The default option selected is dependent on the mappings defined for your index.
+ * Details of the different highlighter types are covered in the reference guide.
+ */
+ @SuppressWarnings("unchecked")
+ public HB highlighterType(String highlighterType) {
+ this.highlighterType = highlighterType;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #highlighterType(String)}
+ */
+ public String highlighterType() {
+ return this.highlighterType;
+ }
+
+ /**
+ * Sets what fragmenter to use to break up text that is eligible for highlighting.
+ * This option is only applicable when using the plain highlighterType <tt>highlighter</tt>.
+ * Permitted values are "simple" or "span" relating to {@link SimpleFragmenter} and
+ * {@link SimpleSpanFragmenter} implementations respectively with the default being "span"
+ */
+ @SuppressWarnings("unchecked")
+ public HB fragmenter(String fragmenter) {
+ this.fragmenter = fragmenter;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #fragmenter(String)}
+ */
+ public String fragmenter() {
+ return this.fragmenter;
+ }
+
+ /**
+ * Sets a query to be used for highlighting instead of the search query.
+ */
+ @SuppressWarnings("unchecked")
+ public HB highlightQuery(QueryBuilder highlightQuery) {
+ this.highlightQuery = highlightQuery;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #highlightQuery(QueryBuilder)}
+ */
+ public QueryBuilder highlightQuery() {
+ return this.highlightQuery;
+ }
+
+ /**
+ * The order of fragments per field. By default, ordered by the order in the
+ * highlighted text. Can be <tt>score</tt>, which then it will be ordered
+ * by score of the fragments, or <tt>none</TT>.
+ */
+ public HB order(String order) {
+ return order(Order.fromString(order));
+ }
+
+ /**
+ * By default, fragments of a field are ordered by the order in the highlighted text.
+ * If set to {@link Order#SCORE}, this changes order to score of the fragments.
+ */
+ @SuppressWarnings("unchecked")
+ public HB order(Order scoreOrdered) {
+ this.order = scoreOrdered;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #order(Order)}
+ */
+ public Order order() {
+ return this.order;
+ }
+
+ /**
+ * Set this to true when using the highlighterType <tt>fvh</tt>
+ * and you want to provide highlighting on filter clauses in your
+ * query. Default is <tt>false</tt>.
+ */
+ @SuppressWarnings("unchecked")
+ public HB highlightFilter(Boolean highlightFilter) {
+ this.highlightFilter = highlightFilter;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #highlightFilter(Boolean)}
+ */
+ public Boolean highlightFilter() {
+ return this.highlightFilter;
+ }
+
+ /**
+ * When using the highlighterType <tt>fvh</tt> this setting
+ * controls how far to look for boundary characters, and defaults to 20.
+ */
+ @SuppressWarnings("unchecked")
+ public HB boundaryMaxScan(Integer boundaryMaxScan) {
+ this.boundaryMaxScan = boundaryMaxScan;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #boundaryMaxScan(Integer)}
+ */
+ public Integer boundaryMaxScan() {
+ return this.boundaryMaxScan;
+ }
+
+ /**
+ * When using the highlighterType <tt>fvh</tt> this setting
+ * defines what constitutes a boundary for highlighting. It’s a single string with
+ * each boundary character defined in it. It defaults to .,!? \t\n
+ */
+ @SuppressWarnings("unchecked")
+ public HB boundaryChars(char[] boundaryChars) {
+ this.boundaryChars = boundaryChars;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #boundaryChars(char[])}
+ */
+ public char[] boundaryChars() {
+ return this.boundaryChars;
+ }
+
+ /**
+ * Allows to set custom options for custom highlighters.
+ */
+ @SuppressWarnings("unchecked")
+ public HB options(Map<String, Object> options) {
+ this.options = options;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #options(Map)}
+ */
+ public Map<String, Object> options() {
+ return this.options;
+ }
+
+ /**
+ * Set to true to cause a field to be highlighted only if a query matches that field.
+ * Default is false meaning that terms are highlighted on all requested fields regardless
+ * if the query matches specifically on them.
+ */
+ @SuppressWarnings("unchecked")
+ public HB requireFieldMatch(Boolean requireFieldMatch) {
+ this.requireFieldMatch = requireFieldMatch;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #requireFieldMatch(Boolean)}
+ */
+ public Boolean requireFieldMatch() {
+ return this.requireFieldMatch;
+ }
+
+ /**
+ * Sets the size of the fragment to return from the beginning of the field if there are no matches to
+ * highlight and the field doesn't also define noMatchSize.
+ * @param noMatchSize integer to set or null to leave out of request. default is null.
+ * @return this for chaining
+ */
+ @SuppressWarnings("unchecked")
+ public HB noMatchSize(Integer noMatchSize) {
+ this.noMatchSize = noMatchSize;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #noMatchSize(Integer)}
+ */
+ public Integer noMatchSize() {
+ return this.noMatchSize;
+ }
+
+ /**
+ * Sets the maximum number of phrases the fvh will consider if the field doesn't also define phraseLimit.
+ * @param phraseLimit maximum number of phrases the fvh will consider
+ * @return this for chaining
+ */
+ @SuppressWarnings("unchecked")
+ public HB phraseLimit(Integer phraseLimit) {
+ this.phraseLimit = phraseLimit;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #phraseLimit(Integer)}
+ */
+ public Integer phraseLimit() {
+ return this.phraseLimit;
+ }
+
+ /**
+ * Forces the highlighting to highlight fields based on the source even if fields are stored separately.
+ */
+ @SuppressWarnings("unchecked")
+ public HB forceSource(Boolean forceSource) {
+ this.forceSource = forceSource;
+ return (HB) this;
+ }
+
+ /**
+ * @return the value set by {@link #forceSource(Boolean)}
+ */
+ public Boolean forceSource() {
+ return this.forceSource;
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ innerXContent(builder);
+ builder.endObject();
+ return builder;
+ }
+
+ protected abstract void innerXContent(XContentBuilder builder) throws IOException;
+
+ void commonOptionsToXContent(XContentBuilder builder) throws IOException {
+ if (preTags != null) {
+ builder.array(PRE_TAGS_FIELD.getPreferredName(), preTags);
+ }
+ if (postTags != null) {
+ builder.array(POST_TAGS_FIELD.getPreferredName(), postTags);
+ }
+ if (fragmentSize != null) {
+ builder.field(FRAGMENT_SIZE_FIELD.getPreferredName(), fragmentSize);
+ }
+ if (numOfFragments != null) {
+ builder.field(NUMBER_OF_FRAGMENTS_FIELD.getPreferredName(), numOfFragments);
+ }
+ if (highlighterType != null) {
+ builder.field(TYPE_FIELD.getPreferredName(), highlighterType);
+ }
+ if (fragmenter != null) {
+ builder.field(FRAGMENTER_FIELD.getPreferredName(), fragmenter);
+ }
+ if (highlightQuery != null) {
+ builder.field(HIGHLIGHT_QUERY_FIELD.getPreferredName(), highlightQuery);
+ }
+ if (order != null) {
+ builder.field(ORDER_FIELD.getPreferredName(), order.toString());
+ }
+ if (highlightFilter != null) {
+ builder.field(HIGHLIGHT_FILTER_FIELD.getPreferredName(), highlightFilter);
+ }
+ if (boundaryMaxScan != null) {
+ builder.field(BOUNDARY_MAX_SCAN_FIELD.getPreferredName(), boundaryMaxScan);
+ }
+ if (boundaryChars != null) {
+ builder.field(BOUNDARY_CHARS_FIELD.getPreferredName(), new String(boundaryChars));
+ }
+ if (options != null && options.size() > 0) {
+ builder.field(OPTIONS_FIELD.getPreferredName(), options);
+ }
+ if (forceSource != null) {
+ builder.field(FORCE_SOURCE_FIELD.getPreferredName(), forceSource);
+ }
+ if (requireFieldMatch != null) {
+ builder.field(REQUIRE_FIELD_MATCH_FIELD.getPreferredName(), requireFieldMatch);
+ }
+ if (noMatchSize != null) {
+ builder.field(NO_MATCH_SIZE_FIELD.getPreferredName(), noMatchSize);
+ }
+ if (phraseLimit != null) {
+ builder.field(PHRASE_LIMIT_FIELD.getPreferredName(), phraseLimit);
+ }
+ }
+
+ static <HB extends AbstractHighlighterBuilder<HB>> BiFunction<QueryParseContext, HB, HB> setupParser(
+ ObjectParser<HB, QueryParseContext> parser) {
+ parser.declareStringArray(fromList(String.class, HB::preTags), PRE_TAGS_FIELD);
+ parser.declareStringArray(fromList(String.class, HB::postTags), POST_TAGS_FIELD);
+ parser.declareString(HB::order, ORDER_FIELD);
+ parser.declareBoolean(HB::highlightFilter, HIGHLIGHT_FILTER_FIELD);
+ parser.declareInt(HB::fragmentSize, FRAGMENT_SIZE_FIELD);
+ parser.declareInt(HB::numOfFragments, NUMBER_OF_FRAGMENTS_FIELD);
+ parser.declareBoolean(HB::requireFieldMatch, REQUIRE_FIELD_MATCH_FIELD);
+ parser.declareInt(HB::boundaryMaxScan, BOUNDARY_MAX_SCAN_FIELD);
+ parser.declareString((HB hb, String bc) -> hb.boundaryChars(bc.toCharArray()) , BOUNDARY_CHARS_FIELD);
+ parser.declareString(HB::highlighterType, TYPE_FIELD);
+ parser.declareString(HB::fragmenter, FRAGMENTER_FIELD);
+ parser.declareInt(HB::noMatchSize, NO_MATCH_SIZE_FIELD);
+ parser.declareBoolean(HB::forceSource, FORCE_SOURCE_FIELD);
+ parser.declareInt(HB::phraseLimit, PHRASE_LIMIT_FIELD);
+ parser.declareObject(HB::options, (XContentParser p, QueryParseContext c) -> {
+ try {
+ return p.map();
+ } catch (IOException e) {
+ throw new RuntimeException("Error parsing options", e);
+ }
+ }, OPTIONS_FIELD);
+ parser.declareObject(HB::highlightQuery, (XContentParser p, QueryParseContext c) -> {
+ try {
+ return c.parseInnerQueryBuilder().orElse(null);
+ } catch (IOException e) {
+ throw new RuntimeException("Error parsing query", e);
+ }
+ }, HIGHLIGHT_QUERY_FIELD);
+ return (QueryParseContext c, HB hb) -> {
+ try {
+ parser.parse(c.parser(), hb, c);
+ if (hb.preTags() != null && hb.postTags() == null) {
+ throw new ParsingException(c.parser().getTokenLocation(),
+ "pre_tags are set but post_tags are not set");
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return hb;
+ };
+ }
+
+ @Override
+ public final int hashCode() {
+ return Objects.hash(getClass(), Arrays.hashCode(preTags), Arrays.hashCode(postTags), fragmentSize,
+ numOfFragments, highlighterType, fragmenter, highlightQuery, order, highlightFilter,
+ forceSource, boundaryMaxScan, Arrays.hashCode(boundaryChars), noMatchSize,
+ phraseLimit, options, requireFieldMatch, doHashCode());
+ }
+
+ /**
+ * fields only present in subclass should contribute to hashCode in the implementation
+ */
+ protected abstract int doHashCode();
+
+ @Override
+ public final boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ @SuppressWarnings("unchecked")
+ HB other = (HB) obj;
+ return Arrays.equals(preTags, other.preTags) &&
+ Arrays.equals(postTags, other.postTags) &&
+ Objects.equals(fragmentSize, other.fragmentSize) &&
+ Objects.equals(numOfFragments, other.numOfFragments) &&
+ Objects.equals(highlighterType, other.highlighterType) &&
+ Objects.equals(fragmenter, other.fragmenter) &&
+ Objects.equals(highlightQuery, other.highlightQuery) &&
+ Objects.equals(order, other.order) &&
+ Objects.equals(highlightFilter, other.highlightFilter) &&
+ Objects.equals(forceSource, other.forceSource) &&
+ Objects.equals(boundaryMaxScan, other.boundaryMaxScan) &&
+ Arrays.equals(boundaryChars, other.boundaryChars) &&
+ Objects.equals(noMatchSize, other.noMatchSize) &&
+ Objects.equals(phraseLimit, other.phraseLimit) &&
+ Objects.equals(options, other.options) &&
+ Objects.equals(requireFieldMatch, other.requireFieldMatch) &&
+ doEquals(other);
+ }
+
+ /**
+ * fields only present in subclass should be checked for equality in the implementation
+ */
+ protected abstract boolean doEquals(HB other);
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java
new file mode 100644
index 0000000000..b62d28f8ab
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.WeightedSpanTerm;
+import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
+import org.apache.lucene.spatial.geopoint.search.GeoPointInBBoxQuery;
+import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
+import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
+import org.elasticsearch.index.query.HasChildQueryBuilder;
+
+import java.io.IOException;
+import java.util.Map;
+
+public final class CustomQueryScorer extends QueryScorer {
+
+ public CustomQueryScorer(Query query, IndexReader reader, String field,
+ String defaultField) {
+ super(query, reader, field, defaultField);
+ }
+
+ public CustomQueryScorer(Query query, IndexReader reader, String field) {
+ super(query, reader, field);
+ }
+
+ public CustomQueryScorer(Query query, String field, String defaultField) {
+ super(query, field, defaultField);
+ }
+
+ public CustomQueryScorer(Query query, String field) {
+ super(query, field);
+ }
+
+ public CustomQueryScorer(Query query) {
+ super(query);
+ }
+
+ public CustomQueryScorer(WeightedSpanTerm[] weightedTerms) {
+ super(weightedTerms);
+ }
+
+ @Override
+ protected WeightedSpanTermExtractor newTermExtractor(String defaultField) {
+ return defaultField == null ? new CustomWeightedSpanTermExtractor()
+ : new CustomWeightedSpanTermExtractor(defaultField);
+ }
+
+ private static class CustomWeightedSpanTermExtractor extends WeightedSpanTermExtractor {
+
+ public CustomWeightedSpanTermExtractor() {
+ super();
+ }
+
+ public CustomWeightedSpanTermExtractor(String defaultField) {
+ super(defaultField);
+ }
+
+ @Override
+ protected void extractUnknownQuery(Query query,
+ Map<String, WeightedSpanTerm> terms) throws IOException {
+ if (query instanceof FunctionScoreQuery) {
+ query = ((FunctionScoreQuery) query).getSubQuery();
+ extract(query, 1F, terms);
+ } else if (query instanceof FiltersFunctionScoreQuery) {
+ query = ((FiltersFunctionScoreQuery) query).getSubQuery();
+ extract(query, 1F, terms);
+ } else if (terms.isEmpty()) {
+ extractWeightedTerms(terms, query, 1F);
+ }
+ }
+
+ protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
+ if (query instanceof GeoPointInBBoxQuery) {
+ // skip all geo queries, see https://issues.apache.org/jira/browse/LUCENE-7293 and
+ // https://github.com/elastic/elasticsearch/issues/17537
+ return;
+ } else if (query instanceof HasChildQueryBuilder.LateParsingQuery) {
+ // skip has_child or has_parent queries, see: https://github.com/elastic/elasticsearch/issues/14999
+ return;
+ }
+
+ super.extract(query, boost, terms);
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java
new file mode 100644
index 0000000000..8110780a9b
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder;
+import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
+import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
+import org.apache.lucene.search.vectorhighlight.FieldFragList;
+import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
+import org.apache.lucene.search.vectorhighlight.FieldQuery;
+import org.apache.lucene.search.vectorhighlight.FragListBuilder;
+import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
+import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
+import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner;
+import org.apache.lucene.search.vectorhighlight.SimpleFieldFragList;
+import org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder;
+import org.apache.lucene.search.vectorhighlight.SingleFragListBuilder;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ *
+ */
+public class FastVectorHighlighter implements Highlighter {
+
+ private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
+
+ private static final String CACHE_KEY = "highlight-fsv";
+ private final Boolean termVectorMultiValue;
+
+ public FastVectorHighlighter(Settings settings) {
+ this.termVectorMultiValue = settings.getAsBoolean("search.highlight.term_vector_multi_value", true);
+ }
+
+ @Override
+ public HighlightField highlight(HighlighterContext highlighterContext) {
+ SearchContextHighlight.Field field = highlighterContext.field;
+ SearchContext context = highlighterContext.context;
+ FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
+ FieldMapper mapper = highlighterContext.mapper;
+
+ if (canHighlight(mapper) == false) {
+ throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
+ }
+
+ Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
+
+ if (!hitContext.cache().containsKey(CACHE_KEY)) {
+ hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
+ }
+ HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
+
+ try {
+ FieldQuery fieldQuery;
+ if (field.fieldOptions().requireFieldMatch()) {
+ if (cache.fieldMatchFieldQuery == null) {
+ // we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...)
+ cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
+ }
+ fieldQuery = cache.fieldMatchFieldQuery;
+ } else {
+ if (cache.noFieldMatchFieldQuery == null) {
+ // we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...)
+ cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
+ }
+ fieldQuery = cache.noFieldMatchFieldQuery;
+ }
+
+ MapperHighlightEntry entry = cache.mappers.get(mapper);
+ if (entry == null) {
+ FragListBuilder fragListBuilder;
+ BaseFragmentsBuilder fragmentsBuilder;
+
+ BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER;
+ if (field.fieldOptions().boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.fieldOptions().boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
+ boundaryScanner = new SimpleBoundaryScanner(field.fieldOptions().boundaryMaxScan(), field.fieldOptions().boundaryChars());
+ }
+ boolean forceSource = context.highlight().forceSource(field);
+ if (field.fieldOptions().numberOfFragments() == 0) {
+ fragListBuilder = new SingleFragListBuilder();
+
+ if (!forceSource && mapper.fieldType().stored()) {
+ fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
+ } else {
+ fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
+ }
+ } else {
+ fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
+ if (field.fieldOptions().scoreOrdered()) {
+ if (!forceSource && mapper.fieldType().stored()) {
+ fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
+ } else {
+ fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
+ }
+ } else {
+ if (!forceSource && mapper.fieldType().stored()) {
+ fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
+ } else {
+ fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
+ }
+ }
+ }
+ fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
+ entry = new MapperHighlightEntry();
+ entry.fragListBuilder = fragListBuilder;
+ entry.fragmentsBuilder = fragmentsBuilder;
+ if (cache.fvh == null) {
+ // parameters to FVH are not requires since:
+ // first two booleans are not relevant since they are set on the CustomFieldQuery (phrase and fieldMatch)
+ // fragment builders are used explicitly
+ cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter();
+ }
+ CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter());
+ cache.mappers.put(mapper, entry);
+ }
+ cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
+
+ String[] fragments;
+
+ // a HACK to make highlighter do highlighting, even though its using the single frag list builder
+ int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().numberOfFragments();
+ int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize();
+ // we highlight against the low level reader and docId, because if we load source, we want to reuse it if possible
+ // Only send matched fields if they were requested to save time.
+ if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
+ fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), field.fieldOptions().matchedFields(), fragmentCharSize,
+ numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
+ } else {
+ fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), fragmentCharSize,
+ numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
+ }
+
+ if (fragments != null && fragments.length > 0) {
+ return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
+ }
+
+ int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
+ if (noMatchSize > 0) {
+ // Essentially we just request that a fragment is built from 0 to noMatchSize using the normal fragmentsBuilder
+ FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
+ fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
+ fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.fieldType().name(),
+ fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
+ if (fragments != null && fragments.length > 0) {
+ return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
+ }
+ }
+
+ return null;
+
+ } catch (Exception e) {
+ throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
+ }
+ }
+
+ @Override
+ public boolean canHighlight(FieldMapper fieldMapper) {
+ return fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions();
+ }
+
+ private class MapperHighlightEntry {
+ public FragListBuilder fragListBuilder;
+ public FragmentsBuilder fragmentsBuilder;
+
+ public org.apache.lucene.search.highlight.Highlighter highlighter;
+ }
+
+ private class HighlighterEntry {
+ public org.apache.lucene.search.vectorhighlight.FastVectorHighlighter fvh;
+ public FieldQuery noFieldMatchFieldQuery;
+ public FieldQuery fieldMatchFieldQuery;
+ public Map<FieldMapper, MapperHighlightEntry> mappers = new HashMap<>();
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FragmentBuilderHelper.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FragmentBuilderHelper.java
new file mode 100644
index 0000000000..ac0dab3a63
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FragmentBuilderHelper.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
+import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
+import org.apache.lucene.util.CollectionUtil;
+import org.apache.lucene.util.Version;
+import org.elasticsearch.index.analysis.CustomAnalyzer;
+import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
+import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
+import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
+import org.elasticsearch.index.analysis.NGramTokenizerFactory;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
+import org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory;
+import org.elasticsearch.index.mapper.FieldMapper;
+
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * Simple helper class for {@link FastVectorHighlighter} {@link FragmentsBuilder} implementations.
+ */
+public final class FragmentBuilderHelper {
+
+ private FragmentBuilderHelper() {
+ // no instance
+ }
+
+ /**
+ * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to
+ * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter}
+ */
+ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) {
+ assert fragInfo != null : "FragInfo must not be null";
+ assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name();
+ if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) {
+ /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time
+ * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort
+ * the fragments based on their offsets rather than using soley the positions as it is done in
+ * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather
+ * than in this hack... aka. "we are are working on in!" */
+ final List<SubInfo> subInfos = fragInfo.getSubInfos();
+ CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() {
+ @Override
+ public int compare(SubInfo o1, SubInfo o2) {
+ int startOffset = o1.getTermsOffsets().get(0).getStartOffset();
+ int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset();
+ return FragmentBuilderHelper.compare(startOffset, startOffset2);
+ }
+ });
+ return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(),
+ fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost());
+ } else {
+ return fragInfo;
+ }
+ }
+
+ private static int compare(int x, int y) {
+ return (x < y) ? -1 : ((x == y) ? 0 : 1);
+ }
+
+ private static boolean containsBrokenAnalysis(Analyzer analyzer) {
+ // TODO maybe we need a getter on Namedanalyzer that tells if this uses broken Analysis
+ if (analyzer instanceof NamedAnalyzer) {
+ analyzer = ((NamedAnalyzer) analyzer).analyzer();
+ }
+ if (analyzer instanceof CustomAnalyzer) {
+ final CustomAnalyzer a = (CustomAnalyzer) analyzer;
+ TokenFilterFactory[] tokenFilters = a.tokenFilters();
+ for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
+ if (tokenFilterFactory instanceof WordDelimiterTokenFilterFactory
+ || tokenFilterFactory instanceof EdgeNGramTokenFilterFactory) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java
new file mode 100644
index 0000000000..fe4587826c
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java
@@ -0,0 +1,525 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ObjectParser.NamedObjectParser;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.FieldOptions;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.Set;
+import java.util.function.BiFunction;
+
+import static org.elasticsearch.common.xcontent.ObjectParser.fromList;
+
+/**
+ * A builder for search highlighting. Settings can control how large fields
+ * are summarized to show only selected snippets ("fragments") containing search terms.
+ *
+ * @see org.elasticsearch.search.builder.SearchSourceBuilder#highlight()
+ */
+public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilder> {
+ /** default for whether to highlight fields based on the source even if stored separately */
+ public static final boolean DEFAULT_FORCE_SOURCE = false;
+ /** default for whether a field should be highlighted only if a query matches that field */
+ public static final boolean DEFAULT_REQUIRE_FIELD_MATCH = true;
+ /** default for whether <tt>fvh</tt> should provide highlighting on filter clauses */
+ public static final boolean DEFAULT_HIGHLIGHT_FILTER = false;
+ /** default for highlight fragments being ordered by score */
+ public static final boolean DEFAULT_SCORE_ORDERED = false;
+ /** the default encoder setting */
+ public static final String DEFAULT_ENCODER = "default";
+ /** default for the maximum number of phrases the fvh will consider */
+ public static final int DEFAULT_PHRASE_LIMIT = 256;
+ /** default for fragment size when there are no matches */
+ public static final int DEFAULT_NO_MATCH_SIZE = 0;
+ /** the default number of fragments for highlighting */
+ public static final int DEFAULT_NUMBER_OF_FRAGMENTS = 5;
+ /** the default number of fragments size in characters */
+ public static final int DEFAULT_FRAGMENT_CHAR_SIZE = 100;
+ /** the default opening tag */
+ public static final String[] DEFAULT_PRE_TAGS = new String[]{"<em>"};
+ /** the default closing tag */
+ public static final String[] DEFAULT_POST_TAGS = new String[]{"</em>"};
+
+ /** the default opening tags when <tt>tag_schema = "styled"</tt> */
+ public static final String[] DEFAULT_STYLED_PRE_TAG = {
+ "<em class=\"hlt1\">", "<em class=\"hlt2\">", "<em class=\"hlt3\">",
+ "<em class=\"hlt4\">", "<em class=\"hlt5\">", "<em class=\"hlt6\">",
+ "<em class=\"hlt7\">", "<em class=\"hlt8\">", "<em class=\"hlt9\">",
+ "<em class=\"hlt10\">"
+ };
+ /** the default closing tags when <tt>tag_schema = "styled"</tt> */
+ public static final String[] DEFAULT_STYLED_POST_TAGS = {"</em>"};
+
+ /**
+ * a {@link FieldOptions} with default settings
+ */
+ static final FieldOptions defaultOptions = new SearchContextHighlight.FieldOptions.Builder()
+ .preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(DEFAULT_SCORE_ORDERED)
+ .highlightFilter(DEFAULT_HIGHLIGHT_FILTER).requireFieldMatch(DEFAULT_REQUIRE_FIELD_MATCH)
+ .forceSource(DEFAULT_FORCE_SOURCE).fragmentCharSize(DEFAULT_FRAGMENT_CHAR_SIZE)
+ .numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER)
+ .boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN).boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS)
+ .noMatchSize(DEFAULT_NO_MATCH_SIZE).phraseLimit(DEFAULT_PHRASE_LIMIT).build();
+
+ private final List<Field> fields = new ArrayList<>();
+
+ private String encoder;
+
+ private boolean useExplicitFieldOrder = false;
+
+ public HighlightBuilder() {
+ }
+
+ /**
+ * Read from a stream.
+ */
+ public HighlightBuilder(StreamInput in) throws IOException {
+ super(in);
+ encoder(in.readOptionalString());
+ useExplicitFieldOrder(in.readBoolean());
+ int fields = in.readVInt();
+ for (int i = 0; i < fields; i++) {
+ field(new Field(in));
+ }
+ }
+
+ @Override
+ protected void doWriteTo(StreamOutput out) throws IOException {
+ out.writeOptionalString(encoder);
+ out.writeBoolean(useExplicitFieldOrder);
+ out.writeVInt(fields.size());
+ for (int i = 0; i < fields.size(); i++) {
+ fields.get(i).writeTo(out);
+ }
+ }
+
+ /**
+ * Adds a field to be highlighted with default fragment size of 100 characters, and
+ * default number of fragments of 5 using the default encoder
+ *
+ * @param name The field to highlight
+ */
+ public HighlightBuilder field(String name) {
+ return field(new Field(name));
+ }
+
+ /**
+ * Adds a field to be highlighted with a provided fragment size (in characters), and
+ * default number of fragments of 5.
+ *
+ * @param name The field to highlight
+ * @param fragmentSize The size of a fragment in characters
+ */
+ public HighlightBuilder field(String name, int fragmentSize) {
+ return field(new Field(name).fragmentSize(fragmentSize));
+ }
+
+
+ /**
+ * Adds a field to be highlighted with a provided fragment size (in characters), and
+ * a provided (maximum) number of fragments.
+ *
+ * @param name The field to highlight
+ * @param fragmentSize The size of a fragment in characters
+ * @param numberOfFragments The (maximum) number of fragments
+ */
+ public HighlightBuilder field(String name, int fragmentSize, int numberOfFragments) {
+ return field(new Field(name).fragmentSize(fragmentSize).numOfFragments(numberOfFragments));
+ }
+
+ /**
+ * Adds a field to be highlighted with a provided fragment size (in characters), and
+ * a provided (maximum) number of fragments.
+ *
+ * @param name The field to highlight
+ * @param fragmentSize The size of a fragment in characters
+ * @param numberOfFragments The (maximum) number of fragments
+ * @param fragmentOffset The offset from the start of the fragment to the start of the highlight
+ */
+ public HighlightBuilder field(String name, int fragmentSize, int numberOfFragments, int fragmentOffset) {
+ return field(new Field(name).fragmentSize(fragmentSize).numOfFragments(numberOfFragments)
+ .fragmentOffset(fragmentOffset));
+ }
+
+ public HighlightBuilder field(Field field) {
+ fields.add(field);
+ return this;
+ }
+
+ void fields(List<Field> fields) {
+ this.fields.addAll(fields);
+ }
+
+ public List<Field> fields() {
+ return this.fields;
+ }
+
+ /**
+ * Set a tag scheme that encapsulates a built in pre and post tags. The allowed schemes
+ * are <tt>styled</tt> and <tt>default</tt>.
+ *
+ * @param schemaName The tag scheme name
+ */
+ public HighlightBuilder tagsSchema(String schemaName) {
+ switch (schemaName) {
+ case "default":
+ preTags(DEFAULT_PRE_TAGS);
+ postTags(DEFAULT_POST_TAGS);
+ break;
+ case "styled":
+ preTags(DEFAULT_STYLED_PRE_TAG);
+ postTags(DEFAULT_STYLED_POST_TAGS);
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown tag schema ["+ schemaName +"]");
+ }
+ return this;
+ }
+
+ /**
+ * Set encoder for the highlighting
+ * are <tt>styled</tt> and <tt>default</tt>.
+ *
+ * @param encoder name
+ */
+ public HighlightBuilder encoder(String encoder) {
+ this.encoder = encoder;
+ return this;
+ }
+
+ /**
+ * Getter for {@link #encoder(String)}
+ */
+ public String encoder() {
+ return this.encoder;
+ }
+
+ /**
+ * Send the fields to be highlighted using a syntax that is specific about the order in which they should be highlighted.
+ * @return this for chaining
+ */
+ public HighlightBuilder useExplicitFieldOrder(boolean useExplicitFieldOrder) {
+ this.useExplicitFieldOrder = useExplicitFieldOrder;
+ return this;
+ }
+
+ /**
+ * Gets value set with {@link #useExplicitFieldOrder(boolean)}
+ */
+ public Boolean useExplicitFieldOrder() {
+ return this.useExplicitFieldOrder;
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ innerXContent(builder);
+ builder.endObject();
+ return builder;
+ }
+
+ private static final BiFunction<QueryParseContext, HighlightBuilder, HighlightBuilder> PARSER;
+ static {
+ ObjectParser<HighlightBuilder, QueryParseContext> parser = new ObjectParser<>("highlight");
+ parser.declareString(HighlightBuilder::tagsSchema, new ParseField("tags_schema"));
+ parser.declareString(HighlightBuilder::encoder, ENCODER_FIELD);
+ parser.declareNamedObjects(HighlightBuilder::fields, Field.PARSER, (HighlightBuilder hb) -> hb.useExplicitFieldOrder(true),
+ FIELDS_FIELD);
+ PARSER = setupParser(parser);
+ }
+ public static HighlightBuilder fromXContent(QueryParseContext c) {
+ return PARSER.apply(c, new HighlightBuilder());
+ }
+
+ public SearchContextHighlight build(QueryShardContext context) throws IOException {
+ // create template global options that are later merged with any partial field options
+ final SearchContextHighlight.FieldOptions.Builder globalOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder();
+ globalOptionsBuilder.encoder(this.encoder);
+ transferOptions(this, globalOptionsBuilder, context);
+
+ // overwrite unset global options by default values
+ globalOptionsBuilder.merge(defaultOptions);
+
+ // create field options
+ Collection<org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field> fieldOptions = new ArrayList<>();
+ for (Field field : this.fields) {
+ final SearchContextHighlight.FieldOptions.Builder fieldOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder();
+ fieldOptionsBuilder.fragmentOffset(field.fragmentOffset);
+ if (field.matchedFields != null) {
+ Set<String> matchedFields = new HashSet<String>(field.matchedFields.length);
+ Collections.addAll(matchedFields, field.matchedFields);
+ fieldOptionsBuilder.matchedFields(matchedFields);
+ }
+ transferOptions(field, fieldOptionsBuilder, context);
+ fieldOptions.add(new SearchContextHighlight.Field(field.name(), fieldOptionsBuilder
+ .merge(globalOptionsBuilder.build()).build()));
+ }
+ return new SearchContextHighlight(fieldOptions);
+ }
+
+ /**
+ * Transfers field options present in the input {@link AbstractHighlighterBuilder} to the receiving
+ * {@link FieldOptions.Builder}, effectively overwriting existing settings
+ * @param targetOptionsBuilder the receiving options builder
+ * @param highlighterBuilder highlight builder with the input options
+ * @param context needed to convert {@link QueryBuilder} to {@link Query}
+ * @throws IOException on errors parsing any optional nested highlight query
+ */
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ private static void transferOptions(AbstractHighlighterBuilder highlighterBuilder,
+ SearchContextHighlight.FieldOptions.Builder targetOptionsBuilder, QueryShardContext context) throws IOException {
+ if (highlighterBuilder.preTags != null) {
+ targetOptionsBuilder.preTags(highlighterBuilder.preTags);
+ }
+ if (highlighterBuilder.postTags != null) {
+ targetOptionsBuilder.postTags(highlighterBuilder.postTags);
+ }
+ if (highlighterBuilder.order != null) {
+ targetOptionsBuilder.scoreOrdered(highlighterBuilder.order == Order.SCORE);
+ }
+ if (highlighterBuilder.highlightFilter != null) {
+ targetOptionsBuilder.highlightFilter(highlighterBuilder.highlightFilter);
+ }
+ if (highlighterBuilder.fragmentSize != null) {
+ targetOptionsBuilder.fragmentCharSize(highlighterBuilder.fragmentSize);
+ }
+ if (highlighterBuilder.numOfFragments != null) {
+ targetOptionsBuilder.numberOfFragments(highlighterBuilder.numOfFragments);
+ }
+ if (highlighterBuilder.requireFieldMatch != null) {
+ targetOptionsBuilder.requireFieldMatch(highlighterBuilder.requireFieldMatch);
+ }
+ if (highlighterBuilder.boundaryMaxScan != null) {
+ targetOptionsBuilder.boundaryMaxScan(highlighterBuilder.boundaryMaxScan);
+ }
+ if (highlighterBuilder.boundaryChars != null) {
+ targetOptionsBuilder.boundaryChars(convertCharArray(highlighterBuilder.boundaryChars));
+ }
+ if (highlighterBuilder.highlighterType != null) {
+ targetOptionsBuilder.highlighterType(highlighterBuilder.highlighterType);
+ }
+ if (highlighterBuilder.fragmenter != null) {
+ targetOptionsBuilder.fragmenter(highlighterBuilder.fragmenter);
+ }
+ if (highlighterBuilder.noMatchSize != null) {
+ targetOptionsBuilder.noMatchSize(highlighterBuilder.noMatchSize);
+ }
+ if (highlighterBuilder.forceSource != null) {
+ targetOptionsBuilder.forceSource(highlighterBuilder.forceSource);
+ }
+ if (highlighterBuilder.phraseLimit != null) {
+ targetOptionsBuilder.phraseLimit(highlighterBuilder.phraseLimit);
+ }
+ if (highlighterBuilder.options != null) {
+ targetOptionsBuilder.options(highlighterBuilder.options);
+ }
+ if (highlighterBuilder.highlightQuery != null) {
+ targetOptionsBuilder.highlightQuery(QueryBuilder.rewriteQuery(highlighterBuilder.highlightQuery, context).toQuery(context));
+ }
+ }
+
+ static Character[] convertCharArray(char[] array) {
+ if (array == null) {
+ return null;
+ }
+ Character[] charArray = new Character[array.length];
+ for (int i = 0; i < array.length; i++) {
+ charArray[i] = array[i];
+ }
+ return charArray;
+ }
+
+ @Override
+ public void innerXContent(XContentBuilder builder) throws IOException {
+ // first write common options
+ commonOptionsToXContent(builder);
+ // special options for top-level highlighter
+ if (encoder != null) {
+ builder.field(ENCODER_FIELD.getPreferredName(), encoder);
+ }
+ if (fields.size() > 0) {
+ if (useExplicitFieldOrder) {
+ builder.startArray(FIELDS_FIELD.getPreferredName());
+ } else {
+ builder.startObject(FIELDS_FIELD.getPreferredName());
+ }
+ for (Field field : fields) {
+ if (useExplicitFieldOrder) {
+ builder.startObject();
+ }
+ field.innerXContent(builder);
+ if (useExplicitFieldOrder) {
+ builder.endObject();
+ }
+ }
+ if (useExplicitFieldOrder) {
+ builder.endArray();
+ } else {
+ builder.endObject();
+ }
+ }
+ }
+
+ @Override
+ protected int doHashCode() {
+ return Objects.hash(encoder, useExplicitFieldOrder, fields);
+ }
+
+ @Override
+ protected boolean doEquals(HighlightBuilder other) {
+ return Objects.equals(encoder, other.encoder) &&
+ Objects.equals(useExplicitFieldOrder, other.useExplicitFieldOrder) &&
+ Objects.equals(fields, other.fields);
+ }
+
+ public static class Field extends AbstractHighlighterBuilder<Field> {
+ static final NamedObjectParser<Field, QueryParseContext> PARSER;
+ static {
+ ObjectParser<Field, QueryParseContext> parser = new ObjectParser<>("highlight_field");
+ parser.declareInt(Field::fragmentOffset, FRAGMENT_OFFSET_FIELD);
+ parser.declareStringArray(fromList(String.class, Field::matchedFields), MATCHED_FIELDS_FIELD);
+ BiFunction<QueryParseContext, Field, Field> decoratedParser = setupParser(parser);
+ PARSER = (XContentParser p, QueryParseContext c, String name) -> decoratedParser.apply(c, new Field(name));
+ }
+
+ private final String name;
+
+ int fragmentOffset = -1;
+
+ String[] matchedFields;
+
+ public Field(String name) {
+ this.name = name;
+ }
+
+ /**
+ * Read from a stream.
+ */
+ public Field(StreamInput in) throws IOException {
+ super(in);
+ name = in.readString();
+ fragmentOffset(in.readVInt());
+ matchedFields(in.readOptionalStringArray());
+ }
+
+ @Override
+ protected void doWriteTo(StreamOutput out) throws IOException {
+ out.writeString(name);
+ out.writeVInt(fragmentOffset);
+ out.writeOptionalStringArray(matchedFields);
+ }
+
+ public String name() {
+ return name;
+ }
+
+ public Field fragmentOffset(int fragmentOffset) {
+ this.fragmentOffset = fragmentOffset;
+ return this;
+ }
+
+ /**
+ * Set the matched fields to highlight against this field data. Default to null, meaning just
+ * the named field. If you provide a list of fields here then don't forget to include name as
+ * it is not automatically included.
+ */
+ public Field matchedFields(String... matchedFields) {
+ this.matchedFields = matchedFields;
+ return this;
+ }
+
+ @Override
+ public void innerXContent(XContentBuilder builder) throws IOException {
+ builder.startObject(name);
+ // write common options
+ commonOptionsToXContent(builder);
+ // write special field-highlighter options
+ if (fragmentOffset != -1) {
+ builder.field(FRAGMENT_OFFSET_FIELD.getPreferredName(), fragmentOffset);
+ }
+ if (matchedFields != null) {
+ builder.field(MATCHED_FIELDS_FIELD.getPreferredName(), matchedFields);
+ }
+ builder.endObject();
+ }
+
+ @Override
+ protected int doHashCode() {
+ return Objects.hash(name, fragmentOffset, Arrays.hashCode(matchedFields));
+ }
+
+ @Override
+ protected boolean doEquals(Field other) {
+ return Objects.equals(name, other.name) &&
+ Objects.equals(fragmentOffset, other.fragmentOffset) &&
+ Arrays.equals(matchedFields, other.matchedFields);
+ }
+ }
+
+ public enum Order implements Writeable {
+ NONE, SCORE;
+
+ public static Order readFromStream(StreamInput in) throws IOException {
+ int ordinal = in.readVInt();
+ if (ordinal < 0 || ordinal >= values().length) {
+ throw new IOException("Unknown Order ordinal [" + ordinal + "]");
+ }
+ return values()[ordinal];
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeVInt(this.ordinal());
+ }
+
+ public static Order fromString(String order) {
+ if (order.toUpperCase(Locale.ROOT).equals(SCORE.name())) {
+ return Order.SCORE;
+ }
+ return NONE;
+ }
+
+ @Override
+ public String toString() {
+ return name().toLowerCase(Locale.ROOT);
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java
new file mode 100644
index 0000000000..91fde32c88
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightField.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Streamable;
+import org.elasticsearch.common.text.Text;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * A field highlighted with its highlighted fragments.
+ */
+public class HighlightField implements Streamable {
+
+ private String name;
+
+ private Text[] fragments;
+
+ HighlightField() {
+ }
+
+ public HighlightField(String name, Text[] fragments) {
+ this.name = name;
+ this.fragments = fragments;
+ }
+
+ /**
+ * The name of the field highlighted.
+ */
+ public String name() {
+ return name;
+ }
+
+ /**
+ * The name of the field highlighted.
+ */
+ public String getName() {
+ return name();
+ }
+
+ /**
+ * The highlighted fragments. <tt>null</tt> if failed to highlight (for example, the field is not stored).
+ */
+ public Text[] fragments() {
+ return fragments;
+ }
+
+ /**
+ * The highlighted fragments. <tt>null</tt> if failed to highlight (for example, the field is not stored).
+ */
+ public Text[] getFragments() {
+ return fragments();
+ }
+
+ @Override
+ public String toString() {
+ return "[" + name + "], fragments[" + Arrays.toString(fragments) + "]";
+ }
+
+ public static HighlightField readHighlightField(StreamInput in) throws IOException {
+ HighlightField field = new HighlightField();
+ field.readFrom(in);
+ return field;
+ }
+
+ @Override
+ public void readFrom(StreamInput in) throws IOException {
+ name = in.readString();
+ if (in.readBoolean()) {
+ int size = in.readVInt();
+ if (size == 0) {
+ fragments = Text.EMPTY_ARRAY;
+ } else {
+ fragments = new Text[size];
+ for (int i = 0; i < size; i++) {
+ fragments[i] = in.readText();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeString(name);
+ if (fragments == null) {
+ out.writeBoolean(false);
+ } else {
+ out.writeBoolean(true);
+ out.writeVInt(fragments.length);
+ for (Text fragment : fragments) {
+ out.writeText(fragment);
+ }
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
new file mode 100644
index 0000000000..2909e71445
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.Query;
+import org.elasticsearch.common.component.AbstractComponent;
+import org.elasticsearch.common.regex.Regex;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.mapper.DocumentMapper;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.SourceFieldMapper;
+import org.elasticsearch.index.mapper.StringFieldMapper;
+import org.elasticsearch.index.mapper.TextFieldMapper;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
+ private static final List<String> STANDARD_HIGHLIGHTERS_BY_PRECEDENCE = Arrays.asList("fvh", "postings", "plain");
+
+ private final Map<String, Highlighter> highlighters;
+
+ public HighlightPhase(Settings settings, Map<String, Highlighter> highlighters) {
+ super(settings);
+ this.highlighters = highlighters;
+ }
+
+ @Override
+ public void hitExecute(SearchContext context, HitContext hitContext) {
+ if (context.highlight() == null) {
+ return;
+ }
+ Map<String, HighlightField> highlightFields = new HashMap<>();
+ for (SearchContextHighlight.Field field : context.highlight().fields()) {
+ Collection<String> fieldNamesToHighlight;
+ if (Regex.isSimpleMatchPattern(field.field())) {
+ DocumentMapper documentMapper = context.mapperService().documentMapper(hitContext.hit().type());
+ fieldNamesToHighlight = documentMapper.mappers().simpleMatchToFullName(field.field());
+ } else {
+ fieldNamesToHighlight = Collections.singletonList(field.field());
+ }
+
+ if (context.highlight().forceSource(field)) {
+ SourceFieldMapper sourceFieldMapper = context.mapperService().documentMapper(hitContext.hit().type()).sourceMapper();
+ if (!sourceFieldMapper.enabled()) {
+ throw new IllegalArgumentException("source is forced for fields " + fieldNamesToHighlight + " but type [" + hitContext.hit().type() + "] has disabled _source");
+ }
+ }
+
+ boolean fieldNameContainsWildcards = field.field().contains("*");
+ for (String fieldName : fieldNamesToHighlight) {
+ FieldMapper fieldMapper = getMapperForField(fieldName, context, hitContext);
+ if (fieldMapper == null) {
+ continue;
+ }
+
+ // We should prevent highlighting if a field is anything but a text or keyword field.
+ // However, someone might implement a custom field type that has text and still want to
+ // highlight on that. We cannot know in advance if the highlighter will be able to
+ // highlight such a field and so we do the following:
+ // If the field is only highlighted because the field matches a wildcard we assume
+ // it was a mistake and do not process it.
+ // If the field was explicitly given we assume that whoever issued the query knew
+ // what they were doing and try to highlight anyway.
+ if (fieldNameContainsWildcards) {
+ if (fieldMapper.fieldType().typeName().equals(TextFieldMapper.CONTENT_TYPE) == false &&
+ fieldMapper.fieldType().typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false &&
+ fieldMapper.fieldType().typeName().equals(StringFieldMapper.CONTENT_TYPE) == false) {
+ continue;
+ }
+ }
+ String highlighterType = field.fieldOptions().highlighterType();
+ if (highlighterType == null) {
+ for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) {
+ if (highlighters.get(highlighterCandidate).canHighlight(fieldMapper)) {
+ highlighterType = highlighterCandidate;
+ break;
+ }
+ }
+ assert highlighterType != null;
+ }
+ Highlighter highlighter = highlighters.get(highlighterType);
+ if (highlighter == null) {
+ throw new IllegalArgumentException("unknown highlighter type [" + highlighterType + "] for the field [" + fieldName + "]");
+ }
+
+ Query highlightQuery = field.fieldOptions().highlightQuery() == null ? context.parsedQuery().query() : field.fieldOptions().highlightQuery();
+ HighlighterContext highlighterContext = new HighlighterContext(fieldName, field, fieldMapper, context, hitContext, highlightQuery);
+
+ if ((highlighter.canHighlight(fieldMapper) == false) && fieldNameContainsWildcards) {
+ // if several fieldnames matched the wildcard then we want to skip those that we cannot highlight
+ continue;
+ }
+ HighlightField highlightField = highlighter.highlight(highlighterContext);
+ if (highlightField != null) {
+ highlightFields.put(highlightField.name(), highlightField);
+ }
+ }
+ }
+ hitContext.hit().highlightFields(highlightFields);
+ }
+
+ private FieldMapper getMapperForField(String fieldName, SearchContext searchContext, HitContext hitContext) {
+ DocumentMapper documentMapper = searchContext.mapperService().documentMapper(hitContext.hit().type());
+ // TODO: no need to lookup the doc mapper with unambiguous field names? just look at the mapper service
+ return documentMapper.mappers().smartNameFieldMapper(fieldName);
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java
new file mode 100644
index 0000000000..dc805ea8d8
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.search.lookup.SourceLookup;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+import static java.util.Collections.singleton;
+
+public final class HighlightUtils {
+
+ //U+2029 PARAGRAPH SEPARATOR (PS): each value holds a discrete passage for highlighting (postings highlighter)
+ public static final char PARAGRAPH_SEPARATOR = 8233;
+ public static final char NULL_SEPARATOR = '\u0000';
+
+ private HighlightUtils() {
+
+ }
+
+ static List<Object> loadFieldValues(SearchContextHighlight.Field field, FieldMapper mapper, SearchContext searchContext, FetchSubPhase.HitContext hitContext) throws IOException {
+ //percolator needs to always load from source, thus it sets the global force source to true
+ boolean forceSource = searchContext.highlight().forceSource(field);
+ List<Object> textsToHighlight;
+ if (!forceSource && mapper.fieldType().stored()) {
+ CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(mapper.fieldType().name()), false);
+ hitContext.reader().document(hitContext.docId(), fieldVisitor);
+ textsToHighlight = fieldVisitor.fields().get(mapper.fieldType().name());
+ if (textsToHighlight == null) {
+ // Can happen if the document doesn't have the field to highlight
+ textsToHighlight = Collections.emptyList();
+ }
+ } else {
+ SourceLookup sourceLookup = searchContext.lookup().source();
+ sourceLookup.setSegmentAndDocument(hitContext.readerContext(), hitContext.docId());
+ textsToHighlight = sourceLookup.extractRawValues(mapper.fieldType().name());
+ }
+ assert textsToHighlight != null;
+ return textsToHighlight;
+ }
+
+ static class Encoders {
+ static Encoder DEFAULT = new DefaultEncoder();
+ static Encoder HTML = new SimpleHTMLEncoder();
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/Highlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/Highlighter.java
new file mode 100644
index 0000000000..ab76da6e72
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/Highlighter.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.elasticsearch.index.mapper.FieldMapper;
+
+/**
+ * Highlights a search result.
+ */
+public interface Highlighter {
+
+ HighlightField highlight(HighlighterContext highlighterContext);
+
+ boolean canHighlight(FieldMapper fieldMapper);
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterContext.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterContext.java
new file mode 100644
index 0000000000..7b9526d152
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterContext.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.Query;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.SearchContext;
+
+/**
+ *
+ */
+public class HighlighterContext {
+
+ public final String fieldName;
+ public final SearchContextHighlight.Field field;
+ public final FieldMapper mapper;
+ public final SearchContext context;
+ public final FetchSubPhase.HitContext hitContext;
+ public final Query query;
+
+ public HighlighterContext(String fieldName, SearchContextHighlight.Field field, FieldMapper mapper, SearchContext context,
+ FetchSubPhase.HitContext hitContext, Query query) {
+ this.fieldName = fieldName;
+ this.field = field;
+ this.mapper = mapper;
+ this.context = context;
+ this.hitContext = hitContext;
+ this.query = query;
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
new file mode 100644
index 0000000000..01f70d4b27
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.NullFragmenter;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.ExceptionsHelper;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ */
+public class PlainHighlighter implements Highlighter {
+
+ private static final String CACHE_KEY = "highlight-plain";
+
+ @Override
+ public HighlightField highlight(HighlighterContext highlighterContext) {
+ SearchContextHighlight.Field field = highlighterContext.field;
+ SearchContext context = highlighterContext.context;
+ FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
+ FieldMapper mapper = highlighterContext.mapper;
+
+ Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
+
+ if (!hitContext.cache().containsKey(CACHE_KEY)) {
+ Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> mappers = new HashMap<>();
+ hitContext.cache().put(CACHE_KEY, mappers);
+ }
+ @SuppressWarnings("unchecked")
+ Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter>) hitContext.cache().get(CACHE_KEY);
+
+ org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
+ if (entry == null) {
+ QueryScorer queryScorer = new CustomQueryScorer(highlighterContext.query, field.fieldOptions().requireFieldMatch() ? mapper.fieldType().name() : null);
+ queryScorer.setExpandMultiTermQuery(true);
+ Fragmenter fragmenter;
+ if (field.fieldOptions().numberOfFragments() == 0) {
+ fragmenter = new NullFragmenter();
+ } else if (field.fieldOptions().fragmenter() == null) {
+ fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
+ } else if ("simple".equals(field.fieldOptions().fragmenter())) {
+ fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
+ } else if ("span".equals(field.fieldOptions().fragmenter())) {
+ fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
+ } else {
+ throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + highlighterContext.fieldName + "]");
+ }
+ Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]);
+
+ entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
+ entry.setTextFragmenter(fragmenter);
+ // always highlight across all data
+ entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
+
+ cache.put(mapper, entry);
+ }
+
+ // a HACK to make highlighter do highlighting, even though its using the single frag list builder
+ int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
+ ArrayList<TextFragment> fragsList = new ArrayList<>();
+ List<Object> textsToHighlight;
+ Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
+
+ try {
+ textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
+
+ for (Object textToHighlight : textsToHighlight) {
+ String text = textToHighlight.toString();
+
+ try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) {
+ if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
+ // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
+ continue;
+ }
+ TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments);
+ for (TextFragment bestTextFragment : bestTextFragments) {
+ if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
+ fragsList.add(bestTextFragment);
+ }
+ }
+ }
+ }
+ } catch (Exception e) {
+ if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) {
+ // this can happen if for example a field is not_analyzed and ignore_above option is set.
+ // the field will be ignored when indexing but the huge term is still in the source and
+ // the plain highlighter will parse the source and try to analyze it.
+ return null;
+ } else {
+ throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
+ }
+ }
+ if (field.fieldOptions().scoreOrdered()) {
+ CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
+ @Override
+ public int compare(TextFragment o1, TextFragment o2) {
+ return Math.round(o2.getScore() - o1.getScore());
+ }
+ });
+ }
+ String[] fragments;
+ // number_of_fragments is set to 0 but we have a multivalued field
+ if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
+ fragments = new String[fragsList.size()];
+ for (int i = 0; i < fragsList.size(); i++) {
+ fragments[i] = fragsList.get(i).toString();
+ }
+ } else {
+ // refine numberOfFragments if needed
+ numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
+ fragments = new String[numberOfFragments];
+ for (int i = 0; i < fragments.length; i++) {
+ fragments[i] = fragsList.get(i).toString();
+ }
+ }
+
+ if (fragments.length > 0) {
+ return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
+ }
+
+ int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
+ if (noMatchSize > 0 && textsToHighlight.size() > 0) {
+ // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
+ String fieldContents = textsToHighlight.get(0).toString();
+ int end;
+ try {
+ end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().name(), fieldContents);
+ } catch (Exception e) {
+ throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
+ }
+ if (end > 0) {
+ return new HighlightField(highlighterContext.fieldName, new Text[] { new Text(fieldContents.substring(0, end)) });
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public boolean canHighlight(FieldMapper fieldMapper) {
+ return true;
+ }
+
+ private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
+ try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
+ if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
+ // Can't split on term boundaries without offsets
+ return -1;
+ }
+ int end = -1;
+ tokenStream.reset();
+ while (tokenStream.incrementToken()) {
+ OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
+ if (attr.endOffset() >= noMatchSize) {
+ // Jump to the end of this token if it wouldn't put us past the boundary
+ if (attr.endOffset() == noMatchSize) {
+ end = noMatchSize;
+ }
+ return end;
+ }
+ end = attr.endOffset();
+ }
+ tokenStream.end();
+ // We've exhausted the token stream so we should just highlight everything.
+ return end;
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java
new file mode 100644
index 0000000000..b2b08edaca
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.postingshighlight.CustomPassageFormatter;
+import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter;
+import org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator;
+import org.apache.lucene.search.postingshighlight.Snippet;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.text.BreakIterator;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+public class PostingsHighlighter implements Highlighter {
+
+ private static final String CACHE_KEY = "highlight-postings";
+
+ @Override
+ public HighlightField highlight(HighlighterContext highlighterContext) {
+
+ FieldMapper fieldMapper = highlighterContext.mapper;
+ SearchContextHighlight.Field field = highlighterContext.field;
+ if (canHighlight(fieldMapper) == false) {
+ throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter");
+ }
+
+ SearchContext context = highlighterContext.context;
+ FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
+
+ if (!hitContext.cache().containsKey(CACHE_KEY)) {
+ hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
+ }
+
+ HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
+ MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper);
+
+ if (mapperHighlighterEntry == null) {
+ Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
+ CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder);
+ mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter);
+ }
+
+ List<Snippet> snippets = new ArrayList<>();
+ int numberOfFragments;
+ try {
+ Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
+ List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
+ CustomPostingsHighlighter highlighter;
+ if (field.fieldOptions().numberOfFragments() == 0) {
+ //we use a control char to separate values, which is the only char that the custom break iterator breaks the text on,
+ //so we don't lose the distinction between the different values of a field and we get back a snippet per value
+ String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.NULL_SEPARATOR);
+ CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(HighlightUtils.NULL_SEPARATOR);
+ highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, breakIterator, fieldValue, field.fieldOptions().noMatchSize() > 0);
+ numberOfFragments = fieldValues.size(); //we are highlighting the whole content, one snippet per value
+ } else {
+ //using paragraph separator we make sure that each field value holds a discrete passage for highlighting
+ String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.PARAGRAPH_SEPARATOR);
+ highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, fieldValue, field.fieldOptions().noMatchSize() > 0);
+ numberOfFragments = field.fieldOptions().numberOfFragments();
+ }
+
+ IndexSearcher searcher = new IndexSearcher(hitContext.reader());
+ Snippet[] fieldSnippets = highlighter.highlightField(fieldMapper.fieldType().name(), highlighterContext.query, searcher, hitContext.docId(), numberOfFragments);
+ for (Snippet fieldSnippet : fieldSnippets) {
+ if (Strings.hasText(fieldSnippet.getText())) {
+ snippets.add(fieldSnippet);
+ }
+ }
+
+ } catch(IOException e) {
+ throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
+ }
+
+ snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());
+
+ if (field.fieldOptions().scoreOrdered()) {
+ //let's sort the snippets by score if needed
+ CollectionUtil.introSort(snippets, new Comparator<Snippet>() {
+ @Override
+ public int compare(Snippet o1, Snippet o2) {
+ return (int) Math.signum(o2.getScore() - o1.getScore());
+ }
+ });
+ }
+
+ String[] fragments = new String[snippets.size()];
+ for (int i = 0; i < fragments.length; i++) {
+ fragments[i] = snippets.get(i).getText();
+ }
+
+ if (fragments.length > 0) {
+ return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
+ }
+
+ return null;
+ }
+
+ @Override
+ public boolean canHighlight(FieldMapper fieldMapper) {
+ return fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ }
+
+ private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
+ //postings highlighter accepts all values in a single string, as offsets etc. need to match with content
+ //loaded from stored fields, we merge all values using a proper separator
+ String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator));
+ return rawValue.substring(0, Math.min(rawValue.length(), Integer.MAX_VALUE - 1));
+ }
+
+ private static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {
+
+ //We need to filter the snippets as due to no_match_size we could have
+ //either highlighted snippets or non highlighted ones and we don't want to mix those up
+ List<Snippet> filteredSnippets = new ArrayList<>(snippets.size());
+ for (Snippet snippet : snippets) {
+ if (snippet.isHighlighted()) {
+ filteredSnippets.add(snippet);
+ }
+ }
+
+ //if there's at least one highlighted snippet, we return all the highlighted ones
+ //otherwise we return the first non highlighted one if available
+ if (filteredSnippets.size() == 0) {
+ if (snippets.size() > 0) {
+ Snippet snippet = snippets.get(0);
+ //if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0)
+ //we need to return the first sentence of the content rather than the whole content
+ if (numberOfFragments == 0) {
+ BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT);
+ String text = snippet.getText();
+ bi.setText(text);
+ int next = bi.next();
+ if (next != BreakIterator.DONE) {
+ String newText = text.substring(0, next).trim();
+ snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted());
+ }
+ }
+ filteredSnippets.add(snippet);
+ }
+ }
+
+ return filteredSnippets;
+ }
+
+ private static class HighlighterEntry {
+ Map<FieldMapper, MapperHighlighterEntry> mappers = new HashMap<>();
+ }
+
+ private static class MapperHighlighterEntry {
+ final CustomPassageFormatter passageFormatter;
+
+ private MapperHighlighterEntry(CustomPassageFormatter passageFormatter) {
+ this.passageFormatter = passageFormatter;
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java
new file mode 100644
index 0000000000..9f2074d741
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java
@@ -0,0 +1,361 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.Query;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ *
+ */
+public class SearchContextHighlight {
+
+ private final Map<String, Field> fields;
+
+ private boolean globalForceSource = false;
+
+ public SearchContextHighlight(Collection<Field> fields) {
+ assert fields != null;
+ this.fields = new LinkedHashMap<String, Field>(fields.size());
+ for (Field field : fields) {
+ this.fields.put(field.field, field);
+ }
+ }
+
+ public Collection<Field> fields() {
+ return fields.values();
+ }
+
+ public void globalForceSource(boolean globalForceSource) {
+ this.globalForceSource = globalForceSource;
+ }
+
+ boolean globalForceSource() {
+ return this.globalForceSource;
+ }
+
+ public boolean forceSource(Field field) {
+ if (globalForceSource) {
+ return true;
+ }
+
+ Field _field = fields.get(field.field);
+ return _field == null ? false : _field.fieldOptions.forceSource;
+ }
+
+ public static class Field {
+ private final String field;
+ private final FieldOptions fieldOptions;
+
+ Field(String field, FieldOptions fieldOptions) {
+ assert field != null;
+ assert fieldOptions != null;
+ this.field = field;
+ this.fieldOptions = fieldOptions;
+ }
+
+ public String field() {
+ return field;
+ }
+
+ public FieldOptions fieldOptions() {
+ return fieldOptions;
+ }
+ }
+
+ public static class FieldOptions {
+
+ // Field options that default to null or -1 are often set to their real default in HighlighterParseElement#parse
+ private int fragmentCharSize = -1;
+
+ private int numberOfFragments = -1;
+
+ private int fragmentOffset = -1;
+
+ private String encoder;
+
+ private String[] preTags;
+
+ private String[] postTags;
+
+ private Boolean scoreOrdered;
+
+ private Boolean highlightFilter;
+
+ private Boolean requireFieldMatch;
+
+ private String highlighterType;
+
+ private Boolean forceSource;
+
+ private String fragmenter;
+
+ private int boundaryMaxScan = -1;
+
+ private Character[] boundaryChars = null;
+
+ private Query highlightQuery;
+
+ private int noMatchSize = -1;
+
+ private Set<String> matchedFields;
+
+ private Map<String, Object> options;
+
+ private int phraseLimit = -1;
+
+ public int fragmentCharSize() {
+ return fragmentCharSize;
+ }
+
+ public int numberOfFragments() {
+ return numberOfFragments;
+ }
+
+ public int fragmentOffset() {
+ return fragmentOffset;
+ }
+
+ public String encoder() {
+ return encoder;
+ }
+
+ public String[] preTags() {
+ return preTags;
+ }
+
+ public String[] postTags() {
+ return postTags;
+ }
+
+ public Boolean scoreOrdered() {
+ return scoreOrdered;
+ }
+
+ public Boolean highlightFilter() {
+ return highlightFilter;
+ }
+
+ public Boolean requireFieldMatch() {
+ return requireFieldMatch;
+ }
+
+ public String highlighterType() {
+ return highlighterType;
+ }
+
+ public String fragmenter() {
+ return fragmenter;
+ }
+
+ public int boundaryMaxScan() {
+ return boundaryMaxScan;
+ }
+
+ public Character[] boundaryChars() {
+ return boundaryChars;
+ }
+
+ public Query highlightQuery() {
+ return highlightQuery;
+ }
+
+ public int noMatchSize() {
+ return noMatchSize;
+ }
+
+ public int phraseLimit() {
+ return phraseLimit;
+ }
+
+ public Set<String> matchedFields() {
+ return matchedFields;
+ }
+
+ public Map<String, Object> options() {
+ return options;
+ }
+
+ static class Builder {
+
+ private final FieldOptions fieldOptions = new FieldOptions();
+
+ Builder fragmentCharSize(int fragmentCharSize) {
+ fieldOptions.fragmentCharSize = fragmentCharSize;
+ return this;
+ }
+
+ Builder numberOfFragments(int numberOfFragments) {
+ fieldOptions.numberOfFragments = numberOfFragments;
+ return this;
+ }
+
+ Builder fragmentOffset(int fragmentOffset) {
+ fieldOptions.fragmentOffset = fragmentOffset;
+ return this;
+ }
+
+ Builder encoder(String encoder) {
+ fieldOptions.encoder = encoder;
+ return this;
+ }
+
+ Builder preTags(String[] preTags) {
+ fieldOptions.preTags = preTags;
+ return this;
+ }
+
+ Builder postTags(String[] postTags) {
+ fieldOptions.postTags = postTags;
+ return this;
+ }
+
+ Builder scoreOrdered(boolean scoreOrdered) {
+ fieldOptions.scoreOrdered = scoreOrdered;
+ return this;
+ }
+
+ Builder highlightFilter(boolean highlightFilter) {
+ fieldOptions.highlightFilter = highlightFilter;
+ return this;
+ }
+
+ Builder requireFieldMatch(boolean requireFieldMatch) {
+ fieldOptions.requireFieldMatch = requireFieldMatch;
+ return this;
+ }
+
+ Builder highlighterType(String type) {
+ fieldOptions.highlighterType = type;
+ return this;
+ }
+
+ Builder forceSource(boolean forceSource) {
+ fieldOptions.forceSource = forceSource;
+ return this;
+ }
+
+ Builder fragmenter(String fragmenter) {
+ fieldOptions.fragmenter = fragmenter;
+ return this;
+ }
+
+ Builder boundaryMaxScan(int boundaryMaxScan) {
+ fieldOptions.boundaryMaxScan = boundaryMaxScan;
+ return this;
+ }
+
+ Builder boundaryChars(Character[] boundaryChars) {
+ fieldOptions.boundaryChars = boundaryChars;
+ return this;
+ }
+
+ Builder highlightQuery(Query highlightQuery) {
+ fieldOptions.highlightQuery = highlightQuery;
+ return this;
+ }
+
+ Builder noMatchSize(int noMatchSize) {
+ fieldOptions.noMatchSize = noMatchSize;
+ return this;
+ }
+
+ Builder phraseLimit(int phraseLimit) {
+ fieldOptions.phraseLimit = phraseLimit;
+ return this;
+ }
+
+ Builder matchedFields(Set<String> matchedFields) {
+ fieldOptions.matchedFields = matchedFields;
+ return this;
+ }
+
+ Builder options(Map<String, Object> options) {
+ fieldOptions.options = options;
+ return this;
+ }
+
+ FieldOptions build() {
+ return fieldOptions;
+ }
+
+ Builder merge(FieldOptions globalOptions) {
+ if (fieldOptions.preTags == null && globalOptions.preTags != null) {
+ fieldOptions.preTags = Arrays.copyOf(globalOptions.preTags, globalOptions.preTags.length);
+ }
+ if (fieldOptions.postTags == null && globalOptions.postTags != null) {
+ fieldOptions.postTags = Arrays.copyOf(globalOptions.postTags, globalOptions.postTags.length);
+ }
+ if (fieldOptions.highlightFilter == null) {
+ fieldOptions.highlightFilter = globalOptions.highlightFilter;
+ }
+ if (fieldOptions.scoreOrdered == null) {
+ fieldOptions.scoreOrdered = globalOptions.scoreOrdered;
+ }
+ if (fieldOptions.fragmentCharSize == -1) {
+ fieldOptions.fragmentCharSize = globalOptions.fragmentCharSize;
+ }
+ if (fieldOptions.numberOfFragments == -1) {
+ fieldOptions.numberOfFragments = globalOptions.numberOfFragments;
+ }
+ if (fieldOptions.encoder == null) {
+ fieldOptions.encoder = globalOptions.encoder;
+ }
+ if (fieldOptions.requireFieldMatch == null) {
+ fieldOptions.requireFieldMatch = globalOptions.requireFieldMatch;
+ }
+ if (fieldOptions.boundaryMaxScan == -1) {
+ fieldOptions.boundaryMaxScan = globalOptions.boundaryMaxScan;
+ }
+ if (fieldOptions.boundaryChars == null && globalOptions.boundaryChars != null) {
+ fieldOptions.boundaryChars = Arrays.copyOf(globalOptions.boundaryChars, globalOptions.boundaryChars.length);
+ }
+ if (fieldOptions.highlighterType == null) {
+ fieldOptions.highlighterType = globalOptions.highlighterType;
+ }
+ if (fieldOptions.fragmenter == null) {
+ fieldOptions.fragmenter = globalOptions.fragmenter;
+ }
+ if ((fieldOptions.options == null || fieldOptions.options.size() == 0) && globalOptions.options != null) {
+ fieldOptions.options = new HashMap<>(globalOptions.options);
+ }
+ if (fieldOptions.highlightQuery == null && globalOptions.highlightQuery != null) {
+ fieldOptions.highlightQuery = globalOptions.highlightQuery;
+ }
+ if (fieldOptions.noMatchSize == -1) {
+ fieldOptions.noMatchSize = globalOptions.noMatchSize;
+ }
+ if (fieldOptions.forceSource == null) {
+ fieldOptions.forceSource = globalOptions.forceSource;
+ }
+ if (fieldOptions.phraseLimit == -1) {
+ fieldOptions.phraseLimit = globalOptions.phraseLimit;
+ }
+ return this;
+ }
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SimpleFragmentsBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SimpleFragmentsBuilder.java
new file mode 100644
index 0000000000..68c40ad846
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SimpleFragmentsBuilder.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
+import org.elasticsearch.index.mapper.FieldMapper;
+
+/**
+ * Direct Subclass of Lucene's org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder
+ * that corrects offsets for broken analysis chains.
+ */
+public class SimpleFragmentsBuilder extends org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder {
+ protected final FieldMapper mapper;
+
+ public SimpleFragmentsBuilder(FieldMapper mapper,
+ String[] preTags, String[] postTags, BoundaryScanner boundaryScanner) {
+ super(preTags, postTags, boundaryScanner);
+ this.mapper = mapper;
+ }
+
+ @Override
+ protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
+ String[] preTags, String[] postTags, Encoder encoder ){
+ return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder);
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceScoreOrderFragmentsBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceScoreOrderFragmentsBuilder.java
new file mode 100644
index 0000000000..dabe3b48ba
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceScoreOrderFragmentsBuilder.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
+import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.search.lookup.SourceLookup;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ *
+ */
+public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {
+
+ private final FieldMapper mapper;
+
+ private final SearchContext searchContext;
+
+ public SourceScoreOrderFragmentsBuilder(FieldMapper mapper, SearchContext searchContext, String[] preTags, String[] postTags,
+ BoundaryScanner boundaryScanner) {
+ super(preTags, postTags, boundaryScanner);
+ this.mapper = mapper;
+ this.searchContext = searchContext;
+ }
+
+ @Override
+ protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
+ // we know its low level reader, and matching docId, since that's how we call the highlighter with
+ SourceLookup sourceLookup = searchContext.lookup().source();
+ sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId);
+
+ List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name());
+ Field[] fields = new Field[values.size()];
+ for (int i = 0; i < values.size(); i++) {
+ fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
+ }
+ return fields;
+ }
+
+ @Override
+ protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
+ String[] preTags, String[] postTags, Encoder encoder ){
+ return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder);
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceSimpleFragmentsBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceSimpleFragmentsBuilder.java
new file mode 100644
index 0000000000..4ff52547c7
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SourceSimpleFragmentsBuilder.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.search.lookup.SourceLookup;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ *
+ */
+public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
+
+ private final SearchContext searchContext;
+
+ public SourceSimpleFragmentsBuilder(FieldMapper mapper, SearchContext searchContext, String[] preTags, String[] postTags,
+ BoundaryScanner boundaryScanner) {
+ super(mapper, preTags, postTags, boundaryScanner);
+ this.searchContext = searchContext;
+ }
+
+ public static final Field[] EMPTY_FIELDS = new Field[0];
+
+ @Override
+ protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
+ // we know its low level reader, and matching docId, since that's how we call the highlighter with
+ SourceLookup sourceLookup = searchContext.lookup().source();
+ sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId);
+
+ List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name());
+ if (values.isEmpty()) {
+ return EMPTY_FIELDS;
+ }
+ Field[] fields = new Field[values.size()];
+ for (int i = 0; i < values.size(); i++) {
+ fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
+ }
+ return fields;
+ }
+
+}
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/package-info.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/package-info.java
new file mode 100644
index 0000000000..0e0daf6670
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Fetch sub phase that extracts significant portions of string fields, marking the matches. Pluggable by implementing
+ * {@link org.elasticsearch.search.fetch.subphase.highlight.Highlighter} and
+ * {@link org.elasticsearch.plugins.SearchPlugin#getHighlighters()}.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;