1 files changed, 208 insertions, 0 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
new file mode 100644
index 0000000000..01f70d4b27
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.NullFragmenter;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.ExceptionsHelper;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ */
+public class PlainHighlighter implements Highlighter {
+
+    private static final String CACHE_KEY = "highlight-plain";
+
+    @Override
+    public HighlightField highlight(HighlighterContext highlighterContext) {
+        SearchContextHighlight.Field field = highlighterContext.field;
+        SearchContext context = highlighterContext.context;
+        FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
+        FieldMapper mapper = highlighterContext.mapper;
+
+        Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
+
+        if (!hitContext.cache().containsKey(CACHE_KEY)) {
+            Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> mappers = new HashMap<>();
+            hitContext.cache().put(CACHE_KEY, mappers);
+        }
+        @SuppressWarnings("unchecked")
+        Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter>) hitContext.cache().get(CACHE_KEY);
+
+        org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
+        if (entry == null) {
+            QueryScorer queryScorer = new CustomQueryScorer(highlighterContext.query, field.fieldOptions().requireFieldMatch() ? mapper.fieldType().name() : null);
+            queryScorer.setExpandMultiTermQuery(true);
+            Fragmenter fragmenter;
+            if (field.fieldOptions().numberOfFragments() == 0) {
+                fragmenter = new NullFragmenter();
+            } else if (field.fieldOptions().fragmenter() == null) {
+                fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
+            } else if ("simple".equals(field.fieldOptions().fragmenter())) {
+                fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
+            } else if ("span".equals(field.fieldOptions().fragmenter())) {
+                fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
+            } else {
+                throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + highlighterContext.fieldName + "]");
+            }
+            Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]);
+
+            entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
+            entry.setTextFragmenter(fragmenter);
+            // always highlight across all data
+            entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
+
+            cache.put(mapper, entry);
+        }
+
+        // a HACK to make highlighter do highlighting, even though its using the single frag list builder
+        int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
+        ArrayList<TextFragment> fragsList = new ArrayList<>();
+        List<Object> textsToHighlight;
+        Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
+
+        try {
+            textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
+
+            for (Object textToHighlight : textsToHighlight) {
+                String text = textToHighlight.toString();
+
+                try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) {
+                    if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
+                        // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
+                        continue;
+                    }
+                    TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments);
+                    for (TextFragment bestTextFragment : bestTextFragments) {
+                        if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
+                            fragsList.add(bestTextFragment);
+                        }
+                    }
+                }
+            }
+        } catch (Exception e) {
+            if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) {
+                // this can happen if for example a field is not_analyzed and ignore_above option is set.
+                // the field will be ignored when indexing but the huge term is still in the source and
+                // the plain highlighter will parse the source and try to analyze it.
+                return null;
+            } else {
+                throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
+            }
+        }
+        if (field.fieldOptions().scoreOrdered()) {
+            CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
+                @Override
+                public int compare(TextFragment o1, TextFragment o2) {
+                    return Math.round(o2.getScore() - o1.getScore());
+                }
+            });
+        }
+        String[] fragments;
+        // number_of_fragments is set to 0 but we have a multivalued field
+        if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
+            fragments = new String[fragsList.size()];
+            for (int i = 0; i < fragsList.size(); i++) {
+                fragments[i] = fragsList.get(i).toString();
+            }
+        } else {
+            // refine numberOfFragments if needed
+            numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
+            fragments = new String[numberOfFragments];
+            for (int i = 0; i < fragments.length; i++) {
+                fragments[i] = fragsList.get(i).toString();
+            }
+        }
+
+        if (fragments.length > 0) {
+            return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
+        }
+
+        int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
+        if (noMatchSize > 0 && textsToHighlight.size() > 0) {
+            // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
+            String fieldContents = textsToHighlight.get(0).toString();
+            int end;
+            try {
+                end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().name(), fieldContents);
+            } catch (Exception e) {
+                throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
+            }
+            if (end > 0) {
+                return new HighlightField(highlighterContext.fieldName, new Text[] { new Text(fieldContents.substring(0, end)) });
+            }
+        }
+        return null;
+    }
+
+    @Override
+    public boolean canHighlight(FieldMapper fieldMapper) {
+        return true;
+    }
+
+    private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
+        try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
+            if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
+                // Can't split on term boundaries without offsets
+                return -1;
+            }
+            int end = -1;
+            tokenStream.reset();
+            while (tokenStream.incrementToken()) {
+                OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
+                if (attr.endOffset() >= noMatchSize) {
+                    // Jump to the end of this token if it wouldn't put us past the boundary
+                    if (attr.endOffset() == noMatchSize) {
+                        end = noMatchSize;
+                    }
+                    return end;
+                }
+                end = attr.endOffset();
+            }
+            tokenStream.end();
+            // We've exhausted the token stream so we should just highlight everything.
+            return end;
+        }
+    }
+}