summaryrefslogtreecommitdiff
path: root/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java')
-rw-r--r--core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java208
1 files changed, 208 insertions, 0 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
new file mode 100644
index 0000000000..01f70d4b27
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.NullFragmenter;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.ExceptionsHelper;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ */
+public class PlainHighlighter implements Highlighter {
+
+ private static final String CACHE_KEY = "highlight-plain";
+
+ @Override
+ public HighlightField highlight(HighlighterContext highlighterContext) {
+ SearchContextHighlight.Field field = highlighterContext.field;
+ SearchContext context = highlighterContext.context;
+ FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
+ FieldMapper mapper = highlighterContext.mapper;
+
+ Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
+
+ if (!hitContext.cache().containsKey(CACHE_KEY)) {
+ Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> mappers = new HashMap<>();
+ hitContext.cache().put(CACHE_KEY, mappers);
+ }
+ @SuppressWarnings("unchecked")
+ Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter>) hitContext.cache().get(CACHE_KEY);
+
+ org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
+ if (entry == null) {
+ QueryScorer queryScorer = new CustomQueryScorer(highlighterContext.query, field.fieldOptions().requireFieldMatch() ? mapper.fieldType().name() : null);
+ queryScorer.setExpandMultiTermQuery(true);
+ Fragmenter fragmenter;
+ if (field.fieldOptions().numberOfFragments() == 0) {
+ fragmenter = new NullFragmenter();
+ } else if (field.fieldOptions().fragmenter() == null) {
+ fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
+ } else if ("simple".equals(field.fieldOptions().fragmenter())) {
+ fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
+ } else if ("span".equals(field.fieldOptions().fragmenter())) {
+ fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
+ } else {
+ throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + highlighterContext.fieldName + "]");
+ }
+ Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]);
+
+ entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
+ entry.setTextFragmenter(fragmenter);
+ // always highlight across all data
+ entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
+
+ cache.put(mapper, entry);
+ }
+
+ // a HACK to make highlighter do highlighting, even though its using the single frag list builder
+ int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
+ ArrayList<TextFragment> fragsList = new ArrayList<>();
+ List<Object> textsToHighlight;
+ Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
+
+ try {
+ textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
+
+ for (Object textToHighlight : textsToHighlight) {
+ String text = textToHighlight.toString();
+
+ try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) {
+ if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
+ // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
+ continue;
+ }
+ TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments);
+ for (TextFragment bestTextFragment : bestTextFragments) {
+ if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
+ fragsList.add(bestTextFragment);
+ }
+ }
+ }
+ }
+ } catch (Exception e) {
+ if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) {
+ // this can happen if for example a field is not_analyzed and ignore_above option is set.
+ // the field will be ignored when indexing but the huge term is still in the source and
+ // the plain highlighter will parse the source and try to analyze it.
+ return null;
+ } else {
+ throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
+ }
+ }
+ if (field.fieldOptions().scoreOrdered()) {
+ CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
+ @Override
+ public int compare(TextFragment o1, TextFragment o2) {
+ return Math.round(o2.getScore() - o1.getScore());
+ }
+ });
+ }
+ String[] fragments;
+ // number_of_fragments is set to 0 but we have a multivalued field
+ if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
+ fragments = new String[fragsList.size()];
+ for (int i = 0; i < fragsList.size(); i++) {
+ fragments[i] = fragsList.get(i).toString();
+ }
+ } else {
+ // refine numberOfFragments if needed
+ numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
+ fragments = new String[numberOfFragments];
+ for (int i = 0; i < fragments.length; i++) {
+ fragments[i] = fragsList.get(i).toString();
+ }
+ }
+
+ if (fragments.length > 0) {
+ return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
+ }
+
+ int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
+ if (noMatchSize > 0 && textsToHighlight.size() > 0) {
+ // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
+ String fieldContents = textsToHighlight.get(0).toString();
+ int end;
+ try {
+ end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().name(), fieldContents);
+ } catch (Exception e) {
+ throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
+ }
+ if (end > 0) {
+ return new HighlightField(highlighterContext.fieldName, new Text[] { new Text(fieldContents.substring(0, end)) });
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public boolean canHighlight(FieldMapper fieldMapper) {
+ return true;
+ }
+
+ private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
+ try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
+ if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
+ // Can't split on term boundaries without offsets
+ return -1;
+ }
+ int end = -1;
+ tokenStream.reset();
+ while (tokenStream.incrementToken()) {
+ OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
+ if (attr.endOffset() >= noMatchSize) {
+ // Jump to the end of this token if it wouldn't put us past the boundary
+ if (attr.endOffset() == noMatchSize) {
+ end = noMatchSize;
+ }
+ return end;
+ }
+ end = attr.endOffset();
+ }
+ tokenStream.end();
+ // We've exhausted the token stream so we should just highlight everything.
+ return end;
+ }
+ }
+}