4 files changed, 366 insertions, 0 deletions
diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java
index fcddc58f77..2d43a1ca64 100644
--- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java
+++ b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java
@@ -19,6 +19,7 @@
 
 package org.apache.lucene.search.postingshighlight;
 
+import org.apache.lucene.search.highlight.Snippet;
 import org.apache.lucene.search.highlight.DefaultEncoder;
 import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
 import org.apache.lucene.util.BytesRef;
diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java
index c95819a0e5..315e38d12f 100644
--- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java
+++ b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java
@@ -31,6 +31,7 @@ import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.highlight.Snippet;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java
new file mode 100644
index 0000000000..4e664c3e24
--- /dev/null
+++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search.uhighlight;
+
+import org.apache.lucene.search.highlight.Snippet;
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.notNullValue;
+
+
+public class CustomPassageFormatterTests extends ESTestCase {
+    public void testSimpleFormat() {
+        String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back. No matches here.";
+
+        CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder());
+
+        Passage[] passages = new Passage[3];
+        String match = "highlighter";
+        BytesRef matchBytesRef = new BytesRef(match);
+
+        Passage passage1 = new Passage();
+        int start = content.indexOf(match);
+        int end = start + match.length();
+        passage1.setStartOffset(0);
+        passage1.setEndOffset(end + 2); //lets include the whitespace at the end to make sure we trim it
+        passage1.addMatch(start, end, matchBytesRef);
+        passages[0] = passage1;
+
+        Passage passage2 = new Passage();
+        start = content.lastIndexOf(match);
+        end = start + match.length();
+        passage2.setStartOffset(passage1.getEndOffset());
+        passage2.setEndOffset(end + 26);
+        passage2.addMatch(start, end, matchBytesRef);
+        passages[1] = passage2;
+
+        Passage passage3 = new Passage();
+        passage3.setStartOffset(passage2.getEndOffset());
+        passage3.setEndOffset(content.length());
+        passages[2] = passage3;
+
+        Snippet[] fragments = passageFormatter.format(passages, content);
+        assertThat(fragments, notNullValue());
+        assertThat(fragments.length, equalTo(3));
+        assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>."));
+        assertThat(fragments[0].isHighlighted(), equalTo(true));
+        assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
+        assertThat(fragments[1].isHighlighted(), equalTo(true));
+        assertThat(fragments[2].getText(), equalTo("No matches here."));
+        assertThat(fragments[2].isHighlighted(), equalTo(false));
+    }
+
+    public void testHtmlEncodeFormat() {
+        String content = "<b>This is a really cool highlighter.</b> Unified highlighter gives nice snippets back.";
+
+        CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder());
+
+        Passage[] passages = new Passage[2];
+        String match = "highlighter";
+        BytesRef matchBytesRef = new BytesRef(match);
+
+        Passage passage1 = new Passage();
+        int start = content.indexOf(match);
+        int end = start + match.length();
+        passage1.setStartOffset(0);
+        passage1.setEndOffset(end + 6); //lets include the whitespace at the end to make sure we trim it
+        passage1.addMatch(start, end, matchBytesRef);
+        passages[0] = passage1;
+
+        Passage passage2 = new Passage();
+        start = content.lastIndexOf(match);
+        end = start + match.length();
+        passage2.setStartOffset(passage1.getEndOffset());
+        passage2.setEndOffset(content.length());
+        passage2.addMatch(start, end, matchBytesRef);
+        passages[1] = passage2;
+
+        Snippet[] fragments = passageFormatter.format(passages, content);
+        assertThat(fragments, notNullValue());
+        assertThat(fragments.length, equalTo(2));
+        assertThat(fragments[0].getText(), equalTo("&lt;b&gt;This is a really cool <em>highlighter</em>.&lt;&#x2F;b&gt;"));
+        assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
+    }
+}
diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
new file mode 100644
index 0000000000..83b42750f9
--- /dev/null
+++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search.uhighlight;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.Snippet;
+import org.apache.lucene.store.Directory;
+import org.elasticsearch.common.lucene.all.AllTermQuery;
+import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+
+public class CustomUnifiedHighlighterTests extends ESTestCase {
+    public void testCustomUnifiedHighlighter() throws Exception {
+        Directory dir = newDirectory();
+        IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+        iwc.setMergePolicy(newLogMergePolicy());
+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+        FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+        offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+        offsetsType.setStoreTermVectorOffsets(true);
+        offsetsType.setStoreTermVectorPositions(true);
+        offsetsType.setStoreTermVectors(true);
+
+        //good position but only one match
+        final String firstValue = "This is a test. Just a test1 highlighting from unified highlighter.";
+        Field body = new Field("body", "", offsetsType);
+        Document doc = new Document();
+        doc.add(body);
+        body.setStringValue(firstValue);
+
+        //two matches, not the best snippet due to its length though
+        final String secondValue = "This is the second highlighting value to perform highlighting on a longer text " +
+            "that gets scored lower.";
+        Field body2 = new Field("body", "", offsetsType);
+        doc.add(body2);
+        body2.setStringValue(secondValue);
+
+        //two matches and short, will be scored highest
+        final String thirdValue = "This is highlighting the third short highlighting value.";
+        Field body3 = new Field("body", "", offsetsType);
+        doc.add(body3);
+        body3.setStringValue(thirdValue);
+
+        //one match, same as first but at the end, will be scored lower due to its position
+        final String fourthValue = "Just a test4 highlighting from unified highlighter.";
+        Field body4 = new Field("body", "", offsetsType);
+        doc.add(body4);
+        body4.setStringValue(fourthValue);
+
+        iw.addDocument(doc);
+
+        IndexReader ir = iw.getReader();
+        iw.close();
+
+        String firstHlValue = "Just a test1 <b>highlighting</b> from unified highlighter.";
+        String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a" +
+            " longer text that gets scored lower.";
+        String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
+        String fourthHlValue = "Just a test4 <b>highlighting</b> from unified highlighter.";
+
+        IndexSearcher searcher = newSearcher(ir);
+        Query query = new TermQuery(new Term("body", "highlighting"));
+
+        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+        assertThat(topDocs.totalHits, equalTo(1));
+
+        int docId = topDocs.scoreDocs[0].doc;
+
+        String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue +
+            HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
+
+        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, iwc.getAnalyzer(),
+            new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), null, fieldValue, true);
+        Snippet[] snippets = highlighter.highlightField("body", query, docId, 5);
+
+        assertThat(snippets.length, equalTo(4));
+
+        assertThat(snippets[0].getText(), equalTo(firstHlValue));
+        assertThat(snippets[1].getText(), equalTo(secondHlValue));
+        assertThat(snippets[2].getText(), equalTo(thirdHlValue));
+        assertThat(snippets[3].getText(), equalTo(fourthHlValue));
+        ir.close();
+        dir.close();
+    }
+
+    public void testNoMatchSize() throws Exception {
+        Directory dir = newDirectory();
+        Analyzer analyzer = new StandardAnalyzer();
+        IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
+        iwc.setMergePolicy(newLogMergePolicy());
+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+        FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+        offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+        offsetsType.setStoreTermVectorOffsets(true);
+        offsetsType.setStoreTermVectorPositions(true);
+        offsetsType.setStoreTermVectors(true);
+        Field body = new Field("body", "", offsetsType);
+        Field none = new Field("none", "", offsetsType);
+        Document doc = new Document();
+        doc.add(body);
+        doc.add(none);
+
+        String firstValue = "This is a test. Just a test highlighting from unified. Feel free to ignore.";
+        body.setStringValue(firstValue);
+        none.setStringValue(firstValue);
+        iw.addDocument(doc);
+
+        IndexReader ir = iw.getReader();
+        iw.close();
+
+        Query query = new TermQuery(new Term("none", "highlighting"));
+
+        IndexSearcher searcher = newSearcher(ir);
+        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+        assertThat(topDocs.totalHits, equalTo(1));
+        int docId = topDocs.scoreDocs[0].doc;
+
+        CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
+        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter,
+            null, firstValue, false);
+        Snippet[] snippets = highlighter.highlightField("body", query, docId, 5);
+        assertThat(snippets.length, equalTo(0));
+
+        highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, firstValue, true);
+        snippets = highlighter.highlightField("body", query, docId, 5);
+        assertThat(snippets.length, equalTo(1));
+        assertThat(snippets[0].getText(), equalTo("This is a test."));
+        ir.close();
+        dir.close();
+    }
+
+
+    private IndexReader indexOneDoc(Directory dir, String field, String value, Analyzer analyzer) throws IOException {
+        IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
+        iwc.setMergePolicy(newLogMergePolicy());
+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+        FieldType ft = new FieldType(TextField.TYPE_STORED);
+        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+        Field textField = new Field(field, "", ft);
+        Document doc = new Document();
+        doc.add(textField);
+
+        textField.setStringValue(value);
+        iw.addDocument(doc);
+        IndexReader ir = iw.getReader();
+        iw.close();
+        return ir;
+    }
+
+    public void testMultiPhrasePrefixQuery() throws Exception {
+        Analyzer analyzer = new StandardAnalyzer();
+        Directory dir = newDirectory();
+        String value = "The quick brown fox.";
+        IndexReader ir = indexOneDoc(dir, "text", value, analyzer);
+        MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery();
+        query.add(new Term("text", "quick"));
+        query.add(new Term("text", "brown"));
+        query.add(new Term("text", "fo"));
+        IndexSearcher searcher = newSearcher(ir);
+        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+        assertThat(topDocs.totalHits, equalTo(1));
+        int docId = topDocs.scoreDocs[0].doc;
+        CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
+        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
+            passageFormatter, null, value, false);
+        Snippet[] snippets = highlighter.highlightField("text", query, docId, 5);
+        assertThat(snippets.length, equalTo(1));
+        assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>."));
+        ir.close();
+        dir.close();
+    }
+
+    public void testAllTermQuery() throws IOException {
+        Directory dir = newDirectory();
+        String value = "The quick brown fox.";
+        Analyzer analyzer = new StandardAnalyzer();
+        IndexReader ir = indexOneDoc(dir, "all", value, analyzer);
+        AllTermQuery query = new AllTermQuery(new Term("all", "fox"));
+        IndexSearcher searcher = newSearcher(ir);
+        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+        assertThat(topDocs.totalHits, equalTo(1));
+        int docId = topDocs.scoreDocs[0].doc;
+        CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
+        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
+            passageFormatter, null, value, false);
+        Snippet[] snippets = highlighter.highlightField("all", query, docId, 5);
+        assertThat(snippets.length, equalTo(1));
+        assertThat(snippets[0].getText(), equalTo("The quick brown <b>fox</b>."));
+        ir.close();
+        dir.close();
+    }
+
+    public void testCommonTermsQuery() throws IOException {
+        Directory dir = newDirectory();
+        String value = "The quick brown fox.";
+        Analyzer analyzer = new StandardAnalyzer();
+        IndexReader ir = indexOneDoc(dir, "text", value, analyzer);
+        CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128);
+        query.add(new Term("text", "quick"));
+        query.add(new Term("text", "brown"));
+        query.add(new Term("text", "fox"));
+        IndexSearcher searcher = newSearcher(ir);
+        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+        assertThat(topDocs.totalHits, equalTo(1));
+        int docId = topDocs.scoreDocs[0].doc;
+        CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
+        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
+            passageFormatter, null, value, false);
+        Snippet[] snippets = highlighter.highlightField("text", query, docId, 5);
+        assertThat(snippets.length, equalTo(1));
+        assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>."));
+        ir.close();
+        dir.close();
+    }
+}