summaryrefslogtreecommitdiff
path: root/core/src/test/java/org/apache
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/test/java/org/apache')
-rw-r--r--core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java1
-rw-r--r--core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java1
-rw-r--r--core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java105
-rw-r--r--core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java259
4 files changed, 366 insertions, 0 deletions
diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java
index fcddc58f77..2d43a1ca64 100644
--- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java
+++ b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java
@@ -19,6 +19,7 @@
package org.apache.lucene.search.postingshighlight;
+import org.apache.lucene.search.highlight.Snippet;
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.util.BytesRef;
diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java
index c95819a0e5..315e38d12f 100644
--- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java
+++ b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java
@@ -31,6 +31,7 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.highlight.Snippet;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java
new file mode 100644
index 0000000000..4e664c3e24
--- /dev/null
+++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search.uhighlight;
+
+import org.apache.lucene.search.highlight.Snippet;
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.notNullValue;
+
+
+public class CustomPassageFormatterTests extends ESTestCase {
+ public void testSimpleFormat() {
+ String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back. No matches here.";
+
+ CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder());
+
+ Passage[] passages = new Passage[3];
+ String match = "highlighter";
+ BytesRef matchBytesRef = new BytesRef(match);
+
+ Passage passage1 = new Passage();
+ int start = content.indexOf(match);
+ int end = start + match.length();
+ passage1.setStartOffset(0);
+ passage1.setEndOffset(end + 2); //lets include the whitespace at the end to make sure we trim it
+ passage1.addMatch(start, end, matchBytesRef);
+ passages[0] = passage1;
+
+ Passage passage2 = new Passage();
+ start = content.lastIndexOf(match);
+ end = start + match.length();
+ passage2.setStartOffset(passage1.getEndOffset());
+ passage2.setEndOffset(end + 26);
+ passage2.addMatch(start, end, matchBytesRef);
+ passages[1] = passage2;
+
+ Passage passage3 = new Passage();
+ passage3.setStartOffset(passage2.getEndOffset());
+ passage3.setEndOffset(content.length());
+ passages[2] = passage3;
+
+ Snippet[] fragments = passageFormatter.format(passages, content);
+ assertThat(fragments, notNullValue());
+ assertThat(fragments.length, equalTo(3));
+ assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>."));
+ assertThat(fragments[0].isHighlighted(), equalTo(true));
+ assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
+ assertThat(fragments[1].isHighlighted(), equalTo(true));
+ assertThat(fragments[2].getText(), equalTo("No matches here."));
+ assertThat(fragments[2].isHighlighted(), equalTo(false));
+ }
+
+ public void testHtmlEncodeFormat() {
+ String content = "<b>This is a really cool highlighter.</b> Unified highlighter gives nice snippets back.";
+
+ CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder());
+
+ Passage[] passages = new Passage[2];
+ String match = "highlighter";
+ BytesRef matchBytesRef = new BytesRef(match);
+
+ Passage passage1 = new Passage();
+ int start = content.indexOf(match);
+ int end = start + match.length();
+ passage1.setStartOffset(0);
+ passage1.setEndOffset(end + 6); //lets include the whitespace at the end to make sure we trim it
+ passage1.addMatch(start, end, matchBytesRef);
+ passages[0] = passage1;
+
+ Passage passage2 = new Passage();
+ start = content.lastIndexOf(match);
+ end = start + match.length();
+ passage2.setStartOffset(passage1.getEndOffset());
+ passage2.setEndOffset(content.length());
+ passage2.addMatch(start, end, matchBytesRef);
+ passages[1] = passage2;
+
+ Snippet[] fragments = passageFormatter.format(passages, content);
+ assertThat(fragments, notNullValue());
+ assertThat(fragments.length, equalTo(2));
+ assertThat(fragments[0].getText(), equalTo("&lt;b&gt;This is a really cool <em>highlighter</em>.&lt;&#x2F;b&gt;"));
+ assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
+ }
+}
diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
new file mode 100644
index 0000000000..83b42750f9
--- /dev/null
+++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search.uhighlight;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.Snippet;
+import org.apache.lucene.store.Directory;
+import org.elasticsearch.common.lucene.all.AllTermQuery;
+import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+
+public class CustomUnifiedHighlighterTests extends ESTestCase {
+ public void testCustomUnifiedHighlighter() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+ offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ offsetsType.setStoreTermVectorOffsets(true);
+ offsetsType.setStoreTermVectorPositions(true);
+ offsetsType.setStoreTermVectors(true);
+
+ //good position but only one match
+ final String firstValue = "This is a test. Just a test1 highlighting from unified highlighter.";
+ Field body = new Field("body", "", offsetsType);
+ Document doc = new Document();
+ doc.add(body);
+ body.setStringValue(firstValue);
+
+ //two matches, not the best snippet due to its length though
+ final String secondValue = "This is the second highlighting value to perform highlighting on a longer text " +
+ "that gets scored lower.";
+ Field body2 = new Field("body", "", offsetsType);
+ doc.add(body2);
+ body2.setStringValue(secondValue);
+
+ //two matches and short, will be scored highest
+ final String thirdValue = "This is highlighting the third short highlighting value.";
+ Field body3 = new Field("body", "", offsetsType);
+ doc.add(body3);
+ body3.setStringValue(thirdValue);
+
+ //one match, same as first but at the end, will be scored lower due to its position
+ final String fourthValue = "Just a test4 highlighting from unified highlighter.";
+ Field body4 = new Field("body", "", offsetsType);
+ doc.add(body4);
+ body4.setStringValue(fourthValue);
+
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ String firstHlValue = "Just a test1 <b>highlighting</b> from unified highlighter.";
+ String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a" +
+ " longer text that gets scored lower.";
+ String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
+ String fourthHlValue = "Just a test4 <b>highlighting</b> from unified highlighter.";
+
+ IndexSearcher searcher = newSearcher(ir);
+ Query query = new TermQuery(new Term("body", "highlighting"));
+
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertThat(topDocs.totalHits, equalTo(1));
+
+ int docId = topDocs.scoreDocs[0].doc;
+
+ String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue +
+ HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
+
+ CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, iwc.getAnalyzer(),
+ new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), null, fieldValue, true);
+ Snippet[] snippets = highlighter.highlightField("body", query, docId, 5);
+
+ assertThat(snippets.length, equalTo(4));
+
+ assertThat(snippets[0].getText(), equalTo(firstHlValue));
+ assertThat(snippets[1].getText(), equalTo(secondHlValue));
+ assertThat(snippets[2].getText(), equalTo(thirdHlValue));
+ assertThat(snippets[3].getText(), equalTo(fourthHlValue));
+ ir.close();
+ dir.close();
+ }
+
+ public void testNoMatchSize() throws Exception {
+ Directory dir = newDirectory();
+ Analyzer analyzer = new StandardAnalyzer();
+ IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+ offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ offsetsType.setStoreTermVectorOffsets(true);
+ offsetsType.setStoreTermVectorPositions(true);
+ offsetsType.setStoreTermVectors(true);
+ Field body = new Field("body", "", offsetsType);
+ Field none = new Field("none", "", offsetsType);
+ Document doc = new Document();
+ doc.add(body);
+ doc.add(none);
+
+ String firstValue = "This is a test. Just a test highlighting from unified. Feel free to ignore.";
+ body.setStringValue(firstValue);
+ none.setStringValue(firstValue);
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ Query query = new TermQuery(new Term("none", "highlighting"));
+
+ IndexSearcher searcher = newSearcher(ir);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertThat(topDocs.totalHits, equalTo(1));
+ int docId = topDocs.scoreDocs[0].doc;
+
+ CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
+ CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter,
+ null, firstValue, false);
+ Snippet[] snippets = highlighter.highlightField("body", query, docId, 5);
+ assertThat(snippets.length, equalTo(0));
+
+ highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, firstValue, true);
+ snippets = highlighter.highlightField("body", query, docId, 5);
+ assertThat(snippets.length, equalTo(1));
+ assertThat(snippets[0].getText(), equalTo("This is a test."));
+ ir.close();
+ dir.close();
+ }
+
+
+ private IndexReader indexOneDoc(Directory dir, String field, String value, Analyzer analyzer) throws IOException {
+ IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ FieldType ft = new FieldType(TextField.TYPE_STORED);
+ ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ Field textField = new Field(field, "", ft);
+ Document doc = new Document();
+ doc.add(textField);
+
+ textField.setStringValue(value);
+ iw.addDocument(doc);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ return ir;
+ }
+
+ public void testMultiPhrasePrefixQuery() throws Exception {
+ Analyzer analyzer = new StandardAnalyzer();
+ Directory dir = newDirectory();
+ String value = "The quick brown fox.";
+ IndexReader ir = indexOneDoc(dir, "text", value, analyzer);
+ MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery();
+ query.add(new Term("text", "quick"));
+ query.add(new Term("text", "brown"));
+ query.add(new Term("text", "fo"));
+ IndexSearcher searcher = newSearcher(ir);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertThat(topDocs.totalHits, equalTo(1));
+ int docId = topDocs.scoreDocs[0].doc;
+ CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
+ CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
+ passageFormatter, null, value, false);
+ Snippet[] snippets = highlighter.highlightField("text", query, docId, 5);
+ assertThat(snippets.length, equalTo(1));
+ assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>."));
+ ir.close();
+ dir.close();
+ }
+
+ public void testAllTermQuery() throws IOException {
+ Directory dir = newDirectory();
+ String value = "The quick brown fox.";
+ Analyzer analyzer = new StandardAnalyzer();
+ IndexReader ir = indexOneDoc(dir, "all", value, analyzer);
+ AllTermQuery query = new AllTermQuery(new Term("all", "fox"));
+ IndexSearcher searcher = newSearcher(ir);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertThat(topDocs.totalHits, equalTo(1));
+ int docId = topDocs.scoreDocs[0].doc;
+ CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
+ CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
+ passageFormatter, null, value, false);
+ Snippet[] snippets = highlighter.highlightField("all", query, docId, 5);
+ assertThat(snippets.length, equalTo(1));
+ assertThat(snippets[0].getText(), equalTo("The quick brown <b>fox</b>."));
+ ir.close();
+ dir.close();
+ }
+
+ public void testCommonTermsQuery() throws IOException {
+ Directory dir = newDirectory();
+ String value = "The quick brown fox.";
+ Analyzer analyzer = new StandardAnalyzer();
+ IndexReader ir = indexOneDoc(dir, "text", value, analyzer);
+ CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128);
+ query.add(new Term("text", "quick"));
+ query.add(new Term("text", "brown"));
+ query.add(new Term("text", "fox"));
+ IndexSearcher searcher = newSearcher(ir);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertThat(topDocs.totalHits, equalTo(1));
+ int docId = topDocs.scoreDocs[0].doc;
+ CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
+ CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
+ passageFormatter, null, value, false);
+ Snippet[] snippets = highlighter.highlightField("text", query, docId, 5);
+ assertThat(snippets.length, equalTo(1));
+ assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>."));
+ ir.close();
+ dir.close();
+ }
+}