diff options
Diffstat (limited to 'core/src/test/java/org/apache')
4 files changed, 366 insertions, 0 deletions
diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java index fcddc58f77..2d43a1ca64 100644 --- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java +++ b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.postingshighlight; +import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.search.highlight.DefaultEncoder; import org.apache.lucene.search.highlight.SimpleHTMLEncoder; import org.apache.lucene.util.BytesRef; diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java index c95819a0e5..315e38d12f 100644 --- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java +++ b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java new file mode 100644 index 0000000000..4e664c3e24 --- /dev/null +++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java @@ -0,0 +1,105 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search.uhighlight; + +import org.apache.lucene.search.highlight.Snippet; +import org.apache.lucene.search.highlight.DefaultEncoder; +import org.apache.lucene.search.highlight.SimpleHTMLEncoder; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.notNullValue; + + +public class CustomPassageFormatterTests extends ESTestCase { + public void testSimpleFormat() { + String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back. No matches here."; + + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder()); + + Passage[] passages = new Passage[3]; + String match = "highlighter"; + BytesRef matchBytesRef = new BytesRef(match); + + Passage passage1 = new Passage(); + int start = content.indexOf(match); + int end = start + match.length(); + passage1.setStartOffset(0); + passage1.setEndOffset(end + 2); //lets include the whitespace at the end to make sure we trim it + passage1.addMatch(start, end, matchBytesRef); + passages[0] = passage1; + + Passage passage2 = new Passage(); + start = content.lastIndexOf(match); + end = start + match.length(); + passage2.setStartOffset(passage1.getEndOffset()); + passage2.setEndOffset(end + 26); + passage2.addMatch(start, end, matchBytesRef); + passages[1] = passage2; + + Passage passage3 = new Passage(); + passage3.setStartOffset(passage2.getEndOffset()); + passage3.setEndOffset(content.length()); + passages[2] = passage3; + + Snippet[] fragments = passageFormatter.format(passages, content); + assertThat(fragments, notNullValue()); + assertThat(fragments.length, equalTo(3)); + assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>.")); + assertThat(fragments[0].isHighlighted(), equalTo(true)); + assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back.")); + assertThat(fragments[1].isHighlighted(), equalTo(true)); + assertThat(fragments[2].getText(), equalTo("No matches here.")); + assertThat(fragments[2].isHighlighted(), equalTo(false)); + } + + public void testHtmlEncodeFormat() { + String content = "<b>This is a really cool highlighter.</b> Unified highlighter gives nice snippets back."; + + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder()); + + Passage[] passages = new Passage[2]; + String match = "highlighter"; + BytesRef matchBytesRef = new BytesRef(match); + + Passage passage1 = new Passage(); + int start = content.indexOf(match); + int end = start + match.length(); + passage1.setStartOffset(0); + passage1.setEndOffset(end + 6); //lets include the whitespace at the end to make sure we trim it + passage1.addMatch(start, end, matchBytesRef); + passages[0] = passage1; + + Passage passage2 = new Passage(); + start = content.lastIndexOf(match); + end = start + match.length(); + passage2.setStartOffset(passage1.getEndOffset()); + passage2.setEndOffset(content.length()); + passage2.addMatch(start, end, matchBytesRef); + passages[1] = passage2; + + Snippet[] fragments = passageFormatter.format(passages, content); + assertThat(fragments, notNullValue()); + assertThat(fragments.length, equalTo(2)); + assertThat(fragments[0].getText(), equalTo("<b>This is a really cool <em>highlighter</em>.</b>")); + assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back.")); + } +} diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java new file mode 100644 index 0000000000..83b42750f9 --- /dev/null +++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -0,0 +1,259 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search.uhighlight; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.DefaultEncoder; +import org.apache.lucene.search.highlight.Snippet; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.lucene.all.AllTermQuery; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +import static org.hamcrest.CoreMatchers.equalTo; + +public class CustomUnifiedHighlighterTests extends ESTestCase { + public void testCustomUnifiedHighlighter() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + FieldType offsetsType = new FieldType(TextField.TYPE_STORED); + offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + offsetsType.setStoreTermVectorOffsets(true); + offsetsType.setStoreTermVectorPositions(true); + offsetsType.setStoreTermVectors(true); + + //good position but only one match + final String firstValue = "This is a test. Just a test1 highlighting from unified highlighter."; + Field body = new Field("body", "", offsetsType); + Document doc = new Document(); + doc.add(body); + body.setStringValue(firstValue); + + //two matches, not the best snippet due to its length though + final String secondValue = "This is the second highlighting value to perform highlighting on a longer text " + + "that gets scored lower."; + Field body2 = new Field("body", "", offsetsType); + doc.add(body2); + body2.setStringValue(secondValue); + + //two matches and short, will be scored highest + final String thirdValue = "This is highlighting the third short highlighting value."; + Field body3 = new Field("body", "", offsetsType); + doc.add(body3); + body3.setStringValue(thirdValue); + + //one match, same as first but at the end, will be scored lower due to its position + final String fourthValue = "Just a test4 highlighting from unified highlighter."; + Field body4 = new Field("body", "", offsetsType); + doc.add(body4); + body4.setStringValue(fourthValue); + + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.close(); + + String firstHlValue = "Just a test1 <b>highlighting</b> from unified highlighter."; + String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a" + + " longer text that gets scored lower."; + String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value."; + String fourthHlValue = "Just a test4 <b>highlighting</b> from unified highlighter."; + + IndexSearcher searcher = newSearcher(ir); + Query query = new TermQuery(new Term("body", "highlighting")); + + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + + int docId = topDocs.scoreDocs[0].doc; + + String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue; + + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, iwc.getAnalyzer(), + new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), null, fieldValue, true); + Snippet[] snippets = highlighter.highlightField("body", query, docId, 5); + + assertThat(snippets.length, equalTo(4)); + + assertThat(snippets[0].getText(), equalTo(firstHlValue)); + assertThat(snippets[1].getText(), equalTo(secondHlValue)); + assertThat(snippets[2].getText(), equalTo(thirdHlValue)); + assertThat(snippets[3].getText(), equalTo(fourthHlValue)); + ir.close(); + dir.close(); + } + + public void testNoMatchSize() throws Exception { + Directory dir = newDirectory(); + Analyzer analyzer = new StandardAnalyzer(); + IndexWriterConfig iwc = newIndexWriterConfig(analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + FieldType offsetsType = new FieldType(TextField.TYPE_STORED); + offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + offsetsType.setStoreTermVectorOffsets(true); + offsetsType.setStoreTermVectorPositions(true); + offsetsType.setStoreTermVectors(true); + Field body = new Field("body", "", offsetsType); + Field none = new Field("none", "", offsetsType); + Document doc = new Document(); + doc.add(body); + doc.add(none); + + String firstValue = "This is a test. Just a test highlighting from unified. Feel free to ignore."; + body.setStringValue(firstValue); + none.setStringValue(firstValue); + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.close(); + + Query query = new TermQuery(new Term("none", "highlighting")); + + IndexSearcher searcher = newSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + int docId = topDocs.scoreDocs[0].doc; + + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, + null, firstValue, false); + Snippet[] snippets = highlighter.highlightField("body", query, docId, 5); + assertThat(snippets.length, equalTo(0)); + + highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, firstValue, true); + snippets = highlighter.highlightField("body", query, docId, 5); + assertThat(snippets.length, equalTo(1)); + assertThat(snippets[0].getText(), equalTo("This is a test.")); + ir.close(); + dir.close(); + } + + + private IndexReader indexOneDoc(Directory dir, String field, String value, Analyzer analyzer) throws IOException { + IndexWriterConfig iwc = newIndexWriterConfig(analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + Field textField = new Field(field, "", ft); + Document doc = new Document(); + doc.add(textField); + + textField.setStringValue(value); + iw.addDocument(doc); + IndexReader ir = iw.getReader(); + iw.close(); + return ir; + } + + public void testMultiPhrasePrefixQuery() throws Exception { + Analyzer analyzer = new StandardAnalyzer(); + Directory dir = newDirectory(); + String value = "The quick brown fox."; + IndexReader ir = indexOneDoc(dir, "text", value, analyzer); + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + query.add(new Term("text", "quick")); + query.add(new Term("text", "brown")); + query.add(new Term("text", "fo")); + IndexSearcher searcher = newSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + int docId = topDocs.scoreDocs[0].doc; + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, + passageFormatter, null, value, false); + Snippet[] snippets = highlighter.highlightField("text", query, docId, 5); + assertThat(snippets.length, equalTo(1)); + assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>.")); + ir.close(); + dir.close(); + } + + public void testAllTermQuery() throws IOException { + Directory dir = newDirectory(); + String value = "The quick brown fox."; + Analyzer analyzer = new StandardAnalyzer(); + IndexReader ir = indexOneDoc(dir, "all", value, analyzer); + AllTermQuery query = new AllTermQuery(new Term("all", "fox")); + IndexSearcher searcher = newSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + int docId = topDocs.scoreDocs[0].doc; + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, + passageFormatter, null, value, false); + Snippet[] snippets = highlighter.highlightField("all", query, docId, 5); + assertThat(snippets.length, equalTo(1)); + assertThat(snippets[0].getText(), equalTo("The quick brown <b>fox</b>.")); + ir.close(); + dir.close(); + } + + public void testCommonTermsQuery() throws IOException { + Directory dir = newDirectory(); + String value = "The quick brown fox."; + Analyzer analyzer = new StandardAnalyzer(); + IndexReader ir = indexOneDoc(dir, "text", value, analyzer); + CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128); + query.add(new Term("text", "quick")); + query.add(new Term("text", "brown")); + query.add(new Term("text", "fox")); + IndexSearcher searcher = newSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + int docId = topDocs.scoreDocs[0].doc; + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, + passageFormatter, null, value, false); + Snippet[] snippets = highlighter.highlightField("text", query, docId, 5); + assertThat(snippets.length, equalTo(1)); + assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>.")); + ir.close(); + dir.close(); + } +} |