diff options
author | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-06-23 21:22:14 +0200 |
---|---|---|
committer | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-06-26 09:02:16 +0200 |
commit | a34f5fa8127595534d919646d73dd7a88c21fa65 (patch) | |
tree | cefbd73eee7bffa5d2e88df3378c9e501c8c9cdd /core/src/test/java | |
parent | 1583f8104725eca4779a0a0fd9886839c4c615a3 (diff) |
Move more token filters to analysis-common module
The following token filters were moved: stemmer, stemmer_override, kstem, dictionary_decompounder, hyphenation_decompounder, reverse, elision and truncate.
Relates to #23658
Diffstat (limited to 'core/src/test/java')
6 files changed, 8 insertions, 297 deletions
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java b/core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java deleted file mode 100644 index e873433116..0000000000 --- a/core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.lucene.all.AllEntries; -import org.elasticsearch.common.lucene.all.AllTokenStream; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory; -import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory; -import org.elasticsearch.indices.analysis.AnalysisModule; -import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; -import org.elasticsearch.plugins.AnalysisPlugin; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.IndexSettingsModule; -import org.hamcrest.MatcherAssert; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import static java.util.Collections.singletonList; -import static java.util.Collections.singletonMap; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasItems; -import static org.hamcrest.Matchers.instanceOf; - -public class CompoundAnalysisTests extends ESTestCase { - public void testDefaultsCompoundAnalysis() throws Exception { - Settings settings = getJsonSettings(); - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); - AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() { - @Override - public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { - return singletonMap("myfilter", MyFilterTokenFilterFactory::new); - } - })); - TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec"); - MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class)); - } - - public void testDictionaryDecompounder() throws Exception { - Settings[] settingsArr = new Settings[]{getJsonSettings(), getYamlSettings()}; - for (Settings settings : settingsArr) { - List<String> terms = analyze(settings, "decompoundingAnalyzer", "donaudampfschiff spargelcremesuppe"); - MatcherAssert.assertThat(terms.size(), equalTo(8)); - MatcherAssert.assertThat(terms, - hasItems("donau", "dampf", "schiff", "donaudampfschiff", "spargel", "creme", "suppe", "spargelcremesuppe")); - } - } - - private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException { - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); - AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() { - @Override - public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { - return singletonMap("myfilter", MyFilterTokenFilterFactory::new); - } - })); - IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings); - Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer(); - - AllEntries allEntries = new AllEntries(); - allEntries.addText("field1", text, 1.0f); - - TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer); - stream.reset(); - CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); - - List<String> terms = new ArrayList<>(); - while (stream.incrementToken()) { - String tokText = termAtt.toString(); - terms.add(tokText); - } - return terms; - } - - private Settings getJsonSettings() throws IOException { - String json = "/org/elasticsearch/index/analysis/test1.json"; - return Settings.builder() - .loadFromStream(json, getClass().getResourceAsStream(json)) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - } - - private Settings getYamlSettings() throws IOException { - String yaml = "/org/elasticsearch/index/analysis/test1.yml"; - return Settings.builder() - .loadFromStream(yaml, getClass().getResourceAsStream(yaml)) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - } -} diff --git a/core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java deleted file mode 100644 index c4632e5749..0000000000 --- a/core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.en.PorterStemFilter; -import org.apache.lucene.analysis.snowball.SnowballFilter; -import org.elasticsearch.Version; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.ESTokenStreamTestCase; -import org.elasticsearch.test.VersionUtils; - -import java.io.IOException; -import java.io.StringReader; - -import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; -import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_VERSION_CREATED; -import static org.hamcrest.Matchers.instanceOf; - -public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase { - public void testEnglishFilterFactory() throws IOException { - int iters = scaledRandomIntBetween(20, 100); - for (int i = 0; i < iters; i++) { - Version v = VersionUtils.randomVersion(random()); - Settings settings = Settings.builder() - .put("index.analysis.filter.my_english.type", "stemmer") - .put("index.analysis.filter.my_english.language", "english") - .put("index.analysis.analyzer.my_english.tokenizer","whitespace") - .put("index.analysis.analyzer.my_english.filter","my_english") - .put(SETTING_VERSION_CREATED,v) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); - TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_english"); - assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class)); - Tokenizer tokenizer = new WhitespaceTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - TokenStream create = tokenFilter.create(tokenizer); - IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers; - NamedAnalyzer analyzer = indexAnalyzers.get("my_english"); - assertThat(create, instanceOf(PorterStemFilter.class)); - assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"}); - } - - } - - public void testPorter2FilterFactory() throws IOException { - int iters = scaledRandomIntBetween(20, 100); - for (int i = 0; i < iters; i++) { - - Version v = VersionUtils.randomVersion(random()); - Settings settings = Settings.builder() - .put("index.analysis.filter.my_porter2.type", "stemmer") - .put("index.analysis.filter.my_porter2.language", "porter2") - .put("index.analysis.analyzer.my_porter2.tokenizer","whitespace") - .put("index.analysis.analyzer.my_porter2.filter","my_porter2") - .put(SETTING_VERSION_CREATED,v) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); - TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_porter2"); - assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class)); - Tokenizer tokenizer = new WhitespaceTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - TokenStream create = tokenFilter.create(tokenizer); - IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers; - NamedAnalyzer analyzer = indexAnalyzers.get("my_porter2"); - assertThat(create, instanceOf(SnowballFilter.class)); - assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"}); - } - - } - -} diff --git a/core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java b/core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java deleted file mode 100644 index 1c9a479813..0000000000 --- a/core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.analysis.filter1; - -import org.apache.lucene.analysis.StopFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.core.StopAnalyzer; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; - -public class MyFilterTokenFilterFactory extends AbstractTokenFilterFactory { - - public MyFilterTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, Settings.Builder.EMPTY_SETTINGS); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new StopFilter(tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); - } -} diff --git a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java index b3394d4f4f..a740f96cdd 100644 --- a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java +++ b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java @@ -47,7 +47,7 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.StandardTokenizerFactory; import org.elasticsearch.index.analysis.StopTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory; +import org.elasticsearch.index.analysis.MyFilterTokenFilterFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTestCase; @@ -196,18 +196,6 @@ public class AnalysisModuleTests extends ESTestCase { // assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class)); // assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4)); // assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class)); -// -// // check dictionary decompounder -// analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer(); -// assertThat(analyzer, instanceOf(CustomAnalyzer.class)); -// CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer; -// assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class)); -// assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1)); -// assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0], instanceOf(DictionaryCompoundWordTokenFilterFactory.class)); - - Set<?> wordList = Analysis.getWordSet(null, Version.CURRENT, settings, "index.analysis.filter.dict_dec.word_list"); - MatcherAssert.assertThat(wordList.size(), equalTo(6)); -// MatcherAssert.assertThat(wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe")); } public void testWordListPath() throws Exception { diff --git a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java index dd556c56e3..6e0c61c154 100644 --- a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java +++ b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java @@ -93,16 +93,16 @@ public class AnalyzeActionIT extends ESIntegTestCase { assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); - analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").addTokenFilter("lowercase").addTokenFilter("reverse").get(); + analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").addTokenFilter("lowercase").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); - assertThat(token.getTerm(), equalTo("siht")); + assertThat(token.getTerm(), equalTo("this")); token = analyzeResponse.getTokens().get(1); - assertThat(token.getTerm(), equalTo("si")); + assertThat(token.getTerm(), equalTo("is")); token = analyzeResponse.getTokens().get(2); assertThat(token.getTerm(), equalTo("a")); token = analyzeResponse.getTokens().get(3); - assertThat(token.getTerm(), equalTo("tset")); + assertThat(token.getTerm(), equalTo("test")); analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").addTokenFilter("stop").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); diff --git a/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java b/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java index 035fd847ad..5142c25229 100644 --- a/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java @@ -445,8 +445,6 @@ public class SuggestSearchIT extends ESIntegTestCase { public void testPrefixLength() throws IOException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder() .put(SETTING_NUMBER_OF_SHARDS, 1) - .put("index.analysis.analyzer.reverse.tokenizer", "standard") - .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse") .put("index.analysis.analyzer.body.tokenizer", "standard") .putArray("index.analysis.analyzer.body.filter", "lowercase") .put("index.analysis.analyzer.bigram.tokenizer", "standard") @@ -458,7 +456,6 @@ public class SuggestSearchIT extends ESIntegTestCase { XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties") .startObject("body").field("type", "text").field("analyzer", "body").endObject() - .startObject("body_reverse").field("type", "text").field("analyzer", "reverse").endObject() .startObject("bigram").field("type", "text").field("analyzer", "bigram").endObject() .endObject() .endObject().endObject(); @@ -486,8 +483,6 @@ public class SuggestSearchIT extends ESIntegTestCase { public void testBasicPhraseSuggest() throws IOException, URISyntaxException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder() .put(indexSettings()) - .put("index.analysis.analyzer.reverse.tokenizer", "standard") - .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse") .put("index.analysis.analyzer.body.tokenizer", "standard") .putArray("index.analysis.analyzer.body.filter", "lowercase") .put("index.analysis.analyzer.bigram.tokenizer", "standard") @@ -503,10 +498,6 @@ public class SuggestSearchIT extends ESIntegTestCase { field("type", "text"). field("analyzer", "body") .endObject() - .startObject("body_reverse"). - field("type", "text"). - field("analyzer", "reverse") - .endObject() .startObject("bigram"). field("type", "text"). field("analyzer", "bigram") @@ -536,7 +527,7 @@ public class SuggestSearchIT extends ESIntegTestCase { "Police sergeant who stops the film", }; for (String line : strings) { - index("test", "type1", line, "body", line, "body_reverse", line, "bigram", line); + index("test", "type1", line, "body", line, "bigram", line); } refresh(); @@ -576,14 +567,6 @@ public class SuggestSearchIT extends ESIntegTestCase { searchSuggest = searchSuggest( "Arthur, King of the Britons", "simple_phrase", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons"); - //test reverse suggestions with pre & post filter - phraseSuggest - .addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")) - .addCandidateGenerator(candidateGenerator("body_reverse").minWordLength(1).suggestMode("always").preFilter("reverse") - .postFilter("reverse")); - searchSuggest = searchSuggest( "Artur, Ging of the Britons", "simple_phrase", phraseSuggest); - assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons"); - // set all mass to trigrams (not indexed) phraseSuggest.clearCandidateGenerators() .addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")) @@ -633,8 +616,6 @@ public class SuggestSearchIT extends ESIntegTestCase { public void testSizeParam() throws IOException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder() .put(SETTING_NUMBER_OF_SHARDS, 1) - .put("index.analysis.analyzer.reverse.tokenizer", "standard") - .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse") .put("index.analysis.analyzer.body.tokenizer", "standard") .putArray("index.analysis.analyzer.body.filter", "lowercase") .put("index.analysis.analyzer.bigram.tokenizer", "standard") @@ -652,10 +633,6 @@ public class SuggestSearchIT extends ESIntegTestCase { .field("type", "text") .field("analyzer", "body") .endObject() - .startObject("body_reverse") - .field("type", "text") - .field("analyzer", "reverse") - .endObject() .startObject("bigram") .field("type", "text") .field("analyzer", "bigram") @@ -667,9 +644,9 @@ public class SuggestSearchIT extends ESIntegTestCase { ensureGreen(); String line = "xorr the god jewel"; - index("test", "type1", "1", "body", line, "body_reverse", line, "bigram", line); + index("test", "type1", "1", "body", line, "bigram", line); line = "I got it this time"; - index("test", "type1", "2", "body", line, "body_reverse", line, "bigram", line); + index("test", "type1", "2", "body", line, "bigram", line); refresh(); PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("bigram") |