diff options
author | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-06-23 21:22:14 +0200 |
---|---|---|
committer | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-06-26 09:02:16 +0200 |
commit | a34f5fa8127595534d919646d73dd7a88c21fa65 (patch) | |
tree | cefbd73eee7bffa5d2e88df3378c9e501c8c9cdd /core/src/test/java/org/elasticsearch/index | |
parent | 1583f8104725eca4779a0a0fd9886839c4c615a3 (diff) |
Move more token filters to analysis-common module
The following token filters were moved: stemmer, stemmer_override, kstem, dictionary_decompounder, hyphenation_decompounder, reverse, elision and truncate.
Relates to #23658
Diffstat (limited to 'core/src/test/java/org/elasticsearch/index')
3 files changed, 0 insertions, 254 deletions
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java b/core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java deleted file mode 100644 index e873433116..0000000000 --- a/core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.lucene.all.AllEntries; -import org.elasticsearch.common.lucene.all.AllTokenStream; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory; -import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory; -import org.elasticsearch.indices.analysis.AnalysisModule; -import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; -import org.elasticsearch.plugins.AnalysisPlugin; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.IndexSettingsModule; -import org.hamcrest.MatcherAssert; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import static java.util.Collections.singletonList; -import static java.util.Collections.singletonMap; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasItems; -import static org.hamcrest.Matchers.instanceOf; - -public class CompoundAnalysisTests extends ESTestCase { - public void testDefaultsCompoundAnalysis() throws Exception { - Settings settings = getJsonSettings(); - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); - AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() { - @Override - public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { - return singletonMap("myfilter", MyFilterTokenFilterFactory::new); - } - })); - TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec"); - MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class)); - } - - public void testDictionaryDecompounder() throws Exception { - Settings[] settingsArr = new Settings[]{getJsonSettings(), getYamlSettings()}; - for (Settings settings : settingsArr) { - List<String> terms = analyze(settings, "decompoundingAnalyzer", "donaudampfschiff spargelcremesuppe"); - MatcherAssert.assertThat(terms.size(), equalTo(8)); - MatcherAssert.assertThat(terms, - hasItems("donau", "dampf", "schiff", "donaudampfschiff", "spargel", "creme", "suppe", "spargelcremesuppe")); - } - } - - private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException { - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); - AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() { - @Override - public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { - return singletonMap("myfilter", MyFilterTokenFilterFactory::new); - } - })); - IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings); - Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer(); - - AllEntries allEntries = new AllEntries(); - allEntries.addText("field1", text, 1.0f); - - TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer); - stream.reset(); - CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); - - List<String> terms = new ArrayList<>(); - while (stream.incrementToken()) { - String tokText = termAtt.toString(); - terms.add(tokText); - } - return terms; - } - - private Settings getJsonSettings() throws IOException { - String json = "/org/elasticsearch/index/analysis/test1.json"; - return Settings.builder() - .loadFromStream(json, getClass().getResourceAsStream(json)) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - } - - private Settings getYamlSettings() throws IOException { - String yaml = "/org/elasticsearch/index/analysis/test1.yml"; - return Settings.builder() - .loadFromStream(yaml, getClass().getResourceAsStream(yaml)) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - } -} diff --git a/core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java deleted file mode 100644 index c4632e5749..0000000000 --- a/core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.en.PorterStemFilter; -import org.apache.lucene.analysis.snowball.SnowballFilter; -import org.elasticsearch.Version; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.ESTokenStreamTestCase; -import org.elasticsearch.test.VersionUtils; - -import java.io.IOException; -import java.io.StringReader; - -import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; -import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_VERSION_CREATED; -import static org.hamcrest.Matchers.instanceOf; - -public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase { - public void testEnglishFilterFactory() throws IOException { - int iters = scaledRandomIntBetween(20, 100); - for (int i = 0; i < iters; i++) { - Version v = VersionUtils.randomVersion(random()); - Settings settings = Settings.builder() - .put("index.analysis.filter.my_english.type", "stemmer") - .put("index.analysis.filter.my_english.language", "english") - .put("index.analysis.analyzer.my_english.tokenizer","whitespace") - .put("index.analysis.analyzer.my_english.filter","my_english") - .put(SETTING_VERSION_CREATED,v) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); - TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_english"); - assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class)); - Tokenizer tokenizer = new WhitespaceTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - TokenStream create = tokenFilter.create(tokenizer); - IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers; - NamedAnalyzer analyzer = indexAnalyzers.get("my_english"); - assertThat(create, instanceOf(PorterStemFilter.class)); - assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"}); - } - - } - - public void testPorter2FilterFactory() throws IOException { - int iters = scaledRandomIntBetween(20, 100); - for (int i = 0; i < iters; i++) { - - Version v = VersionUtils.randomVersion(random()); - Settings settings = Settings.builder() - .put("index.analysis.filter.my_porter2.type", "stemmer") - .put("index.analysis.filter.my_porter2.language", "porter2") - .put("index.analysis.analyzer.my_porter2.tokenizer","whitespace") - .put("index.analysis.analyzer.my_porter2.filter","my_porter2") - .put(SETTING_VERSION_CREATED,v) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); - TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_porter2"); - assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class)); - Tokenizer tokenizer = new WhitespaceTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - TokenStream create = tokenFilter.create(tokenizer); - IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers; - NamedAnalyzer analyzer = indexAnalyzers.get("my_porter2"); - assertThat(create, instanceOf(SnowballFilter.class)); - assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"}); - } - - } - -} diff --git a/core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java b/core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java deleted file mode 100644 index 1c9a479813..0000000000 --- a/core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.analysis.filter1; - -import org.apache.lucene.analysis.StopFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.core.StopAnalyzer; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; - -public class MyFilterTokenFilterFactory extends AbstractTokenFilterFactory { - - public MyFilterTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, Settings.Builder.EMPTY_SETTINGS); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new StopFilter(tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); - } -} |