diff options
author | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-06-23 21:22:14 +0200 |
---|---|---|
committer | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-06-26 09:02:16 +0200 |
commit | a34f5fa8127595534d919646d73dd7a88c21fa65 (patch) | |
tree | cefbd73eee7bffa5d2e88df3378c9e501c8c9cdd /test/framework | |
parent | 1583f8104725eca4779a0a0fd9886839c4c615a3 (diff) |
Move more token filters to analysis-common module
The following token filters were moved: stemmer, stemmer_override, kstem, dictionary_decompounder, hyphenation_decompounder, reverse, elision and truncate.
Relates to #23658
Diffstat (limited to 'test/framework')
4 files changed, 164 insertions, 40 deletions
diff --git a/test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java b/test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java new file mode 100644 index 0000000000..921a09e98e --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java @@ -0,0 +1,39 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.StopFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; + +public class MyFilterTokenFilterFactory extends AbstractTokenFilterFactory { + + public MyFilterTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + super(indexSettings, name, Settings.Builder.EMPTY_SETTINGS); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new StopFilter(tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java index 76d170f7c2..97035623a6 100644 --- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java @@ -36,13 +36,11 @@ import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory; import org.elasticsearch.index.analysis.DecimalDigitFilterFactory; import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory; import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory; -import org.elasticsearch.index.analysis.ElisionTokenFilterFactory; import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory; import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory; import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory; import org.elasticsearch.index.analysis.HunspellTokenFilterFactory; import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory; -import org.elasticsearch.index.analysis.KStemTokenFilterFactory; import org.elasticsearch.index.analysis.KeepTypesFilterFactory; import org.elasticsearch.index.analysis.KeepWordFilterFactory; import org.elasticsearch.index.analysis.KeywordTokenizerFactory; @@ -60,7 +58,6 @@ import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory; import org.elasticsearch.index.analysis.PreConfiguredCharFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenizer; -import org.elasticsearch.index.analysis.ReverseTokenFilterFactory; import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory; import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory; import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory; @@ -68,17 +65,12 @@ import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory; import org.elasticsearch.index.analysis.StandardTokenFilterFactory; import org.elasticsearch.index.analysis.StandardTokenizerFactory; -import org.elasticsearch.index.analysis.StemmerOverrideTokenFilterFactory; -import org.elasticsearch.index.analysis.StemmerTokenFilterFactory; import org.elasticsearch.index.analysis.StopTokenFilterFactory; import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory; import org.elasticsearch.index.analysis.SynonymTokenFilterFactory; import org.elasticsearch.index.analysis.ThaiTokenizerFactory; -import org.elasticsearch.index.analysis.TruncateTokenFilterFactory; import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory; import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory; -import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory; -import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTestCase; @@ -147,7 +139,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("arabicstem", ArabicStemTokenFilterFactory.class) .put("asciifolding", MovedToAnalysisCommon.class) .put("brazilianstem", BrazilianStemTokenFilterFactory.class) - .put("bulgarianstem", StemmerTokenFilterFactory.class) + .put("bulgarianstem", MovedToAnalysisCommon.class) .put("cjkbigram", CJKBigramFilterFactory.class) .put("cjkwidth", CJKWidthFilterFactory.class) .put("classic", ClassicFilterFactory.class) @@ -156,50 +148,50 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("czechstem", CzechStemTokenFilterFactory.class) .put("decimaldigit", DecimalDigitFilterFactory.class) .put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class) - .put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class) + .put("dictionarycompoundword", MovedToAnalysisCommon.class) .put("edgengram", MovedToAnalysisCommon.class) - .put("elision", ElisionTokenFilterFactory.class) - .put("englishminimalstem", StemmerTokenFilterFactory.class) - .put("englishpossessive", StemmerTokenFilterFactory.class) - .put("finnishlightstem", StemmerTokenFilterFactory.class) - .put("frenchlightstem", StemmerTokenFilterFactory.class) - .put("frenchminimalstem", StemmerTokenFilterFactory.class) - .put("galicianminimalstem", StemmerTokenFilterFactory.class) - .put("galicianstem", StemmerTokenFilterFactory.class) + .put("elision", MovedToAnalysisCommon.class) + .put("englishminimalstem", MovedToAnalysisCommon.class) + .put("englishpossessive", MovedToAnalysisCommon.class) + .put("finnishlightstem", MovedToAnalysisCommon.class) + .put("frenchlightstem", MovedToAnalysisCommon.class) + .put("frenchminimalstem", MovedToAnalysisCommon.class) + .put("galicianminimalstem", MovedToAnalysisCommon.class) + .put("galicianstem", MovedToAnalysisCommon.class) .put("germanstem", GermanStemTokenFilterFactory.class) - .put("germanlightstem", StemmerTokenFilterFactory.class) - .put("germanminimalstem", StemmerTokenFilterFactory.class) + .put("germanlightstem", MovedToAnalysisCommon.class) + .put("germanminimalstem", MovedToAnalysisCommon.class) .put("germannormalization", GermanNormalizationFilterFactory.class) .put("greeklowercase", MovedToAnalysisCommon.class) - .put("greekstem", StemmerTokenFilterFactory.class) + .put("greekstem", MovedToAnalysisCommon.class) .put("hindinormalization", HindiNormalizationFilterFactory.class) - .put("hindistem", StemmerTokenFilterFactory.class) - .put("hungarianlightstem", StemmerTokenFilterFactory.class) + .put("hindistem", MovedToAnalysisCommon.class) + .put("hungarianlightstem", MovedToAnalysisCommon.class) .put("hunspellstem", HunspellTokenFilterFactory.class) - .put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class) + .put("hyphenationcompoundword", MovedToAnalysisCommon.class) .put("indicnormalization", IndicNormalizationFilterFactory.class) .put("irishlowercase", MovedToAnalysisCommon.class) - .put("indonesianstem", StemmerTokenFilterFactory.class) - .put("italianlightstem", StemmerTokenFilterFactory.class) + .put("indonesianstem", MovedToAnalysisCommon.class) + .put("italianlightstem", MovedToAnalysisCommon.class) .put("keepword", KeepWordFilterFactory.class) .put("keywordmarker", MovedToAnalysisCommon.class) - .put("kstem", KStemTokenFilterFactory.class) - .put("latvianstem", StemmerTokenFilterFactory.class) + .put("kstem", MovedToAnalysisCommon.class) + .put("latvianstem", MovedToAnalysisCommon.class) .put("length", MovedToAnalysisCommon.class) .put("limittokencount", LimitTokenCountFilterFactory.class) .put("lowercase", MovedToAnalysisCommon.class) .put("ngram", MovedToAnalysisCommon.class) - .put("norwegianlightstem", StemmerTokenFilterFactory.class) - .put("norwegianminimalstem", StemmerTokenFilterFactory.class) + .put("norwegianlightstem", MovedToAnalysisCommon.class) + .put("norwegianminimalstem", MovedToAnalysisCommon.class) .put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class) .put("patternreplace", PatternReplaceTokenFilterFactory.class) .put("persiannormalization", PersianNormalizationFilterFactory.class) .put("porterstem", MovedToAnalysisCommon.class) - .put("portuguesestem", StemmerTokenFilterFactory.class) - .put("portugueselightstem", StemmerTokenFilterFactory.class) - .put("portugueseminimalstem", StemmerTokenFilterFactory.class) - .put("reversestring", ReverseTokenFilterFactory.class) - .put("russianlightstem", StemmerTokenFilterFactory.class) + .put("portuguesestem", MovedToAnalysisCommon.class) + .put("portugueselightstem", MovedToAnalysisCommon.class) + .put("portugueseminimalstem", MovedToAnalysisCommon.class) + .put("reversestring", MovedToAnalysisCommon.class) + .put("russianlightstem", MovedToAnalysisCommon.class) .put("scandinavianfolding", ScandinavianFoldingFilterFactory.class) .put("scandinaviannormalization", ScandinavianNormalizationFilterFactory.class) .put("serbiannormalization", SerbianNormalizationFilterFactory.class) @@ -207,16 +199,16 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("minhash", MinHashTokenFilterFactory.class) .put("snowballporter", MovedToAnalysisCommon.class) .put("soraninormalization", SoraniNormalizationFilterFactory.class) - .put("soranistem", StemmerTokenFilterFactory.class) - .put("spanishlightstem", StemmerTokenFilterFactory.class) + .put("soranistem", MovedToAnalysisCommon.class) + .put("spanishlightstem", MovedToAnalysisCommon.class) .put("standard", StandardTokenFilterFactory.class) - .put("stemmeroverride", StemmerOverrideTokenFilterFactory.class) + .put("stemmeroverride", MovedToAnalysisCommon.class) .put("stop", StopTokenFilterFactory.class) - .put("swedishlightstem", StemmerTokenFilterFactory.class) + .put("swedishlightstem", MovedToAnalysisCommon.class) .put("synonym", SynonymTokenFilterFactory.class) .put("synonymgraph", SynonymGraphTokenFilterFactory.class) .put("trim", MovedToAnalysisCommon.class) - .put("truncate", TruncateTokenFilterFactory.class) + .put("truncate", MovedToAnalysisCommon.class) .put("turkishlowercase", MovedToAnalysisCommon.class) .put("type", KeepTypesFilterFactory.class) .put("uppercase", MovedToAnalysisCommon.class) diff --git a/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json new file mode 100644 index 0000000000..38937a9b5a --- /dev/null +++ b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json @@ -0,0 +1,54 @@ +{ + "index":{ + "analysis":{ + "tokenizer":{ + "standard":{ + "type":"standard" + } + }, + "filter":{ + "stop":{ + "type":"stop", + "stopwords":["test-stop"] + }, + "stop2":{ + "type":"stop", + "stopwords":["stop2-1", "stop2-2"] + }, + "my":{ + "type":"myfilter" + }, + "dict_dec":{ + "type":"dictionary_decompounder", + "word_list":["donau", "dampf", "schiff", "spargel", "creme", "suppe"] + } + }, + "analyzer":{ + "standard":{ + "type":"standard", + "stopwords":["test1", "test2", "test3"] + }, + "custom1":{ + "tokenizer":"standard", + "filter":["stop", "stop2"] + }, + "custom4":{ + "tokenizer":"standard", + "filter":["my"] + }, + "custom6":{ + "tokenizer":"standard", + "position_increment_gap": 256 + }, + "czechAnalyzerWithStemmer":{ + "tokenizer":"standard", + "filter":["standard", "lowercase", "stop", "czech_stem"] + }, + "decompoundingAnalyzer":{ + "tokenizer":"standard", + "filter":["dict_dec"] + } + } + } + } +} diff --git a/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml new file mode 100644 index 0000000000..f7a57d14db --- /dev/null +++ b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml @@ -0,0 +1,39 @@ +index : + analysis : + tokenizer : + standard : + type : standard + filter : + stop : + type : stop + stopwords : [test-stop] + stop2 : + type : stop + stopwords : [stop2-1, stop2-2] + my : + type : myfilter + dict_dec : + type : dictionary_decompounder + word_list : [donau, dampf, schiff, spargel, creme, suppe] + analyzer : + standard : + type : standard + stopwords : [test1, test2, test3] + custom1 : + tokenizer : standard + filter : [stop, stop2] + custom4 : + tokenizer : standard + filter : [my] + custom6 : + tokenizer : standard + position_increment_gap: 256 + custom7 : + type : standard + version: 3.6 + czechAnalyzerWithStemmer : + tokenizer : standard + filter : [standard, lowercase, stop, czech_stem] + decompoundingAnalyzer : + tokenizer : standard + filter : [dict_dec] |