diff options
author | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-05-30 00:54:46 +0200 |
---|---|---|
committer | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-05-31 09:34:08 +0200 |
commit | 258be2b135e49df263a546a67bcea9aa2c5ef283 (patch) | |
tree | b6bb6bf8db8e2b893e9287309be9cdda6f66489b /core/src/main/java/org/elasticsearch | |
parent | a089dc9dcd769191f8a69d7922960bc565dbbf29 (diff) |
Moved `keyword_marker`, `trim`, `snowball` and `porter_stemmer` tokenfilter factories from core to common-analysis module.
Relates to #23658
Diffstat (limited to 'core/src/main/java/org/elasticsearch')
5 files changed, 0 insertions, 227 deletions
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java deleted file mode 100644 index a4cd4c41c9..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.CharArraySet; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter; -import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -import java.util.Set; -import java.util.regex.Pattern; - -/** - * A factory for creating keyword marker token filters that prevent tokens from - * being modified by stemmers. Two types of keyword marker filters are available: - * the {@link SetKeywordMarkerFilter} and the {@link PatternKeywordMarkerFilter}. - * - * The {@link SetKeywordMarkerFilter} uses a set of keywords to denote which tokens - * should be excluded from stemming. This filter is created if the settings include - * {@code keywords}, which contains the list of keywords, or {@code `keywords_path`}, - * which contains a path to a file in the config directory with the keywords. - * - * The {@link PatternKeywordMarkerFilter} uses a regular expression pattern to match - * against tokens that should be excluded from stemming. This filter is created if - * the settings include {@code keywords_pattern}, which contains the regular expression - * to match against. - */ -public class KeywordMarkerTokenFilterFactory extends AbstractTokenFilterFactory { - - private final CharArraySet keywordLookup; - private final Pattern keywordPattern; - - public KeywordMarkerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, settings); - - boolean ignoreCase = - settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger); - String patternString = settings.get("keywords_pattern"); - if (patternString != null) { - // a pattern for matching keywords is specified, as opposed to a - // set of keyword strings to match against - if (settings.get("keywords") != null || settings.get("keywords_path") != null) { - throw new IllegalArgumentException( - "cannot specify both `keywords_pattern` and `keywords` or `keywords_path`"); - } - keywordPattern = Pattern.compile(patternString); - keywordLookup = null; - } else { - Set<?> rules = Analysis.getWordSet(env, indexSettings.getIndexVersionCreated(), settings, "keywords"); - if (rules == null) { - throw new IllegalArgumentException( - "keyword filter requires either `keywords`, `keywords_path`, " + - "or `keywords_pattern` to be configured"); - } - // a set of keywords (or a path to them) is specified - keywordLookup = new CharArraySet(rules, ignoreCase); - keywordPattern = null; - } - } - - @Override - public TokenStream create(TokenStream tokenStream) { - if (keywordPattern != null) { - return new PatternKeywordMarkerFilter(tokenStream, keywordPattern); - } else { - return new SetKeywordMarkerFilter(tokenStream, keywordLookup); - } - } - -} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java deleted file mode 100644 index 82d3d7633a..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.en.PorterStemFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -public class PorterStemTokenFilterFactory extends AbstractTokenFilterFactory { - - public PorterStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new PorterStemFilter(tokenStream); - } -} - - diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java deleted file mode 100644 index ba1c3a2a88..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.snowball.SnowballFilter; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -/** - * Real work actually done here by Sebastian on the Elasticsearch mailing list - * http://elasticsearch-users.115913.n3.nabble.com/Using-the-Snowball-stemmers-tp2126106p2127111.html - */ -public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory { - - private String language; - - public SnowballTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - this.language = Strings.capitalize(settings.get("language", settings.get("name", "English"))); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new SnowballFilter(tokenStream, language); - } - -} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java deleted file mode 100644 index 4239f2444b..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.TrimFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -public class TrimTokenFilterFactory extends AbstractTokenFilterFactory { - - private static final String UPDATE_OFFSETS_KEY = "update_offsets"; - - public TrimTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, settings); - if (settings.get(UPDATE_OFFSETS_KEY) != null) { - throw new IllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain"); - } - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new TrimFilter(tokenStream); - } -} diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 4dd146599c..9e378f6679 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -81,7 +81,6 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory; import org.elasticsearch.index.analysis.KeepTypesFilterFactory; import org.elasticsearch.index.analysis.KeepWordFilterFactory; import org.elasticsearch.index.analysis.KeywordAnalyzerProvider; -import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory; import org.elasticsearch.index.analysis.KeywordTokenizerFactory; import org.elasticsearch.index.analysis.LatvianAnalyzerProvider; import org.elasticsearch.index.analysis.LengthTokenFilterFactory; @@ -101,7 +100,6 @@ import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory; import org.elasticsearch.index.analysis.PatternTokenizerFactory; import org.elasticsearch.index.analysis.PersianAnalyzerProvider; import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory; -import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory; import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenizer; @@ -115,7 +113,6 @@ import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; import org.elasticsearch.index.analysis.SimpleAnalyzerProvider; import org.elasticsearch.index.analysis.SnowballAnalyzerProvider; -import org.elasticsearch.index.analysis.SnowballTokenFilterFactory; import org.elasticsearch.index.analysis.SoraniAnalyzerProvider; import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory; import org.elasticsearch.index.analysis.SpanishAnalyzerProvider; @@ -132,7 +129,6 @@ import org.elasticsearch.index.analysis.ThaiAnalyzerProvider; import org.elasticsearch.index.analysis.ThaiTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.index.analysis.TrimTokenFilterFactory; import org.elasticsearch.index.analysis.TruncateTokenFilterFactory; import org.elasticsearch.index.analysis.TurkishAnalyzerProvider; import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory; @@ -212,7 +208,6 @@ public final class AnalysisModule { tokenFilters.register("length", LengthTokenFilterFactory::new); tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new); tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new); - tokenFilters.register("porter_stem", PorterStemTokenFilterFactory::new); tokenFilters.register("kstem", KStemTokenFilterFactory::new); tokenFilters.register("standard", StandardTokenFilterFactory::new); tokenFilters.register("nGram", NGramTokenFilterFactory::new); @@ -223,10 +218,8 @@ public final class AnalysisModule { tokenFilters.register("min_hash", MinHashTokenFilterFactory::new); tokenFilters.register("unique", UniqueTokenFilterFactory::new); tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new)); - tokenFilters.register("trim", TrimTokenFilterFactory::new); tokenFilters.register("limit", LimitTokenCountFilterFactory::new); tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new)); - tokenFilters.register("snowball", SnowballTokenFilterFactory::new); tokenFilters.register("stemmer", StemmerTokenFilterFactory::new); tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new); tokenFilters.register("elision", ElisionTokenFilterFactory::new); @@ -244,7 +237,6 @@ public final class AnalysisModule { tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new); tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new); tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new); - tokenFilters.register("keyword_marker", requriesAnalysisSettings(KeywordMarkerTokenFilterFactory::new)); tokenFilters.register("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new)); tokenFilters.register("arabic_normalization", ArabicNormalizationFilterFactory::new); tokenFilters.register("german_normalization", GermanNormalizationFilterFactory::new); |