Moved `keyword_marker`, `trim`, `snowball` and `porter_stemmer` tokenfilter factories from core to common-analysis module.

Relates to #23658
author: Martijn van Groningen <martijn.v.groningen@gmail.com> 2017-05-30 00:54:46 +0200
committer: Martijn van Groningen <martijn.v.groningen@gmail.com> 2017-05-31 09:34:08 +0200
commit: 258be2b135e49df263a546a67bcea9aa2c5ef283 (patch)
tree: b6bb6bf8db8e2b893e9287309be9cdda6f66489b /core/src/main/java/org/elasticsearch
parent: a089dc9dcd769191f8a69d7922960bc565dbbf29 (diff)
5 files changed, 0 insertions, 227 deletions
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java
deleted file mode 100644
index a4cd4c41c9..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-
-import java.util.Set;
-import java.util.regex.Pattern;
-
-/**
- * A factory for creating keyword marker token filters that prevent tokens from
- * being modified by stemmers.  Two types of keyword marker filters are available:
- * the {@link SetKeywordMarkerFilter} and the {@link PatternKeywordMarkerFilter}.
- *
- * The {@link SetKeywordMarkerFilter} uses a set of keywords to denote which tokens
- * should be excluded from stemming.  This filter is created if the settings include
- * {@code keywords}, which contains the list of keywords, or {@code `keywords_path`},
- * which contains a path to a file in the config directory with the keywords.
- *
- * The {@link PatternKeywordMarkerFilter} uses a regular expression pattern to match
- * against tokens that should be excluded from stemming.  This filter is created if
- * the settings include {@code keywords_pattern}, which contains the regular expression
- * to match against.
- */
-public class KeywordMarkerTokenFilterFactory extends AbstractTokenFilterFactory {
-
-    private final CharArraySet keywordLookup;
-    private final Pattern keywordPattern;
-
-    public KeywordMarkerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
-        super(indexSettings, name, settings);
-
-        boolean ignoreCase =
-            settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger);
-        String patternString = settings.get("keywords_pattern");
-        if (patternString != null) {
-            // a pattern for matching keywords is specified, as opposed to a
-            // set of keyword strings to match against
-            if (settings.get("keywords") != null || settings.get("keywords_path") != null) {
-                throw new IllegalArgumentException(
-                    "cannot specify both `keywords_pattern` and `keywords` or `keywords_path`");
-            }
-            keywordPattern = Pattern.compile(patternString);
-            keywordLookup = null;
-        } else {
-            Set<?> rules = Analysis.getWordSet(env, indexSettings.getIndexVersionCreated(), settings, "keywords");
-            if (rules == null) {
-                throw new IllegalArgumentException(
-                    "keyword filter requires either `keywords`, `keywords_path`, " +
-                    "or `keywords_pattern` to be configured");
-            }
-            // a set of keywords (or a path to them) is specified
-            keywordLookup = new CharArraySet(rules, ignoreCase);
-            keywordPattern = null;
-        }
-    }
-
-    @Override
-    public TokenStream create(TokenStream tokenStream) {
-        if (keywordPattern != null) {
-            return new PatternKeywordMarkerFilter(tokenStream, keywordPattern);
-        } else {
-            return new SetKeywordMarkerFilter(tokenStream, keywordLookup);
-        }
-    }
-
-}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java
deleted file mode 100644
index 82d3d7633a..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-
-public class PorterStemTokenFilterFactory extends AbstractTokenFilterFactory {
-
-    public PorterStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
-        super(indexSettings, name, settings);
-    }
-
-    @Override
-    public TokenStream create(TokenStream tokenStream) {
-        return new PorterStemFilter(tokenStream);
-    }
-}
-
-
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java
deleted file mode 100644
index ba1c3a2a88..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-
-/**
- * Real work actually done here by Sebastian on the Elasticsearch mailing list
- * http://elasticsearch-users.115913.n3.nabble.com/Using-the-Snowball-stemmers-tp2126106p2127111.html
- */
-public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory {
-
-    private String language;
-
-    public SnowballTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
-        super(indexSettings, name, settings);
-        this.language = Strings.capitalize(settings.get("language", settings.get("name", "English")));
-    }
-
-    @Override
-    public TokenStream create(TokenStream tokenStream) {
-        return new SnowballFilter(tokenStream, language);
-    }
-
-}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java
deleted file mode 100644
index 4239f2444b..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.TrimFilter;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-
-public class TrimTokenFilterFactory extends AbstractTokenFilterFactory {
-
-    private static final String UPDATE_OFFSETS_KEY = "update_offsets";
-
-    public TrimTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
-        super(indexSettings, name, settings);
-        if (settings.get(UPDATE_OFFSETS_KEY) != null) {
-            throw new IllegalArgumentException(UPDATE_OFFSETS_KEY +  " is not supported anymore. Please fix your analysis chain");
-        }
-    }
-
-    @Override
-    public TokenStream create(TokenStream tokenStream) {
-        return new TrimFilter(tokenStream);
-    }
-}
diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
index 4dd146599c..9e378f6679 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@@ -81,7 +81,6 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
 import org.elasticsearch.index.analysis.KeepWordFilterFactory;
 import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
-import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory;
 import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
 import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
 import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
@@ -101,7 +100,6 @@ import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
 import org.elasticsearch.index.analysis.PatternTokenizerFactory;
 import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
 import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
-import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
 import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
 import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
@@ -115,7 +113,6 @@ import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
 import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
 import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
-import org.elasticsearch.index.analysis.SnowballTokenFilterFactory;
 import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
 import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
 import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
@@ -132,7 +129,6 @@ import org.elasticsearch.index.analysis.ThaiAnalyzerProvider;
 import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.index.analysis.TokenizerFactory;
-import org.elasticsearch.index.analysis.TrimTokenFilterFactory;
 import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
 import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
 import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
@@ -212,7 +208,6 @@ public final class AnalysisModule {
         tokenFilters.register("length", LengthTokenFilterFactory::new);
         tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
         tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
-        tokenFilters.register("porter_stem", PorterStemTokenFilterFactory::new);
         tokenFilters.register("kstem", KStemTokenFilterFactory::new);
         tokenFilters.register("standard", StandardTokenFilterFactory::new);
         tokenFilters.register("nGram", NGramTokenFilterFactory::new);
@@ -223,10 +218,8 @@ public final class AnalysisModule {
         tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
         tokenFilters.register("unique", UniqueTokenFilterFactory::new);
         tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
-        tokenFilters.register("trim", TrimTokenFilterFactory::new);
         tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
         tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
-        tokenFilters.register("snowball", SnowballTokenFilterFactory::new);
         tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
         tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
         tokenFilters.register("elision", ElisionTokenFilterFactory::new);
@@ -244,7 +237,6 @@ public final class AnalysisModule {
         tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new);
         tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new);
         tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new);
-        tokenFilters.register("keyword_marker", requriesAnalysisSettings(KeywordMarkerTokenFilterFactory::new));
         tokenFilters.register("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
         tokenFilters.register("arabic_normalization", ArabicNormalizationFilterFactory::new);
         tokenFilters.register("german_normalization", GermanNormalizationFilterFactory::new);
author	Martijn van Groningen <martijn.v.groningen@gmail.com>	2017-05-30 00:54:46 +0200
committer	Martijn van Groningen <martijn.v.groningen@gmail.com>	2017-05-31 09:34:08 +0200
commit	258be2b135e49df263a546a67bcea9aa2c5ef283 (patch)
tree	b6bb6bf8db8e2b893e9287309be9cdda6f66489b /core/src/main/java/org/elasticsearch
parent	a089dc9dcd769191f8a69d7922960bc565dbbf29 (diff)