summaryrefslogtreecommitdiff
path: root/core/src/main/java
diff options
context:
space:
mode:
authorMartijn van Groningen <martijn.v.groningen@gmail.com>2017-05-30 00:54:46 +0200
committerMartijn van Groningen <martijn.v.groningen@gmail.com>2017-05-31 09:34:08 +0200
commit258be2b135e49df263a546a67bcea9aa2c5ef283 (patch)
treeb6bb6bf8db8e2b893e9287309be9cdda6f66489b /core/src/main/java
parenta089dc9dcd769191f8a69d7922960bc565dbbf29 (diff)
Moved `keyword_marker`, `trim`, `snowball` and `porter_stemmer` tokenfilter factories from core to common-analysis module.
Relates to #23658
Diffstat (limited to 'core/src/main/java')
-rw-r--r--core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java90
-rw-r--r--core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java40
-rw-r--r--core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java46
-rw-r--r--core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java43
-rw-r--r--core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java8
5 files changed, 0 insertions, 227 deletions
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java
deleted file mode 100644
index a4cd4c41c9..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-
-import java.util.Set;
-import java.util.regex.Pattern;
-
-/**
- * A factory for creating keyword marker token filters that prevent tokens from
- * being modified by stemmers. Two types of keyword marker filters are available:
- * the {@link SetKeywordMarkerFilter} and the {@link PatternKeywordMarkerFilter}.
- *
- * The {@link SetKeywordMarkerFilter} uses a set of keywords to denote which tokens
- * should be excluded from stemming. This filter is created if the settings include
- * {@code keywords}, which contains the list of keywords, or {@code `keywords_path`},
- * which contains a path to a file in the config directory with the keywords.
- *
- * The {@link PatternKeywordMarkerFilter} uses a regular expression pattern to match
- * against tokens that should be excluded from stemming. This filter is created if
- * the settings include {@code keywords_pattern}, which contains the regular expression
- * to match against.
- */
-public class KeywordMarkerTokenFilterFactory extends AbstractTokenFilterFactory {
-
- private final CharArraySet keywordLookup;
- private final Pattern keywordPattern;
-
- public KeywordMarkerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
- super(indexSettings, name, settings);
-
- boolean ignoreCase =
- settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger);
- String patternString = settings.get("keywords_pattern");
- if (patternString != null) {
- // a pattern for matching keywords is specified, as opposed to a
- // set of keyword strings to match against
- if (settings.get("keywords") != null || settings.get("keywords_path") != null) {
- throw new IllegalArgumentException(
- "cannot specify both `keywords_pattern` and `keywords` or `keywords_path`");
- }
- keywordPattern = Pattern.compile(patternString);
- keywordLookup = null;
- } else {
- Set<?> rules = Analysis.getWordSet(env, indexSettings.getIndexVersionCreated(), settings, "keywords");
- if (rules == null) {
- throw new IllegalArgumentException(
- "keyword filter requires either `keywords`, `keywords_path`, " +
- "or `keywords_pattern` to be configured");
- }
- // a set of keywords (or a path to them) is specified
- keywordLookup = new CharArraySet(rules, ignoreCase);
- keywordPattern = null;
- }
- }
-
- @Override
- public TokenStream create(TokenStream tokenStream) {
- if (keywordPattern != null) {
- return new PatternKeywordMarkerFilter(tokenStream, keywordPattern);
- } else {
- return new SetKeywordMarkerFilter(tokenStream, keywordLookup);
- }
- }
-
-}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java
deleted file mode 100644
index 82d3d7633a..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-
-public class PorterStemTokenFilterFactory extends AbstractTokenFilterFactory {
-
- public PorterStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
- super(indexSettings, name, settings);
- }
-
- @Override
- public TokenStream create(TokenStream tokenStream) {
- return new PorterStemFilter(tokenStream);
- }
-}
-
-
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java
deleted file mode 100644
index ba1c3a2a88..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-
-/**
- * Real work actually done here by Sebastian on the Elasticsearch mailing list
- * http://elasticsearch-users.115913.n3.nabble.com/Using-the-Snowball-stemmers-tp2126106p2127111.html
- */
-public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory {
-
- private String language;
-
- public SnowballTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
- super(indexSettings, name, settings);
- this.language = Strings.capitalize(settings.get("language", settings.get("name", "English")));
- }
-
- @Override
- public TokenStream create(TokenStream tokenStream) {
- return new SnowballFilter(tokenStream, language);
- }
-
-}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java
deleted file mode 100644
index 4239f2444b..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.TrimFilter;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-
-public class TrimTokenFilterFactory extends AbstractTokenFilterFactory {
-
- private static final String UPDATE_OFFSETS_KEY = "update_offsets";
-
- public TrimTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
- super(indexSettings, name, settings);
- if (settings.get(UPDATE_OFFSETS_KEY) != null) {
- throw new IllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain");
- }
- }
-
- @Override
- public TokenStream create(TokenStream tokenStream) {
- return new TrimFilter(tokenStream);
- }
-}
diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
index 4dd146599c..9e378f6679 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@@ -81,7 +81,6 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
-import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
@@ -101,7 +100,6 @@ import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
-import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory;
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
@@ -115,7 +113,6 @@ import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
-import org.elasticsearch.index.analysis.SnowballTokenFilterFactory;
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
@@ -132,7 +129,6 @@ import org.elasticsearch.index.analysis.ThaiAnalyzerProvider;
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
-import org.elasticsearch.index.analysis.TrimTokenFilterFactory;
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
@@ -212,7 +208,6 @@ public final class AnalysisModule {
tokenFilters.register("length", LengthTokenFilterFactory::new);
tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
- tokenFilters.register("porter_stem", PorterStemTokenFilterFactory::new);
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
tokenFilters.register("standard", StandardTokenFilterFactory::new);
tokenFilters.register("nGram", NGramTokenFilterFactory::new);
@@ -223,10 +218,8 @@ public final class AnalysisModule {
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
tokenFilters.register("unique", UniqueTokenFilterFactory::new);
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
- tokenFilters.register("trim", TrimTokenFilterFactory::new);
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
- tokenFilters.register("snowball", SnowballTokenFilterFactory::new);
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
@@ -244,7 +237,6 @@ public final class AnalysisModule {
tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new);
tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new);
tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new);
- tokenFilters.register("keyword_marker", requriesAnalysisSettings(KeywordMarkerTokenFilterFactory::new));
tokenFilters.register("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
tokenFilters.register("arabic_normalization", ArabicNormalizationFilterFactory::new);
tokenFilters.register("german_normalization", GermanNormalizationFilterFactory::new);