summaryrefslogtreecommitdiff
path: root/test/framework
diff options
context:
space:
mode:
authorMartijn van Groningen <martijn.v.groningen@gmail.com>2017-06-23 21:22:14 +0200
committerMartijn van Groningen <martijn.v.groningen@gmail.com>2017-06-26 09:02:16 +0200
commita34f5fa8127595534d919646d73dd7a88c21fa65 (patch)
treecefbd73eee7bffa5d2e88df3378c9e501c8c9cdd /test/framework
parent1583f8104725eca4779a0a0fd9886839c4c615a3 (diff)
Move more token filters to analysis-common module
The following token filters were moved: stemmer, stemmer_override, kstem, dictionary_decompounder, hyphenation_decompounder, reverse, elision and truncate. Relates to #23658
Diffstat (limited to 'test/framework')
-rw-r--r--test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java39
-rw-r--r--test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java72
-rw-r--r--test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json54
-rw-r--r--test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml39
4 files changed, 164 insertions, 40 deletions
diff --git a/test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java b/test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java
new file mode 100644
index 0000000000..921a09e98e
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.index.analysis;
+
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+
+public class MyFilterTokenFilterFactory extends AbstractTokenFilterFactory {
+
+ public MyFilterTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ super(indexSettings, name, Settings.Builder.EMPTY_SETTINGS);
+ }
+
+ @Override
+ public TokenStream create(TokenStream tokenStream) {
+ return new StopFilter(tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
index 76d170f7c2..97035623a6 100644
--- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
@@ -36,13 +36,11 @@ import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
-import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory;
-import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
@@ -60,7 +58,6 @@ import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
-import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
@@ -68,17 +65,12 @@ import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
-import org.elasticsearch.index.analysis.StemmerOverrideTokenFilterFactory;
-import org.elasticsearch.index.analysis.StemmerTokenFilterFactory;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
-import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
-import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
-import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
@@ -147,7 +139,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("arabicstem", ArabicStemTokenFilterFactory.class)
.put("asciifolding", MovedToAnalysisCommon.class)
.put("brazilianstem", BrazilianStemTokenFilterFactory.class)
- .put("bulgarianstem", StemmerTokenFilterFactory.class)
+ .put("bulgarianstem", MovedToAnalysisCommon.class)
.put("cjkbigram", CJKBigramFilterFactory.class)
.put("cjkwidth", CJKWidthFilterFactory.class)
.put("classic", ClassicFilterFactory.class)
@@ -156,50 +148,50 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("czechstem", CzechStemTokenFilterFactory.class)
.put("decimaldigit", DecimalDigitFilterFactory.class)
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
- .put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class)
+ .put("dictionarycompoundword", MovedToAnalysisCommon.class)
.put("edgengram", MovedToAnalysisCommon.class)
- .put("elision", ElisionTokenFilterFactory.class)
- .put("englishminimalstem", StemmerTokenFilterFactory.class)
- .put("englishpossessive", StemmerTokenFilterFactory.class)
- .put("finnishlightstem", StemmerTokenFilterFactory.class)
- .put("frenchlightstem", StemmerTokenFilterFactory.class)
- .put("frenchminimalstem", StemmerTokenFilterFactory.class)
- .put("galicianminimalstem", StemmerTokenFilterFactory.class)
- .put("galicianstem", StemmerTokenFilterFactory.class)
+ .put("elision", MovedToAnalysisCommon.class)
+ .put("englishminimalstem", MovedToAnalysisCommon.class)
+ .put("englishpossessive", MovedToAnalysisCommon.class)
+ .put("finnishlightstem", MovedToAnalysisCommon.class)
+ .put("frenchlightstem", MovedToAnalysisCommon.class)
+ .put("frenchminimalstem", MovedToAnalysisCommon.class)
+ .put("galicianminimalstem", MovedToAnalysisCommon.class)
+ .put("galicianstem", MovedToAnalysisCommon.class)
.put("germanstem", GermanStemTokenFilterFactory.class)
- .put("germanlightstem", StemmerTokenFilterFactory.class)
- .put("germanminimalstem", StemmerTokenFilterFactory.class)
+ .put("germanlightstem", MovedToAnalysisCommon.class)
+ .put("germanminimalstem", MovedToAnalysisCommon.class)
.put("germannormalization", GermanNormalizationFilterFactory.class)
.put("greeklowercase", MovedToAnalysisCommon.class)
- .put("greekstem", StemmerTokenFilterFactory.class)
+ .put("greekstem", MovedToAnalysisCommon.class)
.put("hindinormalization", HindiNormalizationFilterFactory.class)
- .put("hindistem", StemmerTokenFilterFactory.class)
- .put("hungarianlightstem", StemmerTokenFilterFactory.class)
+ .put("hindistem", MovedToAnalysisCommon.class)
+ .put("hungarianlightstem", MovedToAnalysisCommon.class)
.put("hunspellstem", HunspellTokenFilterFactory.class)
- .put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class)
+ .put("hyphenationcompoundword", MovedToAnalysisCommon.class)
.put("indicnormalization", IndicNormalizationFilterFactory.class)
.put("irishlowercase", MovedToAnalysisCommon.class)
- .put("indonesianstem", StemmerTokenFilterFactory.class)
- .put("italianlightstem", StemmerTokenFilterFactory.class)
+ .put("indonesianstem", MovedToAnalysisCommon.class)
+ .put("italianlightstem", MovedToAnalysisCommon.class)
.put("keepword", KeepWordFilterFactory.class)
.put("keywordmarker", MovedToAnalysisCommon.class)
- .put("kstem", KStemTokenFilterFactory.class)
- .put("latvianstem", StemmerTokenFilterFactory.class)
+ .put("kstem", MovedToAnalysisCommon.class)
+ .put("latvianstem", MovedToAnalysisCommon.class)
.put("length", MovedToAnalysisCommon.class)
.put("limittokencount", LimitTokenCountFilterFactory.class)
.put("lowercase", MovedToAnalysisCommon.class)
.put("ngram", MovedToAnalysisCommon.class)
- .put("norwegianlightstem", StemmerTokenFilterFactory.class)
- .put("norwegianminimalstem", StemmerTokenFilterFactory.class)
+ .put("norwegianlightstem", MovedToAnalysisCommon.class)
+ .put("norwegianminimalstem", MovedToAnalysisCommon.class)
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
.put("patternreplace", PatternReplaceTokenFilterFactory.class)
.put("persiannormalization", PersianNormalizationFilterFactory.class)
.put("porterstem", MovedToAnalysisCommon.class)
- .put("portuguesestem", StemmerTokenFilterFactory.class)
- .put("portugueselightstem", StemmerTokenFilterFactory.class)
- .put("portugueseminimalstem", StemmerTokenFilterFactory.class)
- .put("reversestring", ReverseTokenFilterFactory.class)
- .put("russianlightstem", StemmerTokenFilterFactory.class)
+ .put("portuguesestem", MovedToAnalysisCommon.class)
+ .put("portugueselightstem", MovedToAnalysisCommon.class)
+ .put("portugueseminimalstem", MovedToAnalysisCommon.class)
+ .put("reversestring", MovedToAnalysisCommon.class)
+ .put("russianlightstem", MovedToAnalysisCommon.class)
.put("scandinavianfolding", ScandinavianFoldingFilterFactory.class)
.put("scandinaviannormalization", ScandinavianNormalizationFilterFactory.class)
.put("serbiannormalization", SerbianNormalizationFilterFactory.class)
@@ -207,16 +199,16 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("minhash", MinHashTokenFilterFactory.class)
.put("snowballporter", MovedToAnalysisCommon.class)
.put("soraninormalization", SoraniNormalizationFilterFactory.class)
- .put("soranistem", StemmerTokenFilterFactory.class)
- .put("spanishlightstem", StemmerTokenFilterFactory.class)
+ .put("soranistem", MovedToAnalysisCommon.class)
+ .put("spanishlightstem", MovedToAnalysisCommon.class)
.put("standard", StandardTokenFilterFactory.class)
- .put("stemmeroverride", StemmerOverrideTokenFilterFactory.class)
+ .put("stemmeroverride", MovedToAnalysisCommon.class)
.put("stop", StopTokenFilterFactory.class)
- .put("swedishlightstem", StemmerTokenFilterFactory.class)
+ .put("swedishlightstem", MovedToAnalysisCommon.class)
.put("synonym", SynonymTokenFilterFactory.class)
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
.put("trim", MovedToAnalysisCommon.class)
- .put("truncate", TruncateTokenFilterFactory.class)
+ .put("truncate", MovedToAnalysisCommon.class)
.put("turkishlowercase", MovedToAnalysisCommon.class)
.put("type", KeepTypesFilterFactory.class)
.put("uppercase", MovedToAnalysisCommon.class)
diff --git a/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json
new file mode 100644
index 0000000000..38937a9b5a
--- /dev/null
+++ b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json
@@ -0,0 +1,54 @@
+{
+ "index":{
+ "analysis":{
+ "tokenizer":{
+ "standard":{
+ "type":"standard"
+ }
+ },
+ "filter":{
+ "stop":{
+ "type":"stop",
+ "stopwords":["test-stop"]
+ },
+ "stop2":{
+ "type":"stop",
+ "stopwords":["stop2-1", "stop2-2"]
+ },
+ "my":{
+ "type":"myfilter"
+ },
+ "dict_dec":{
+ "type":"dictionary_decompounder",
+ "word_list":["donau", "dampf", "schiff", "spargel", "creme", "suppe"]
+ }
+ },
+ "analyzer":{
+ "standard":{
+ "type":"standard",
+ "stopwords":["test1", "test2", "test3"]
+ },
+ "custom1":{
+ "tokenizer":"standard",
+ "filter":["stop", "stop2"]
+ },
+ "custom4":{
+ "tokenizer":"standard",
+ "filter":["my"]
+ },
+ "custom6":{
+ "tokenizer":"standard",
+ "position_increment_gap": 256
+ },
+ "czechAnalyzerWithStemmer":{
+ "tokenizer":"standard",
+ "filter":["standard", "lowercase", "stop", "czech_stem"]
+ },
+ "decompoundingAnalyzer":{
+ "tokenizer":"standard",
+ "filter":["dict_dec"]
+ }
+ }
+ }
+ }
+}
diff --git a/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml
new file mode 100644
index 0000000000..f7a57d14db
--- /dev/null
+++ b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml
@@ -0,0 +1,39 @@
+index :
+ analysis :
+ tokenizer :
+ standard :
+ type : standard
+ filter :
+ stop :
+ type : stop
+ stopwords : [test-stop]
+ stop2 :
+ type : stop
+ stopwords : [stop2-1, stop2-2]
+ my :
+ type : myfilter
+ dict_dec :
+ type : dictionary_decompounder
+ word_list : [donau, dampf, schiff, spargel, creme, suppe]
+ analyzer :
+ standard :
+ type : standard
+ stopwords : [test1, test2, test3]
+ custom1 :
+ tokenizer : standard
+ filter : [stop, stop2]
+ custom4 :
+ tokenizer : standard
+ filter : [my]
+ custom6 :
+ tokenizer : standard
+ position_increment_gap: 256
+ custom7 :
+ type : standard
+ version: 3.6
+ czechAnalyzerWithStemmer :
+ tokenizer : standard
+ filter : [standard, lowercase, stop, czech_stem]
+ decompoundingAnalyzer :
+ tokenizer : standard
+ filter : [dict_dec]