summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNik Everett <nik9000@gmail.com>2017-05-09 14:50:49 -0400
committerGitHub <noreply@github.com>2017-05-09 14:50:49 -0400
commitbb06d8ec4f3290c05ca50e19649e5ebd1f8840fa (patch)
tree41c9eaa4dad0e4618d4e61d45db667501476b024
parent428390865c9c6de7497170236a80f0a62a73b924 (diff)
Allow plugins to build pre-configured token filters (#24223)
This changes the way we register pre-configured token filters so that plugins can declare them and starts to move all of the pre-configured token filters out of core. It doesn't finish the job because doing so would make the change unreviewably large. So this PR includes a shim that keeps the "old" way of registering pre-configured token filters around. The Lowercase token filter is special because there is a "special" interaction between it and the lowercase tokenizer. I'm not sure exactly what to do about it so for now I'm leaving it alone with the intent of figuring out what to do with it in a followup. This also renames these pre-configured token filters from "pre-built" to "pre-configured" because that seemed like a more descriptive name. This is a part of #23658
-rw-r--r--core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java37
-rw-r--r--core/src/main/java/org/elasticsearch/index/analysis/PreBuiltTokenFilterFactoryFactory.java50
-rw-r--r--core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java123
-rw-r--r--core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java46
-rw-r--r--core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltCacheFactory.java2
-rw-r--r--core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java176
-rw-r--r--core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java14
-rw-r--r--core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java35
-rw-r--r--core/src/test/java/org/elasticsearch/index/IndexModuleTests.java41
-rw-r--r--core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java26
-rw-r--r--core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java71
-rw-r--r--core/src/test/java/org/elasticsearch/index/analysis/CoreAnalysisFactoryTests.java37
-rw-r--r--core/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java58
-rw-r--r--core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java23
-rw-r--r--modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java72
-rw-r--r--modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java36
-rw-r--r--modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml12
-rw-r--r--plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICUFactoryTests.java6
-rw-r--r--plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiFactoryTests.java6
-rw-r--r--plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/AnalysisPhoneticFactoryTests.java6
-rw-r--r--plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseFactoryTests.java7
-rw-r--r--plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java6
-rw-r--r--test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java (renamed from test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java)116
23 files changed, 579 insertions, 427 deletions
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java
index 36357afe67..b438cd5af4 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java
+++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java
@@ -36,7 +36,6 @@ import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
-import org.elasticsearch.indices.analysis.PreBuiltTokenFilters;
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
import java.io.Closeable;
@@ -59,7 +58,7 @@ public final class AnalysisRegistry implements Closeable {
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
- private final PrebuiltAnalysis prebuiltAnalysis = new PrebuiltAnalysis();
+ private final PrebuiltAnalysis prebuiltAnalysis;
private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap<>();
private final Environment environment;
@@ -74,13 +73,15 @@ public final class AnalysisRegistry implements Closeable {
Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters,
Map<String, AnalysisProvider<TokenizerFactory>> tokenizers,
Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers,
- Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers) {
+ Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers,
+ Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters) {
this.environment = environment;
this.charFilters = unmodifiableMap(charFilters);
this.tokenFilters = unmodifiableMap(tokenFilters);
this.tokenizers = unmodifiableMap(tokenizers);
this.analyzers = unmodifiableMap(analyzers);
this.normalizers = unmodifiableMap(normalizers);
+ prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredTokenFilters);
}
/**
@@ -305,8 +306,8 @@ public final class AnalysisRegistry implements Closeable {
}
private <T> Map<String, T> buildMapping(Component component, IndexSettings settings, Map<String, Settings> settingsMap,
- Map<String, AnalysisModule.AnalysisProvider<T>> providerMap, Map<String, AnalysisModule.AnalysisProvider<T>> defaultInstance)
- throws IOException {
+ Map<String, ? extends AnalysisModule.AnalysisProvider<T>> providerMap,
+ Map<String, ? extends AnalysisModule.AnalysisProvider<T>> defaultInstance) throws IOException {
Settings defaultSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, settings.getIndexVersionCreated()).build();
Map<String, T> factories = new HashMap<>();
for (Map.Entry<String, Settings> entry : settingsMap.entrySet()) {
@@ -344,7 +345,7 @@ public final class AnalysisRegistry implements Closeable {
}
// go over the char filters in the bindings and register the ones that are not configured
- for (Map.Entry<String, AnalysisModule.AnalysisProvider<T>> entry : providerMap.entrySet()) {
+ for (Map.Entry<String, ? extends AnalysisModule.AnalysisProvider<T>> entry : providerMap.entrySet()) {
String name = entry.getKey();
AnalysisModule.AnalysisProvider<T> provider = entry.getValue();
// we don't want to re-register one that already exists
@@ -365,7 +366,7 @@ public final class AnalysisRegistry implements Closeable {
factories.put(name, instance);
}
- for (Map.Entry<String, AnalysisModule.AnalysisProvider<T>> entry : defaultInstance.entrySet()) {
+ for (Map.Entry<String, ? extends AnalysisModule.AnalysisProvider<T>> entry : defaultInstance.entrySet()) {
final String name = entry.getKey();
final AnalysisModule.AnalysisProvider<T> provider = entry.getValue();
if (factories.containsKey(name) == false) {
@@ -378,7 +379,8 @@ public final class AnalysisRegistry implements Closeable {
return factories;
}
- private <T> AnalysisProvider<T> getAnalysisProvider(Component component, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
+ private <T> AnalysisProvider<T> getAnalysisProvider(Component component, Map<String, ? extends AnalysisProvider<T>> providerMap,
+ String name, String typeName) {
if (typeName == null) {
throw new IllegalArgumentException(component + " [" + name + "] must specify either an analyzer type, or a tokenizer");
}
@@ -393,13 +395,12 @@ public final class AnalysisRegistry implements Closeable {
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;
final Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> tokenizerFactories;
- final Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilterFactories;
+ final Map<String, ? extends AnalysisProvider<TokenFilterFactory>> tokenFilterFactories;
final Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> charFilterFactories;
- private PrebuiltAnalysis() {
+ private PrebuiltAnalysis(Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters) {
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap<>();
Map<String, PreBuiltTokenizerFactoryFactory> tokenizerFactories = new HashMap<>();
- Map<String, PreBuiltTokenFilterFactoryFactory> tokenFilterFactories = new HashMap<>();
Map<String, PreBuiltCharFilterFactoryFactory> charFilterFactories = new HashMap<>();
// Analyzers
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
@@ -418,17 +419,6 @@ public final class AnalysisRegistry implements Closeable {
tokenizerFactories.put("edgeNGram", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.EDGE_NGRAM.getTokenizerFactory(Version.CURRENT)));
tokenizerFactories.put("PathHierarchy", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.PATH_HIERARCHY.getTokenizerFactory(Version.CURRENT)));
-
- // Token filters
- for (PreBuiltTokenFilters preBuiltTokenFilter : PreBuiltTokenFilters.values()) {
- String name = preBuiltTokenFilter.name().toLowerCase(Locale.ROOT);
- tokenFilterFactories.put(name, new PreBuiltTokenFilterFactoryFactory(preBuiltTokenFilter.getTokenFilterFactory(Version.CURRENT)));
- }
- // Token filter aliases
- tokenFilterFactories.put("nGram", new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.NGRAM.getTokenFilterFactory(Version.CURRENT)));
- tokenFilterFactories.put("edgeNGram", new PreBuiltTokenFilterFactoryFactory(PreBuiltTokenFilters.EDGE_NGRAM.getTokenFilterFactory(Version.CURRENT)));
-
-
// Char Filters
for (PreBuiltCharFilters preBuiltCharFilter : PreBuiltCharFilters.values()) {
String name = preBuiltCharFilter.name().toLowerCase(Locale.ROOT);
@@ -436,10 +426,11 @@ public final class AnalysisRegistry implements Closeable {
}
// Char filter aliases
charFilterFactories.put("htmlStrip", new PreBuiltCharFilterFactoryFactory(PreBuiltCharFilters.HTML_STRIP.getCharFilterFactory(Version.CURRENT)));
+
this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
this.charFilterFactories = Collections.unmodifiableMap(charFilterFactories);
- this.tokenFilterFactories = Collections.unmodifiableMap(tokenFilterFactories);
this.tokenizerFactories = Collections.unmodifiableMap(tokenizerFactories);
+ tokenFilterFactories = preConfiguredTokenFilters;
}
public AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterFactory(String name) {
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltTokenFilterFactoryFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltTokenFilterFactoryFactory.java
deleted file mode 100644
index 52c9f2851a..0000000000
--- a/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltTokenFilterFactoryFactory.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.elasticsearch.Version;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.indices.analysis.AnalysisModule;
-import org.elasticsearch.indices.analysis.PreBuiltTokenFilters;
-
-import java.io.IOException;
-
-public class PreBuiltTokenFilterFactoryFactory implements AnalysisModule.AnalysisProvider<TokenFilterFactory> {
-
- private final TokenFilterFactory tokenFilterFactory;
-
- public PreBuiltTokenFilterFactoryFactory(TokenFilterFactory tokenFilterFactory) {
- this.tokenFilterFactory = tokenFilterFactory;
- }
-
- @Override
- public TokenFilterFactory get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
- Version indexVersion = Version.indexCreated(settings);
- if (!Version.CURRENT.equals(indexVersion)) {
- PreBuiltTokenFilters preBuiltTokenFilters = PreBuiltTokenFilters.getOrDefault(name, null);
- if (preBuiltTokenFilters != null) {
- return preBuiltTokenFilters.getTokenFilterFactory(indexVersion);
- }
- }
- return tokenFilterFactory;
- }
-}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java
new file mode 100644
index 0000000000..b410e8fb70
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.elasticsearch.Version;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.indices.analysis.AnalysisModule;
+import org.elasticsearch.indices.analysis.PreBuiltCacheFactory;
+
+import java.io.IOException;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+
+/**
+ * Provides pre-configured, shared {@link TokenFilter}s.
+ */
+public final class PreConfiguredTokenFilter implements AnalysisModule.AnalysisProvider<TokenFilterFactory> {
+ private final String name;
+ private final boolean useFilterForMultitermQueries;
+ private final PreBuiltCacheFactory.PreBuiltCache<TokenFilterFactory> cache;
+ private final BiFunction<TokenStream, Version, TokenStream> create;
+
+ /**
+ * Standard ctor with all the power.
+ */
+ public PreConfiguredTokenFilter(String name, boolean useFilterForMultitermQueries,
+ PreBuiltCacheFactory.CachingStrategy cachingStrategy, BiFunction<TokenStream, Version, TokenStream> create) {
+ this.name = name;
+ this.useFilterForMultitermQueries = useFilterForMultitermQueries;
+ cache = PreBuiltCacheFactory.getCache(cachingStrategy);
+ this.create = create;
+ }
+
+ /**
+ * Convenience ctor for token streams that don't vary based on version.
+ */
+ public PreConfiguredTokenFilter(String name, boolean useFilterForMultitermQueries,
+ PreBuiltCacheFactory.CachingStrategy cachingStrategy, Function<TokenStream, TokenStream> create) {
+ this(name, useFilterForMultitermQueries, cachingStrategy, (input, version) -> create.apply(input));
+ // TODO why oh why aren't these all CachingStrategy.ONE? They *can't* vary based on version because they don't get it, right?!
+ }
+
+ @Override
+ public TokenFilterFactory get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
+ return getTokenFilterFactory(Version.indexCreated(settings));
+ }
+
+ /**
+ * The name of the {@link TokenFilter} in the API.
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Can this {@link TokenFilter} be used in multi-term queries?
+ */
+ public boolean shouldUseFilterForMultitermQueries() {
+ return useFilterForMultitermQueries;
+ }
+
+ private interface MultiTermAwareTokenFilterFactory extends TokenFilterFactory, MultiTermAwareComponent {}
+
+ private synchronized TokenFilterFactory getTokenFilterFactory(final Version version) {
+ TokenFilterFactory factory = cache.get(version);
+ if (factory == null) {
+ if (useFilterForMultitermQueries) {
+ factory = new MultiTermAwareTokenFilterFactory() {
+ @Override
+ public String name() {
+ return name;
+ }
+
+ @Override
+ public TokenStream create(TokenStream tokenStream) {
+ return create.apply(tokenStream, version);
+ }
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
+ };
+ } else {
+ factory = new TokenFilterFactory() {
+ @Override
+ public String name() {
+ return name;
+ }
+
+ @Override
+ public TokenStream create(TokenStream tokenStream) {
+ return create.apply(tokenStream, version);
+ }
+ };
+ }
+ cache.put(version, factory);
+ }
+
+ return factory;
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
index 26a4e4c1c5..06ef3e315c 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@@ -19,6 +19,8 @@
package org.elasticsearch.indices.analysis;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.NamedRegistry;
@@ -101,6 +103,7 @@ import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory;
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
+import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
import org.elasticsearch.index.analysis.RomanianAnalyzerProvider;
import org.elasticsearch.index.analysis.RussianAnalyzerProvider;
@@ -138,11 +141,15 @@ import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
+import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.elasticsearch.plugins.AnalysisPlugin;
import java.io.IOException;
import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.plugins.AnalysisPlugin.requriesAnalysisSettings;
/**
@@ -169,8 +176,11 @@ public final class AnalysisModule {
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers(plugins);
+
+ Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
+
analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers
- .getRegistry(), analyzers.getRegistry(), normalizers.getRegistry());
+ .getRegistry(), analyzers.getRegistry(), normalizers.getRegistry(), preConfiguredTokenFilters);
}
HunspellService getHunspellService() {
@@ -258,6 +268,40 @@ public final class AnalysisModule {
return tokenFilters;
}
+ static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List<AnalysisPlugin> plugins) {
+ NamedRegistry<PreConfiguredTokenFilter> preConfiguredTokenFilters = new NamedRegistry<>("pre-configured token_filter");
+
+ // Add filters available in lucene-core
+ preConfiguredTokenFilters.register("lowercase",
+ new PreConfiguredTokenFilter("lowercase", true, CachingStrategy.LUCENE, LowerCaseFilter::new));
+ preConfiguredTokenFilters.register("standard",
+ new PreConfiguredTokenFilter("standard", false, CachingStrategy.LUCENE, StandardFilter::new));
+ /* Note that "stop" is available in lucene-core but it's pre-built
+ * version uses a set of English stop words that are in
+ * lucene-analyzers-common so "stop" is defined in the analysis-common
+ * module. */
+
+ // Add token filters declared in PreBuiltTokenFilters until they have all been migrated
+ for (PreBuiltTokenFilters preBuilt : PreBuiltTokenFilters.values()) {
+ switch (preBuilt) {
+ case LOWERCASE:
+ // This has been migrated but has to stick around until PreBuiltTokenizers is removed.
+ continue;
+ default:
+ String name = preBuilt.name().toLowerCase(Locale.ROOT);
+ preConfiguredTokenFilters.register(name,
+ new PreConfiguredTokenFilter(name, preBuilt.isMultiTermAware(), preBuilt.getCachingStrategy(), preBuilt::create));
+ }
+ }
+
+ for (AnalysisPlugin plugin: plugins) {
+ for (PreConfiguredTokenFilter filter : plugin.getPreConfiguredTokenFilters()) {
+ preConfiguredTokenFilters.register(filter.getName(), filter);
+ }
+ }
+ return unmodifiableMap(preConfiguredTokenFilters.getRegistry());
+ }
+
private NamedRegistry<AnalysisProvider<TokenizerFactory>> setupTokenizers(List<AnalysisPlugin> plugins) {
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = new NamedRegistry<>("tokenizer");
tokenizers.register("standard", StandardTokenizerFactory::new);
diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltCacheFactory.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltCacheFactory.java
index 823152e6d9..8636e04f20 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltCacheFactory.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltCacheFactory.java
@@ -42,7 +42,7 @@ public class PreBuiltCacheFactory {
private PreBuiltCacheFactory() {}
- static <T> PreBuiltCache<T> getCache(CachingStrategy cachingStrategy) {
+ public static <T> PreBuiltCache<T> getCache(CachingStrategy cachingStrategy) {
switch (cachingStrategy) {
case ONE:
return new PreBuiltCacheStrategyOne<>();
diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
index 6c58ab884d..02f6d8aadc 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
@@ -18,9 +18,7 @@
*/
package org.elasticsearch.indices.analysis;
-import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
@@ -28,39 +26,23 @@ import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
-import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.UpperCaseFilter;
import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
import org.apache.lucene.analysis.de.GermanStemFilter;
-import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
-import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
-import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilter;
import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
-import org.apache.lucene.analysis.miscellaneous.TrimFilter;
-import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
-import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
-import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
-import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
-import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
-import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
-import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.apache.lucene.analysis.standard.ClassicFilter;
-import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.tr.ApostropheFilter;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.Version;
@@ -75,77 +57,7 @@ import org.tartarus.snowball.ext.FrenchStemmer;
import java.util.Locale;
public enum PreBuiltTokenFilters {
-
- WORD_DELIMITER(CachingStrategy.ONE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new WordDelimiterFilter(tokenStream,
- WordDelimiterFilter.GENERATE_WORD_PARTS |
- WordDelimiterFilter.GENERATE_NUMBER_PARTS |
- WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
- WordDelimiterFilter.SPLIT_ON_NUMERICS |
- WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
- }
- },
-
- WORD_DELIMITER_GRAPH(CachingStrategy.ONE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new WordDelimiterGraphFilter(tokenStream,
- WordDelimiterGraphFilter.GENERATE_WORD_PARTS |
- WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS |
- WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE |
- WordDelimiterGraphFilter.SPLIT_ON_NUMERICS |
- WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null);
- }
- },
-
- STOP(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new StopFilter(tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
- }
- },
-
- TRIM(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new TrimFilter(tokenStream);
- }
- },
-
- REVERSE(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new ReverseStringFilter(tokenStream);
- }
- },
-
- ASCIIFOLDING(CachingStrategy.ONE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new ASCIIFoldingFilter(tokenStream);
- }
- @Override
- protected boolean isMultiTermAware() {
- return true;
- }
- },
-
- LENGTH(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new LengthFilter(tokenStream, 0, Integer.MAX_VALUE);
- }
- },
-
- COMMON_GRAMS(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new CommonGramsFilter(tokenStream, CharArraySet.EMPTY_SET);
- }
- },
-
+ // TODO remove this entire class when PreBuiltTokenizers no longer needs it.....
LOWERCASE(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
@@ -157,73 +69,6 @@ public enum PreBuiltTokenFilters {
}
},
- UPPERCASE(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new UpperCaseFilter(tokenStream);
- }
- @Override
- protected boolean isMultiTermAware() {
- return true;
- }
- },
-
- KSTEM(CachingStrategy.ONE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new KStemFilter(tokenStream);
- }
- },
-
- PORTER_STEM(CachingStrategy.ONE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new PorterStemFilter(tokenStream);
- }
- },
-
- STANDARD(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new StandardFilter(tokenStream);
- }
- },
-
- CLASSIC(CachingStrategy.ONE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new ClassicFilter(tokenStream);
- }
- },
-
- NGRAM(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new NGramTokenFilter(tokenStream);
- }
- },
-
- EDGE_NGRAM(CachingStrategy.LUCENE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
- }
- },
-
- UNIQUE(CachingStrategy.ONE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new UniqueTokenFilter(tokenStream);
- }
- },
-
- TRUNCATE(CachingStrategy.ONE) {
- @Override
- public TokenStream create(TokenStream tokenStream, Version version) {
- return new TruncateTokenFilter(tokenStream, 10);
- }
- },
-
// Extended Token Filters
SNOWBALL(CachingStrategy.ONE) {
@Override
@@ -469,10 +314,16 @@ public enum PreBuiltTokenFilters {
protected final PreBuiltCacheFactory.PreBuiltCache<TokenFilterFactory> cache;
+ private final CachingStrategy cachingStrategy;
PreBuiltTokenFilters(CachingStrategy cachingStrategy) {
+ this.cachingStrategy = cachingStrategy;
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
}
+ public CachingStrategy getCachingStrategy() {
+ return cachingStrategy;
+ }
+
private interface MultiTermAwareTokenFilterFactory extends TokenFilterFactory, MultiTermAwareComponent {}
public synchronized TokenFilterFactory getTokenFilterFactory(final Version version) {
@@ -514,17 +365,4 @@ public enum PreBuiltTokenFilters {
return factory;
}
-
- /**
- * Get a pre built TokenFilter by its name or fallback to the default one
- * @param name TokenFilter name
- * @param defaultTokenFilter default TokenFilter if name not found
- */
- public static PreBuiltTokenFilters getOrDefault(String name, PreBuiltTokenFilters defaultTokenFilter) {
- try {
- return valueOf(name.toUpperCase(Locale.ROOT));
- } catch (IllegalArgumentException e) {
- return defaultTokenFilter;
- }
- }
}
diff --git a/core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java b/core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java
index 5e7e1053ad..c248c706f2 100644
--- a/core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java
+++ b/core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java
@@ -22,19 +22,26 @@ package org.elasticsearch.plugins;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.elasticsearch.Version;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
+import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
+import org.elasticsearch.indices.analysis.PreBuiltCacheFactory;
import java.io.IOException;
+import java.util.List;
import java.util.Map;
+import java.util.function.BiFunction;
+import static java.util.Collections.emptyList;
import static java.util.Collections.emptyMap;
/**
@@ -88,6 +95,13 @@ public interface AnalysisPlugin {
}
/**
+ * Override to add additional pre-configured token filters.
+ */
+ default List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
+ return emptyList();
+ }
+
+ /**
* Override to add additional hunspell {@link org.apache.lucene.analysis.hunspell.Dictionary}s.
*/
default Map<String, org.apache.lucene.analysis.hunspell.Dictionary> getHunspellDictionaries() {
diff --git a/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java
index 57a83b2c68..0e1414bdbe 100644
--- a/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java
+++ b/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java
@@ -48,8 +48,8 @@ import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
/**
- * Tests for {@link TransportAnalyzeAction}. See the more "intense" version of this test in the
- * {@code common-analysis} module.
+ * Tests for {@link TransportAnalyzeAction}. See the rest tests in the {@code analysis-common} module for places where this code gets a ton
+ * more exercise.
*/
public class TransportAnalyzeActionTests extends ESTestCase {
@@ -90,7 +90,11 @@ public class TransportAnalyzeActionTests extends ESTestCase {
indexAnalyzers = registry.build(idxSettings);
}
+ /**
+ * Test behavior when the named analysis component isn't defined on the index. In that case we should build with defaults.
+ */
public void testNoIndexAnalyzers() throws IOException {
+ // Refer to an analyzer by its type so we get its default configuration
AnalyzeRequest request = new AnalyzeRequest();
request.analyzer("standard");
request.text("the quick brown fox");
@@ -98,33 +102,30 @@ public class TransportAnalyzeActionTests extends ESTestCase {
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
assertEquals(4, tokens.size());
+ // Refer to a token filter by its type so we get its default configuration
request.analyzer(null);
request.tokenizer("whitespace");
- request.addTokenFilter("lowercase");
- request.addTokenFilter("word_delimiter");
+ request.addTokenFilter("mock");
request.text("the qu1ck brown fox");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
tokens = analyze.getTokens();
- assertEquals(6, tokens.size());
- assertEquals("qu", tokens.get(1).getTerm());
- assertEquals("1", tokens.get(2).getTerm());
- assertEquals("ck", tokens.get(3).getTerm());
+ assertEquals(3, tokens.size());
+ assertEquals("qu1ck", tokens.get(0).getTerm());
+ assertEquals("brown", tokens.get(1).getTerm());
+ assertEquals("fox", tokens.get(2).getTerm());
+ // Refer to a char filter by its type so we get its default configuration
request.analyzer(null);
request.tokenizer("whitespace");
request.addCharFilter("html_strip");
- request.addTokenFilter("lowercase");
- request.addTokenFilter("word_delimiter");
+ request.addTokenFilter("mock");
request.text("<p>the qu1ck brown fox</p>");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
tokens = analyze.getTokens();
- assertEquals(6, tokens.size());
- assertEquals("the", tokens.get(0).getTerm());
- assertEquals("qu", tokens.get(1).getTerm());
- assertEquals("1", tokens.get(2).getTerm());
- assertEquals("ck", tokens.get(3).getTerm());
- assertEquals("brown", tokens.get(4).getTerm());
- assertEquals("fox", tokens.get(5).getTerm());
+ assertEquals(3, tokens.size());
+ assertEquals("qu1ck", tokens.get(0).getTerm());
+ assertEquals("brown", tokens.get(1).getTerm());
+ assertEquals("fox", tokens.get(2).getTerm());
}
public void testFillsAttributes() throws IOException {
diff --git a/core/src/test/java/org/elasticsearch/index/IndexModuleTests.java b/core/src/test/java/org/elasticsearch/index/IndexModuleTests.java
index 1ae125cecd..209bd1648b 100644
--- a/core/src/test/java/org/elasticsearch/index/IndexModuleTests.java
+++ b/core/src/test/java/org/elasticsearch/index/IndexModuleTests.java
@@ -94,6 +94,7 @@ public class IndexModuleTests extends ESTestCase {
private Settings settings;
private IndexSettings indexSettings;
private Environment environment;
+ private AnalysisRegistry emptyAnalysisRegistry;
private NodeEnvironment nodeEnvironment;
private IndicesQueryCache indicesQueryCache;
@@ -123,6 +124,7 @@ public class IndexModuleTests extends ESTestCase {
indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
index = indexSettings.getIndex();
environment = new Environment(settings);
+ emptyAnalysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
threadPool = new TestThreadPool("test");
circuitBreakerService = new NoneCircuitBreakerService();
bigArrays = new BigArrays(settings, circuitBreakerService);
@@ -150,8 +152,7 @@ public class IndexModuleTests extends ESTestCase {
}
public void testWrapperIsBound() throws IOException {
- IndexModule module = new IndexModule(indexSettings,
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(indexSettings, emptyAnalysisRegistry);
module.setSearcherWrapper((s) -> new Wrapper());
module.engineFactory.set(new MockEngineFactory(AssertingDirectoryReader.class));
@@ -170,8 +171,7 @@ public class IndexModuleTests extends ESTestCase {
.put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), "foo_store")
.build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
- IndexModule module = new IndexModule(indexSettings,
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(indexSettings, emptyAnalysisRegistry);
module.addIndexStore("foo_store", FooStore::new);
try {
module.addIndexStore("foo_store", FooStore::new);
@@ -195,8 +195,7 @@ public class IndexModuleTests extends ESTestCase {
}
};
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
- IndexModule module = new IndexModule(indexSettings,
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(indexSettings, emptyAnalysisRegistry);
module.addIndexEventListener(eventListener);
IndexService indexService = newIndexService(module);
IndexSettings x = indexService.getIndexSettings();
@@ -210,8 +209,7 @@ public class IndexModuleTests extends ESTestCase {
public void testListener() throws IOException {
Setting<Boolean> booleanSetting = Setting.boolSetting("index.foo.bar", false, Property.Dynamic, Property.IndexScope);
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings, booleanSetting),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings, booleanSetting), emptyAnalysisRegistry);
Setting<Boolean> booleanSetting2 = Setting.boolSetting("index.foo.bar.baz", false, Property.Dynamic, Property.IndexScope);
AtomicBoolean atomicBoolean = new AtomicBoolean(false);
module.addSettingsUpdateConsumer(booleanSetting, atomicBoolean::set);
@@ -230,8 +228,7 @@ public class IndexModuleTests extends ESTestCase {
}
public void testAddIndexOperationListener() throws IOException {
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings), emptyAnalysisRegistry);
AtomicBoolean executed = new AtomicBoolean(false);
IndexingOperationListener listener = new IndexingOperationListener() {
@Override
@@ -261,8 +258,7 @@ public class IndexModuleTests extends ESTestCase {
}
public void testAddSearchOperationListener() throws IOException {
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings), emptyAnalysisRegistry);
AtomicBoolean executed = new AtomicBoolean(false);
SearchOperationListener listener = new SearchOperationListener() {
@@ -295,8 +291,7 @@ public class IndexModuleTests extends ESTestCase {
.put("index.similarity.my_similarity.key", "there is a key")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings) -> new SimilarityProvider() {
@Override
public String name() {
@@ -319,8 +314,7 @@ public class IndexModuleTests extends ESTestCase {
}
public void testFrozen() {
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings(index, settings), emptyAnalysisRegistry);
module.freeze();
String msg = "Can't modify IndexModule once the index service has been created";
assertEquals(msg, expectThrows(IllegalStateException.class, () -> module.addSearchOperationListener(null)).getMessage());
@@ -338,8 +332,7 @@ public class IndexModuleTests extends ESTestCase {
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
Exception ex = expectThrows(IllegalArgumentException.class, () -> newIndexService(module));
assertEquals("Unknown Similarity type [test_similarity] for [my_similarity]", ex.getMessage());
}
@@ -350,8 +343,7 @@ public class IndexModuleTests extends ESTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.build();
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
Exception ex = expectThrows(IllegalArgumentException.class, () -> newIndexService(module));
assertEquals("Similarity [my_similarity] must have an associated type", ex.getMessage());
}
@@ -360,8 +352,7 @@ public class IndexModuleTests extends ESTestCase {
Settings indexSettings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
module.forceQueryCacheProvider((a, b) -> new CustomQueryCache());
expectThrows(AlreadySetException.class, () -> module.forceQueryCacheProvider((a, b) -> new CustomQueryCache()));
IndexService indexService = newIndexService(module);
@@ -373,8 +364,7 @@ public class IndexModuleTests extends ESTestCase {
Settings indexSettings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
IndexService indexService = newIndexService(module);
assertTrue(indexService.cache().query() instanceof IndexQueryCache);
indexService.close("simon says", false);
@@ -385,8 +375,7 @@ public class IndexModuleTests extends ESTestCase {
.put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
- IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
- new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
+ IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
module.forceQueryCacheProvider((a, b) -> new CustomQueryCache());
IndexService indexService = newIndexService(module);
assertTrue(indexService.cache().query() instanceof DisabledQueryCache);
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java
deleted file mode 100644
index 0a62e8c491..0000000000
--- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.elasticsearch.AnalysisFactoryTestCase;
-
-public class AnalysisFactoryTests extends AnalysisFactoryTestCase {
- // tests are inherited and nothing needs to be defined here
-}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java
index 6033186c81..471d6f9ccc 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java
@@ -34,6 +34,7 @@ import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
+import org.elasticsearch.indices.analysis.PreBuiltCacheFactory;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
@@ -41,6 +42,7 @@ import org.elasticsearch.test.VersionUtils;
import java.io.IOException;
import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonList;
@@ -50,7 +52,9 @@ import static org.hamcrest.Matchers.instanceOf;
public class AnalysisRegistryTests extends ESTestCase {
- private AnalysisRegistry registry;
+ private Environment emptyEnvironment;
+ private AnalysisRegistry emptyRegistry;
+ private IndexSettings emptyIndexSettingsOfCurrentVersion;
private static AnalyzerProvider<?> analyzerProvider(final String name) {
return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDEX, new EnglishAnalyzer());
@@ -59,12 +63,13 @@ public class AnalysisRegistryTests extends ESTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
- Settings settings = Settings
- .builder()
- .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
- .build();
- registry = new AnalysisRegistry(new Environment(settings),
- emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
+ emptyEnvironment = new Environment(Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .build());
+ emptyRegistry = new AnalysisRegistry(emptyEnvironment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap());
+ emptyIndexSettingsOfCurrentVersion = IndexSettingsModule.newIndexSettings("index", Settings.builder()
+ .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
+ .build());
}
public void testDefaultAnalyzers() throws IOException {
@@ -75,9 +80,7 @@ public class AnalysisRegistryTests extends ESTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
- IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
- emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
- .build(idxSettings);
+ IndexAnalyzers indexAnalyzers = emptyRegistry.build(idxSettings);
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
@@ -86,7 +89,7 @@ public class AnalysisRegistryTests extends ESTestCase {
public void testOverrideDefaultAnalyzer() throws IOException {
Version version = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
- IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
+ IndexAnalyzers indexAnalyzers = emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings),
singletonMap("default", analyzerProvider("default"))
, emptyMap(), emptyMap(), emptyMap(), emptyMap());
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
@@ -99,7 +102,7 @@ public class AnalysisRegistryTests extends ESTestCase {
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
AnalyzerProvider<?> defaultIndex = new PreBuiltAnalyzerProvider("default_index", AnalyzerScope.INDEX, new EnglishAnalyzer());
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
- () -> registry.build(IndexSettingsModule.newIndexSettings("index", settings),
+ () -> emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings),
singletonMap("default_index", defaultIndex), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
assertTrue(e.getMessage().contains("[index.analysis.analyzer.default_index] is not supported"));
}
@@ -107,7 +110,7 @@ public class AnalysisRegistryTests extends ESTestCase {
public void testOverrideDefaultSearchAnalyzer() {
Version version = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
- IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
+ IndexAnalyzers indexAnalyzers = emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings),
singletonMap("default_search", analyzerProvider("default_search")), emptyMap(), emptyMap(), emptyMap(), emptyMap());
assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
@@ -189,11 +192,12 @@ public class AnalysisRegistryTests extends ESTestCase {
Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
- IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
- emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
- .build(idxSettings);
- IndexAnalyzers otherIndexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(),
- emptyMap(), emptyMap()).build(idxSettings);
+ IndexAnalyzers indexAnalyzers =
+ new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
+ .build(idxSettings);
+ IndexAnalyzers otherIndexAnalyzers =
+ new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
+ .build(idxSettings);
final int numIters = randomIntBetween(5, 20);
for (int i = 0; i < numIters; i++) {
PreBuiltAnalyzers preBuiltAnalyzers = RandomPicks.randomFrom(random(), PreBuiltAnalyzers.values());
@@ -201,6 +205,23 @@ public class AnalysisRegistryTests extends ESTestCase {
}
}
+ public void testPreConfiguredTokenFiltersAreCached() throws IOException {
+ AtomicBoolean built = new AtomicBoolean(false);
+ PreConfiguredTokenFilter assertsBuiltOnce = new PreConfiguredTokenFilter("asserts_built_once", false,
+ PreBuiltCacheFactory.CachingStrategy.ONE, (tokens, version) -> {
+ if (false == built.compareAndSet(false, true)) {
+ fail("Attempted to build the token filter twice when it should have been cached");
+ }
+ return new MockTokenFilter(tokens, MockTokenFilter.EMPTY_STOPSET);
+ });
+ try (AnalysisRegistry registryWithPreBuiltTokenFilter = new AnalysisRegistry(emptyEnvironment, emptyMap(), emptyMap(), emptyMap(),
+ emptyMap(), emptyMap(), singletonMap("asserts_built_once", assertsBuiltOnce))) {
+ IndexAnalyzers indexAnalyzers = registryWithPreBuiltTokenFilter.build(emptyIndexSettingsOfCurrentVersion);
+ IndexAnalyzers otherIndexAnalyzers = registryWithPreBuiltTokenFilter.build(emptyIndexSettingsOfCurrentVersion);
+ assertSame(indexAnalyzers.get("asserts_built_once"), otherIndexAnalyzers.get("asserts_built_once"));
+ }
+ }
+
public void testNoTypeOrTokenizerErrorMessage() throws IOException {
Version version = VersionUtils.randomVersion(random());
Settings settings = Settings
@@ -212,20 +233,14 @@ public class AnalysisRegistryTests extends ESTestCase {
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
- IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
- () -> new AnalysisRegistry(new Environment(settings),
- emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()).build(idxSettings));
+ IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () ->
+ new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
+ .build(idxSettings));
assertThat(e.getMessage(), equalTo("analyzer [test_analyzer] must specify either an analyzer type, or a tokenizer"));
}
public void testCloseIndexAnalyzersMultipleTimes() throws IOException {
- Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
- Settings indexSettings = Settings.builder()
- .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
- IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
- IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
- emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
- .build(idxSettings);
+ IndexAnalyzers indexAnalyzers = emptyRegistry.build(emptyIndexSettingsOfCurrentVersion);
indexAnalyzers.close();
indexAnalyzers.close();
}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/CoreAnalysisFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/CoreAnalysisFactoryTests.java
new file mode 100644
index 0000000000..3b4897b588
--- /dev/null
+++ b/core/src/test/java/org/elasticsearch/index/analysis/CoreAnalysisFactoryTests.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
+import org.elasticsearch.plugins.AnalysisPlugin;
+
+/**
+ * Checks on the analysis components that are part of core to make sure that any that are added
+ * to lucene are either enabled or explicitly not enabled. During the migration of analysis
+ * components to the {@code analysis-common} module this test ignores many components that are
+ * available to es-core but mapping in {@code analysis-common}. When the migration is complete
+ * no such ignoring will be needed because the analysis components won't be available to core.
+ */
+public class CoreAnalysisFactoryTests extends AnalysisFactoryTestCase {
+ public CoreAnalysisFactoryTests() {
+ // Use an empty plugin that doesn't define anything so the test doesn't need a ton of null checks.
+ super(new AnalysisPlugin() {});
+ }
+}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java b/core/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java
index c6b5806099..5cdc589405 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java
@@ -19,33 +19,39 @@
package org.elasticsearch.index.analysis;
+import org.apache.lucene.analysis.MockLowerCaseFilter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
+import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.ESTokenStreamTestCase;
import java.io.IOException;
import java.io.Reader;
+import java.util.List;
import java.util.Map;
+import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
public class CustomNormalizerTests extends ESTokenStreamTestCase {
+ private static final AnalysisPlugin MOCK_ANALYSIS_PLUGIN = new MockAnalysisPlugin();
+
public void testBasics() throws IOException {
Settings settings = Settings.builder()
- .putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase", "asciifolding")
+ .putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
- ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+ ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, MOCK_ANALYSIS_PLUGIN);
assertNull(analysis.indexAnalyzers.get("my_normalizer"));
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
assertNotNull(normalizer);
assertEquals("my_normalizer", normalizer.name());
- assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet ete-la"});
- assertEquals(new BytesRef("cet ete-la"), normalizer.normalize("foo", "Cet été-là"));
+ assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet été-là"});
+ assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là"));
}
public void testUnknownType() {
@@ -75,7 +81,7 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
.putArray("index.analysis.normalizer.my_normalizer.char_filter", "my_mapping")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
- ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new MockCharFilterPlugin());
+ ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, MOCK_ANALYSIS_PLUGIN);
assertNull(analysis.indexAnalyzers.get("my_normalizer"));
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
assertNotNull(normalizer);
@@ -86,12 +92,12 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
public void testIllegalFilters() throws IOException {
Settings settings = Settings.builder()
- .putArray("index.analysis.normalizer.my_normalizer.filter", "porter_stem")
+ .putArray("index.analysis.normalizer.my_normalizer.filter", "mock_forbidden")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
- () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
- assertEquals("Custom normalizer [my_normalizer] may not use filter [porter_stem]", e.getMessage());
+ () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, MOCK_ANALYSIS_PLUGIN));
+ assertEquals("Custom normalizer [my_normalizer] may not use filter [mock_forbidden]", e.getMessage());
}
public void testIllegalCharFilters() throws IOException {
@@ -104,7 +110,12 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
assertEquals("Custom normalizer [my_normalizer] may not use char filter [html_strip]", e.getMessage());
}
- private class MockCharFilterPlugin implements AnalysisPlugin {
+ private static class MockAnalysisPlugin implements AnalysisPlugin {
+ @Override
+ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
+ return singletonList(new PreConfiguredTokenFilter("mock_forbidden", false, CachingStrategy.ONE, MockLowerCaseFilter::new));
+ }
+
@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
return singletonMap("mock_char_filter", (indexSettings, env, name, settings) -> {
@@ -116,22 +127,21 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
@Override
public Reader create(Reader reader) {
return new Reader() {
+ @Override
+ public int read(char[] cbuf, int off, int len) throws IOException {
+ int result = reader.read(cbuf, off, len);
+ for (int i = off; i < result; i++) {
+ if (cbuf[i] == 'a') {
+ cbuf[i] = 'z';
+ }
+ }
+ return result;
+ }
- @Override
- public int read(char[] cbuf, int off, int len) throws IOException {
- int result = reader.read(cbuf, off, len);
- for (int i = off; i < result; i++) {
- if (cbuf[i] == 'a') {
- cbuf[i] = 'z';
- }
- }
- return result;
- }
-
- @Override
- public void close() throws IOException {
- reader.close();
- }
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
};
}
@Override
diff --git a/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java
index 2da44d57f0..518f669f81 100644
--- a/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java
+++ b/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java
@@ -19,6 +19,7 @@
package org.elasticsearch.index.mapper;
+import org.apache.lucene.analysis.MockLowerCaseFilter;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
@@ -29,7 +30,10 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
+import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
+import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
+import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
@@ -38,15 +42,26 @@ import org.junit.Before;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
+import java.util.List;
+import static java.util.Collections.singletonList;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
+ /**
+ * Creates a copy of the lowercase token filter which we use for testing merge errors.
+ */
+ public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {
+ @Override
+ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
+ return singletonList(new PreConfiguredTokenFilter("mock_other_lowercase", true, CachingStrategy.ONE, MockLowerCaseFilter::new));
+ }
+ };
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
- return pluginList(InternalSettingsPlugin.class);
+ return pluginList(InternalSettingsPlugin.class, MockAnalysisPlugin.class);
}
IndexService indexService;
@@ -57,8 +72,8 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
indexService = createIndex("test", Settings.builder()
.put("index.analysis.normalizer.my_lowercase.type", "custom")
.putArray("index.analysis.normalizer.my_lowercase.filter", "lowercase")
- .put("index.analysis.normalizer.my_asciifolding.type", "custom")
- .putArray("index.analysis.normalizer.my_asciifolding.filter", "asciifolding").build());
+ .put("index.analysis.normalizer.my_other_lowercase.type", "custom")
+ .putArray("index.analysis.normalizer.my_other_lowercase.filter", "mock_other_lowercase").build());
parser = indexService.mapperService().documentMapperParser();
}
@@ -348,7 +363,7 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
String mapping2 = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
- .field("type", "keyword").field("normalizer", "my_asciifolding").endObject().endObject()
+ .field("type", "keyword").field("normalizer", "my_other_lowercase").endObject().endObject()
.endObject().endObject().string();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> indexService.mapperService().merge("type",
diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
index e17df4b446..afe235ac8a 100644
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -19,14 +19,35 @@
package org.elasticsearch.analysis.common;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.UpperCaseFilter;
+import org.apache.lucene.analysis.en.KStemFilter;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
+import org.apache.lucene.analysis.miscellaneous.LengthFilter;
+import org.apache.lucene.analysis.miscellaneous.TrimFilter;
+import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
+import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
+import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
+import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
+import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
+import org.apache.lucene.analysis.ngram.NGramTokenFilter;
+import org.apache.lucene.analysis.reverse.ReverseStringFilter;
+import org.apache.lucene.analysis.standard.ClassicFilter;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
+import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
+import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
-import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@@ -35,14 +56,13 @@ import static org.elasticsearch.plugins.AnalysisPlugin.requriesAnalysisSettings;
public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
- Map<String, AnalysisProvider<TokenFilterFactory>> filters = new HashMap<>();
+ Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
filters.put("asciifolding", ASCIIFoldingTokenFilterFactory::new);
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
return filters;
}
- @Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
Map<String, AnalysisProvider<CharFilterFactory>> filters = new TreeMap<>();
filters.put("html_strip", HtmlStripCharFilterFactory::new);
@@ -50,4 +70,50 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
filters.put("mapping", requriesAnalysisSettings(MappingCharFilterFactory::new));
return filters;
}
+
+ @Override
+ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
+ // TODO we should revisit the caching strategies.
+ List<PreConfiguredTokenFilter> filters = new ArrayList<>();
+ filters.add(new PreConfiguredTokenFilter("asciifolding", true, CachingStrategy.ONE, input -> new ASCIIFoldingFilter(input)));
+ filters.add(new PreConfiguredTokenFilter("classic", false, CachingStrategy.ONE, ClassicFilter::new));
+ filters.add(new PreConfiguredTokenFilter("common_grams", false, CachingStrategy.LUCENE, input ->
+ new CommonGramsFilter(input, CharArraySet.EMPTY_SET)));
+ filters.add(new PreConfiguredTokenFilter("edge_ngram", false, CachingStrategy.LUCENE, input ->
+ new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE)));
+ // TODO deprecate edgeNGram
+ filters.add(new PreConfiguredTokenFilter("edgeNGram", false, CachingStrategy.LUCENE, input ->
+ new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE)));
+ filters.add(new PreConfiguredTokenFilter("kstem", false, CachingStrategy.ONE, KStemFilter::new));
+ filters.add(new PreConfiguredTokenFilter("length", false, CachingStrategy.LUCENE, input ->
+ new LengthFilter(input, 0, Integer.MAX_VALUE))); // TODO this one seems useless
+ filters.add(new PreConfiguredTokenFilter("ngram", false, CachingStrategy.LUCENE, NGramTokenFilter::new));
+ // TODO deprecate nGram
+ filters.add(new PreConfiguredTokenFilter("nGram", false, CachingStrategy.LUCENE, NGramTokenFilter::new));
+ filters.add(new PreConfiguredTokenFilter("porter_stem", false, CachingStrategy.ONE, PorterStemFilter::new));
+ filters.add(new PreConfiguredTokenFilter("reverse", false, CachingStrategy.LUCENE, input -> new ReverseStringFilter(input)));
+ // The stop filter is in lucene-core but the English stop words set is in lucene-analyzers-common
+ filters.add(new PreConfiguredTokenFilter("stop", false, CachingStrategy.LUCENE, input ->
+ new StopFilter(input, StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
+ filters.add(new PreConfiguredTokenFilter("trim", false, CachingStrategy.LUCENE, TrimFilter::new));
+ filters.add(new PreConfiguredTokenFilter("truncate", false, CachingStrategy.ONE, input ->
+ new TruncateTokenFilter(input, 10)));
+ filters.add(new PreConfiguredTokenFilter("unique", false, CachingStrategy.ONE, input -> new UniqueTokenFilter(input)));
+ filters.add(new PreConfiguredTokenFilter("uppercase", true, CachingStrategy.LUCENE, UpperCaseFilter::new));
+ filters.add(new PreConfiguredTokenFilter("word_delimiter", false, CachingStrategy.ONE, input ->
+ new WordDelimiterFilter(input,
+ WordDelimiterFilter.GENERATE_WORD_PARTS
+ | WordDelimiterFilter.GENERATE_NUMBER_PARTS
+ | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
+ | WordDelimiterFilter.SPLIT_ON_NUMERICS
+ | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null)));
+ filters.add(new PreConfiguredTokenFilter("word_delimiter_graph", false, CachingStrategy.ONE, input ->
+ new WordDelimiterGraphFilter(input,
+ WordDelimiterGraphFilter.GENERATE_WORD_PARTS
+ | WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS
+ | WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE
+ | WordDelimiterGraphFilter.SPLIT_ON_NUMERICS
+ | WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null)));
+ return filters;
+ }
}
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
index 78522f3b6f..73a6c3d273 100644
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
@@ -19,10 +19,10 @@
package org.elasticsearch.analysis.common;
-import org.elasticsearch.AnalysisFactoryTestCase;
+import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
+import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@@ -31,15 +31,19 @@ import static java.util.Collections.emptyList;
import static java.util.stream.Collectors.toList;
public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
+ public CommonAnalysisFactoryTests() {
+ super(new CommonAnalysisPlugin());
+ }
+
@Override
protected Map<String, Class<?>> getTokenizers() {
- Map<String, Class<?>> tokenizers = new HashMap<>(super.getTokenizers());
+ Map<String, Class<?>> tokenizers = new TreeMap<>(super.getTokenizers());
return tokenizers;
}
@Override
protected Map<String, Class<?>> getTokenFilters() {
- Map<String, Class<?>> filters = new HashMap<>(super.getTokenFilters());
+ Map<String, Class<?>> filters = new TreeMap<>(super.getTokenFilters());
filters.put("asciifolding", ASCIIFoldingTokenFilterFactory.class);
filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
@@ -59,6 +63,30 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
return filters;
}
+ @Override
+ protected Map<String, Class<?>> getPreConfiguredTokenFilters() {
+ Map<String, Class<?>> filters = new TreeMap<>(super.getPreConfiguredTokenFilters());
+ filters.put("asciifolding", null);
+ filters.put("classic", null);
+ filters.put("common_grams", null);
+ filters.put("edge_ngram", null);
+ filters.put("edgeNGram", null);
+ filters.put("kstem", null);
+ filters.put("length", null);
+ filters.put("ngram", null);
+ filters.put("nGram", null);
+ filters.put("porter_stem", null);
+ filters.put("reverse", ReverseStringFilterFactory.class);
+ filters.put("stop", null);
+ filters.put("trim", null);
+ filters.put("truncate", null);
+ filters.put("unique", Void.class);
+ filters.put("uppercase", null);
+ filters.put("word_delimiter", null);
+ filters.put("word_delimiter_graph", null);
+ return filters;
+ }
+
/**
* Fails if a tokenizer is marked in the superclass with {@link MovedToAnalysisCommon} but
* hasn't been marked in this class with its proper factory.
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml
index 0666a31623..39d55c15ac 100644
--- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml
@@ -10,6 +10,18 @@
- length: { tokens: 1 }
- match: { tokens.0.token: Musee d'Orsay }
+ - do:
+ indices.analyze:
+ body:
+ text: Musée d'Orsay
+ tokenizer: keyword
+ filter:
+ - type: asciifolding
+ preserve_original: true
+ - length: { tokens: 2 }
+ - match: { tokens.0.token: Musee d'Orsay }
+ - match: { tokens.1.token: Musée d'Orsay }
+
---
"lowercase":
- do:
diff --git a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICUFactoryTests.java b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICUFactoryTests.java
index 704ca61985..d222189651 100644
--- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICUFactoryTests.java
+++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICUFactoryTests.java
@@ -19,12 +19,16 @@
package org.elasticsearch.index.analysis;
-import org.elasticsearch.AnalysisFactoryTestCase;
+import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
+import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
import java.util.HashMap;
import java.util.Map;
public class AnalysisICUFactoryTests extends AnalysisFactoryTestCase {
+ public AnalysisICUFactoryTests() {
+ super(new AnalysisICUPlugin());
+ }
@Override
protected Map<String, Class<?>> getTokenizers() {
diff --git a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiFactoryTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiFactoryTests.java
index 9db7def101..dbdc5795b3 100644
--- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiFactoryTests.java
+++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiFactoryTests.java
@@ -20,12 +20,16 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.ja.JapaneseTokenizerFactory;
-import org.elasticsearch.AnalysisFactoryTestCase;
+import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
+import org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin;
import java.util.HashMap;
import java.util.Map;
public class AnalysisKuromojiFactoryTests extends AnalysisFactoryTestCase {
+ public AnalysisKuromojiFactoryTests() {
+ super(new AnalysisKuromojiPlugin());
+ }
@Override
protected Map<String, Class<?>> getTokenizers() {
diff --git a/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/AnalysisPhoneticFactoryTests.java b/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/AnalysisPhoneticFactoryTests.java
index 0546fb468c..8c551aee91 100644
--- a/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/AnalysisPhoneticFactoryTests.java
+++ b/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/AnalysisPhoneticFactoryTests.java
@@ -19,12 +19,16 @@
package org.elasticsearch.index.analysis;
-import org.elasticsearch.AnalysisFactoryTestCase;
+import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
+import org.elasticsearch.plugin.analysis.AnalysisPhoneticPlugin;
import java.util.HashMap;
import java.util.Map;
public class AnalysisPhoneticFactoryTests extends AnalysisFactoryTestCase {
+ public AnalysisPhoneticFactoryTests() {
+ super(new AnalysisPhoneticPlugin());
+ }
@Override
protected Map<String, Class<?>> getTokenFilters() {
diff --git a/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseFactoryTests.java b/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseFactoryTests.java
index d8aad322dc..53652c55f0 100644
--- a/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseFactoryTests.java
+++ b/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseFactoryTests.java
@@ -19,13 +19,16 @@
package org.elasticsearch.index.analysis;
-import org.elasticsearch.AnalysisFactoryTestCase;
+import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
+import org.elasticsearch.plugin.analysis.smartcn.AnalysisSmartChinesePlugin;
import java.util.HashMap;
import java.util.Map;
public class AnalysisSmartChineseFactoryTests extends AnalysisFactoryTestCase {
-
+ public AnalysisSmartChineseFactoryTests() {
+ super(new AnalysisSmartChinesePlugin());
+ }
@Override
protected Map<String, Class<?>> getTokenizers() {
Map<String, Class<?>> tokenizers = new HashMap<>(super.getTokenizers());
diff --git a/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java b/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java
index 8301529627..ae78b9c01b 100644
--- a/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java
+++ b/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.elasticsearch.AnalysisFactoryTestCase;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.UUIDs;
@@ -31,12 +30,17 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
+import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
+import org.elasticsearch.plugin.analysis.stempel.AnalysisStempelPlugin;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
+ public AnalysisPolishFactoryTests() {
+ super(new AnalysisStempelPlugin());
+ }
@Override
protected Map<String, Class<?>> getTokenFilters() {
diff --git a/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
index d14f81c61d..534db0be39 100644
--- a/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
@@ -17,14 +17,14 @@
* under the License.
*/
-package org.elasticsearch;
+package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.en.PorterStemFilterFactory;
-import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.elasticsearch.Version;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.index.analysis.ApostropheFilterFactory;
import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
@@ -67,6 +67,7 @@ import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory;
+import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory;
@@ -89,21 +90,23 @@ import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
-import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
-import org.elasticsearch.indices.analysis.PreBuiltTokenFilters;
-import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
+import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
import java.util.Collection;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Locale;
import java.util.Map;
+import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import static java.util.Collections.singletonList;
+
/**
* Alerts us if new analysis components are added to Lucene, so we don't miss them.
* <p>
@@ -285,41 +288,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.immutableMap();
- static final Map<PreBuiltTokenFilters, Class<?>> PREBUILT_TOKENFILTERS;
- static {
- PREBUILT_TOKENFILTERS = new EnumMap<>(PreBuiltTokenFilters.class);
- for (PreBuiltTokenFilters tokenizer : PreBuiltTokenFilters.values()) {
- Class<?> luceneFactoryClazz;
- switch (tokenizer) {
- case REVERSE:
- luceneFactoryClazz = ReverseStringFilterFactory.class;
- break;
- case UNIQUE:
- luceneFactoryClazz = Void.class;
- break;
- case SNOWBALL:
- case DUTCH_STEM:
- case FRENCH_STEM:
- case RUSSIAN_STEM:
- luceneFactoryClazz = SnowballPorterFilterFactory.class;
- break;
- case STEMMER:
- luceneFactoryClazz = PorterStemFilterFactory.class;
- break;
- case DELIMITED_PAYLOAD_FILTER:
- luceneFactoryClazz = org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory.class;
- break;
- case LIMIT:
- luceneFactoryClazz = org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory.class;
- break;
- default:
- luceneFactoryClazz = org.apache.lucene.analysis.util.TokenFilterFactory.lookupClass(
- toCamelCase(tokenizer.getTokenFilterFactory(Version.CURRENT).name()));
- }
- PREBUILT_TOKENFILTERS.put(tokenizer, luceneFactoryClazz);
- }
- }
-
static final Map<String,Class<?>> KNOWN_CHARFILTERS = new MapBuilder<String,Class<?>>()
// exposed in ES
.put("htmlstrip", MovedToAnalysisCommon.class)
@@ -345,6 +313,15 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
}
}
+ /**
+ * The plugin being tested. Core uses an "empty" plugin so we don't have to throw null checks all over the place.
+ */
+ private final AnalysisPlugin plugin;
+
+ public AnalysisFactoryTestCase(AnalysisPlugin plugin) {
+ this.plugin = Objects.requireNonNull(plugin, "plugin is required. use an empty plugin for core");
+ }
+
protected Map<String, Class<?>> getTokenizers() {
return KNOWN_TOKENIZERS;
}
@@ -353,6 +330,49 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
return KNOWN_TOKENFILTERS;
}
+ /**
+ * Map containing pre-configured token filters that should be available
+ * after installing this plugin. The map is from the name of the token
+ * filter to the class of the Lucene {@link TokenFilterFactory} that it
+ * is emulating. If the Lucene filter factory is {@code null} then the
+ * test will look it up for you from the name. If there is no Lucene
+ * {@linkplain TokenFilterFactory} then the right hand side should
+ * be {@link Void}.
+ */
+ protected Map<String, Class<?>> getPreConfiguredTokenFilters() {
+ Map<String, Class<?>> filters = new HashMap<>();
+ filters.put("standard", null);
+ filters.put("lowercase", null);
+ // TODO remove the loop below once all the tokenizers are migrated out of PreBuiltTokenFilters
+ for (PreBuiltTokenFilters tokenizer : PreBuiltTokenFilters.values()) {
+ Class<?> luceneFactoryClass;
+ switch (tokenizer) {
+ case LOWERCASE:
+ // This has been migrated but has to stick around until PreBuiltTokenizers is removed.
+ continue;
+ case SNOWBALL:
+ case DUTCH_STEM:
+ case FRENCH_STEM:
+ case RUSSIAN_STEM:
+ luceneFactoryClass = SnowballPorterFilterFactory.class;
+ break;
+ case STEMMER:
+ luceneFactoryClass = PorterStemFilterFactory.class;
+ break;
+ case DELIMITED_PAYLOAD_FILTER:
+ luceneFactoryClass = org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory.class;
+ break;
+ case LIMIT:
+ luceneFactoryClass = org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory.class;
+ break;
+ default:
+ luceneFactoryClass = null;
+ }
+ filters.put(tokenizer.name().toLowerCase(Locale.ROOT), luceneFactoryClass);
+ }
+ return filters;
+ }
+
protected Map<String, Class<?>> getCharFilters() {
return KNOWN_CHARFILTERS;
}
@@ -445,18 +465,24 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
expected.add(tokenizer);
}
}
- for (Map.Entry<PreBuiltTokenFilters, Class<?>> entry : PREBUILT_TOKENFILTERS.entrySet()) {
- PreBuiltTokenFilters tokenFilter = entry.getKey();
+ Map<String, PreConfiguredTokenFilter> preBuiltTokenFilters = AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin));
+ for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenFilters().entrySet()) {
+ String name = entry.getKey();
Class<?> luceneFactory = entry.getValue();
if (luceneFactory == Void.class) {
continue;
}
+ if (luceneFactory == null) {
+ luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
+ }
assertTrue(TokenFilterFactory.class.isAssignableFrom(luceneFactory));
- if (tokenFilter.getTokenFilterFactory(Version.CURRENT) instanceof MultiTermAwareComponent) {
- actual.add(tokenFilter);
+ PreConfiguredTokenFilter filter = preBuiltTokenFilters.get(name);
+ assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
+ if (filter.shouldUseFilterForMultitermQueries()) {
+ actual.add("token filter [" + name + "]");
}
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
- expected.add(tokenFilter);
+ expected.add("token filter [" + name + "]");
}
}
for (Map.Entry<PreBuiltCharFilters, Class<?>> entry : PREBUILT_CHARFILTERS.entrySet()) {