summaryrefslogtreecommitdiff
path: root/test/framework
diff options
context:
space:
mode:
authorNik Everett <nik9000@gmail.com>2017-06-05 09:25:15 -0400
committerGitHub <noreply@github.com>2017-06-05 09:25:15 -0400
commit73307a2144fbdf63c551eeccdc7e93dc30e59a92 (patch)
tree834342811e4daaf6bf2adf7c40d674f80731e929 /test/framework
parent66007078d4beb27aa427d356ba926448705b0f04 (diff)
Plugins can register pre-configured char filters (#25000)
Fixes the plumbing so plugins can register char filters and moves the `html_strip` char filter into analysis-common. Relates to #23658
Diffstat (limited to 'test/framework')
-rw-r--r--test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java61
1 files changed, 36 insertions, 25 deletions
diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
index 35f79a26ac..fd8a5e7cd9 100644
--- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
@@ -63,6 +63,7 @@ import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
+import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
@@ -100,7 +101,9 @@ import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonList;
+import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.typeCompatibleWith;
/**
@@ -275,20 +278,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("persian", Void.class)
.immutableMap();
- static final Map<PreBuiltCharFilters, Class<?>> PREBUILT_CHARFILTERS;
- static {
- PREBUILT_CHARFILTERS = new EnumMap<>(PreBuiltCharFilters.class);
- for (PreBuiltCharFilters tokenizer : PreBuiltCharFilters.values()) {
- Class<?> luceneFactoryClazz;
- switch (tokenizer) {
- default:
- luceneFactoryClazz = org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(
- toCamelCase(tokenizer.getCharFilterFactory(Version.CURRENT).name()));
- }
- PREBUILT_CHARFILTERS.put(tokenizer, luceneFactoryClazz);
- }
- }
-
/**
* The plugin being tested. Core uses an "empty" plugin so we don't have to throw null checks all over the place.
*/
@@ -352,9 +341,17 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
}
tokenizers.put(tokenizer.name().toLowerCase(Locale.ROOT), luceneFactoryClazz);
}
+ // TODO drop aliases once they are moved to module
+ tokenizers.put("nGram", tokenizers.get("ngram"));
+ tokenizers.put("edgeNGram", tokenizers.get("edge_ngram"));
+ tokenizers.put("PathHierarchy", tokenizers.get("path_hierarchy"));
return tokenizers;
}
+ public Map<String, Class<?>> getPreConfiguredCharFilters() {
+ return emptyMap();
+ }
+
public void testTokenizers() {
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers());
missing.removeAll(getTokenizers().keySet());
@@ -430,10 +427,12 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
Collection<Object> actual = new HashSet<>();
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters =
- AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin));
+ new HashMap<>(AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)));
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenFilters().entrySet()) {
String name = entry.getKey();
Class<?> luceneFactory = entry.getValue();
+ PreConfiguredTokenFilter filter = preConfiguredTokenFilters.remove(name);
+ assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
if (luceneFactory == Void.class) {
continue;
}
@@ -441,8 +440,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
}
assertThat(luceneFactory, typeCompatibleWith(TokenFilterFactory.class));
- PreConfiguredTokenFilter filter = preConfiguredTokenFilters.get(name);
- assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
if (filter.shouldUseFilterForMultitermQueries()) {
actual.add("token filter [" + name + "]");
}
@@ -450,10 +447,15 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
expected.add("token filter [" + name + "]");
}
}
- Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin));
+ assertThat("pre configured token filter not registered with test", preConfiguredTokenFilters.keySet(), empty());
+
+ Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = new HashMap<>(
+ AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin)));
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenizers().entrySet()) {
String name = entry.getKey();
Class<?> luceneFactory = entry.getValue();
+ PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.remove(name);
+ assertNotNull("test claims pre built tokenizer [" + name + "] should be available but it wasn't", tokenizer);
if (luceneFactory == Void.class) {
continue;
}
@@ -461,7 +463,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
luceneFactory = TokenizerFactory.lookupClass(toCamelCase(name));
}
assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class));
- PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.get(name);
if (tokenizer.hasMultiTermComponent()) {
actual.add(tokenizer);
}
@@ -469,20 +470,30 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
expected.add(tokenizer);
}
}
- for (Map.Entry<PreBuiltCharFilters, Class<?>> entry : PREBUILT_CHARFILTERS.entrySet()) {
- PreBuiltCharFilters charFilter = entry.getKey();
+ assertThat("pre configured tokenizer not registered with test", preConfiguredTokenizers.keySet(), empty());
+
+ Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = new HashMap<>(
+ AnalysisModule.setupPreConfiguredCharFilters(singletonList(plugin)));
+ for (Map.Entry<String, Class<?>> entry : getPreConfiguredCharFilters().entrySet()) {
+ String name = entry.getKey();
Class<?> luceneFactory = entry.getValue();
+ PreConfiguredCharFilter filter = preConfiguredCharFilters.remove(name);
+ assertNotNull("test claims pre built char filter [" + name + "] should be available but it wasn't", filter);
if (luceneFactory == Void.class) {
continue;
}
- assertTrue(CharFilterFactory.class.isAssignableFrom(luceneFactory));
- if (charFilter.getCharFilterFactory(Version.CURRENT) instanceof MultiTermAwareComponent) {
- actual.add(charFilter);
+ if (luceneFactory == null) {
+ luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
+ }
+ assertThat(luceneFactory, typeCompatibleWith(CharFilterFactory.class));
+ if (filter.shouldUseFilterForMultitermQueries()) {
+ actual.add(filter);
}
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
- expected.add(charFilter);
+ expected.add("token filter [" + name + "]");
}
}
+ assertThat("pre configured char filter not registered with test", preConfiguredCharFilters.keySet(), empty());
Set<Object> classesMissingMultiTermSupport = new HashSet<>(expected);
classesMissingMultiTermSupport.removeAll(actual);