diff options
author | Nik Everett <nik9000@gmail.com> | 2017-06-05 09:25:15 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-05 09:25:15 -0400 |
commit | 73307a2144fbdf63c551eeccdc7e93dc30e59a92 (patch) | |
tree | 834342811e4daaf6bf2adf7c40d674f80731e929 /test/framework | |
parent | 66007078d4beb27aa427d356ba926448705b0f04 (diff) |
Plugins can register pre-configured char filters (#25000)
Fixes the plumbing so plugins can register char filters and moves
the `html_strip` char filter into analysis-common.
Relates to #23658
Diffstat (limited to 'test/framework')
-rw-r--r-- | test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java | 61 |
1 files changed, 36 insertions, 25 deletions
diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java index 35f79a26ac..fd8a5e7cd9 100644 --- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java @@ -63,6 +63,7 @@ import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory; import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory; import org.elasticsearch.index.analysis.PatternTokenizerFactory; import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory; +import org.elasticsearch.index.analysis.PreConfiguredCharFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.ReverseTokenFilterFactory; @@ -100,7 +101,9 @@ import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; +import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.typeCompatibleWith; /** @@ -275,20 +278,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("persian", Void.class) .immutableMap(); - static final Map<PreBuiltCharFilters, Class<?>> PREBUILT_CHARFILTERS; - static { - PREBUILT_CHARFILTERS = new EnumMap<>(PreBuiltCharFilters.class); - for (PreBuiltCharFilters tokenizer : PreBuiltCharFilters.values()) { - Class<?> luceneFactoryClazz; - switch (tokenizer) { - default: - luceneFactoryClazz = org.apache.lucene.analysis.util.CharFilterFactory.lookupClass( - toCamelCase(tokenizer.getCharFilterFactory(Version.CURRENT).name())); - } - PREBUILT_CHARFILTERS.put(tokenizer, luceneFactoryClazz); - } - } - /** * The plugin being tested. Core uses an "empty" plugin so we don't have to throw null checks all over the place. */ @@ -352,9 +341,17 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { } tokenizers.put(tokenizer.name().toLowerCase(Locale.ROOT), luceneFactoryClazz); } + // TODO drop aliases once they are moved to module + tokenizers.put("nGram", tokenizers.get("ngram")); + tokenizers.put("edgeNGram", tokenizers.get("edge_ngram")); + tokenizers.put("PathHierarchy", tokenizers.get("path_hierarchy")); return tokenizers; } + public Map<String, Class<?>> getPreConfiguredCharFilters() { + return emptyMap(); + } + public void testTokenizers() { Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers()); missing.removeAll(getTokenizers().keySet()); @@ -430,10 +427,12 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { Collection<Object> actual = new HashSet<>(); Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = - AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)); + new HashMap<>(AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin))); for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenFilters().entrySet()) { String name = entry.getKey(); Class<?> luceneFactory = entry.getValue(); + PreConfiguredTokenFilter filter = preConfiguredTokenFilters.remove(name); + assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter); if (luceneFactory == Void.class) { continue; } @@ -441,8 +440,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name)); } assertThat(luceneFactory, typeCompatibleWith(TokenFilterFactory.class)); - PreConfiguredTokenFilter filter = preConfiguredTokenFilters.get(name); - assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter); if (filter.shouldUseFilterForMultitermQueries()) { actual.add("token filter [" + name + "]"); } @@ -450,10 +447,15 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { expected.add("token filter [" + name + "]"); } } - Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin)); + assertThat("pre configured token filter not registered with test", preConfiguredTokenFilters.keySet(), empty()); + + Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = new HashMap<>( + AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin))); for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenizers().entrySet()) { String name = entry.getKey(); Class<?> luceneFactory = entry.getValue(); + PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.remove(name); + assertNotNull("test claims pre built tokenizer [" + name + "] should be available but it wasn't", tokenizer); if (luceneFactory == Void.class) { continue; } @@ -461,7 +463,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { luceneFactory = TokenizerFactory.lookupClass(toCamelCase(name)); } assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class)); - PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.get(name); if (tokenizer.hasMultiTermComponent()) { actual.add(tokenizer); } @@ -469,20 +470,30 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { expected.add(tokenizer); } } - for (Map.Entry<PreBuiltCharFilters, Class<?>> entry : PREBUILT_CHARFILTERS.entrySet()) { - PreBuiltCharFilters charFilter = entry.getKey(); + assertThat("pre configured tokenizer not registered with test", preConfiguredTokenizers.keySet(), empty()); + + Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = new HashMap<>( + AnalysisModule.setupPreConfiguredCharFilters(singletonList(plugin))); + for (Map.Entry<String, Class<?>> entry : getPreConfiguredCharFilters().entrySet()) { + String name = entry.getKey(); Class<?> luceneFactory = entry.getValue(); + PreConfiguredCharFilter filter = preConfiguredCharFilters.remove(name); + assertNotNull("test claims pre built char filter [" + name + "] should be available but it wasn't", filter); if (luceneFactory == Void.class) { continue; } - assertTrue(CharFilterFactory.class.isAssignableFrom(luceneFactory)); - if (charFilter.getCharFilterFactory(Version.CURRENT) instanceof MultiTermAwareComponent) { - actual.add(charFilter); + if (luceneFactory == null) { + luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name)); + } + assertThat(luceneFactory, typeCompatibleWith(CharFilterFactory.class)); + if (filter.shouldUseFilterForMultitermQueries()) { + actual.add(filter); } if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) { - expected.add(charFilter); + expected.add("token filter [" + name + "]"); } } + assertThat("pre configured char filter not registered with test", preConfiguredCharFilters.keySet(), empty()); Set<Object> classesMissingMultiTermSupport = new HashSet<>(expected); classesMissingMultiTermSupport.removeAll(actual); |