diff options
author | Jun Ohtani <johtani@gmail.com> | 2017-07-04 19:16:56 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-04 19:16:56 +0900 |
commit | 6894ef6057127cab078ae659cc842b42d2f6f7d1 (patch) | |
tree | 91f2c75b3903ce8e48af16ca38babeafb1347eba /core/src/main | |
parent | 5200665295146b4738c0a848e43ef259e78b18fb (diff) |
[Analysis] Support normalizer in request param (#24767)
* [Analysis] Support normalizer in request param
Support normalizer param
Support custom normalizer with char_filter/filter param
Closes #23347
Diffstat (limited to 'core/src/main')
4 files changed, 98 insertions, 10 deletions
diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java index 08f220e019..b44fa67183 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.action.admin.indices.analyze; +import org.elasticsearch.Version; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.support.single.shard.SingleShardRequest; import org.elasticsearch.common.Strings; @@ -59,6 +60,8 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { private String[] attributes = Strings.EMPTY_ARRAY; + private String normalizer; + public static class NameOrDefinition implements Writeable { // exactly one of these two members is not null public final String name; @@ -202,12 +205,27 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { return this.attributes; } + public String normalizer() { + return this.normalizer; + } + + public AnalyzeRequest normalizer(String normalizer) { + this.normalizer = normalizer; + return this; + } + @Override public ActionRequestValidationException validate() { ActionRequestValidationException validationException = null; if (text == null || text.length == 0) { validationException = addValidationError("text is missing", validationException); } + if ((index == null || index.length() == 0) && normalizer != null) { + validationException = addValidationError("index is required if normalizer is specified", validationException); + } + if (normalizer != null && (tokenizer != null || analyzer != null)) { + validationException = addValidationError("tokenizer/analyze should be null if normalizer is specified", validationException); + } return validationException; } @@ -222,6 +240,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { field = in.readOptionalString(); explain = in.readBoolean(); attributes = in.readStringArray(); + if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) { + normalizer = in.readOptionalString(); + } } @Override @@ -235,5 +256,8 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { out.writeOptionalString(field); out.writeBoolean(explain); out.writeStringArray(attributes); + if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) { + out.writeOptionalString(normalizer); + } } } diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java index 5070862ed6..3893cb25d9 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java @@ -125,4 +125,13 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An request.text(texts); return this; } + + /** + * Instead of setting the analyzer and tokenizer, sets the normalizer as name + */ + public AnalyzeRequestBuilder setNormalizer(String normalizer) { + request.normalizer(normalizer); + return this; + } + } diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index b7da50139b..ffa4a73d87 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.CustomAnalyzer; import org.elasticsearch.index.analysis.CustomAnalyzerProvider; import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; @@ -60,6 +61,7 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.analysis.AnalysisModule; +import org.elasticsearch.indices.analysis.PreBuiltTokenizers; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -178,21 +180,46 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]"); } } - } else if (request.tokenizer() != null) { final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings(); Tuple<String, TokenizerFactory> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, analysisRegistry, environment); - List<CharFilterFactory> charFilterFactoryList = parseCharFilterFactories(request, indexSettings, analysisRegistry, environment); + List<CharFilterFactory> charFilterFactoryList = + parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, false); List<TokenFilterFactory> tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry, - environment, tokenizerFactory, charFilterFactoryList); + environment, tokenizerFactory, charFilterFactoryList, false); analyzer = new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(), charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]), tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()])); closeAnalyzer = true; + } else if (request.normalizer() != null) { + // Get normalizer from indexAnalyzers + analyzer = indexAnalyzers.getNormalizer(request.normalizer()); + if (analyzer == null) { + throw new IllegalArgumentException("failed to find normalizer under [" + request.normalizer() + "]"); + } + } else if (((request.tokenFilters() != null && request.tokenFilters().size() > 0) + || (request.charFilters() != null && request.charFilters().size() > 0))) { + final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings(); + // custom normalizer = if normalizer == null but filter or char_filter is not null and tokenizer/analyzer is null + // get charfilter and filter from request + List<CharFilterFactory> charFilterFactoryList = + parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true); + + final String keywordTokenizerName = "keyword"; + TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName); + + List<TokenFilterFactory> tokenFilterFactoryList = + parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true); + + analyzer = new CustomAnalyzer("keyword_for_normalizer", + keywordTokenizerFactory, + charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]), + tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()])); + closeAnalyzer = true; } else if (analyzer == null) { if (indexAnalyzers == null) { analyzer = analysisRegistry.getAnalyzer("standard"); @@ -465,7 +492,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe } private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry, - Environment environment) throws IOException { + Environment environment, boolean normalizer) throws IOException { List<CharFilterFactory> charFilterFactoryList = new ArrayList<>(); if (request.charFilters() != null && request.charFilters().size() > 0) { List<AnalyzeRequest.NameOrDefinition> charFilters = request.charFilters(); @@ -506,6 +533,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe if (charFilterFactory == null) { throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]"); } + if (normalizer) { + if (charFilterFactory instanceof MultiTermAwareComponent == false) { + throw new IllegalArgumentException("Custom normalizer may not use char filter [" + + charFilterFactory.name() + "]"); + } + charFilterFactory = (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent(); + } charFilterFactoryList.add(charFilterFactory); } } @@ -514,7 +548,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry, Environment environment, Tuple<String, TokenizerFactory> tokenizerFactory, - List<CharFilterFactory> charFilterFactoryList) throws IOException { + List<CharFilterFactory> charFilterFactoryList, boolean normalizer) throws IOException { List<TokenFilterFactory> tokenFilterFactoryList = new ArrayList<>(); if (request.tokenFilters() != null && request.tokenFilters().size() > 0) { List<AnalyzeRequest.NameOrDefinition> tokenFilters = request.tokenFilters(); @@ -561,6 +595,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe if (tokenFilterFactory == null) { throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]"); } + if (normalizer) { + if (tokenFilterFactory instanceof MultiTermAwareComponent == false) { + throw new IllegalArgumentException("Custom normalizer may not use filter [" + + tokenFilterFactory.name() + "]"); + } + tokenFilterFactory = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent(); + } tokenFilterFactoryList.add(tokenFilterFactory); } } @@ -590,12 +631,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe } else { AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory; if (indexAnalzyers == null) { - tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]"); - } + tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name); name = tokenizer.name; - tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name); } else { tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings()); if (tokenizerFactoryFactory == null) { @@ -610,6 +647,17 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe return new Tuple<>(name, tokenizerFactory); } + private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException { + AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory; + TokenizerFactory tokenizerFactory; + tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); + if (tokenizerFactoryFactory == null) { + throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]"); + } + tokenizerFactory = tokenizerFactoryFactory.get(environment, name); + return tokenizerFactory; + } + private static IndexSettings getNaIndexSettings(Settings settings) { IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build(); return new IndexSettings(metaData, Settings.EMPTY); diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java index 44ff79c4d9..62c0e97c03 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java @@ -46,6 +46,7 @@ public class RestAnalyzeAction extends BaseRestHandler { public static final ParseField CHAR_FILTERS = new ParseField("char_filter"); public static final ParseField EXPLAIN = new ParseField("explain"); public static final ParseField ATTRIBUTES = new ParseField("attributes"); + public static final ParseField NORMALIZER = new ParseField("normalizer"); } public RestAnalyzeAction(Settings settings, RestController controller) { @@ -147,6 +148,12 @@ public class RestAnalyzeAction extends BaseRestHandler { attributes.add(parser.text()); } analyzeRequest.attributes(attributes.toArray(new String[attributes.size()])); + } else if (Fields.NORMALIZER.match(currentFieldName)) { + if (token == XContentParser.Token.VALUE_STRING) { + analyzeRequest.normalizer(parser.text()); + } else { + throw new IllegalArgumentException(currentFieldName + " should be normalizer's name"); + } } else { throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] "); |