summaryrefslogtreecommitdiff
path: root/core/src/main/java
diff options
context:
space:
mode:
authorJun Ohtani <johtani@gmail.com>2017-07-04 19:16:56 +0900
committerGitHub <noreply@github.com>2017-07-04 19:16:56 +0900
commit6894ef6057127cab078ae659cc842b42d2f6f7d1 (patch)
tree91f2c75b3903ce8e48af16ca38babeafb1347eba /core/src/main/java
parent5200665295146b4738c0a848e43ef259e78b18fb (diff)
[Analysis] Support normalizer in request param (#24767)
* [Analysis] Support normalizer in request param Support normalizer param Support custom normalizer with char_filter/filter param Closes #23347
Diffstat (limited to 'core/src/main/java')
-rw-r--r--core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java24
-rw-r--r--core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java9
-rw-r--r--core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java68
-rw-r--r--core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java7
4 files changed, 98 insertions, 10 deletions
diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java
index 08f220e019..b44fa67183 100644
--- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java
+++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java
@@ -18,6 +18,7 @@
*/
package org.elasticsearch.action.admin.indices.analyze;
+import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.single.shard.SingleShardRequest;
import org.elasticsearch.common.Strings;
@@ -59,6 +60,8 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
private String[] attributes = Strings.EMPTY_ARRAY;
+ private String normalizer;
+
public static class NameOrDefinition implements Writeable {
// exactly one of these two members is not null
public final String name;
@@ -202,12 +205,27 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
return this.attributes;
}
+ public String normalizer() {
+ return this.normalizer;
+ }
+
+ public AnalyzeRequest normalizer(String normalizer) {
+ this.normalizer = normalizer;
+ return this;
+ }
+
@Override
public ActionRequestValidationException validate() {
ActionRequestValidationException validationException = null;
if (text == null || text.length == 0) {
validationException = addValidationError("text is missing", validationException);
}
+ if ((index == null || index.length() == 0) && normalizer != null) {
+ validationException = addValidationError("index is required if normalizer is specified", validationException);
+ }
+ if (normalizer != null && (tokenizer != null || analyzer != null)) {
+ validationException = addValidationError("tokenizer/analyze should be null if normalizer is specified", validationException);
+ }
return validationException;
}
@@ -222,6 +240,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
field = in.readOptionalString();
explain = in.readBoolean();
attributes = in.readStringArray();
+ if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
+ normalizer = in.readOptionalString();
+ }
}
@Override
@@ -235,5 +256,8 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
out.writeOptionalString(field);
out.writeBoolean(explain);
out.writeStringArray(attributes);
+ if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
+ out.writeOptionalString(normalizer);
+ }
}
}
diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java
index 5070862ed6..3893cb25d9 100644
--- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java
+++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java
@@ -125,4 +125,13 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An
request.text(texts);
return this;
}
+
+ /**
+ * Instead of setting the analyzer and tokenizer, sets the normalizer as name
+ */
+ public AnalyzeRequestBuilder setNormalizer(String normalizer) {
+ request.normalizer(normalizer);
+ return this;
+ }
+
}
diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java
index b7da50139b..ffa4a73d87 100644
--- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java
+++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java
@@ -51,6 +51,7 @@ import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.CustomAnalyzerProvider;
import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
@@ -60,6 +61,7 @@ import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.indices.analysis.AnalysisModule;
+import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
@@ -178,21 +180,46 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]");
}
}
-
} else if (request.tokenizer() != null) {
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
Tuple<String, TokenizerFactory> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers,
analysisRegistry, environment);
- List<CharFilterFactory> charFilterFactoryList = parseCharFilterFactories(request, indexSettings, analysisRegistry, environment);
+ List<CharFilterFactory> charFilterFactoryList =
+ parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, false);
List<TokenFilterFactory> tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry,
- environment, tokenizerFactory, charFilterFactoryList);
+ environment, tokenizerFactory, charFilterFactoryList, false);
analyzer = new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(),
charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]),
tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()]));
closeAnalyzer = true;
+ } else if (request.normalizer() != null) {
+ // Get normalizer from indexAnalyzers
+ analyzer = indexAnalyzers.getNormalizer(request.normalizer());
+ if (analyzer == null) {
+ throw new IllegalArgumentException("failed to find normalizer under [" + request.normalizer() + "]");
+ }
+ } else if (((request.tokenFilters() != null && request.tokenFilters().size() > 0)
+ || (request.charFilters() != null && request.charFilters().size() > 0))) {
+ final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
+ // custom normalizer = if normalizer == null but filter or char_filter is not null and tokenizer/analyzer is null
+ // get charfilter and filter from request
+ List<CharFilterFactory> charFilterFactoryList =
+ parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true);
+
+ final String keywordTokenizerName = "keyword";
+ TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName);
+
+ List<TokenFilterFactory> tokenFilterFactoryList =
+ parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true);
+
+ analyzer = new CustomAnalyzer("keyword_for_normalizer",
+ keywordTokenizerFactory,
+ charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]),
+ tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()]));
+ closeAnalyzer = true;
} else if (analyzer == null) {
if (indexAnalyzers == null) {
analyzer = analysisRegistry.getAnalyzer("standard");
@@ -465,7 +492,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
}
private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
- Environment environment) throws IOException {
+ Environment environment, boolean normalizer) throws IOException {
List<CharFilterFactory> charFilterFactoryList = new ArrayList<>();
if (request.charFilters() != null && request.charFilters().size() > 0) {
List<AnalyzeRequest.NameOrDefinition> charFilters = request.charFilters();
@@ -506,6 +533,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
if (charFilterFactory == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
}
+ if (normalizer) {
+ if (charFilterFactory instanceof MultiTermAwareComponent == false) {
+ throw new IllegalArgumentException("Custom normalizer may not use char filter ["
+ + charFilterFactory.name() + "]");
+ }
+ charFilterFactory = (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent();
+ }
charFilterFactoryList.add(charFilterFactory);
}
}
@@ -514,7 +548,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
Environment environment, Tuple<String, TokenizerFactory> tokenizerFactory,
- List<CharFilterFactory> charFilterFactoryList) throws IOException {
+ List<CharFilterFactory> charFilterFactoryList, boolean normalizer) throws IOException {
List<TokenFilterFactory> tokenFilterFactoryList = new ArrayList<>();
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
List<AnalyzeRequest.NameOrDefinition> tokenFilters = request.tokenFilters();
@@ -561,6 +595,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
if (tokenFilterFactory == null) {
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
}
+ if (normalizer) {
+ if (tokenFilterFactory instanceof MultiTermAwareComponent == false) {
+ throw new IllegalArgumentException("Custom normalizer may not use filter ["
+ + tokenFilterFactory.name() + "]");
+ }
+ tokenFilterFactory = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent();
+ }
tokenFilterFactoryList.add(tokenFilterFactory);
}
}
@@ -590,12 +631,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
} else {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
if (indexAnalzyers == null) {
- tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
- if (tokenizerFactoryFactory == null) {
- throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
- }
+ tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name);
name = tokenizer.name;
- tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings());
if (tokenizerFactoryFactory == null) {
@@ -610,6 +647,17 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
return new Tuple<>(name, tokenizerFactory);
}
+ private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException {
+ AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
+ TokenizerFactory tokenizerFactory;
+ tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name);
+ if (tokenizerFactoryFactory == null) {
+ throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]");
+ }
+ tokenizerFactory = tokenizerFactoryFactory.get(environment, name);
+ return tokenizerFactory;
+ }
+
private static IndexSettings getNaIndexSettings(Settings settings) {
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
return new IndexSettings(metaData, Settings.EMPTY);
diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java
index 44ff79c4d9..62c0e97c03 100644
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java
@@ -46,6 +46,7 @@ public class RestAnalyzeAction extends BaseRestHandler {
public static final ParseField CHAR_FILTERS = new ParseField("char_filter");
public static final ParseField EXPLAIN = new ParseField("explain");
public static final ParseField ATTRIBUTES = new ParseField("attributes");
+ public static final ParseField NORMALIZER = new ParseField("normalizer");
}
public RestAnalyzeAction(Settings settings, RestController controller) {
@@ -147,6 +148,12 @@ public class RestAnalyzeAction extends BaseRestHandler {
attributes.add(parser.text());
}
analyzeRequest.attributes(attributes.toArray(new String[attributes.size()]));
+ } else if (Fields.NORMALIZER.match(currentFieldName)) {
+ if (token == XContentParser.Token.VALUE_STRING) {
+ analyzeRequest.normalizer(parser.text());
+ } else {
+ throw new IllegalArgumentException(currentFieldName + " should be normalizer's name");
+ }
} else {
throw new IllegalArgumentException("Unknown parameter ["
+ currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");