summaryrefslogtreecommitdiff
path: root/core/src/main/java/org/elasticsearch/index
diff options
context:
space:
mode:
authorAreek Zillur <areek.zillur@elasticsearch.com>2015-10-31 01:59:21 -0400
committerAreek Zillur <areek.zillur@elasticsearch.com>2015-11-07 17:46:27 -0500
commitdd1c687ace953c82f3671363dd077c1c99b1fc8e (patch)
tree383402288e296ca292926a1813c51e3d334c2f4f /core/src/main/java/org/elasticsearch/index
parentd0f5950c80044bd174629cf9da8cf8e1b97084dd (diff)
Completion Suggester V2
The completion suggester provides auto-complete/search-as-you-type functionality. This is a navigational feature to guide users to relevant results as they are typing, improving search precision. It is not meant for spell correction or did-you-mean functionality like the term or phrase suggesters. The completions are indexed as a weighted FST (finite state transducer) to provide fast Top N prefix-based searches suitable for serving relevant results as a user types. closes #10746
Diffstat (limited to 'core/src/main/java/org/elasticsearch/index')
-rw-r--r--core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java5
-rw-r--r--core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java712
-rw-r--r--core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java2
-rw-r--r--core/src/main/java/org/elasticsearch/index/shard/IndexShard.java14
4 files changed, 371 insertions, 362 deletions
diff --git a/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java b/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
index b504c4c21c..2c23f94747 100644
--- a/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
+++ b/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
@@ -58,10 +58,7 @@ public class PerFieldMappingPostingFormatCodec extends Lucene54Codec {
if (indexName == null) {
logger.warn("no index mapper found for field: [{}] returning default postings format", field);
} else if (indexName instanceof CompletionFieldMapper.CompletionFieldType) {
- // CompletionFieldMapper needs a special postings format
- final CompletionFieldMapper.CompletionFieldType fieldType = (CompletionFieldMapper.CompletionFieldType) indexName;
- final PostingsFormat defaultFormat = super.getPostingsFormatForField(field);
- return fieldType.postingsFormat(defaultFormat);
+ return CompletionFieldMapper.CompletionFieldType.postingsFormat();
}
return super.getPostingsFormatForField(field);
}
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
index 3a3a854915..7f793ab616 100644
--- a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
@@ -18,144 +18,91 @@
*/
package org.elasticsearch.index.mapper.core;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.Field;
-import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.suggest.document.Completion50PostingsFormat;
+import org.apache.lucene.search.suggest.document.CompletionAnalyzer;
+import org.apache.lucene.search.suggest.document.CompletionQuery;
+import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery;
+import org.apache.lucene.search.suggest.document.PrefixCompletionQuery;
+import org.apache.lucene.search.suggest.document.RegexCompletionQuery;
+import org.apache.lucene.search.suggest.document.SuggestField;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.NumberType;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.analysis.NamedAnalyzer;
-import org.elasticsearch.index.mapper.FieldMapper;
-import org.elasticsearch.index.mapper.MappedFieldType;
-import org.elasticsearch.index.mapper.Mapper;
-import org.elasticsearch.index.mapper.MapperException;
-import org.elasticsearch.index.mapper.MapperParsingException;
-import org.elasticsearch.index.mapper.MergeMappingException;
-import org.elasticsearch.index.mapper.MergeResult;
-import org.elasticsearch.index.mapper.ParseContext;
-import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider;
-import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat;
-import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
-import org.elasticsearch.search.suggest.context.ContextBuilder;
-import org.elasticsearch.search.suggest.context.ContextMapping;
-import org.elasticsearch.search.suggest.context.ContextMapping.ContextConfig;
+import org.elasticsearch.index.mapper.*;
+import org.elasticsearch.index.mapper.object.ArrayValueMapperParser;
+import org.elasticsearch.search.suggest.completion.CompletionSuggester;
+import org.elasticsearch.search.suggest.completion.context.ContextMapping;
+import org.elasticsearch.search.suggest.completion.context.ContextMappings;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.*;
import static org.elasticsearch.index.mapper.MapperBuilders.completionField;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField;
/**
+ * Mapper for completion field. The field values are indexed as a weighted FST for
+ * fast auto-completion/search-as-you-type functionality.<br>
*
+ * Type properties:<br>
+ * <ul>
+ * <li>"analyzer": "simple", (default)</li>
+ * <li>"search_analyzer": "simple", (default)</li>
+ * <li>"preserve_separators" : true, (default)</li>
+ * <li>"preserve_position_increments" : true (default)</li>
+ * <li>"min_input_length": 50 (default)</li>
+ * <li>"contexts" : CONTEXTS</li>
+ * </ul>
+ * see {@link ContextMappings#load(Object, Version)} for CONTEXTS<br>
+ * see {@link #parse(ParseContext)} for acceptable inputs for indexing<br>
+ * <p>
+ * This field type constructs completion queries that are run
+ * against the weighted FST index by the {@link CompletionSuggester}.
+ * This field can also be extended to add search criteria to suggestions
+ * for query-time filtering and boosting (see {@link ContextMappings}
*/
-public class CompletionFieldMapper extends FieldMapper {
+public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapperParser {
public static final String CONTENT_TYPE = "completion";
public static class Defaults {
- public static final CompletionFieldType FIELD_TYPE = new CompletionFieldType();
-
+ public static final MappedFieldType FIELD_TYPE = new CompletionFieldType();
static {
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
-
public static final boolean DEFAULT_PRESERVE_SEPARATORS = true;
public static final boolean DEFAULT_POSITION_INCREMENTS = true;
- public static final boolean DEFAULT_HAS_PAYLOADS = false;
public static final int DEFAULT_MAX_INPUT_LENGTH = 50;
}
public static class Fields {
// Mapping field names
- public static final String ANALYZER = "analyzer";
+ public static final ParseField ANALYZER = new ParseField("analyzer");
public static final ParseField SEARCH_ANALYZER = new ParseField("search_analyzer");
public static final ParseField PRESERVE_SEPARATORS = new ParseField("preserve_separators");
public static final ParseField PRESERVE_POSITION_INCREMENTS = new ParseField("preserve_position_increments");
- public static final String PAYLOADS = "payloads";
- public static final String TYPE = "type";
+ public static final ParseField TYPE = new ParseField("type");
+ public static final ParseField CONTEXTS = new ParseField("contexts");
public static final ParseField MAX_INPUT_LENGTH = new ParseField("max_input_length", "max_input_len");
// Content field names
public static final String CONTENT_FIELD_NAME_INPUT = "input";
- public static final String CONTENT_FIELD_NAME_OUTPUT = "output";
- public static final String CONTENT_FIELD_NAME_PAYLOAD = "payload";
public static final String CONTENT_FIELD_NAME_WEIGHT = "weight";
- public static final String CONTEXT = "context";
+ public static final String CONTENT_FIELD_NAME_CONTEXTS = "contexts";
}
public static final Set<String> ALLOWED_CONTENT_FIELD_NAMES = Sets.newHashSet(Fields.CONTENT_FIELD_NAME_INPUT,
- Fields.CONTENT_FIELD_NAME_OUTPUT, Fields.CONTENT_FIELD_NAME_PAYLOAD, Fields.CONTENT_FIELD_NAME_WEIGHT, Fields.CONTEXT);
-
- public static class Builder extends FieldMapper.Builder<Builder, CompletionFieldMapper> {
-
- private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS;
- private boolean payloads = Defaults.DEFAULT_HAS_PAYLOADS;
- private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
- private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH;
- private SortedMap<String, ContextMapping> contextMapping = ContextMapping.EMPTY_MAPPING;
-
- public Builder(String name) {
- super(name, Defaults.FIELD_TYPE);
- builder = this;
- }
-
- public Builder payloads(boolean payloads) {
- this.payloads = payloads;
- return this;
- }
-
- public Builder preserveSeparators(boolean preserveSeparators) {
- this.preserveSeparators = preserveSeparators;
- return this;
- }
-
- public Builder preservePositionIncrements(boolean preservePositionIncrements) {
- this.preservePositionIncrements = preservePositionIncrements;
- return this;
- }
-
- public Builder maxInputLength(int maxInputLength) {
- if (maxInputLength <= 0) {
- throw new IllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]");
- }
- this.maxInputLength = maxInputLength;
- return this;
- }
-
- public Builder contextMapping(SortedMap<String, ContextMapping> contextMapping) {
- this.contextMapping = contextMapping;
- return this;
- }
-
- @Override
- public CompletionFieldMapper build(Mapper.BuilderContext context) {
- setupFieldType(context);
- CompletionFieldType completionFieldType = (CompletionFieldType)fieldType;
- completionFieldType.setProvider(new AnalyzingCompletionLookupProvider(preserveSeparators, false, preservePositionIncrements, payloads));
- completionFieldType.setContextMapping(contextMapping);
- return new CompletionFieldMapper(name, fieldType, maxInputLength, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
- }
-
- }
+ Fields.CONTENT_FIELD_NAME_WEIGHT, Fields.CONTENT_FIELD_NAME_CONTEXTS);
public static class TypeParser implements Mapper.TypeParser {
@@ -171,17 +118,12 @@ public class CompletionFieldMapper extends FieldMapper {
if (fieldName.equals("type")) {
continue;
}
- if (Fields.ANALYZER.equals(fieldName) || // index_analyzer is for backcompat, remove for v3.0
- fieldName.equals("index_analyzer") && parserContext.indexVersionCreated().before(Version.V_2_0_0_beta1)) {
-
+ if (parserContext.parseFieldMatcher().match(fieldName, Fields.ANALYZER)) {
indexAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString());
iterator.remove();
} else if (parserContext.parseFieldMatcher().match(fieldName, Fields.SEARCH_ANALYZER)) {
searchAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString());
iterator.remove();
- } else if (fieldName.equals(Fields.PAYLOADS)) {
- builder.payloads(Boolean.parseBoolean(fieldNode.toString()));
- iterator.remove();
} else if (parserContext.parseFieldMatcher().match(fieldName, Fields.PRESERVE_SEPARATORS)) {
builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString()));
iterator.remove();
@@ -191,14 +133,14 @@ public class CompletionFieldMapper extends FieldMapper {
} else if (parserContext.parseFieldMatcher().match(fieldName, Fields.MAX_INPUT_LENGTH)) {
builder.maxInputLength(Integer.parseInt(fieldNode.toString()));
iterator.remove();
- } else if (parseMultiField(builder, name, parserContext, fieldName, fieldNode)) {
+ } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.CONTEXTS)) {
+ builder.contextMappings(ContextMappings.load(fieldNode, parserContext.indexVersionCreated()));
iterator.remove();
- } else if (fieldName.equals(Fields.CONTEXT)) {
- builder.contextMapping(ContextBuilder.loadMappings(fieldNode, parserContext.indexVersionCreated()));
+ } else if (parseMultiField(builder, name, parserContext, fieldName, fieldNode)) {
iterator.remove();
}
}
-
+
if (indexAnalyzer == null) {
if (searchAnalyzer != null) {
throw new MapperParsingException("analyzer on completion field [" + name + "] must be set when search_analyzer is set");
@@ -207,9 +149,9 @@ public class CompletionFieldMapper extends FieldMapper {
} else if (searchAnalyzer == null) {
searchAnalyzer = indexAnalyzer;
}
+
builder.indexAnalyzer(indexAnalyzer);
builder.searchAnalyzer(searchAnalyzer);
-
return builder;
}
@@ -223,40 +165,138 @@ public class CompletionFieldMapper extends FieldMapper {
}
public static final class CompletionFieldType extends MappedFieldType {
- private PostingsFormat postingsFormat;
- private AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider;
- private SortedMap<String, ContextMapping> contextMapping = ContextMapping.EMPTY_MAPPING;
+
+ private static PostingsFormat postingsFormat;
+
+ private boolean preserveSep = Defaults.DEFAULT_PRESERVE_SEPARATORS;
+ private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
+ private ContextMappings contextMappings = null;
public CompletionFieldType() {
setFieldDataType(null);
}
- protected CompletionFieldType(CompletionFieldType ref) {
+ private CompletionFieldType(CompletionFieldType ref) {
super(ref);
- this.postingsFormat = ref.postingsFormat;
- this.analyzingSuggestLookupProvider = ref.analyzingSuggestLookupProvider;
- this.contextMapping = ref.contextMapping;
+ this.contextMappings = ref.contextMappings;
+ this.preserveSep = ref.preserveSep;
+ this.preservePositionIncrements = ref.preservePositionIncrements;
+ }
+
+ public void setPreserveSep(boolean preserveSep) {
+ checkIfFrozen();
+ this.preserveSep = preserveSep;
+ }
+
+ public void setPreservePositionIncrements(boolean preservePositionIncrements) {
+ checkIfFrozen();
+ this.preservePositionIncrements = preservePositionIncrements;
+ }
+
+ public void setContextMappings(ContextMappings contextMappings) {
+ checkIfFrozen();
+ this.contextMappings = contextMappings;
+ }
+
+ @Override
+ public NamedAnalyzer indexAnalyzer() {
+ final NamedAnalyzer indexAnalyzer = super.indexAnalyzer();
+ if (indexAnalyzer != null && !(indexAnalyzer.analyzer() instanceof CompletionAnalyzer)) {
+ return new NamedAnalyzer(indexAnalyzer.name(),
+ new CompletionAnalyzer(indexAnalyzer, preserveSep, preservePositionIncrements));
+
+ }
+ return indexAnalyzer;
+ }
+
+ @Override
+ public NamedAnalyzer searchAnalyzer() {
+ final NamedAnalyzer searchAnalyzer = super.searchAnalyzer();
+ if (searchAnalyzer != null && !(searchAnalyzer.analyzer() instanceof CompletionAnalyzer)) {
+ return new NamedAnalyzer(searchAnalyzer.name(),
+ new CompletionAnalyzer(searchAnalyzer, preserveSep, preservePositionIncrements));
+ }
+ return searchAnalyzer;
+ }
+
+ /**
+ * @return true if there are one or more context mappings defined
+ * for this field type
+ */
+ public boolean hasContextMappings() {
+ return contextMappings != null;
+ }
+
+ /**
+ * @return associated context mappings for this field type
+ */
+ public ContextMappings getContextMappings() {
+ return contextMappings;
+ }
+
+ public boolean preserveSep() {
+ return preserveSep;
+ }
+
+ public boolean preservePositionIncrements() {
+ return preservePositionIncrements;
+ }
+
+ /**
+ * @return postings format to use for this field-type
+ */
+ public static synchronized PostingsFormat postingsFormat() {
+ if (postingsFormat == null) {
+ postingsFormat = new Completion50PostingsFormat();
+ }
+ return postingsFormat;
+ }
+
+ /**
+ * Completion prefix query
+ */
+ public CompletionQuery prefixQuery(Object value) {
+ return new PrefixCompletionQuery(searchAnalyzer().analyzer(), createTerm(value));
+ }
+
+ /**
+ * Completion prefix regular expression query
+ */
+ public CompletionQuery regexpQuery(Object value, int flags, int maxDeterminizedStates) {
+ return new RegexCompletionQuery(createTerm(value), flags, maxDeterminizedStates);
+ }
+
+ /**
+ * Completion prefix fuzzy query
+ */
+ public CompletionQuery fuzzyQuery(String value, Fuzziness fuzziness, int nonFuzzyPrefixLength,
+ int minFuzzyPrefixLength, int maxExpansions, boolean transpositions,
+ boolean unicodeAware) {
+ return new FuzzyCompletionQuery(searchAnalyzer().analyzer(), createTerm(value), null,
+ fuzziness.asDistance(), transpositions, nonFuzzyPrefixLength, minFuzzyPrefixLength,
+ unicodeAware, maxExpansions);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
- if (!(o instanceof CompletionFieldType)) return false;
+ if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
- CompletionFieldType fieldType = (CompletionFieldType) o;
- return analyzingSuggestLookupProvider.getPreserveSep() == fieldType.analyzingSuggestLookupProvider.getPreserveSep() &&
- analyzingSuggestLookupProvider.getPreservePositionsIncrements() == fieldType.analyzingSuggestLookupProvider.getPreservePositionsIncrements() &&
- analyzingSuggestLookupProvider.hasPayloads() == fieldType.analyzingSuggestLookupProvider.hasPayloads() &&
- Objects.equals(getContextMapping(), fieldType.getContextMapping());
+
+ CompletionFieldType that = (CompletionFieldType) o;
+
+ if (preserveSep != that.preserveSep) return false;
+ if (preservePositionIncrements != that.preservePositionIncrements) return false;
+ return !(contextMappings != null ? !contextMappings.equals(that.contextMappings) : that.contextMappings != null);
+
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(),
- analyzingSuggestLookupProvider.getPreserveSep(),
- analyzingSuggestLookupProvider.getPreservePositionsIncrements(),
- analyzingSuggestLookupProvider.hasPayloads(),
- getContextMapping());
+ preserveSep,
+ preservePositionIncrements,
+ contextMappings);
}
@Override
@@ -273,69 +313,99 @@ public class CompletionFieldMapper extends FieldMapper {
public void checkCompatibility(MappedFieldType fieldType, List<String> conflicts, boolean strict) {
super.checkCompatibility(fieldType, conflicts, strict);
CompletionFieldType other = (CompletionFieldType)fieldType;
- if (analyzingSuggestLookupProvider.hasPayloads() != other.analyzingSuggestLookupProvider.hasPayloads()) {
- conflicts.add("mapper [" + names().fullName() + "] has different [payload] values");
- }
- if (analyzingSuggestLookupProvider.getPreservePositionsIncrements() != other.analyzingSuggestLookupProvider.getPreservePositionsIncrements()) {
+
+ if (preservePositionIncrements != other.preservePositionIncrements) {
conflicts.add("mapper [" + names().fullName() + "] has different [preserve_position_increments] values");
}
- if (analyzingSuggestLookupProvider.getPreserveSep() != other.analyzingSuggestLookupProvider.getPreserveSep()) {
+ if (preserveSep != other.preserveSep) {
conflicts.add("mapper [" + names().fullName() + "] has different [preserve_separators] values");
}
- if(!ContextMapping.mappingsAreEqual(getContextMapping(), other.getContextMapping())) {
- conflicts.add("mapper [" + names().fullName() + "] has different [context_mapping] values");
+ if (hasContextMappings() != other.hasContextMappings()) {
+ conflicts.add("mapper [" + names().fullName() + "] has different [context_mappings] values");
+ } else if (hasContextMappings() && contextMappings.equals(other.contextMappings) == false) {
+ conflicts.add("mapper [" + names().fullName() + "] has different [context_mappings] values");
}
}
- public void setProvider(AnalyzingCompletionLookupProvider provider) {
- checkIfFrozen();
- this.analyzingSuggestLookupProvider = provider;
+ @Override
+ public String value(Object value) {
+ if (value == null) {
+ return null;
+ }
+ return value.toString();
}
- public synchronized PostingsFormat postingsFormat(PostingsFormat in) {
- if (in instanceof Completion090PostingsFormat) {
- throw new IllegalStateException("Double wrapping of " + Completion090PostingsFormat.class);
- }
- if (postingsFormat == null) {
- postingsFormat = new Completion090PostingsFormat(in, analyzingSuggestLookupProvider);
- }
- return postingsFormat;
+ @Override
+ public boolean isSortable() {
+ return false;
}
- public void setContextMapping(SortedMap<String, ContextMapping> contextMapping) {
- checkIfFrozen();
- this.contextMapping = contextMapping;
+ }
+
+ /**
+ * Builder for {@link CompletionFieldMapper}
+ */
+ public static class Builder extends FieldMapper.Builder<Builder, CompletionFieldMapper> {
+
+ private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH;
+ private ContextMappings contextMappings = null;
+ private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS;
+ private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
+
+ /**
+ * @param name of the completion field to build
+ */
+ public Builder(String name) {
+ super(name, new CompletionFieldType());
+ builder = this;
}
- /** Get the context mapping associated with this completion field */
- public SortedMap<String, ContextMapping> getContextMapping() {
- return contextMapping;
+ /**
+ * @param maxInputLength maximum expected prefix length
+ * NOTE: prefixes longer than this will
+ * be truncated
+ */
+ public Builder maxInputLength(int maxInputLength) {
+ if (maxInputLength <= 0) {
+ throw new IllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]");
+ }
+ this.maxInputLength = maxInputLength;
+ return this;
}
- /** @return true if a context mapping has been defined */
- public boolean requiresContext() {
- return contextMapping.isEmpty() == false;
+ /**
+ * Add context mapping to this field
+ * @param contextMappings see {@link ContextMappings#load(Object, Version)}
+ */
+ public Builder contextMappings(ContextMappings contextMappings) {
+ this.contextMappings = contextMappings;
+ return this;
}
- @Override
- public String value(Object value) {
- if (value == null) {
- return null;
- }
- return value.toString();
+ public Builder preserveSeparators(boolean preserveSeparators) {
+ this.preserveSeparators = preserveSeparators;
+ return this;
+ }
+
+ public Builder preservePositionIncrements(boolean preservePositionIncrements) {
+ this.preservePositionIncrements = preservePositionIncrements;
+ return this;
}
@Override
- public boolean isSortable() {
- return false;
+ public CompletionFieldMapper build(BuilderContext context) {
+ setupFieldType(context);
+ CompletionFieldType completionFieldType = (CompletionFieldType) this.fieldType;
+ completionFieldType.setContextMappings(contextMappings);
+ completionFieldType.setPreservePositionIncrements(preservePositionIncrements);
+ completionFieldType.setPreserveSep(preserveSeparators);
+ return new CompletionFieldMapper(name, this.fieldType, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo, maxInputLength);
}
}
- private static final BytesRef EMPTY = new BytesRef();
-
private int maxInputLength;
- public CompletionFieldMapper(String simpleName, MappedFieldType fieldType, int maxInputLength, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
+ public CompletionFieldMapper(String simpleName, MappedFieldType fieldType, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, int maxInputLength) {
super(simpleName, fieldType, Defaults.FIELD_TYPE, indexSettings, multiFields, copyTo);
this.maxInputLength = maxInputLength;
}
@@ -345,216 +415,188 @@ public class CompletionFieldMapper extends FieldMapper {
return (CompletionFieldType) super.fieldType();
}
+ /**
+ * Parses and indexes inputs
+ *
+ * Parsing:
+ * Acceptable format:
+ * "STRING" - interpreted as field value (input)
+ * "ARRAY" - each element can be one of {@link #parse(ParseContext, Token, XContentParser, Map)}
+ * "OBJECT" - see {@link #parse(ParseContext, Token, XContentParser, Map)}
+ *
+ * Indexing:
+ * if context mappings are defined, delegates to {@link ContextMappings#addField(ParseContext.Document, String, String, int, Map)}
+ * else adds inputs as a {@link org.apache.lucene.search.suggest.document.SuggestField}
+ */
@Override
public Mapper parse(ParseContext context) throws IOException {
+ // parse
XContentParser parser = context.parser();
- XContentParser.Token token = parser.currentToken();
- if (token == XContentParser.Token.VALUE_NULL) {
+ Token token = parser.currentToken();
+ Map<String, CompletionInputMetaData> inputMap = new HashMap<>(1);
+ if (token == Token.VALUE_NULL) {
throw new MapperParsingException("completion field [" + fieldType().names().fullName() + "] does not support null values");
+ } else if (token == Token.START_ARRAY) {
+ while ((token = parser.nextToken()) != Token.END_ARRAY) {
+ parse(context, token, parser, inputMap);
+ }
+ } else {
+ parse(context, token, parser, inputMap);
+ }
+
+ // index
+ for (Map.Entry<String, CompletionInputMetaData> completionInput : inputMap.entrySet()) {
+ String input = completionInput.getKey();
+ // truncate input
+ if (input.length() > maxInputLength) {
+ int len = Math.min(maxInputLength, input.length());
+ if (Character.isHighSurrogate(input.charAt(len - 1))) {
+ assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len));
+ len += 1;
+ }
+ input = input.substring(0, len);
+ }
+ CompletionInputMetaData metaData = completionInput.getValue();
+ if (fieldType().hasContextMappings()) {
+ fieldType().getContextMappings().addField(context.doc(), fieldType().names().indexName(),
+ input, metaData.weight, metaData.contexts);
+ } else {
+ context.doc().add(new SuggestField(fieldType().names().indexName(), input, metaData.weight));
+ }
}
+ multiFields.parse(this, context);
+ return null;
+ }
- String surfaceForm = null;
- BytesRef payload = null;
- long weight = -1;
- List<String> inputs = new ArrayList<>(4);
-
- SortedMap<String, ContextConfig> contextConfig = null;
-
- if (token == XContentParser.Token.VALUE_STRING) {
- inputs.add(parser.text());
- multiFields.parse(this, context);
- } else {
- String currentFieldName = null;
- while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
- if (token == XContentParser.Token.FIELD_NAME) {
+ /**
+ * Acceptable inputs:
+ * "STRING" - interpreted as the field value (input)
+ * "OBJECT" - { "input": STRING|ARRAY, "weight": STRING|INT, "contexts": ARRAY|OBJECT }
+ */
+ private void parse(ParseContext parseContext, Token token, XContentParser parser, Map<String, CompletionInputMetaData> inputMap) throws IOException {
+ String currentFieldName = null;
+ if (token == Token.VALUE_STRING) {
+ inputMap.put(parser.text(), new CompletionInputMetaData(Collections.<String, Set<CharSequence>>emptyMap(), 1));
+ } else if (token == Token.START_OBJECT) {
+ Set<String> inputs = new HashSet<>();
+ int weight = 1;
+ Map<String, Set<CharSequence>> contextsMap = new HashMap<>();
+ while ((token = parser.nextToken()) != Token.END_OBJECT) {
+ if (token == Token.FIELD_NAME) {
currentFieldName = parser.currentName();
if (!ALLOWED_CONTENT_FIELD_NAMES.contains(currentFieldName)) {
- throw new IllegalArgumentException("Unknown field name[" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES);
+ throw new IllegalArgumentException("unknown field name [" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES);
}
- } else if (Fields.CONTEXT.equals(currentFieldName)) {
- SortedMap<String, ContextConfig> configs = new TreeMap<>();
-
- if (token == Token.START_OBJECT) {
- while ((token = parser.nextToken()) != Token.END_OBJECT) {
- String name = parser.text();
- ContextMapping mapping = fieldType().getContextMapping().get(name);
- if (mapping == null) {
- throw new ElasticsearchParseException("context [{}] is not defined", name);
- } else {
- token = parser.nextToken();
- configs.put(name, mapping.parseContext(context, parser));
+ } else if (currentFieldName != null) {
+ if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
+ if (token == Token.VALUE_STRING) {
+ inputs.add(parser.text());
+ } else if (token == Token.START_ARRAY) {
+ while ((token = parser.nextToken()) != Token.END_ARRAY) {
+ if (token == Token.VALUE_STRING) {
+ inputs.add(parser.text());
+ } else {
+ throw new IllegalArgumentException("input array must have string values, but was [" + token.name() + "]");
+ }
}
+ } else {
+ throw new IllegalArgumentException("input must be a string or array, but was [" + token.name() + "]");
}
- contextConfig = new TreeMap<>();
- for (ContextMapping mapping : fieldType().getContextMapping().values()) {
- ContextConfig config = configs.get(mapping.name());
- contextConfig.put(mapping.name(), config==null ? mapping.defaultConfig() : config);
+ } else if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
+ final Number weightValue;
+ if (token == Token.VALUE_STRING) {
+ try {
+ weightValue = Long.parseLong(parser.text());
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("weight must be an integer, but was [" + parser.text() + "]");
+ }
+ } else if (token == Token.VALUE_NUMBER) {
+ NumberType numberType = parser.numberType();
+ if (NumberType.LONG != numberType && NumberType.INT != numberType) {
+ throw new IllegalArgumentException("weight must be an integer, but was [" + parser.numberValue() + "]");
+ }
+ weightValue = parser.numberValue();
+ } else {
+ throw new IllegalArgumentException("weight must be a number or string, but was [" + token.name() + "]");
}
- } else {
- throw new ElasticsearchParseException("context must be an object");
- }
- } else if (Fields.CONTENT_FIELD_NAME_PAYLOAD.equals(currentFieldName)) {
- if (!isStoringPayloads()) {
- throw new MapperException("Payloads disabled in mapping");
- }
- if (token == XContentParser.Token.START_OBJECT) {
- XContentBuilder payloadBuilder = XContentFactory.contentBuilder(parser.contentType()).copyCurrentStructure(parser);
- payload = payloadBuilder.bytes().toBytesRef();
- payloadBuilder.close();
- } else if (token.isValue()) {
- payload = parser.utf8BytesOrNull();
- } else {
- throw new MapperException("payload doesn't support type " + token);
- }
- } else if (token == XContentParser.Token.VALUE_STRING) {
- if (Fields.CONTENT_FIELD_NAME_OUTPUT.equals(currentFieldName)) {
- surfaceForm = parser.text();
- }
- if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
- inputs.add(parser.text());
- }
- if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
- Number weightValue;
- try {
- weightValue = Long.parseLong(parser.text());
- } catch (NumberFormatException e) {
- throw new IllegalArgumentException("Weight must be a string representing a numeric value, but was [" + parser.text() + "]");
+ if (weightValue.longValue() < 0 || weightValue.longValue() > Integer.MAX_VALUE) { // always parse a long to make sure we don't get overflow
+ throw new IllegalArgumentException("weight must be in the interval [0..2147483647], but was [" + weightValue.longValue() + "]");
}
- weight = weightValue.longValue(); // always parse a long to make sure we don't get overflow
- checkWeight(weight);
- }
- } else if (token == XContentParser.Token.VALUE_NUMBER) {
- if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
- NumberType numberType = parser.numberType();
- if (NumberType.LONG != numberType && NumberType.INT != numberType) {
- throw new IllegalArgumentException("Weight must be an integer, but was [" + parser.numberValue() + "]");
+ weight = weightValue.intValue();
+ } else if (Fields.CONTENT_FIELD_NAME_CONTEXTS.equals(currentFieldName)) {
+ if (fieldType().hasContextMappings() == false) {
+ throw new IllegalArgumentException("contexts field is not supported for field: [" + fieldType().names().fullName() + "]");
}
- weight = parser.longValue(); // always parse a long to make sure we don't get overflow
- checkWeight(weight);
- }
- } else if (token == XContentParser.Token.START_ARRAY) {
- if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
- while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
- inputs.add(parser.text());
+ ContextMappings contextMappings = fieldType().getContextMappings();
+ XContentParser.Token currentToken = parser.currentToken();
+ if (currentToken == XContentParser.Token.START_OBJECT) {
+ ContextMapping contextMapping = null;
+ String fieldName = null;
+ while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (currentToken == XContentParser.Token.FIELD_NAME) {
+ fieldName = parser.currentName();
+ contextMapping = contextMappings.get(fieldName);
+ } else if (currentToken == XContentParser.Token.VALUE_STRING
+ || currentToken == XContentParser.Token.START_ARRAY
+ || currentToken == XContentParser.Token.START_OBJECT) {
+ assert fieldName != null;
+ assert !contextsMap.containsKey(fieldName);
+ contextsMap.put(fieldName, contextMapping.parseContext(parseContext, parser));
+ } else {
+ throw new IllegalArgumentException("contexts must be an object or an array , but was [" + currentToken + "]");
+ }
+ }
+ } else {
+ throw new IllegalArgumentException("contexts must be an object or an array , but was [" + currentToken + "]");
}
}
}
}
- }
-
- if(contextConfig == null) {
- contextConfig = new TreeMap<>();
- for (ContextMapping mapping : fieldType().getContextMapping().values()) {
- contextConfig.put(mapping.name(), mapping.defaultConfig());
- }
- }
-
- final ContextMapping.Context ctx = new ContextMapping.Context(contextConfig, context.doc());
-
- payload = payload == null ? EMPTY : payload;
- if (surfaceForm == null) { // no surface form use the input
for (String input : inputs) {
- if (input.length() == 0) {
- continue;
+ if (inputMap.containsKey(input) == false || inputMap.get(input).weight < weight) {
+ inputMap.put(input, new CompletionInputMetaData(contextsMap, weight));
}
- BytesRef suggestPayload = fieldType().analyzingSuggestLookupProvider.buildPayload(new BytesRef(
- input), weight, payload);
- context.doc().add(getCompletionField(ctx, input, suggestPayload));
}
} else {
- BytesRef suggestPayload = fieldType().analyzingSuggestLookupProvider.buildPayload(new BytesRef(
- surfaceForm), weight, payload);
- for (String input : inputs) {
- if (input.length() == 0) {
- continue;
- }
- context.doc().add(getCompletionField(ctx, input, suggestPayload));
- }
+ throw new ElasticsearchParseException("failed to parse expected text or object got" + token.name());
}
- return null;
}
- private void checkWeight(long weight) {
- if (weight < 0 || weight > Integer.MAX_VALUE) {
- throw new IllegalArgumentException("Weight must be in the interval [0..2147483647], but was [" + weight + "]");
- }
- }
+ static class CompletionInputMetaData {
+ public final Map<String, Set<CharSequence>> contexts;
+ public final int weight;
- public Field getCompletionField(ContextMapping.Context ctx, String input, BytesRef payload) {
- final String originalInput = input;
- if (input.length() > maxInputLength) {
- final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length()));
- input = input.substring(0, len);
- }
- for (int i = 0; i < input.length(); i++) {
- if (isReservedChar(input.charAt(i))) {
- throw new IllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x"
- + Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT)
- + "] at position " + i + " is a reserved character");
- }
+ CompletionInputMetaData(Map<String, Set<CharSequence>> contexts, int weight) {
+ this.contexts = contexts;
+ this.weight = weight;
}
- return new SuggestField(fieldType().names().indexName(), ctx, input, fieldType(), payload, fieldType().analyzingSuggestLookupProvider);
}
- public static int correctSubStringLen(String input, int len) {
- if (Character.isHighSurrogate(input.charAt(len - 1))) {
- assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len));
- return len + 1;
- }
- return len;
- }
-
- public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
- return fieldType().analyzingSuggestLookupProvider.buildPayload(surfaceForm, weight, payload);
- }
-
- private static final class SuggestField extends Field {
- private final BytesRef payload;
- private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
- private final ContextMapping.Context ctx;
-
- public SuggestField(String name, ContextMapping.Context ctx, String value, MappedFieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
- super(name, value, type);
- this.payload = payload;
- this.toFiniteStrings = toFiniteStrings;
- this.ctx = ctx;
- }
-
- @Override
- public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
- TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer, previous));
- return new CompletionTokenStream(ts, payload, toFiniteStrings);
- }
- }
-
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(simpleName())
- .field(Fields.TYPE, CONTENT_TYPE);
-
- builder.field(Fields.ANALYZER, fieldType().indexAnalyzer().name());
+ .field(Fields.TYPE.getPreferredName(), CONTENT_TYPE);
+ builder.field(Fields.ANALYZER.getPreferredName(), fieldType().indexAnalyzer().name());
if (fieldType().indexAnalyzer().name().equals(fieldType().searchAnalyzer().name()) == false) {
builder.field(Fields.SEARCH_ANALYZER.getPreferredName(), fieldType().searchAnalyzer().name());
}
- builder.field(Fields.PAYLOADS, fieldType().analyzingSuggestLookupProvider.hasPayloads());
- builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), fieldType().analyzingSuggestLookupProvider.getPreserveSep());
- builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), fieldType().analyzingSuggestLookupProvider.getPreservePositionsIncrements());
+ builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), fieldType().preserveSep());
+ builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), fieldType().preservePositionIncrements());
builder.field(Fields.MAX_INPUT_LENGTH.getPreferredName(), this.maxInputLength);
- multiFields.toXContent(builder, params);
- if(fieldType().requiresContext()) {
- builder.startObject(Fields.CONTEXT);
- for (ContextMapping mapping : fieldType().getContextMapping().values()) {
- builder.value(mapping);
- }
- builder.endObject();
+ if (fieldType().hasContextMappings()) {
+ builder.startArray(Fields.CONTEXTS.getPreferredName());
+ fieldType().getContextMappings().toXContent(builder, params);
+ builder.endArray();
}
+ multiFields.toXContent(builder, params);
return builder.endObject();
}
@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
+ // no-op
}
@Override
@@ -562,10 +604,6 @@ public class CompletionFieldMapper extends FieldMapper {
return CONTENT_TYPE;
}
- public boolean isStoringPayloads() {
- return fieldType().analyzingSuggestLookupProvider.hasPayloads();
- }
-
@Override
public void merge(Mapper mergeWith, MergeResult mergeResult) throws MergeMappingException {
super.merge(mergeWith, mergeResult);
@@ -574,22 +612,4 @@ public class CompletionFieldMapper extends FieldMapper {
this.maxInputLength = fieldMergeWith.maxInputLength;
}
}
-
- // this should be package private but our tests don't allow it.
- public static boolean isReservedChar(char character) {
- /* we use 0x001F as a SEP_LABEL in the suggester but we can use the UTF-16 representation since they
- * are equivalent. We also don't need to convert the input character to UTF-8 here to check for
- * the 0x00 end label since all multi-byte UTF-8 chars start with 0x10 binary so if the UTF-16 CP is == 0x00
- * it's the single byte UTF-8 CP */
- assert XAnalyzingSuggester.PAYLOAD_SEP == XAnalyzingSuggester.SEP_LABEL; // ensure they are the same!
- switch(character) {
- case XAnalyzingSuggester.END_BYTE:
- case XAnalyzingSuggester.SEP_LABEL:
- case XAnalyzingSuggester.HOLE_CHARACTER:
- case ContextMapping.SEPARATOR:
- return true;
- default:
- return false;
- }
- }
}
diff --git a/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java b/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java
index 45f58c47da..1ebf44e23f 100644
--- a/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java
+++ b/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java
@@ -108,7 +108,7 @@ public enum RegexpFlag {
* @param flags A string representing a list of regular expression flags
* @return The combined OR'ed value for all the flags
*/
- static int resolveValue(String flags) {
+ public static int resolveValue(String flags) {
if (flags == null || flags.isEmpty()) {
return RegExp.ALL;
}
diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
index 6e7893df98..aad1497c9d 100644
--- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
+++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
@@ -19,7 +19,6 @@
package org.elasticsearch.index.shard;
-import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
@@ -106,8 +105,8 @@ import org.elasticsearch.indices.memory.IndexingMemoryController;
import org.elasticsearch.indices.recovery.RecoveryFailedException;
import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.percolator.PercolatorService;
-import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat;
import org.elasticsearch.search.suggest.completion.CompletionStats;
+import org.elasticsearch.search.suggest.completion.CompletionFieldStats;
import org.elasticsearch.threadpool.ThreadPool;
import java.io.IOException;
@@ -618,15 +617,8 @@ public class IndexShard extends AbstractIndexShardComponent {
public CompletionStats completionStats(String... fields) {
CompletionStats completionStats = new CompletionStats();
- final Engine.Searcher currentSearcher = acquireSearcher("completion_stats");
- try {
- PostingsFormat postingsFormat = PostingsFormat.forName(Completion090PostingsFormat.CODEC_NAME);
- if (postingsFormat instanceof Completion090PostingsFormat) {
- Completion090PostingsFormat completionPostingsFormat = (Completion090PostingsFormat) postingsFormat;
- completionStats.add(completionPostingsFormat.completionStats(currentSearcher.reader(), fields));
- }
- } finally {
- currentSearcher.close();
+ try (final Engine.Searcher currentSearcher = acquireSearcher("completion_stats")) {
+ completionStats.add(CompletionFieldStats.completionStats(currentSearcher.reader(), fields));
}
return completionStats;
}