summaryrefslogtreecommitdiff
path: root/core/src/main/java/org/elasticsearch/search/suggest/completion
diff options
context:
space:
mode:
authorSimon Willnauer <simonw@apache.org>2015-06-05 13:12:03 +0200
committerSimon Willnauer <simonw@apache.org>2015-06-05 13:12:03 +0200
commit15a62448343fd24f8e63f43b1e4b16f50005e4a5 (patch)
tree7d04660f3f7aef0d679da3e6185af9cf378bf1d0 /core/src/main/java/org/elasticsearch/search/suggest/completion
parent7ccc193a666e2ae888e7ac93d677a2143e5e07c3 (diff)
create core module
Diffstat (limited to 'core/src/main/java/org/elasticsearch/search/suggest/completion')
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java389
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java347
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionStats.java154
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java122
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java120
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java136
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java42
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java111
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java135
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java173
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/PayloadProcessor.java38
11 files changed, 1767 insertions, 0 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java
new file mode 100644
index 0000000000..879b51a6fe
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java
@@ -0,0 +1,389 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion;
+
+import com.carrotsearch.hppc.ObjectLongHashMap;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
+import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.elasticsearch.common.regex.Regex;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider;
+import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
+import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider {
+
+ // for serialization
+ public static final int SERIALIZE_PRESERVE_SEPARATORS = 1;
+ public static final int SERIALIZE_HAS_PAYLOADS = 2;
+ public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4;
+
+ private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256;
+ private static final int MAX_GRAPH_EXPANSIONS = -1;
+
+ public static final String CODEC_NAME = "analyzing";
+ public static final int CODEC_VERSION_START = 1;
+ public static final int CODEC_VERSION_SERIALIZED_LABELS = 2;
+ public static final int CODEC_VERSION_CHECKSUMS = 3;
+ public static final int CODEC_VERSION_LATEST = CODEC_VERSION_CHECKSUMS;
+
+ private boolean preserveSep;
+ private boolean preservePositionIncrements;
+ private int maxSurfaceFormsPerAnalyzedForm;
+ private int maxGraphExpansions;
+ private boolean hasPayloads;
+ private final XAnalyzingSuggester prototype;
+
+ public AnalyzingCompletionLookupProvider(boolean preserveSep, boolean exactFirst, boolean preservePositionIncrements, boolean hasPayloads) {
+ this.preserveSep = preserveSep;
+ this.preservePositionIncrements = preservePositionIncrements;
+ this.hasPayloads = hasPayloads;
+ this.maxSurfaceFormsPerAnalyzedForm = MAX_SURFACE_FORMS_PER_ANALYZED_FORM;
+ this.maxGraphExpansions = MAX_GRAPH_EXPANSIONS;
+ int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;
+ // needs to fixed in the suggester first before it can be supported
+ //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
+ prototype = new XAnalyzingSuggester(null, null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
+ }
+
+ @Override
+ public String getName() {
+ return "analyzing";
+ }
+
+ @Override
+ public FieldsConsumer consumer(final IndexOutput output) throws IOException {
+ CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
+ return new FieldsConsumer() {
+ private Map<String, Long> fieldOffsets = new HashMap<>();
+
+ @Override
+ public void close() throws IOException {
+ try {
+ /*
+ * write the offsets per field such that we know where
+ * we need to load the FSTs from
+ */
+ long pointer = output.getFilePointer();
+ output.writeVInt(fieldOffsets.size());
+ for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
+ output.writeString(entry.getKey());
+ output.writeVLong(entry.getValue());
+ }
+ output.writeLong(pointer);
+ CodecUtil.writeFooter(output);
+ } finally {
+ IOUtils.close(output);
+ }
+ }
+
+ @Override
+ public void write(Fields fields) throws IOException {
+ for(String field : fields) {
+ Terms terms = fields.terms(field);
+ if (terms == null) {
+ continue;
+ }
+ TermsEnum termsEnum = terms.iterator();
+ PostingsEnum docsEnum = null;
+ final SuggestPayload spare = new SuggestPayload();
+ int maxAnalyzedPathsForOneInput = 0;
+ final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
+ int docCount = 0;
+ while (true) {
+ BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
+ builder.startTerm(term);
+ int docFreq = 0;
+ while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ for (int i = 0; i < docsEnum.freq(); i++) {
+ final int position = docsEnum.nextPosition();
+ AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
+ builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
+ // multi fields have the same surface form so we sum up here
+ maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
+ }
+ docFreq++;
+ docCount = Math.max(docCount, docsEnum.docID()+1);
+ }
+ builder.finishTerm(docFreq);
+ }
+ /*
+ * Here we are done processing the field and we can
+ * buid the FST and write it to disk.
+ */
+ FST<Pair<Long, BytesRef>> build = builder.build();
+ assert build != null || docCount == 0: "the FST is null but docCount is != 0 actual value: [" + docCount + "]";
+ /*
+ * it's possible that the FST is null if we have 2 segments that get merged
+ * and all docs that have a value in this field are deleted. This will cause
+ * a consumer to be created but it doesn't consume any values causing the FSTBuilder
+ * to return null.
+ */
+ if (build != null) {
+ fieldOffsets.put(field, output.getFilePointer());
+ build.save(output);
+ /* write some more meta-info */
+ output.writeVInt(maxAnalyzedPathsForOneInput);
+ output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
+ output.writeInt(maxGraphExpansions); // can be negative
+ int options = 0;
+ options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
+ options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
+ options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
+ output.writeVInt(options);
+ output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
+ output.writeVInt(XAnalyzingSuggester.END_BYTE);
+ output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
+ output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
+ }
+ }
+ }
+ };
+ }
+
+
+ @Override
+ public LookupFactory load(IndexInput input) throws IOException {
+ long sizeInBytes = 0;
+ int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
+ if (version >= CODEC_VERSION_CHECKSUMS) {
+ CodecUtil.checksumEntireFile(input);
+ }
+ final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS? 8 + CodecUtil.footerLength() : 8);
+ final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
+ input.seek(metaPointerPosition);
+ long metaPointer = input.readLong();
+ input.seek(metaPointer);
+ int numFields = input.readVInt();
+
+ Map<Long, String> meta = new TreeMap<>();
+ for (int i = 0; i < numFields; i++) {
+ String name = input.readString();
+ long offset = input.readVLong();
+ meta.put(offset, name);
+ }
+
+ for (Map.Entry<Long, String> entry : meta.entrySet()) {
+ input.seek(entry.getKey());
+ FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>(
+ PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
+ int maxAnalyzedPathsForOneInput = input.readVInt();
+ int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
+ int maxGraphExpansions = input.readInt();
+ int options = input.readVInt();
+ boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
+ boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
+ boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
+
+ // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
+ // was updated in Lucene, so we cannot use the suggester defaults)
+ int sepLabel, payloadSep, endByte, holeCharacter;
+ switch (version) {
+ case CODEC_VERSION_START:
+ sepLabel = 0xFF;
+ payloadSep = '\u001f';
+ endByte = 0x0;
+ holeCharacter = '\u001E';
+ break;
+ default:
+ sepLabel = input.readVInt();
+ endByte = input.readVInt();
+ payloadSep = input.readVInt();
+ holeCharacter = input.readVInt();
+ }
+
+ AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
+ hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter);
+ sizeInBytes += fst.ramBytesUsed();
+ lookupMap.put(entry.getValue(), holder);
+ }
+ final long ramBytesUsed = sizeInBytes;
+ return new LookupFactory() {
+ @Override
+ public Lookup getLookup(CompletionFieldMapper mapper, CompletionSuggestionContext suggestionContext) {
+ AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.fieldType().names().indexName());
+ if (analyzingSuggestHolder == null) {
+ return null;
+ }
+ int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;
+
+ final XAnalyzingSuggester suggester;
+ final Automaton queryPrefix = mapper.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null;
+
+ if (suggestionContext.isFuzzy()) {
+ suggester = new XFuzzySuggester(mapper.fieldType().indexAnalyzer(), queryPrefix, mapper.fieldType().searchAnalyzer(), flags,
+ analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
+ suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(),
+ suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
+ analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
+ analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
+ analyzingSuggestHolder.holeCharacter);
+ } else {
+ suggester = new XAnalyzingSuggester(mapper.fieldType().indexAnalyzer(), queryPrefix, mapper.fieldType().searchAnalyzer(), flags,
+ analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
+ analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
+ analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
+ analyzingSuggestHolder.holeCharacter);
+ }
+ return suggester;
+ }
+
+ @Override
+ public CompletionStats stats(String... fields) {
+ long sizeInBytes = 0;
+ ObjectLongHashMap<String> completionFields = null;
+ if (fields != null && fields.length > 0) {
+ completionFields = new ObjectLongHashMap<>(fields.length);
+ }
+
+ for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
+ sizeInBytes += entry.getValue().fst.ramBytesUsed();
+ if (fields == null || fields.length == 0) {
+ continue;
+ }
+ if (Regex.simpleMatch(fields, entry.getKey())) {
+ long fstSize = entry.getValue().fst.ramBytesUsed();
+ completionFields.addTo(entry.getKey(), fstSize);
+ }
+ }
+
+ return new CompletionStats(sizeInBytes, completionFields);
+ }
+
+ @Override
+ AnalyzingSuggestHolder getAnalyzingSuggestHolder(CompletionFieldMapper mapper) {
+ return lookupMap.get(mapper.fieldType().names().indexName());
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed;
+ }
+
+ @Override
+ public Collection<Accountable> getChildResources() {
+ return Accountables.namedAccountables("field", lookupMap);
+ }
+ };
+ }
+
+ static class AnalyzingSuggestHolder implements Accountable {
+ final boolean preserveSep;
+ final boolean preservePositionIncrements;
+ final int maxSurfaceFormsPerAnalyzedForm;
+ final int maxGraphExpansions;
+ final boolean hasPayloads;
+ final int maxAnalyzedPathsForOneInput;
+ final FST<Pair<Long, BytesRef>> fst;
+ final int sepLabel;
+ final int payloadSep;
+ final int endByte;
+ final int holeCharacter;
+
+ public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
+ boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) {
+ this(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
+ }
+
+ public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
+ this.preserveSep = preserveSep;
+ this.preservePositionIncrements = preservePositionIncrements;
+ this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
+ this.maxGraphExpansions = maxGraphExpansions;
+ this.hasPayloads = hasPayloads;
+ this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
+ this.fst = fst;
+ this.sepLabel = sepLabel;
+ this.payloadSep = payloadSep;
+ this.endByte = endByte;
+ this.holeCharacter = holeCharacter;
+ }
+
+ public boolean getPreserveSeparator() {
+ return preserveSep;
+ }
+
+ public boolean getPreservePositionIncrements() {
+ return preservePositionIncrements;
+ }
+
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ if (fst != null) {
+ return fst.ramBytesUsed();
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public Collection<Accountable> getChildResources() {
+ if (fst != null) {
+ return Collections.singleton(Accountables.namedAccountable("fst", fst));
+ } else {
+ return Collections.emptyList();
+ }
+ }
+ }
+
+ @Override
+ public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
+ return prototype.toFiniteStrings(prototype.getTokenStreamToAutomaton(), stream);
+ }
+} \ No newline at end of file
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java
new file mode 100644
index 0000000000..5ffe9501dc
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMap.Builder;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterLeafReader.FilterTerms;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.store.IOContext.Context;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.logging.Loggers;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ToFiniteStrings;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This {@link PostingsFormat} is basically a T-Sink for a default postings
+ * format that is used to store postings on disk fitting the lucene APIs and
+ * builds a suggest FST as an auxiliary data structure next to the actual
+ * postings format. It uses the delegate postings format for simplicity to
+ * handle all the merge operations. The auxiliary suggest FST data structure is
+ * only loaded if a FieldsProducer is requested for reading, for merging it uses
+ * the low memory delegate postings format.
+ */
+public class Completion090PostingsFormat extends PostingsFormat {
+
+ public static final String CODEC_NAME = "completion090";
+ public static final int SUGGEST_CODEC_VERSION = 1;
+ public static final int SUGGEST_VERSION_CURRENT = SUGGEST_CODEC_VERSION;
+ public static final String EXTENSION = "cmp";
+
+ private final static ESLogger logger = Loggers.getLogger(Completion090PostingsFormat.class);
+ private PostingsFormat delegatePostingsFormat;
+ private final static Map<String, CompletionLookupProvider> providers;
+ private CompletionLookupProvider writeProvider;
+
+
+ static {
+ final CompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, false);
+ final Builder<String, CompletionLookupProvider> builder = ImmutableMap.builder();
+ providers = builder.put(provider.getName(), provider).build();
+ }
+
+ public Completion090PostingsFormat(PostingsFormat delegatePostingsFormat, CompletionLookupProvider provider) {
+ super(CODEC_NAME);
+ this.delegatePostingsFormat = delegatePostingsFormat;
+ this.writeProvider = provider;
+ assert delegatePostingsFormat != null && writeProvider != null;
+ }
+
+ /*
+ * Used only by core Lucene at read-time via Service Provider instantiation
+ * do not use at Write-time in application code.
+ */
+ public Completion090PostingsFormat() {
+ super(CODEC_NAME);
+ }
+
+ @Override
+ public CompletionFieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ if (delegatePostingsFormat == null) {
+ throw new UnsupportedOperationException("Error - " + getClass().getName()
+ + " has been constructed without a choice of PostingsFormat");
+ }
+ assert writeProvider != null;
+ return new CompletionFieldsConsumer(state);
+ }
+
+ @Override
+ public CompletionFieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ return new CompletionFieldsProducer(state);
+ }
+
+ private class CompletionFieldsConsumer extends FieldsConsumer {
+
+ private FieldsConsumer delegatesFieldsConsumer;
+ private FieldsConsumer suggestFieldsConsumer;
+
+ public CompletionFieldsConsumer(SegmentWriteState state) throws IOException {
+ this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
+ String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
+ IndexOutput output = null;
+ boolean success = false;
+ try {
+ output = state.directory.createOutput(suggestFSTFile, state.context);
+ CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_VERSION_CURRENT);
+ /*
+ * we write the delegate postings format name so we can load it
+ * without getting an instance in the ctor
+ */
+ output.writeString(delegatePostingsFormat.getName());
+ output.writeString(writeProvider.getName());
+ this.suggestFieldsConsumer = writeProvider.consumer(output);
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(output);
+ }
+ }
+ }
+
+ @Override
+ public void write(Fields fields) throws IOException {
+ delegatesFieldsConsumer.write(fields);
+ suggestFieldsConsumer.write(fields);
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.close(delegatesFieldsConsumer, suggestFieldsConsumer);
+ }
+ }
+
+ private static class CompletionFieldsProducer extends FieldsProducer {
+ // TODO make this class lazyload all the things in order to take advantage of the new merge instance API
+ // today we just load everything up-front
+ private final FieldsProducer delegateProducer;
+ private final LookupFactory lookupFactory;
+ private final int version;
+
+ public CompletionFieldsProducer(SegmentReadState state) throws IOException {
+ String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
+ IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
+ version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
+ FieldsProducer delegateProducer = null;
+ boolean success = false;
+ try {
+ PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
+ String providerName = input.readString();
+ CompletionLookupProvider completionLookupProvider = providers.get(providerName);
+ if (completionLookupProvider == null) {
+ throw new IllegalStateException("no provider with name [" + providerName + "] registered");
+ }
+ // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
+ delegateProducer = delegatePostingsFormat.fieldsProducer(state);
+ /*
+ * If we are merging we don't load the FSTs at all such that we
+ * don't consume so much memory during merge
+ */
+ if (state.context.context != Context.MERGE) {
+ // TODO: maybe we can do this in a fully lazy fashion based on some configuration
+ // eventually we should have some kind of curciut breaker that prevents us from going OOM here
+ // with some configuration
+ this.lookupFactory = completionLookupProvider.load(input);
+ } else {
+ this.lookupFactory = null;
+ }
+ this.delegateProducer = delegateProducer;
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(delegateProducer, input);
+ } else {
+ IOUtils.close(input);
+ }
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.close(delegateProducer);
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return delegateProducer.iterator();
+ }
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ final Terms terms = delegateProducer.terms(field);
+ if (terms == null || lookupFactory == null) {
+ return terms;
+ }
+ return new CompletionTerms(terms, lookupFactory);
+ }
+
+ @Override
+ public int size() {
+ return delegateProducer.size();
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return (lookupFactory == null ? 0 : lookupFactory.ramBytesUsed()) + delegateProducer.ramBytesUsed();
+ }
+
+ @Override
+ public Collection<Accountable> getChildResources() {
+ List<Accountable> resources = new ArrayList<>();
+ if (lookupFactory != null) {
+ resources.add(Accountables.namedAccountable("lookup", lookupFactory));
+ }
+ resources.add(Accountables.namedAccountable("delegate", delegateProducer));
+ return Collections.unmodifiableList(resources);
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ delegateProducer.checkIntegrity();
+ }
+
+ @Override
+ public FieldsProducer getMergeInstance() throws IOException {
+ return delegateProducer.getMergeInstance();
+ }
+ }
+
+ public static final class CompletionTerms extends FilterTerms {
+ private final LookupFactory lookup;
+
+ public CompletionTerms(Terms delegate, LookupFactory lookup) {
+ super(delegate);
+ this.lookup = lookup;
+ }
+
+ public Lookup getLookup(CompletionFieldMapper mapper, CompletionSuggestionContext suggestionContext) {
+ return lookup.getLookup(mapper, suggestionContext);
+ }
+
+ public CompletionStats stats(String ... fields) {
+ return lookup.stats(fields);
+ }
+ }
+
+ public static abstract class CompletionLookupProvider implements PayloadProcessor, ToFiniteStrings {
+
+ public static final char UNIT_SEPARATOR = '\u001f';
+
+ public abstract FieldsConsumer consumer(IndexOutput output) throws IOException;
+
+ public abstract String getName();
+
+ public abstract LookupFactory load(IndexInput input) throws IOException;
+
+ @Override
+ public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
+ if (weight < -1 || weight > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("weight must be >= -1 && <= Integer.MAX_VALUE");
+ }
+ for (int i = 0; i < surfaceForm.length; i++) {
+ if (surfaceForm.bytes[i] == UNIT_SEPARATOR) {
+ throw new IllegalArgumentException(
+ "surface form cannot contain unit separator character U+001F; this character is reserved");
+ }
+ }
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream);
+ output.writeVLong(weight + 1);
+ output.writeVInt(surfaceForm.length);
+ output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
+ output.writeVInt(payload.length);
+ output.writeBytes(payload.bytes, 0, payload.length);
+
+ output.close();
+ return new BytesRef(byteArrayOutputStream.toByteArray());
+ }
+
+ @Override
+ public void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException {
+ ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
+ InputStreamDataInput input = new InputStreamDataInput(byteArrayInputStream);
+ ref.weight = input.readVLong() - 1;
+ int len = input.readVInt();
+ ref.surfaceForm.grow(len);
+ ref.surfaceForm.setLength(len);
+ input.readBytes(ref.surfaceForm.bytes(), 0, ref.surfaceForm.length());
+ len = input.readVInt();
+ ref.payload.grow(len);
+ ref.payload.setLength(len);
+ input.readBytes(ref.payload.bytes(), 0, ref.payload.length());
+ input.close();
+ }
+ }
+
+ public CompletionStats completionStats(IndexReader indexReader, String ... fields) {
+ CompletionStats completionStats = new CompletionStats();
+ for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
+ LeafReader atomicReader = atomicReaderContext.reader();
+ try {
+ for (String fieldName : atomicReader.fields()) {
+ Terms terms = atomicReader.fields().terms(fieldName);
+ if (terms instanceof CompletionTerms) {
+ CompletionTerms completionTerms = (CompletionTerms) terms;
+ completionStats.add(completionTerms.stats(fields));
+ }
+ }
+ } catch (IOException e) {
+ logger.error("Could not get completion stats: {}", e, e.getMessage());
+ }
+ }
+
+ return completionStats;
+ }
+
+ public static abstract class LookupFactory implements Accountable {
+ public abstract Lookup getLookup(CompletionFieldMapper mapper, CompletionSuggestionContext suggestionContext);
+ public abstract CompletionStats stats(String ... fields);
+ abstract AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder getAnalyzingSuggestHolder(CompletionFieldMapper mapper);
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionStats.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionStats.java
new file mode 100644
index 0000000000..5ec517dd8d
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionStats.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import com.carrotsearch.hppc.ObjectLongHashMap;
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Streamable;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentBuilderString;
+
+import java.io.IOException;
+
+/**
+ *
+ */
+public class CompletionStats implements Streamable, ToXContent {
+
+ private long sizeInBytes;
+
+ @Nullable
+ private ObjectLongHashMap<String> fields;
+
+ public CompletionStats() {
+ }
+
+ public CompletionStats(long size, @Nullable ObjectLongHashMap<String> fields) {
+ this.sizeInBytes = size;
+ this.fields = fields;
+ }
+
+ public long getSizeInBytes() {
+ return sizeInBytes;
+ }
+
+ public ByteSizeValue getSize() {
+ return new ByteSizeValue(sizeInBytes);
+ }
+
+ public ObjectLongHashMap<String> getFields() {
+ return fields;
+ }
+
+ @Override
+ public void readFrom(StreamInput in) throws IOException {
+ sizeInBytes = in.readVLong();
+ if (in.readBoolean()) {
+ int size = in.readVInt();
+ fields = new ObjectLongHashMap<>(size);
+ for (int i = 0; i < size; i++) {
+ fields.put(in.readString(), in.readVLong());
+ }
+ }
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeVLong(sizeInBytes);
+ if (fields == null) {
+ out.writeBoolean(false);
+ } else {
+ out.writeBoolean(true);
+ out.writeVInt(fields.size());
+
+ assert !fields.containsKey(null);
+ final Object[] keys = fields.keys;
+ final long[] values = fields.values;
+ for (int i = 0; i < keys.length; i++) {
+ if (keys[i] != null) {
+ out.writeString((String) keys[i]);
+ out.writeVLong(values[i]);
+ }
+ }
+ }
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject(Fields.COMPLETION);
+ builder.byteSizeField(Fields.SIZE_IN_BYTES, Fields.SIZE, sizeInBytes);
+ if (fields != null) {
+ builder.startObject(Fields.FIELDS);
+
+ assert !fields.containsKey(null);
+ final Object[] keys = fields.keys;
+ final long[] values = fields.values;
+ for (int i = 0; i < keys.length; i++) {
+ if (keys[i] != null) {
+ builder.startObject((String) keys[i], XContentBuilder.FieldCaseConversion.NONE);
+ builder.byteSizeField(Fields.SIZE_IN_BYTES, Fields.SIZE, values[i]);
+ builder.endObject();
+ }
+ }
+ builder.endObject();
+ }
+ builder.endObject();
+ return builder;
+ }
+
+ public static CompletionStats readCompletionStats(StreamInput in) throws IOException {
+ CompletionStats stats = new CompletionStats();
+ stats.readFrom(in);
+ return stats;
+ }
+
+ static final class Fields {
+ static final XContentBuilderString COMPLETION = new XContentBuilderString("completion");
+ static final XContentBuilderString SIZE_IN_BYTES = new XContentBuilderString("size_in_bytes");
+ static final XContentBuilderString SIZE = new XContentBuilderString("size");
+ static final XContentBuilderString FIELDS = new XContentBuilderString("fields");
+ }
+
+ public void add(CompletionStats completion) {
+ if (completion == null) {
+ return;
+ }
+
+ sizeInBytes += completion.getSizeInBytes();
+
+ if (completion.fields != null) {
+ if (fields == null) {
+ fields = completion.fields.clone();
+ } else {
+ assert !completion.fields.containsKey(null);
+ final Object[] keys = completion.fields.keys;
+ final long[] values = completion.fields.values;
+ for (int i = 0; i < keys.length; i++) {
+ if (keys[i] != null) {
+ fields.addTo((String) keys[i], values[i]);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java
new file mode 100644
index 0000000000..8ef271fb76
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.index.query.IndexQueryParserService;
+import org.elasticsearch.search.suggest.SuggestContextParser;
+import org.elasticsearch.search.suggest.SuggestionSearchContext;
+import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.elasticsearch.search.suggest.SuggestUtils.parseSuggestContext;
+
+/**
+ *
+ */
+public class CompletionSuggestParser implements SuggestContextParser {
+
+ private CompletionSuggester completionSuggester;
+ private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("edit_distance");
+
+ public CompletionSuggestParser(CompletionSuggester completionSuggester) {
+ this.completionSuggester = completionSuggester;
+ }
+
+ @Override
+ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, MapperService mapperService, IndexQueryParserService queryParserService) throws IOException {
+ XContentParser.Token token;
+ String fieldName = null;
+ CompletionSuggestionContext suggestion = new CompletionSuggestionContext(completionSuggester);
+
+ XContentParser contextParser = null;
+
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ fieldName = parser.currentName();
+ } else if (token.isValue()) {
+ if (!parseSuggestContext(parser, mapperService, fieldName, suggestion)) {
+ if (token == XContentParser.Token.VALUE_BOOLEAN && "fuzzy".equals(fieldName)) {
+ suggestion.setFuzzy(parser.booleanValue());
+ }
+ }
+ } else if (token == XContentParser.Token.START_OBJECT) {
+ if("fuzzy".equals(fieldName)) {
+ suggestion.setFuzzy(true);
+ String fuzzyConfigName = null;
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ fuzzyConfigName = parser.currentName();
+ } else if (token.isValue()) {
+ if (FUZZINESS.match(fuzzyConfigName, ParseField.EMPTY_FLAGS)) {
+ suggestion.setFuzzyEditDistance(Fuzziness.parse(parser).asDistance());
+ } else if ("transpositions".equals(fuzzyConfigName)) {
+ suggestion.setFuzzyTranspositions(parser.booleanValue());
+ } else if ("min_length".equals(fuzzyConfigName) || "minLength".equals(fuzzyConfigName)) {
+ suggestion.setFuzzyMinLength(parser.intValue());
+ } else if ("prefix_length".equals(fuzzyConfigName) || "prefixLength".equals(fuzzyConfigName)) {
+ suggestion.setFuzzyPrefixLength(parser.intValue());
+ } else if ("unicode_aware".equals(fuzzyConfigName) || "unicodeAware".equals(fuzzyConfigName)) {
+ suggestion.setFuzzyUnicodeAware(parser.booleanValue());
+ }
+ }
+ }
+ } else if("context".equals(fieldName)) {
+ // Copy the current structure. We will parse, once the mapping is provided
+ XContentBuilder builder = XContentFactory.contentBuilder(parser.contentType());
+ builder.copyCurrentStructure(parser);
+ BytesReference bytes = builder.bytes();
+ contextParser = parser.contentType().xContent().createParser(bytes);
+ } else {
+ throw new IllegalArgumentException("suggester [completion] doesn't support field [" + fieldName + "]");
+ }
+ } else {
+ throw new IllegalArgumentException("suggester[completion] doesn't support field [" + fieldName + "]");
+ }
+ }
+
+ suggestion.mapper((CompletionFieldMapper)mapperService.smartNameFieldMapper(suggestion.getField()));
+
+ CompletionFieldMapper mapper = suggestion.mapper();
+ if (mapper != null) {
+ if (mapper.requiresContext()) {
+ if (contextParser == null) {
+ throw new IllegalArgumentException("suggester [completion] requires context to be setup");
+ } else {
+ contextParser.nextToken();
+ List<ContextQuery> contextQueries = ContextQuery.parseQueries(mapper.getContextMapping(), contextParser);
+ suggestion.setContextQuery(contextQueries);
+ }
+ } else if (contextParser != null) {
+ throw new IllegalArgumentException("suggester [completion] doesn't expect any context");
+ }
+ }
+ return suggestion;
+ }
+
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
new file mode 100644
index 0000000000..ee1cc70bc4
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import com.google.common.collect.Maps;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.text.StringText;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.search.suggest.Suggest;
+import org.elasticsearch.search.suggest.SuggestContextParser;
+import org.elasticsearch.search.suggest.Suggester;
+import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+
+public class CompletionSuggester extends Suggester<CompletionSuggestionContext> {
+
+ private static final ScoreComparator scoreComparator = new ScoreComparator();
+
+
+ @Override
+ protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name,
+ CompletionSuggestionContext suggestionContext, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
+ if (suggestionContext.mapper() == null || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) {
+ throw new ElasticsearchException("Field [" + suggestionContext.getField() + "] is not a completion suggest field");
+ }
+ final IndexReader indexReader = searcher.getIndexReader();
+ CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
+ spare.copyUTF8Bytes(suggestionContext.getText());
+
+ CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length());
+ completionSuggestion.addTerm(completionSuggestEntry);
+
+ String fieldName = suggestionContext.getField();
+ Map<String, CompletionSuggestion.Entry.Option> results = Maps.newHashMapWithExpectedSize(indexReader.leaves().size() * suggestionContext.getSize());
+ for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
+ LeafReader atomicReader = atomicReaderContext.reader();
+ Terms terms = atomicReader.fields().terms(fieldName);
+ if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
+ final Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
+ final Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
+ if (lookup == null) {
+ // we don't have a lookup for this segment.. this might be possible if a merge dropped all
+ // docs from the segment that had a value in this segment.
+ continue;
+ }
+ List<Lookup.LookupResult> lookupResults = lookup.lookup(spare.get(), false, suggestionContext.getSize());
+ for (Lookup.LookupResult res : lookupResults) {
+
+ final String key = res.key.toString();
+ final float score = res.value;
+ final Option value = results.get(key);
+ if (value == null) {
+ final Option option = new CompletionSuggestion.Entry.Option(new StringText(key), score, res.payload == null ? null
+ : new BytesArray(res.payload));
+ results.put(key, option);
+ } else if (value.getScore() < score) {
+ value.setScore(score);
+ value.setPayload(res.payload == null ? null : new BytesArray(res.payload));
+ }
+ }
+ }
+ }
+ final List<CompletionSuggestion.Entry.Option> options = new ArrayList<>(results.values());
+ CollectionUtil.introSort(options, scoreComparator);
+
+ int optionCount = Math.min(suggestionContext.getSize(), options.size());
+ for (int i = 0 ; i < optionCount ; i++) {
+ completionSuggestEntry.addOption(options.get(i));
+ }
+
+ return completionSuggestion;
+ }
+
+ @Override
+ public String[] names() {
+ return new String[] { "completion" };
+ }
+
+ @Override
+ public SuggestContextParser getContextParser() {
+ return new CompletionSuggestParser(this);
+ }
+
+ public static class ScoreComparator implements Comparator<CompletionSuggestion.Entry.Option> {
+ @Override
+ public int compare(Option o1, Option o2) {
+ return Float.compare(o2.getScore(), o1.getScore());
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java
new file mode 100644
index 0000000000..83515ff74f
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.search.suggest.Suggest;
+
+import java.io.IOException;
+import java.util.Map;
+
+/**
+ *
+ */
+public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestion.Entry> {
+
+ public static final int TYPE = 2;
+
+ public CompletionSuggestion() {
+ }
+
+ public CompletionSuggestion(String name, int size) {
+ super(name, size);
+ }
+
+ @Override
+ public int getType() {
+ return TYPE;
+ }
+
+ @Override
+ protected Entry newEntry() {
+ return new Entry();
+ }
+
+ public static class Entry extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry<CompletionSuggestion.Entry.Option> {
+
+ public Entry(Text text, int offset, int length) {
+ super(text, offset, length);
+ }
+
+ protected Entry() {
+ super();
+ }
+
+ @Override
+ protected Option newOption() {
+ return new Option();
+ }
+
+ public static class Option extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option {
+ private BytesReference payload;
+
+ public Option(Text text, float score, BytesReference payload) {
+ super(text, score);
+ this.payload = payload;
+ }
+
+
+ protected Option() {
+ super();
+ }
+
+ public void setPayload(BytesReference payload) {
+ this.payload = payload;
+ }
+
+ public BytesReference getPayload() {
+ return payload;
+ }
+
+ public String getPayloadAsString() {
+ return payload.toUtf8();
+ }
+
+ public long getPayloadAsLong() {
+ return Long.parseLong(payload.toUtf8());
+ }
+
+ public double getPayloadAsDouble() {
+ return Double.parseDouble(payload.toUtf8());
+ }
+
+ public Map<String, Object> getPayloadAsMap() {
+ return XContentHelper.convertToMap(payload, false).v2();
+ }
+
+ @Override
+ public void setScore(float score) {
+ super.setScore(score);
+ }
+
+ @Override
+ protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
+ super.innerToXContent(builder, params);
+ if (payload != null && payload.length() > 0) {
+ builder.rawField("payload", payload);
+ }
+ return builder;
+ }
+
+ @Override
+ public void readFrom(StreamInput in) throws IOException {
+ super.readFrom(in);
+ payload = in.readBytesReference();
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ super.writeTo(out);
+ out.writeBytesReference(payload);
+ }
+ }
+ }
+
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java
new file mode 100644
index 0000000000..15d04e845e
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.suggest.SuggestBuilder;
+
+import java.io.IOException;
+
+/**
+ * Defines a suggest command based on a prefix, typically to provide "auto-complete" functionality
+ * for users as they type search terms. The implementation of the completion service uses FSTs that
+ * are created at index-time and so must be defined in the mapping with the type "completion" before
+ * indexing.
+ */
+public class CompletionSuggestionBuilder extends SuggestBuilder.SuggestionBuilder<CompletionSuggestionBuilder> {
+
+ public CompletionSuggestionBuilder(String name) {
+ super(name, "completion");
+ }
+
+ @Override
+ protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
+ return builder;
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java
new file mode 100644
index 0000000000..a60ad16dd8
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.search.suggest.Suggester;
+import org.elasticsearch.search.suggest.SuggestionSearchContext;
+import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ *
+ */
+public class CompletionSuggestionContext extends SuggestionSearchContext.SuggestionContext {
+
+ private CompletionFieldMapper mapper;
+ private int fuzzyEditDistance = XFuzzySuggester.DEFAULT_MAX_EDITS;
+ private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS;
+ private int fuzzyMinLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH;
+ private int fuzzyPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX;
+ private boolean fuzzy = false;
+ private boolean fuzzyUnicodeAware = XFuzzySuggester.DEFAULT_UNICODE_AWARE;
+ private List<ContextQuery> contextQueries = Collections.emptyList();
+
+ public CompletionSuggestionContext(Suggester suggester) {
+ super(suggester);
+ }
+
+ public CompletionFieldMapper mapper() {
+ return this.mapper;
+ }
+
+ public void mapper(CompletionFieldMapper mapper) {
+ this.mapper = mapper;
+ }
+
+ public void setFuzzyEditDistance(int fuzzyEditDistance) {
+ this.fuzzyEditDistance = fuzzyEditDistance;
+ }
+
+ public int getFuzzyEditDistance() {
+ return fuzzyEditDistance;
+ }
+
+ public void setFuzzyTranspositions(boolean fuzzyTranspositions) {
+ this.fuzzyTranspositions = fuzzyTranspositions;
+ }
+
+ public boolean isFuzzyTranspositions() {
+ return fuzzyTranspositions;
+ }
+
+ public void setFuzzyMinLength(int fuzzyMinPrefixLength) {
+ this.fuzzyMinLength = fuzzyMinPrefixLength;
+ }
+
+ public int getFuzzyMinLength() {
+ return fuzzyMinLength;
+ }
+
+ public void setFuzzyPrefixLength(int fuzzyNonPrefixLength) {
+ this.fuzzyPrefixLength = fuzzyNonPrefixLength;
+ }
+
+ public int getFuzzyPrefixLength() {
+ return fuzzyPrefixLength;
+ }
+
+ public void setFuzzy(boolean fuzzy) {
+ this.fuzzy = fuzzy;
+ }
+
+ public boolean isFuzzy() {
+ return fuzzy;
+ }
+
+ public void setFuzzyUnicodeAware(boolean fuzzyUnicodeAware) {
+ this.fuzzyUnicodeAware = fuzzyUnicodeAware;
+ }
+
+ public boolean isFuzzyUnicodeAware() {
+ return fuzzyUnicodeAware;
+ }
+
+ public void setContextQuery(List<ContextQuery> queries) {
+ this.contextQueries = queries;
+ }
+
+ public List<ContextQuery> getContextQueries() {
+ return this.contextQueries;
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java
new file mode 100644
index 0000000000..de6bf1365d
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
+import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.suggest.SuggestBuilder;
+
+import java.io.IOException;
+
+/**
+ * A form of {@link CompletionSuggestionBuilder} that supports fuzzy queries allowing
+ * matches on typos.
+ * Various settings control when and how fuzziness is counted.
+ */
+public class CompletionSuggestionFuzzyBuilder extends SuggestBuilder.SuggestionBuilder<CompletionSuggestionFuzzyBuilder> {
+
+ public CompletionSuggestionFuzzyBuilder(String name) {
+ super(name, "completion");
+ }
+
+ private Fuzziness fuzziness = Fuzziness.ONE;
+ private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS;
+ private int fuzzyMinLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH;
+ private int fuzzyPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX;
+ private boolean unicodeAware = XFuzzySuggester.DEFAULT_UNICODE_AWARE;
+
+ public Fuzziness getFuzziness() {
+ return fuzziness;
+ }
+
+ /**
+ * Sets the level of fuzziness used to create suggestions using a {@link Fuzziness} instance.
+ * The default value is {@link Fuzziness#ONE} which allows for an "edit distance" of one.
+ */
+ public CompletionSuggestionFuzzyBuilder setFuzziness(Fuzziness fuzziness) {
+ this.fuzziness = fuzziness;
+ return this;
+ }
+
+ public boolean isFuzzyTranspositions() {
+ return fuzzyTranspositions;
+ }
+
+ /**
+ * Sets if transpositions (swapping one character for another) counts as one character
+ * change or two.
+ * Defaults to true, meaning it uses the fuzzier option of counting transpositions as
+ * a single change.
+ */
+ public CompletionSuggestionFuzzyBuilder setFuzzyTranspositions(boolean fuzzyTranspositions) {
+ this.fuzzyTranspositions = fuzzyTranspositions;
+ return this;
+ }
+
+ public int getFuzzyMinLength() {
+ return fuzzyMinLength;
+ }
+
+ /**
+ * Sets the minimum length of input string before fuzzy suggestions are returned, defaulting
+ * to 3.
+ */
+ public CompletionSuggestionFuzzyBuilder setFuzzyMinLength(int fuzzyMinLength) {
+ this.fuzzyMinLength = fuzzyMinLength;
+ return this;
+ }
+
+ public int getFuzzyPrefixLength() {
+ return fuzzyPrefixLength;
+ }
+
+ /**
+ * Sets the minimum length of the input, which is not checked for fuzzy alternatives, defaults to 1
+ */
+ public CompletionSuggestionFuzzyBuilder setFuzzyPrefixLength(int fuzzyPrefixLength) {
+ this.fuzzyPrefixLength = fuzzyPrefixLength;
+ return this;
+ }
+
+ public boolean isUnicodeAware() {
+ return unicodeAware;
+ }
+
+ /**
+ * Set to true if all measurements (like edit distance, transpositions and lengths) are in unicode
+ * code points (actual letters) instead of bytes. Default is false.
+ */
+ public CompletionSuggestionFuzzyBuilder setUnicodeAware(boolean unicodeAware) {
+ this.unicodeAware = unicodeAware;
+ return this;
+ }
+
+ @Override
+ protected XContentBuilder innerToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
+ builder.startObject("fuzzy");
+
+ if (fuzziness != Fuzziness.ONE) {
+ fuzziness.toXContent(builder, params);
+ }
+ if (fuzzyTranspositions != XFuzzySuggester.DEFAULT_TRANSPOSITIONS) {
+ builder.field("transpositions", fuzzyTranspositions);
+ }
+ if (fuzzyMinLength != XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH) {
+ builder.field("min_length", fuzzyMinLength);
+ }
+ if (fuzzyPrefixLength != XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX) {
+ builder.field("prefix_length", fuzzyPrefixLength);
+ }
+ if (unicodeAware != XFuzzySuggester.DEFAULT_UNICODE_AWARE) {
+ builder.field("unicode_aware", unicodeAware);
+ }
+
+ builder.endObject();
+ return builder;
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java
new file mode 100644
index 0000000000..103fd0dcf0
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.util.*;
+import org.apache.lucene.util.fst.Util;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ *
+ */
+public final class CompletionTokenStream extends TokenStream {
+
+ private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);
+ private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
+ private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);;
+
+
+ private final TokenStream input;
+ private BytesRef payload;
+ private Iterator<IntsRef> finiteStrings;
+ private ToFiniteStrings toFiniteStrings;
+ private int posInc = -1;
+ private static final int MAX_PATHS = 256;
+ private CharTermAttribute charTermAttribute;
+
+ public CompletionTokenStream(TokenStream input, BytesRef payload, ToFiniteStrings toFiniteStrings) throws IOException {
+ // Don't call the super(input) ctor - this is a true delegate and has a new attribute source since we consume
+ // the input stream entirely in toFiniteStrings(input)
+ this.input = input;
+ this.payload = payload;
+ this.toFiniteStrings = toFiniteStrings;
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ clearAttributes();
+ if (finiteStrings == null) {
+ Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input);
+
+ if (strings.size() > MAX_PATHS) {
+ throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS
+ + " finite strings are supported");
+ }
+ posInc = strings.size();
+ finiteStrings = strings.iterator();
+ }
+ if (finiteStrings.hasNext()) {
+ posAttr.setPositionIncrement(posInc);
+ /*
+ * this posInc encodes the number of paths that this surface form
+ * produced. Multi Fields have the same surface form and therefore sum up
+ */
+ posInc = 0;
+ Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
+ if (charTermAttribute != null) {
+ charTermAttribute.setLength(0);
+ charTermAttribute.append(bytesAtt.toUTF16());
+ }
+ if (payload != null) {
+ payloadAttr.setPayload(this.payload);
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ @Override
+ public void end() throws IOException {
+ super.end();
+ if (posInc == -1) {
+ input.end();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (posInc == -1) {
+ input.close();
+ }
+ }
+
+ public static interface ToFiniteStrings {
+ public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ if (hasAttribute(CharTermAttribute.class)) {
+ // we only create this if we really need it to safe the UTF-8 to UTF-16 conversion
+ charTermAttribute = getAttribute(CharTermAttribute.class);
+ }
+ finiteStrings = null;
+ posInc = -1;
+ }
+
+ public interface ByteTermAttribute extends TermToBytesRefAttribute {
+ // marker interface
+
+ /**
+ * Return the builder from which the term is derived.
+ */
+ public BytesRefBuilder builder();
+
+ public CharSequence toUTF16();
+ }
+
+ public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
+ private final BytesRefBuilder bytes = new BytesRefBuilder();
+ private CharsRefBuilder charsRef;
+
+ @Override
+ public void fillBytesRef() {
+ // does nothing - we change in place
+ }
+
+ @Override
+ public BytesRefBuilder builder() {
+ return bytes;
+ }
+
+ @Override
+ public BytesRef getBytesRef() {
+ return bytes.get();
+ }
+
+ @Override
+ public void clear() {
+ bytes.clear();
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
+ other.bytes.copyBytes(bytes);
+ }
+
+ @Override
+ public CharSequence toUTF16() {
+ if (charsRef == null) {
+ charsRef = new CharsRefBuilder();
+ }
+ charsRef.copyUTF8Bytes(getBytesRef());
+ return charsRef.get();
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/PayloadProcessor.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/PayloadProcessor.java
new file mode 100644
index 0000000000..544d9052a0
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/PayloadProcessor.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+
+import java.io.IOException;
+
+interface PayloadProcessor {
+
+ BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException;
+
+ void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException;
+
+ static class SuggestPayload {
+ final BytesRefBuilder payload = new BytesRefBuilder();
+ long weight = 0;
+ final BytesRefBuilder surfaceForm = new BytesRefBuilder();
+ }
+}