summaryrefslogtreecommitdiff
path: root/core/src/main/java/org
diff options
context:
space:
mode:
authorAreek Zillur <areek.zillur@elasticsearch.com>2016-04-07 15:34:39 -0400
committerAreek Zillur <areek.zillur@elasticsearch.com>2016-04-25 21:21:56 -0400
commit4a1a03428d7884c185b05c364c6f220237a9567e (patch)
treedce7bfb76bfa033c0961f60a7654426f2f288d26 /core/src/main/java/org
parentd39eb2d69131d7a6a8bda29dcac31342bb1a712e (diff)
Add bwc support for pre-5.0 completion index
This commit adds support for reading and querying completion fields that were indexed in 2.x
Diffstat (limited to 'core/src/main/java/org')
-rw-r--r--core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java1214
-rw-r--r--core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java267
-rw-r--r--core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java10
-rw-r--r--core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java3
-rw-r--r--core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper2x.java611
-rw-r--r--core/src/main/java/org/elasticsearch/index/shard/IndexShard.java5
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/Suggest.java13
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java158
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java136
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java20
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/AnalyzingCompletionLookupProvider.java413
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/Completion090PostingsFormat.java353
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/CompletionSuggestion.java144
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/CompletionTokenStream.java176
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/PayloadProcessor.java38
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/CategoryContextMapping.java374
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/ContextBuilder.java135
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/ContextMapping.java319
-rw-r--r--core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/GeolocationContextMapping.java750
19 files changed, 5087 insertions, 52 deletions
diff --git a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
new file mode 100644
index 0000000000..a9327d785e
--- /dev/null
+++ b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
@@ -0,0 +1,1214 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lucene.search.suggest.analyzing;
+
+import com.carrotsearch.hppc.ObjectIntHashMap;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.TokenStreamToAutomaton;
+import org.apache.lucene.search.suggest.InputIterator;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.IntsRefBuilder;
+import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FST.BytesReader;
+import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.fst.Util.Result;
+import org.apache.lucene.util.fst.Util.TopResults;
+import org.elasticsearch.common.SuppressForbidden;
+import org.elasticsearch.common.collect.HppcMaps;
+import org.elasticsearch.common.io.PathUtils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Suggester that first analyzes the surface form, adds the
+ * analyzed form to a weighted FST, and then does the same
+ * thing at lookup time. This means lookup is based on the
+ * analyzed form while suggestions are still the surface
+ * form(s).
+ *
+ * <p>
+ * This can result in powerful suggester functionality. For
+ * example, if you use an analyzer removing stop words,
+ * then the partial text "ghost chr..." could see the
+ * suggestion "The Ghost of Christmas Past". Note that
+ * position increments MUST NOT be preserved for this example
+ * to work, so you should call the constructor with
+ * <code>preservePositionIncrements</code> parameter set to
+ * false
+ *
+ * <p>
+ * If SynonymFilter is used to map wifi and wireless network to
+ * hotspot then the partial text "wirele..." could suggest
+ * "wifi router". Token normalization like stemmers, accent
+ * removal, etc., would allow suggestions to ignore such
+ * variations.
+ *
+ * <p>
+ * When two matching suggestions have the same weight, they
+ * are tie-broken by the analyzed form. If their analyzed
+ * form is the same then the order is undefined.
+ *
+ * <p>
+ * There are some limitations:
+ * <ul>
+ *
+ * <li> A lookup from a query like "net" in English won't
+ * be any different than "net " (ie, user added a
+ * trailing space) because analyzers don't reflect
+ * when they've seen a token separator and when they
+ * haven't.
+ *
+ * <li> If you're using {@code StopFilter}, and the user will
+ * type "fast apple", but so far all they've typed is
+ * "fast a", again because the analyzer doesn't convey whether
+ * it's seen a token separator after the "a",
+ * {@code StopFilter} will remove that "a" causing
+ * far more matches than you'd expect.
+ *
+ * <li> Lookups with the empty string return no results
+ * instead of all results.
+ * </ul>
+ */
+public class XAnalyzingSuggester extends Lookup {
+
+ /**
+ * FST&lt;Weight,Surface&gt;:
+ * input is the analyzed form, with a null byte between terms
+ * weights are encoded as costs: (Integer.MAX_VALUE-weight)
+ * surface is the original, unanalyzed form.
+ */
+ private FST<Pair<Long,BytesRef>> fst = null;
+
+ /**
+ * Analyzer that will be used for analyzing suggestions at
+ * index time.
+ */
+ private final Analyzer indexAnalyzer;
+
+ /**
+ * Analyzer that will be used for analyzing suggestions at
+ * query time.
+ */
+ private final Analyzer queryAnalyzer;
+
+ /**
+ * True if exact match suggestions should always be returned first.
+ */
+ private final boolean exactFirst;
+
+ /**
+ * True if separator between tokens should be preserved.
+ */
+ private final boolean preserveSep;
+
+ /** Include this flag in the options parameter to {@code
+ * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to always
+ * return the exact match first, regardless of score. This
+ * has no performance impact but could result in
+ * low-quality suggestions. */
+ public static final int EXACT_FIRST = 1;
+
+ /** Include this flag in the options parameter to {@code
+ * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to preserve
+ * token separators when matching. */
+ public static final int PRESERVE_SEP = 2;
+
+ /** Represents the separation between tokens, if
+ * PRESERVE_SEP was specified */
+ public static final int SEP_LABEL = '\u001F';
+
+ /** Marks end of the analyzed input and start of dedup
+ * byte. */
+ public static final int END_BYTE = 0x0;
+
+ /** Maximum number of dup surface forms (different surface
+ * forms for the same analyzed form). */
+ private final int maxSurfaceFormsPerAnalyzedForm;
+
+ /** Maximum graph paths to index for a single analyzed
+ * surface form. This only matters if your analyzer
+ * makes lots of alternate paths (e.g. contains
+ * SynonymFilter). */
+ private final int maxGraphExpansions;
+
+ /** Highest number of analyzed paths we saw for any single
+ * input surface form. For analyzers that never create
+ * graphs this will always be 1. */
+ private int maxAnalyzedPathsForOneInput;
+
+ private boolean hasPayloads;
+
+ private final int sepLabel;
+ private final int payloadSep;
+ private final int endByte;
+ private final int holeCharacter;
+
+ public static final int PAYLOAD_SEP = '\u001F';
+ public static final int HOLE_CHARACTER = '\u001E';
+
+ private final Automaton queryPrefix;
+
+ /** Whether position holes should appear in the automaton. */
+ private boolean preservePositionIncrements;
+
+ /** Number of entries the lookup was built with */
+ private long count = 0;
+
+ /**
+ * Calls {@code #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
+ * AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
+ * PRESERVE_SEP, 256, -1)}
+ *
+ * @param analyzer Analyzer that will be used for analyzing suggestions while building the index.
+ */
+ public XAnalyzingSuggester(Analyzer analyzer) {
+ this(analyzer, null, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0,
+ SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
+ }
+
+ /**
+ * Calls {@code #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
+ * AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST |
+ * PRESERVE_SEP, 256, -1)}
+ *
+ * @param indexAnalyzer Analyzer that will be used for analyzing suggestions while building the index.
+ * @param queryAnalyzer Analyzer that will be used for analyzing query text during lookup
+ */
+ public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
+ this(indexAnalyzer, null, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0,
+ SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
+ }
+
+ /**
+ * Creates a new suggester.
+ *
+ * @param indexAnalyzer Analyzer that will be used for
+ * analyzing suggestions while building the index.
+ * @param queryAnalyzer Analyzer that will be used for
+ * analyzing query text during lookup
+ * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
+ * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
+ * surface forms to keep for a single analyzed form.
+ * When there are too many surface forms we discard the
+ * lowest weighted ones.
+ * @param maxGraphExpansions Maximum number of graph paths
+ * to expand from the analyzed form. Set this to -1 for
+ * no limit.
+ */
+ public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer,
+ int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
+ boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst,
+ boolean hasPayloads, int maxAnalyzedPathsForOneInput,
+ int sepLabel, int payloadSep, int endByte, int holeCharacter) {
+ // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput
+ this.indexAnalyzer = indexAnalyzer;
+ this.queryAnalyzer = queryAnalyzer;
+ this.fst = fst;
+ this.hasPayloads = hasPayloads;
+ if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) {
+ throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options);
+ }
+ this.exactFirst = (options & EXACT_FIRST) != 0;
+ this.preserveSep = (options & PRESERVE_SEP) != 0;
+
+ // FLORIAN EDIT: I added <code>queryPrefix</code> for context dependent suggestions
+ this.queryPrefix = queryPrefix;
+
+ // NOTE: this is just an implementation limitation; if
+ // somehow this is a problem we could fix it by using
+ // more than one byte to disambiguate ... but 256 seems
+ // like it should be way more then enough.
+ if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) {
+ throw new IllegalArgumentException(
+ "maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")");
+ }
+ this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
+
+ if (maxGraphExpansions < 1 && maxGraphExpansions != -1) {
+ throw new IllegalArgumentException(
+ "maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")");
+ }
+ this.maxGraphExpansions = maxGraphExpansions;
+ this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
+ this.preservePositionIncrements = preservePositionIncrements;
+ this.sepLabel = sepLabel;
+ this.payloadSep = payloadSep;
+ this.endByte = endByte;
+ this.holeCharacter = holeCharacter;
+ }
+
+ /** Returns byte size of the underlying FST. */
+ @Override
+public long ramBytesUsed() {
+ return fst == null ? 0 : fst.ramBytesUsed();
+ }
+
+ public int getMaxAnalyzedPathsForOneInput() {
+ return maxAnalyzedPathsForOneInput;
+ }
+
+ // Replaces SEP with epsilon or remaps them if
+ // we were asked to preserve them:
+ private Automaton replaceSep(Automaton a) {
+
+ Automaton result = new Automaton();
+
+ // Copy all states over
+ int numStates = a.getNumStates();
+ for(int s=0;s<numStates;s++) {
+ result.createState();
+ result.setAccept(s, a.isAccept(s));
+ }
+
+ // Go in reverse topo sort so we know we only have to
+ // make one pass:
+ Transition t = new Transition();
+ int[] topoSortStates = topoSortStates(a);
+ for(int i=0;i<topoSortStates.length;i++) {
+ int state = topoSortStates[topoSortStates.length-1-i];
+ int count = a.initTransition(state, t);
+ for(int j=0;j<count;j++) {
+ a.getNextTransition(t);
+ if (t.min == TokenStreamToAutomaton.POS_SEP) {
+ assert t.max == TokenStreamToAutomaton.POS_SEP;
+ if (preserveSep) {
+ // Remap to SEP_LABEL:
+ result.addTransition(state, t.dest, SEP_LABEL);
+ } else {
+ result.addEpsilon(state, t.dest);
+ }
+ } else if (t.min == TokenStreamToAutomaton.HOLE) {
+ assert t.max == TokenStreamToAutomaton.HOLE;
+
+ // Just remove the hole: there will then be two
+ // SEP tokens next to each other, which will only
+ // match another hole at search time. Note that
+ // it will also match an empty-string token ... if
+ // that's somehow a problem we can always map HOLE
+ // to a dedicated byte (and escape it in the
+ // input).
+ result.addEpsilon(state, t.dest);
+ } else {
+ result.addTransition(state, t.dest, t.min, t.max);
+ }
+ }
+ }
+
+ result.finishState();
+
+ return result;
+ }
+
+ protected Automaton convertAutomaton(Automaton a) {
+ if (queryPrefix != null) {
+ a = Operations.concatenate(Arrays.asList(queryPrefix, a));
+ // This automaton should not blow up during determinize:
+ a = Operations.determinize(a, Integer.MAX_VALUE);
+ }
+ return a;
+ }
+
+ private int[] topoSortStates(Automaton a) {
+ int[] states = new int[a.getNumStates()];
+ final Set<Integer> visited = new HashSet<>();
+ final LinkedList<Integer> worklist = new LinkedList<>();
+ worklist.add(0);
+ visited.add(0);
+ int upto = 0;
+ states[upto] = 0;
+ upto++;
+ Transition t = new Transition();
+ while (worklist.size() > 0) {
+ int s = worklist.removeFirst();
+ int count = a.initTransition(s, t);
+ for (int i=0;i<count;i++) {
+ a.getNextTransition(t);
+ if (!visited.contains(t.dest)) {
+ visited.add(t.dest);
+ worklist.add(t.dest);
+ states[upto++] = t.dest;
+ }
+ }
+ }
+ return states;
+ }
+
+ /** Just escapes the 0xff byte (which we still for SEP). */
+ private static final class EscapingTokenStreamToAutomaton extends TokenStreamToAutomaton {
+
+ final BytesRefBuilder spare = new BytesRefBuilder();
+ private char sepLabel;
+
+ public EscapingTokenStreamToAutomaton(char sepLabel) {
+ this.sepLabel = sepLabel;
+ }
+
+ @Override
+ protected BytesRef changeToken(BytesRef in) {
+ int upto = 0;
+ for(int i=0;i<in.length;i++) {
+ byte b = in.bytes[in.offset+i];
+ if (b == (byte) sepLabel) {
+ spare.grow(upto+2);
+ spare.setByteAt(upto++, (byte) sepLabel);
+ spare.setByteAt(upto++, b);
+ } else {
+ spare.grow(upto+1);
+ spare.setByteAt(upto++, b);
+ }
+ }
+ spare.setLength(upto);
+ return spare.get();
+ }
+ }
+
+ public TokenStreamToAutomaton getTokenStreamToAutomaton() {
+ final TokenStreamToAutomaton tsta;
+ if (preserveSep) {
+ tsta = new EscapingTokenStreamToAutomaton((char) sepLabel);
+ } else {
+ // When we're not preserving sep, we don't steal 0xff
+ // byte, so we don't need to do any escaping:
+ tsta = new TokenStreamToAutomaton();
+ }
+ tsta.setPreservePositionIncrements(preservePositionIncrements);
+ return tsta;
+ }
+
+ private static class AnalyzingComparator implements Comparator<BytesRef> {
+
+ private final boolean hasPayloads;
+
+ public AnalyzingComparator(boolean hasPayloads) {
+ this.hasPayloads = hasPayloads;
+ }
+
+ private final ByteArrayDataInput readerA = new ByteArrayDataInput();
+ private final ByteArrayDataInput readerB = new ByteArrayDataInput();
+ private final BytesRef scratchA = new BytesRef();
+ private final BytesRef scratchB = new BytesRef();
+
+ @Override
+ public int compare(BytesRef a, BytesRef b) {
+
+ // First by analyzed form:
+ readerA.reset(a.bytes, a.offset, a.length);
+ scratchA.length = readerA.readShort();
+ scratchA.bytes = a.bytes;
+ scratchA.offset = readerA.getPosition();
+
+ readerB.reset(b.bytes, b.offset, b.length);
+ scratchB.bytes = b.bytes;
+ scratchB.length = readerB.readShort();
+ scratchB.offset = readerB.getPosition();
+
+ int cmp = scratchA.compareTo(scratchB);
+ if (cmp != 0) {
+ return cmp;
+ }
+ readerA.skipBytes(scratchA.length);
+ readerB.skipBytes(scratchB.length);
+ // Next by cost:
+ long aCost = readerA.readInt();
+ long bCost = readerB.readInt();
+ if (aCost < bCost) {
+ return -1;
+ } else if (aCost > bCost) {
+ return 1;
+ }
+
+ // Finally by surface form:
+ if (hasPayloads) {
+ scratchA.length = readerA.readShort();
+ scratchA.offset = readerA.getPosition();
+ scratchB.length = readerB.readShort();
+ scratchB.offset = readerB.getPosition();
+ } else {
+ scratchA.offset = readerA.getPosition();
+ scratchA.length = a.length - scratchA.offset;
+ scratchB.offset = readerB.getPosition();
+ scratchB.length = b.length - scratchB.offset;
+ }
+ return scratchA.compareTo(scratchB);
+ }
+ }
+
+ /** Non-null if this sugggester created a temp dir, needed only during build */
+ private static FSDirectory tmpBuildDir;
+
+ @SuppressForbidden(reason = "access temp directory for building index")
+ protected static synchronized FSDirectory getTempDir() {
+ if (tmpBuildDir == null) {
+ // Lazy init
+ String tempDirPath = System.getProperty("java.io.tmpdir");
+ if (tempDirPath == null) {
+ throw new RuntimeException("Java has no temporary folder property (java.io.tmpdir)?");
+ }
+ try {
+ tmpBuildDir = FSDirectory.open(PathUtils.get(tempDirPath));
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+ return tmpBuildDir;
+ }
+
+ @Override
+ public void build(InputIterator iterator) throws IOException {
+ String prefix = getClass().getSimpleName();
+ Directory tempDir = getTempDir();
+ OfflineSorter sorter = new OfflineSorter(tempDir, prefix, new AnalyzingComparator(hasPayloads));
+
+ IndexOutput tempInput = tempDir.createTempOutput(prefix, "input", IOContext.DEFAULT);
+
+ OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
+ OfflineSorter.ByteSequencesReader reader = null;
+
+ hasPayloads = iterator.hasPayloads();
+
+ BytesRefBuilder scratch = new BytesRefBuilder();
+
+ TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
+ String tempSortedFileName = null;
+
+ count = 0;
+ byte buffer[] = new byte[8];
+ try {
+ ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+ for (BytesRef surfaceForm; (surfaceForm = iterator.next()) != null;) {
+ LimitedFiniteStringsIterator finiteStrings =
+ new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions);
+ for (IntsRef string; (string = finiteStrings.next()) != null; count++) {
+ Util.toBytesRef(string, scratch);
+
+ // length of the analyzed text (FST input)
+ if (scratch.length() > Short.MAX_VALUE-2) {
+ throw new IllegalArgumentException(
+ "cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length() + ")");
+ }
+ short analyzedLength = (short) scratch.length();
+
+ // compute the required length:
+ // analyzed sequence + weight (4) + surface + analyzedLength (short)
+ int requiredLength = analyzedLength + 4 + surfaceForm.length + 2;
+
+ BytesRef payload;
+
+ if (hasPayloads) {
+ if (surfaceForm.length > (Short.MAX_VALUE-2)) {
+ throw new IllegalArgumentException(
+ "cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
+ }
+ payload = iterator.payload();
+ // payload + surfaceLength (short)
+ requiredLength += payload.length + 2;
+ } else {
+ payload = null;
+ }
+
+ buffer = ArrayUtil.grow(buffer, requiredLength);
+
+ output.reset(buffer);
+
+ output.writeShort(analyzedLength);
+
+ output.writeBytes(scratch.bytes(), 0, scratch.length());
+
+ output.writeInt(encodeWeight(iterator.weight()));
+
+ if (hasPayloads) {
+ for(int i=0;i<surfaceForm.length;i++) {
+ if (surfaceForm.bytes[i] == payloadSep) {
+ throw new IllegalArgumentException(
+ "surface form cannot contain unit separator character U+001F; this character is reserved");
+ }
+ }
+ output.writeShort((short) surfaceForm.length);
+ output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
+ output.writeBytes(payload.bytes, payload.offset, payload.length);
+ } else {
+ output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
+ }
+
+ assert output.getPosition() == requiredLength: output.getPosition() + " vs " + requiredLength;
+
+ writer.write(buffer, 0, output.getPosition());
+ }
+ maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size());
+ }
+ writer.close();
+
+ // Sort all input/output pairs (required by FST.Builder):
+ tempSortedFileName = sorter.sort(tempInput.getName());
+
+ // Free disk space:
+ tempDir.deleteFile(tempInput.getName());
+
+ reader = new OfflineSorter.ByteSequencesReader(
+ tempDir.openChecksumInput(tempSortedFileName, IOContext.READONCE), prefix);
+
+ PairOutputs<Long,BytesRef> outputs = new PairOutputs<>(
+ PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
+ Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
+
+ // Build FST:
+ BytesRefBuilder previousAnalyzed = null;
+ BytesRefBuilder analyzed = new BytesRefBuilder();
+ BytesRef surface = new BytesRef();
+ IntsRefBuilder scratchInts = new IntsRefBuilder();
+ ByteArrayDataInput input = new ByteArrayDataInput();
+
+ // Used to remove duplicate surface forms (but we
+ // still index the hightest-weight one). We clear
+ // this when we see a new analyzed form, so it cannot
+ // grow unbounded (at most 256 entries):
+ Set<BytesRef> seenSurfaceForms = new HashSet<>();
+
+ int dedup = 0;
+ while (reader.read(scratch)) {
+ input.reset(scratch.bytes(), 0, scratch.length());
+ short analyzedLength = input.readShort();
+ analyzed.grow(analyzedLength+2);
+ input.readBytes(analyzed.bytes(), 0, analyzedLength);
+ analyzed.setLength(analyzedLength);
+
+ long cost = input.readInt();
+
+ surface.bytes = scratch.bytes();
+ if (hasPayloads) {
+ surface.length = input.readShort();
+ surface.offset = input.getPosition();
+ } else {
+ surface.offset = input.getPosition();
+ surface.length = scratch.length() - surface.offset;
+ }
+
+ if (previousAnalyzed == null) {
+ previousAnalyzed = new BytesRefBuilder();
+ previousAnalyzed.copyBytes(analyzed);
+ seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
+ } else if (analyzed.get().equals(previousAnalyzed.get())) {
+ dedup++;
+ if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
+ // More than maxSurfaceFormsPerAnalyzedForm
+ // dups: skip the rest:
+ continue;
+ }
+ if (seenSurfaceForms.contains(surface)) {
+ continue;
+ }
+ seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
+ } else {
+ dedup = 0;
+ previousAnalyzed.copyBytes(analyzed);
+ seenSurfaceForms.clear();
+ seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
+ }
+
+ // TODO: I think we can avoid the extra 2 bytes when
+ // there is no dup (dedup==0), but we'd have to fix
+ // the exactFirst logic ... which would be sort of
+ // hairy because we'd need to special case the two
+ // (dup/not dup)...
+
+ // NOTE: must be byte 0 so we sort before whatever
+ // is next
+ analyzed.append((byte) 0);
+ analyzed.append((byte) dedup);
+
+ Util.toIntsRef(analyzed.get(), scratchInts);
+ //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
+ if (!hasPayloads) {
+ builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
+ } else {
+ int payloadOffset = input.getPosition() + surface.length;
+ int payloadLength = scratch.length() - payloadOffset;
+ BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
+ System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
+ br.bytes[surface.length] = (byte) payloadSep;
+ System.arraycopy(scratch.bytes(), payloadOffset, br.bytes, surface.length+1, payloadLength);
+ br.length = br.bytes.length;
+ builder.add(scratchInts.get(), outputs.newPair(cost, br));
+ }
+ }
+ fst = builder.finish();
+
+ //PrintWriter pw = new PrintWriter("/tmp/out.dot");
+ //Util.toDot(fst, pw, true, true);
+ //pw.close();
+
+ } finally {
+ IOUtils.closeWhileHandlingException(reader, writer);
+ IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
+ }
+ }
+
+ @Override
+ public boolean store(OutputStream output) throws IOException {
+ DataOutput dataOut = new OutputStreamDataOutput(output);
+ try {
+ if (fst == null) {
+ return false;
+ }
+
+ fst.save(dataOut);
+ dataOut.writeVInt(maxAnalyzedPathsForOneInput);
+ dataOut.writeByte((byte) (hasPayloads ? 1 : 0));
+ } finally {
+ IOUtils.close(output);
+ }
+ return true;
+ }
+
+ @Override
+ public long getCount() {
+ return count;
+ }
+
+ @Override
+ public boolean load(InputStream input) throws IOException {
+ DataInput dataIn = new InputStreamDataInput(input);
+ try {
+ this.fst = new FST<>(dataIn, new PairOutputs<>(
+ PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
+ maxAnalyzedPathsForOneInput = dataIn.readVInt();
+ hasPayloads = dataIn.readByte() == 1;
+ } finally {
+ IOUtils.close(input);
+ }
+ return true;
+ }
+
+ private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
+ LookupResult result;
+ if (hasPayloads) {
+ int sepIndex = -1;
+ for(int i=0;i<output2.length;i++) {
+ if (output2.bytes[output2.offset+i] == payloadSep) {
+ sepIndex = i;
+ break;
+ }
+ }
+ assert sepIndex != -1;
+ final int payloadLen = output2.length - sepIndex - 1;
+ spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
+ BytesRef payload = new BytesRef(payloadLen);
+ System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
+ payload.length = payloadLen;
+ result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
+ } else {
+ spare.copyUTF8Bytes(output2);
+ result = new LookupResult(spare.toString(), decodeWeight(output1));
+ }
+
+ return result;
+ }
+
+ private boolean sameSurfaceForm(BytesRef key, BytesRef output2) {
+ if (hasPayloads) {
+ // output2 has at least PAYLOAD_SEP byte:
+ if (key.length >= output2.length) {
+ return false;
+ }
+ for(int i=0;i<key.length;i++) {
+ if (key.bytes[key.offset+i] != output2.bytes[output2.offset+i]) {
+ return false;
+ }
+ }
+ return output2.bytes[output2.offset + key.length] == payloadSep;
+ } else {
+ return key.bytesEquals(output2);
+ }
+ }
+
+ @Override
+ public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
+ assert num > 0;
+
+ if (onlyMorePopular) {
+ throw new IllegalArgumentException("this suggester only works with onlyMorePopular=false");
+ }
+ if (fst == null) {
+ return Collections.emptyList();
+ }
+
+ //System.out.println("lookup key=" + key + " num=" + num);
+ for (int i = 0; i < key.length(); i++) {
+ if (key.charAt(i) == holeCharacter) {
+ throw new IllegalArgumentException(
+ "lookup key cannot contain HOLE character U+001E; this character is reserved");
+ }
+ if (key.charAt(i) == sepLabel) {
+ throw new IllegalArgumentException(
+ "lookup key cannot contain unit separator character U+001F; this character is reserved");
+ }
+ }
+ final BytesRef utf8Key = new BytesRef(key);
+ try {
+
+ Automaton lookupAutomaton = toLookupAutomaton(key);
+
+ final CharsRefBuilder spare = new CharsRefBuilder();
+
+ //System.out.println(" now intersect exactFirst=" + exactFirst);
+
+ // Intersect automaton w/ suggest wFST and get all
+ // prefix starting nodes & their outputs:
+ //final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst);
+
+ //System.out.println(" prefixPaths: " + prefixPaths.size());
+
+ BytesReader bytesReader = fst.getBytesReader();
+
+ FST.Arc<Pair<Long,BytesRef>> scratchArc = new FST.Arc<>();
+
+ final List<LookupResult> results = new ArrayList<>();
+
+ List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(convertAutomaton(lookupAutomaton), fst);
+
+ if (exactFirst) {
+
+ int count = 0;
+ for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
+ if (fst.findTargetArc(endByte, path.fstNode, scratchArc, bytesReader) != null) {
+ // This node has END_BYTE arc leaving, meaning it's an
+ // "exact" match:
+ count++;
+ }
+ }
+
+ // Searcher just to find the single exact only
+ // match, if present:
+ Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
+ searcher = new Util.TopNSearcher<>(
+ fst, count * maxSurfaceFormsPerAnalyzedForm, count * maxSurfaceFormsPerAnalyzedForm, weightComparator);
+
+ // NOTE: we could almost get away with only using
+ // the first start node. The only catch is if
+ // maxSurfaceFormsPerAnalyzedForm had kicked in and
+ // pruned our exact match from one of these nodes
+ // ...:
+ for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
+ if (fst.findTargetArc(endByte, path.fstNode, scratchArc, bytesReader) != null) {
+ // This node has END_BYTE arc leaving, meaning it's an
+ // "exact" match:
+ searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output), false, path.input);
+ }
+ }
+
+ Util.TopResults<Pair<Long,BytesRef>> completions = searcher.search();
+
+ // NOTE: this is rather inefficient: we enumerate
+ // every matching "exactly the same analyzed form"
+ // path, and then do linear scan to see if one of
+ // these exactly matches the input. It should be
+ // possible (though hairy) to do something similar
+ // to getByOutput, since the surface form is encoded
+ // into the FST output, so we more efficiently hone
+ // in on the exact surface-form match. Still, I
+ // suspect very little time is spent in this linear
+ // seach: it's bounded by how many prefix start
+ // nodes we have and the
+ // maxSurfaceFormsPerAnalyzedForm:
+ for(Result<Pair<Long,BytesRef>> completion : completions) {
+ BytesRef output2 = completion.output.output2;
+ if (sameSurfaceForm(utf8Key, output2)) {
+ results.add(getLookupResult(completion.output.output1, output2, spare));
+ break;
+ }
+ }
+
+ if (results.size() == num) {
+ // That was quick:
+ return results;
+ }
+ }
+
+ Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
+ searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
+ num - results.size(),
+ num * maxAnalyzedPathsForOneInput,
+ weightComparator) {
+ private final Set<BytesRef> seen = new HashSet<>();
+
+ @Override
+ protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
+
+ // Dedup: when the input analyzes to a graph we
+ // can get duplicate surface forms:
+ if (seen.contains(output.output2)) {
+ return false;
+ }
+ seen.add(output.output2);
+
+ if (!exactFirst) {
+ return true;
+ } else {
+ // In exactFirst mode, don't accept any paths
+ // matching the surface form since that will
+ // create duplicate results:
+ if (sameSurfaceForm(utf8Key, output.output2)) {
+ // We found exact match, which means we should
+ // have already found it in the first search:
+ assert results.size() == 1;
+ return false;
+ } else {
+ return true;
+ }
+ }
+ }
+ };
+
+ prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst);
+
+ for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
+ searcher.addStartPaths(path.fstNode, path.output, true, path.input);
+ }
+
+ TopResults<Pair<Long,BytesRef>> completions = searcher.search();
+
+ for(Result<Pair<Long,BytesRef>> completion : completions) {
+
+ LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare);
+
+ // TODO: for fuzzy case would be nice to return
+ // how many edits were required
+
+ //System.out.println(" result=" + result);
+ results.add(result);
+
+ if (results.size() == num) {
+ // In the exactFirst=true case the search may
+ // produce one extra path
+ break;
+ }
+ }
+
+ return results;
+ } catch (IOException bogus) {
+ throw new RuntimeException(bogus);
+ }
+ }
+
+ @Override
+ public boolean store(DataOutput output) throws IOException {
+ output.writeVLong(count);
+ if (fst == null) {
+ return false;
+ }
+
+ fst.save(output);
+ output.writeVInt(maxAnalyzedPathsForOneInput);
+ output.writeByte((byte) (hasPayloads ? 1 : 0));
+ return true;
+ }
+
+ @Override
+ public boolean load(DataInput input) throws IOException {
+ count = input.readVLong();
+ this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
+ maxAnalyzedPathsForOneInput = input.readVInt();
+ hasPayloads = input.readByte() == 1;
+ return true;
+ }
+
+ /** Returns all completion paths to initialize the search. */
+ protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
+ Automaton lookupAutomaton,
+ FST<Pair<Long,BytesRef>> fst)
+ throws IOException {
+ return prefixPaths;
+ }
+
+ final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
+ try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {
+ return toAutomaton(ts, ts2a);
+ }
+ }
+
+ final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException {
+ // Create corresponding automaton: labels are bytes
+ // from each analyzed token, with byte 0 used as
+ // separator between tokens:
+ Automaton automaton = ts2a.toAutomaton(ts);
+
+ automaton = replaceSep(automaton);
+ automaton = convertAutomaton(automaton);
+
+ // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
+ // assert SpecialOperations.isFinite(automaton);
+
+ // Get all paths from the automaton (there can be
+ // more than one path, eg if the analyzer created a
+ // graph using SynFilter or WDF):
+
+ return automaton;
+ }
+
+ // EDIT: Adrien, needed by lookup providers
+ // NOTE: these XForks are unmaintainable, we need to get rid of them...
+ public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
+ final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
+ Automaton automaton;
+ try (TokenStream ts = stream) {
+ automaton = toAutomaton(ts, ts2a);
+ }
+ LimitedFiniteStringsIterator finiteStrings =
+ new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
+ Set<IntsRef> set = new HashSet<>();
+ for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) {
+ set.add(IntsRef.deepCopyOf(string));
+ }
+ return Collections.unmodifiableSet(set);
+ }
+
+ final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
+ // TODO: is there a Reader from a CharSequence?
+ // Turn tokenstream into automaton:
+ Automaton automaton = null;
+
+ try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
+ automaton = getTokenStreamToAutomaton().toAutomaton(ts);
+ }
+
+ automaton = replaceSep(automaton);
+
+ // TODO: we can optimize this somewhat by determinizing
+ // while we convert
+
+ // This automaton should not blow up during determinize:
+ automaton = Operations.determinize(automaton, Integer.MAX_VALUE);
+ return automaton;
+ }
+
+
+
+ /**
+ * Returns the weight associated with an input string, or null if it does not exist.
+ *
+ * Unsupported in this implementation (and will throw an {@link UnsupportedOperationException}).
+ *
+ * @param key input string
+ * @return the weight associated with the input string, or {@code null} if it does not exist.
+ */
+ public Object get(CharSequence key) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * cost -&gt; weight
+ *
+ * @param encoded Cost
+ * @return Weight
+ */
+ public static int decodeWeight(long encoded) {
+ return (int)(Integer.MAX_VALUE - encoded);
+ }
+
+ /**
+ * weight -&gt; cost
+ *
+ * @param value Weight
+ * @return Cost
+ */
+ public static int encodeWeight(long value) {
+ if (value < 0 || value > Integer.MAX_VALUE) {
+ throw new UnsupportedOperationException("cannot encode value: " + value);
+ }
+ return Integer.MAX_VALUE - (int)value;
+ }
+
+ static final Comparator<Pair<Long,BytesRef>> weightComparator = new Comparator<Pair<Long,BytesRef>> () {
+ @Override
+ public int compare(Pair<Long,BytesRef> left, Pair<Long,BytesRef> right) {
+ return left.output1.compareTo(right.output1);
+ }
+ };
+
+
+ public static class XBuilder {
+ private Builder<Pair<Long, BytesRef>> builder;
+ private int maxSurfaceFormsPerAnalyzedForm;
+ private IntsRefBuilder scratchInts = new IntsRefBuilder();
+ private final PairOutputs<Long, BytesRef> outputs;
+ private boolean hasPayloads;
+ private BytesRefBuilder analyzed = new BytesRefBuilder();
+ private final SurfaceFormAndPayload[] surfaceFormsAndPayload;
+ private int count;
+ private ObjectIntHashMap<BytesRef> seenSurfaceForms = HppcMaps.Object.Integer.ensureNoNullKeys(256, 0.75f);
+ private int payloadSep;
+
+ public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) {
+ this.payloadSep = payloadSep;
+ this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
+ this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
+ this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
+ this.hasPayloads = hasPayloads;
+ surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm];
+
+ }
+ public void startTerm(BytesRef analyzed) {
+ this.analyzed.grow(analyzed.length+2);
+ this.analyzed.copyBytes(analyzed);
+ }
+
+ private final static class SurfaceFormAndPayload implements Comparable<SurfaceFormAndPayload> {
+ BytesRef payload;
+ long weight;
+
+ public SurfaceFormAndPayload(BytesRef payload, long cost) {
+ super();
+ this.payload = payload;
+ this.weight = cost;
+ }
+
+ @Override
+ public int compareTo(SurfaceFormAndPayload o) {
+ int res = compare(weight, o.weight);
+ if (res == 0 ){
+ return payload.compareTo(o.payload);
+ }
+ return res;
+ }
+ public static int compare(long x, long y) {
+ return (x < y) ? -1 : ((x == y) ? 0 : 1);
+ }
+ }
+
+ public void addSurface(BytesRef surface, BytesRef payload, long cost) throws IOException {
+ int surfaceIndex = -1;
+ long encodedWeight = cost == -1 ? cost : encodeWeight(cost);
+ /*
+ * we need to check if we have seen this surface form, if so only use the
+ * the surface form with the highest weight and drop the rest no matter if
+ * the payload differs.
+ */
+ if (count >= maxSurfaceFormsPerAnalyzedForm) {
+ // More than maxSurfaceFormsPerAnalyzedForm
+ // dups: skip the rest:
+ return;
+ }
+
+ BytesRef surfaceCopy;
+ final int keySlot;
+ if (count > 0 && (keySlot = seenSurfaceForms.indexOf(surface)) >= 0) {
+ surfaceIndex = seenSurfaceForms.indexGet(keySlot);
+ SurfaceFormAndPayload surfaceFormAndPayload = surfaceFormsAndPayload[surfaceIndex];
+ if (encodedWeight >= surfaceFormAndPayload.weight) {
+ return;
+ }
+ surfaceCopy = BytesRef.deepCopyOf(surface);
+ } else {
+ surfaceIndex = count++;
+ surfaceCopy = BytesRef.deepCopyOf(surface);
+ seenSurfaceForms.put(surfaceCopy, surfaceIndex);
+ }
+
+ BytesRef payloadRef;
+ if (!hasPayloads) {
+ payloadRef = surfaceCopy;
+ } else {
+ int len = surface.length + 1 + payload.length;
+ final BytesRef br = new BytesRef(len);
+ System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
+ br.bytes[surface.length] = (byte) payloadSep;
+ System.arraycopy(payload.bytes, payload.offset, br.bytes, surface.length + 1, payload.length);
+ br.length = len;
+ payloadRef = br;
+ }
+ if (surfaceFormsAndPayload[surfaceIndex] == null) {
+ surfaceFormsAndPayload[surfaceIndex] = new SurfaceFormAndPayload(payloadRef, encodedWeight);
+ } else {
+ surfaceFormsAndPayload[surfaceIndex].payload = payloadRef;
+ surfaceFormsAndPayload[surfaceIndex].weight = encodedWeight;
+ }
+ }
+
+ public void finishTerm(long defaultWeight) throws IOException {
+ ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
+ int deduplicator = 0;
+ analyzed.append((byte) 0);
+ analyzed.setLength(analyzed.length() + 1);
+ analyzed.grow(analyzed.length());
+ for (int i = 0; i < count; i++) {
+ analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
+ Util.toIntsRef(analyzed.get(), scratchInts);
+ SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
+ long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
+ builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
+ }
+ seenSurfaceForms.clear();
+ count = 0;
+ }
+
+ public FST<Pair<Long, BytesRef>> build() throws IOException {
+ return builder.finish();
+ }
+
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+
+ public int maxSurfaceFormsPerAnalyzedForm() {
+ return maxSurfaceFormsPerAnalyzedForm;
+ }
+
+ }
+}
diff --git a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java
new file mode 100644
index 0000000000..31a04b6fa6
--- /dev/null
+++ b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lucene.search.suggest.analyzing;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStreamToAutomaton;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.FiniteStringsIterator;
+import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.UTF32ToUTF8;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PairOutputs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
+/**
+ * Implements a fuzzy {@link AnalyzingSuggester}. The similarity measurement is
+ * based on the Damerau-Levenshtein (optimal string alignment) algorithm, though
+ * you can explicitly choose classic Levenshtein by passing <code>false</code>
+ * for the <code>transpositions</code> parameter.
+ * <p>
+ * At most, this query will match terms up to
+ * {@value org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}
+ * edits. Higher distances are not supported. Note that the
+ * fuzzy distance is measured in "byte space" on the bytes
+ * returned by the {@link org.apache.lucene.analysis.TokenStream}'s {@link
+ * org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute}, usually UTF8. By default
+ * the analyzed bytes must be at least 3 {@link
+ * #DEFAULT_MIN_FUZZY_LENGTH} bytes before any edits are
+ * considered. Furthermore, the first 1 {@link
+ * #DEFAULT_NON_FUZZY_PREFIX} byte is not allowed to be
+ * edited. We allow up to 1 (@link
+ * #DEFAULT_MAX_EDITS} edit.
+ * If {@link #unicodeAware} parameter in the constructor is set to true, maxEdits,
+ * minFuzzyLength, transpositions and nonFuzzyPrefix are measured in Unicode code
+ * points (actual letters) instead of bytes.*
+ *
+ * <p>
+ * NOTE: This suggester does not boost suggestions that
+ * required no edits over suggestions that did require
+ * edits. This is a known limitation.
+ *
+ * <p>
+ * Note: complex query analyzers can have a significant impact on the lookup
+ * performance. It's recommended to not use analyzers that drop or inject terms
+ * like synonyms to keep the complexity of the prefix intersection low for good
+ * lookup performance. At index time, complex analyzers can safely be used.
+ * </p>
+ */
+public final class XFuzzySuggester extends XAnalyzingSuggester {
+ private final int maxEdits;
+ private final boolean transpositions;
+ private final int nonFuzzyPrefix;
+ private final int minFuzzyLength;
+ private final boolean unicodeAware;
+
+ /**
+ * Measure maxEdits, minFuzzyLength, transpositions and nonFuzzyPrefix
+ * parameters in Unicode code points (actual letters)
+ * instead of bytes.
+ */
+ public static final boolean DEFAULT_UNICODE_AWARE = false;
+
+ /**
+ * The default minimum length of the key passed to {@link
+ * #lookup} before any edits are allowed.
+ */
+ public static final int DEFAULT_MIN_FUZZY_LENGTH = 3;
+
+ /**
+ * The default prefix length where edits are not allowed.
+ */
+ public static final int DEFAULT_NON_FUZZY_PREFIX = 1;
+
+ /**
+ * The default maximum number of edits for fuzzy
+ * suggestions.
+ */
+ public static final int DEFAULT_MAX_EDITS = 1;
+
+ /**
+ * The default transposition value passed to {@link org.apache.lucene.util.automaton.LevenshteinAutomata}
+ */
+ public static final boolean DEFAULT_TRANSPOSITIONS = true;
+
+ /**
+ * Creates a {@link FuzzySuggester} instance initialized with default values.
+ *
+ * @param analyzer the analyzer used for this suggester
+ */
+ public XFuzzySuggester(Analyzer analyzer) {
+ this(analyzer, analyzer);
+ }
+
+ /**
+ * Creates a {@link FuzzySuggester} instance with an index &amp; a query analyzer initialized with default values.
+ *
+ * @param indexAnalyzer
+ * Analyzer that will be used for analyzing suggestions while building the index.
+ * @param queryAnalyzer
+ * Analyzer that will be used for analyzing query text during lookup
+ */
+ public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
+ this(indexAnalyzer, null, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1,
+ DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
+ DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE,
+ null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
+
+ }
+
+ /**
+ * Creates a {@link FuzzySuggester} instance.
+ *
+ * @param indexAnalyzer Analyzer that will be used for
+ * analyzing suggestions while building the index.
+ * @param queryAnalyzer Analyzer that will be used for
+ * analyzing query text during lookup
+ * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
+ * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
+ * surface forms to keep for a single analyzed form.
+ * When there are too many surface forms we discard the
+ * lowest weighted ones.
+ * @param maxGraphExpansions Maximum number of graph paths
+ * to expand from the analyzed form. Set this to -1 for
+ * no limit.
+ * @param maxEdits must be &gt;= 0 and &lt;= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
+ * @param transpositions <code>true</code> if transpositions should be treated as a primitive
+ * edit operation. If this is false, comparisons will implement the classic
+ * Levenshtein algorithm.
+ * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
+ * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
+ * @param sepLabel separation label
+ * @param payloadSep payload separator byte
+ * @param endByte end byte marker byte
+ */
+ public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer,
+ int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
+ int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength,
+ boolean unicodeAware, FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads,
+ int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
+ super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
+ true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
+ if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
+ throw new IllegalArgumentException(
+ "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
+ }
+ if (nonFuzzyPrefix < 0) {
+ throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
+ }
+ if (minFuzzyLength < 0) {
+ throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
+ }
+
+ this.maxEdits = maxEdits;
+ this.transpositions = transpositions;
+ this.nonFuzzyPrefix = nonFuzzyPrefix;
+ this.minFuzzyLength = minFuzzyLength;
+ this.unicodeAware = unicodeAware;
+ }
+
+ @Override
+ protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(
+ List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton,
+ FST<PairOutputs.Pair<Long,BytesRef>> fst)
+ throws IOException {
+
+ // TODO: right now there's no penalty for fuzzy/edits,
+ // ie a completion whose prefix matched exactly what the
+ // user typed gets no boost over completions that
+ // required an edit, which get no boost over completions
+ // requiring two edits. I suspect a multiplicative
+ // factor is appropriate (eg, say a fuzzy match must be at
+ // least 2X better weight than the non-fuzzy match to
+ // "compete") ... in which case I think the wFST needs
+ // to be log weights or something ...
+
+ Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
+ /*
+ Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
+ w.write(levA.toDot());
+ w.close();
+ System.out.println("Wrote LevA to out.dot");
+ */
+ return FSTUtil.intersectPrefixPaths(levA, fst);
+ }
+
+ @Override
+ protected Automaton convertAutomaton(Automaton a) {
+ if (unicodeAware) {
+ // FLORIAN EDIT: get converted Automaton from superclass
+ Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
+ // This automaton should not blow up during determinize:
+ utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
+ return utf8automaton;
+ } else {
+ return super.convertAutomaton(a);
+ }
+ }
+
+ @Override
+ public TokenStreamToAutomaton getTokenStreamToAutomaton() {
+ final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
+ tsta.setUnicodeArcs(unicodeAware);
+ return tsta;
+ }
+
+ Automaton toLevenshteinAutomata(Automaton automaton) {
+ List<Automaton> subs = new ArrayList<>();
+ FiniteStringsIterator finiteStrings = new FiniteStringsIterator(automaton);
+ for (IntsRef string; (string = finiteStrings.next()) != null;) {
+ if (string.length <= nonFuzzyPrefix || string.length < minFuzzyLength) {
+ subs.add(Automata.makeString(string.ints, string.offset, string.length));
+ } else {
+ int ints[] = new int[string.length-nonFuzzyPrefix];
+ System.arraycopy(string.ints, string.offset+nonFuzzyPrefix, ints, 0, ints.length);
+ // TODO: maybe add alphaMin to LevenshteinAutomata,
+ // and pass 1 instead of 0? We probably don't want
+ // to allow the trailing dedup bytes to be
+ // edited... but then 0 byte is "in general" allowed
+ // on input (but not in UTF8).
+ LevenshteinAutomata lev = new LevenshteinAutomata(
+ ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
+ subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix)));
+ }
+ }
+
+ if (subs.isEmpty()) {
+ // automaton is empty, there is no accepted paths through it
+ return Automata.makeEmpty(); // matches nothing
+ } else if (subs.size() == 1) {
+ // no synonyms or anything: just a single path through the tokenstream
+ return subs.get(0);
+ } else {
+ // multiple paths: this is really scary! is it slow?
+ // maybe we should not do this and throw UOE?
+ Automaton a = Operations.union(subs);
+ // TODO: we could call toLevenshteinAutomata() before det?
+ // this only happens if you have multiple paths anyway (e.g. synonyms)
+ return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java b/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
index a4977baa1f..f0fb05f099 100644
--- a/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
+++ b/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
@@ -28,6 +28,7 @@ import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper2x;
/**
* {@link PerFieldMappingPostingFormatCodec This postings format} is the default
@@ -54,11 +55,14 @@ public class PerFieldMappingPostingFormatCodec extends Lucene60Codec {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
- final MappedFieldType indexName = mapperService.fullName(field);
- if (indexName == null) {
+ final MappedFieldType fieldType = mapperService.fullName(field);
+ if (fieldType == null) {
logger.warn("no index mapper found for field: [{}] returning default postings format", field);
- } else if (indexName instanceof CompletionFieldMapper.CompletionFieldType) {
+ } else if (fieldType instanceof CompletionFieldMapper.CompletionFieldType) {
return CompletionFieldMapper.CompletionFieldType.postingsFormat();
+ } else if (fieldType instanceof CompletionFieldMapper2x.CompletionFieldType) {
+ return ((CompletionFieldMapper2x.CompletionFieldType) fieldType).postingsFormat(
+ super.getPostingsFormatForField(field));
}
return super.getPostingsFormatForField(field);
}
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
index c3c5a5cbcc..53162dc535 100644
--- a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
@@ -119,6 +119,9 @@ public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapp
@Override
public Mapper.Builder<?, ?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
+ if (parserContext.indexVersionCreated().before(Version.V_5_0_0_alpha1)) {
+ return new CompletionFieldMapper2x.TypeParser().parse(name, node, parserContext);
+ }
CompletionFieldMapper.Builder builder = new CompletionFieldMapper.Builder(name);
NamedAnalyzer indexAnalyzer = null;
NamedAnalyzer searchAnalyzer = null;
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper2x.java b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper2x.java
new file mode 100644
index 0000000000..9b0306c687
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper2x.java
@@ -0,0 +1,611 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.index.mapper.core;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.Version;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentParser.NumberType;
+import org.elasticsearch.common.xcontent.XContentParser.Token;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.index.mapper.MapperException;
+import org.elasticsearch.index.mapper.MapperParsingException;
+import org.elasticsearch.index.mapper.ParseContext;
+import org.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider;
+import org.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat;
+import org.elasticsearch.search.suggest.completion2x.CompletionTokenStream;
+import org.elasticsearch.search.suggest.completion2x.context.ContextBuilder;
+import org.elasticsearch.search.suggest.completion2x.context.ContextMapping;
+import org.elasticsearch.search.suggest.completion2x.context.ContextMapping.ContextConfig;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField;
+
+/**
+ *
+ */
+public class CompletionFieldMapper2x extends FieldMapper {
+
+ public static final String CONTENT_TYPE = "completion";
+
+ public static class Defaults {
+ public static final CompletionFieldType FIELD_TYPE = new CompletionFieldType();
+
+ static {
+ FIELD_TYPE.setOmitNorms(true);
+ FIELD_TYPE.freeze();
+ }
+
+ public static final boolean DEFAULT_PRESERVE_SEPARATORS = true;
+ public static final boolean DEFAULT_POSITION_INCREMENTS = true;
+ public static final boolean DEFAULT_HAS_PAYLOADS = false;
+ public static final int DEFAULT_MAX_INPUT_LENGTH = 50;
+ }
+
+ public static class Fields {
+ // Mapping field names
+ public static final String ANALYZER = "analyzer";
+ public static final ParseField SEARCH_ANALYZER = new ParseField("search_analyzer");
+ public static final ParseField PRESERVE_SEPARATORS = new ParseField("preserve_separators");
+ public static final ParseField PRESERVE_POSITION_INCREMENTS = new ParseField("preserve_position_increments");
+ public static final String PAYLOADS = "payloads";
+ public static final String TYPE = "type";
+ public static final ParseField MAX_INPUT_LENGTH = new ParseField("max_input_length", "max_input_len");
+ // Content field names
+ public static final String CONTENT_FIELD_NAME_INPUT = "input";
+ public static final String CONTENT_FIELD_NAME_OUTPUT = "output";
+ public static final String CONTENT_FIELD_NAME_PAYLOAD = "payload";
+ public static final String CONTENT_FIELD_NAME_WEIGHT = "weight";
+ public static final String CONTEXT = "context";
+ }
+
+ public static final Set<String> ALLOWED_CONTENT_FIELD_NAMES;
+ static {
+ ALLOWED_CONTENT_FIELD_NAMES = new HashSet<>();
+ ALLOWED_CONTENT_FIELD_NAMES.add(Fields.CONTENT_FIELD_NAME_INPUT);
+ ALLOWED_CONTENT_FIELD_NAMES.add(Fields.CONTENT_FIELD_NAME_OUTPUT);
+ ALLOWED_CONTENT_FIELD_NAMES.add(Fields.CONTENT_FIELD_NAME_PAYLOAD);
+ ALLOWED_CONTENT_FIELD_NAMES.add(Fields.CONTENT_FIELD_NAME_WEIGHT);
+ ALLOWED_CONTENT_FIELD_NAMES.add(Fields.CONTEXT);
+ }
+
+ public static class Builder extends FieldMapper.Builder<Builder, CompletionFieldMapper2x> {
+
+ private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS;
+ private boolean payloads = Defaults.DEFAULT_HAS_PAYLOADS;
+ private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
+ private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH;
+ private SortedMap<String, ContextMapping> contextMapping = ContextMapping.EMPTY_MAPPING;
+
+ public Builder(String name) {
+ super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
+ builder = this;
+ }
+
+ public Builder payloads(boolean payloads) {
+ this.payloads = payloads;
+ return this;
+ }
+
+ public Builder preserveSeparators(boolean preserveSeparators) {
+ this.preserveSeparators = preserveSeparators;
+ return this;
+ }
+
+ public Builder preservePositionIncrements(boolean preservePositionIncrements) {
+ this.preservePositionIncrements = preservePositionIncrements;
+ return this;
+ }
+
+ public Builder maxInputLength(int maxInputLength) {
+ if (maxInputLength <= 0) {
+ throw new IllegalArgumentException(
+ Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]");
+ }
+ this.maxInputLength = maxInputLength;
+ return this;
+ }
+
+ public Builder contextMapping(SortedMap<String, ContextMapping> contextMapping) {
+ this.contextMapping = contextMapping;
+ return this;
+ }
+
+ @Override
+ public CompletionFieldMapper2x build(Mapper.BuilderContext context) {
+ setupFieldType(context);
+ CompletionFieldType completionFieldType = (CompletionFieldType) fieldType;
+ completionFieldType.setProvider(
+ new AnalyzingCompletionLookupProvider(preserveSeparators, preservePositionIncrements, payloads));
+ completionFieldType.setContextMapping(contextMapping);
+ return new CompletionFieldMapper2x(name, fieldType, maxInputLength, context.indexSettings(),
+ multiFieldsBuilder.build(this, context), copyTo);
+ }
+
+ }
+
+ public static class TypeParser implements Mapper.TypeParser {
+
+ @Override
+ public Mapper.Builder<?, ?> parse(String name, Map<String, Object> node, ParserContext parserContext)
+ throws MapperParsingException {
+ CompletionFieldMapper2x.Builder builder = new Builder(name);
+ NamedAnalyzer indexAnalyzer = null;
+ NamedAnalyzer searchAnalyzer = null;
+ for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext(); ) {
+ Map.Entry<String, Object> entry = iterator.next();
+ String fieldName = entry.getKey();
+ Object fieldNode = entry.getValue();
+ if (fieldName.equals("type")) {
+ continue;
+ }
+ if (Fields.ANALYZER.equals(fieldName) || // index_analyzer is for backcompat, remove for v3.0
+ fieldName.equals("index_analyzer") && parserContext.indexVersionCreated().before(Version.V_2_0_0_beta1)) {
+
+ indexAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString());
+ iterator.remove();
+ } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.SEARCH_ANALYZER)) {
+ searchAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString());
+ iterator.remove();
+ } else if (fieldName.equals(Fields.PAYLOADS)) {
+ builder.payloads(Boolean.parseBoolean(fieldNode.toString()));
+ iterator.remove();
+ } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.PRESERVE_SEPARATORS)) {
+ builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString()));
+ iterator.remove();
+ } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.PRESERVE_POSITION_INCREMENTS)) {
+ builder.preservePositionIncrements(Boolean.parseBoolean(fieldNode.toString()));
+ iterator.remove();
+ } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.MAX_INPUT_LENGTH)) {
+ builder.maxInputLength(Integer.parseInt(fieldNode.toString()));
+ iterator.remove();
+ } else if (parseMultiField(builder, name, parserContext, fieldName, fieldNode)) {
+ iterator.remove();
+ } else if (fieldName.equals(Fields.CONTEXT)) {
+ builder.contextMapping(ContextBuilder.loadMappings(fieldNode, parserContext.indexVersionCreated()));
+ iterator.remove();
+ }
+ }
+
+ if (indexAnalyzer == null) {
+ if (searchAnalyzer != null) {
+ throw new MapperParsingException(
+ "analyzer on completion field [" + name + "] must be set when search_analyzer is set");
+ }
+ indexAnalyzer = searchAnalyzer = parserContext.analysisService().analyzer("simple");
+ } else if (searchAnalyzer == null) {
+ searchAnalyzer = indexAnalyzer;
+ }
+ builder.indexAnalyzer(indexAnalyzer);
+ builder.searchAnalyzer(searchAnalyzer);
+
+ return builder;
+ }
+
+ private NamedAnalyzer getNamedAnalyzer(ParserContext parserContext, String name) {
+ NamedAnalyzer analyzer = parserContext.analysisService().analyzer(name);
+ if (analyzer == null) {
+ throw new IllegalArgumentException("Can't find default or mapped analyzer with name [" + name + "]");
+ }
+ return analyzer;
+ }
+ }
+
+ public static final class CompletionFieldType extends MappedFieldType {
+ private PostingsFormat postingsFormat;
+ private AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider;
+ private SortedMap<String, ContextMapping> contextMapping = ContextMapping.EMPTY_MAPPING;
+
+ public CompletionFieldType() {
+ }
+
+ protected CompletionFieldType(CompletionFieldType ref) {
+ super(ref);
+ this.postingsFormat = ref.postingsFormat;
+ this.analyzingSuggestLookupProvider = ref.analyzingSuggestLookupProvider;
+ this.contextMapping = ref.contextMapping;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof CompletionFieldType)) return false;
+ if (!super.equals(o)) return false;
+ CompletionFieldType fieldType = (CompletionFieldType) o;
+ return analyzingSuggestLookupProvider.getPreserveSep() == fieldType.analyzingSuggestLookupProvider.getPreserveSep()
+ && analyzingSuggestLookupProvider.getPreservePositionsIncrements() ==
+ fieldType.analyzingSuggestLookupProvider.getPreservePositionsIncrements() &&
+ analyzingSuggestLookupProvider.hasPayloads() == fieldType.analyzingSuggestLookupProvider.hasPayloads() &&
+ Objects.equals(getContextMapping(), fieldType.getContextMapping());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(super.hashCode(),
+ analyzingSuggestLookupProvider.getPreserveSep(),
+ analyzingSuggestLookupProvider.getPreservePositionsIncrements(),
+ analyzingSuggestLookupProvider.hasPayloads(),
+ getContextMapping());
+ }
+
+ @Override
+ public CompletionFieldType clone() {
+ return new CompletionFieldType(this);
+ }
+
+ @Override
+ public String typeName() {
+ return CONTENT_TYPE;
+ }
+
+ @Override
+ public void checkCompatibility(MappedFieldType fieldType, List<String> conflicts, boolean strict) {
+ super.checkCompatibility(fieldType, conflicts, strict);
+ CompletionFieldType other = (CompletionFieldType) fieldType;
+ if (analyzingSuggestLookupProvider.hasPayloads() != other.analyzingSuggestLookupProvider.hasPayloads()) {
+ conflicts.add("mapper [" + name() + "] has different [payload] values");
+ }
+ if (analyzingSuggestLookupProvider.getPreservePositionsIncrements() !=
+ other.analyzingSuggestLookupProvider.getPreservePositionsIncrements()) {
+ conflicts.add("mapper [" + name() + "] has different [preserve_position_increments] values");
+ }
+ if (analyzingSuggestLookupProvider.getPreserveSep() != other.analyzingSuggestLookupProvider.getPreserveSep()) {
+ conflicts.add("mapper [" + name() + "] has different [preserve_separators] values");
+ }
+ if (!ContextMapping.mappingsAreEqual(getContextMapping(), other.getContextMapping())) {
+ conflicts.add("mapper [" + name() + "] has different [context_mapping] values");
+ }
+ }
+
+ public void setProvider(AnalyzingCompletionLookupProvider provider) {
+ checkIfFrozen();
+ this.analyzingSuggestLookupProvider = provider;
+ }
+
+ public synchronized PostingsFormat postingsFormat(PostingsFormat in) {
+ if (in instanceof Completion090PostingsFormat) {
+ throw new IllegalStateException("Double wrapping of " + Completion090PostingsFormat.class);
+ }
+ if (postingsFormat == null) {
+ postingsFormat = new Completion090PostingsFormat(in, analyzingSuggestLookupProvider);
+ }
+ return postingsFormat;
+ }
+
+ public void setContextMapping(SortedMap<String, ContextMapping> contextMapping) {
+ checkIfFrozen();
+ this.contextMapping = contextMapping;
+ }
+
+ /**
+ * Get the context mapping associated with this completion field
+ */
+ public SortedMap<String, ContextMapping> getContextMapping() {
+ return contextMapping;
+ }
+
+ /**
+ * @return true if a context mapping has been defined
+ */
+ public boolean requiresContext() {
+ return contextMapping.isEmpty() == false;
+ }
+ }
+
+ private static final BytesRef EMPTY = new BytesRef();
+
+ private int maxInputLength;
+
+ public CompletionFieldMapper2x(String simpleName, MappedFieldType fieldType, int maxInputLength,
+ Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
+ super(simpleName, fieldType, Defaults.FIELD_TYPE, indexSettings, multiFields, copyTo);
+ this.maxInputLength = maxInputLength;
+ }
+
+ @Override
+ public CompletionFieldType fieldType() {
+ return (CompletionFieldType) super.fieldType();
+ }
+
+ /**
+ * Parses and indexes inputs
+ * Parsing:
+ * Acceptable format:
+ * "STRING" - interpreted as field value (input)
+ * "ARRAY" - each element can be one of "OBJECT" (see below)
+ * "OBJECT" - { "input": STRING|ARRAY, "weight": STRING|INT, "contexts": ARRAY|OBJECT }
+ */
+ @Override
+ public Mapper parse(ParseContext context) throws IOException {
+ XContentParser parser = context.parser();
+ XContentParser.Token token = parser.currentToken();
+ if (token == XContentParser.Token.VALUE_NULL) {
+ throw new MapperParsingException("completion field [" + fieldType().name() + "] does not support null values");
+ }
+
+ String surfaceForm = null;
+ BytesRef payload = null;
+ long weight = -1;
+ List<String> inputs = new ArrayList<>(4);
+
+ SortedMap<String, ContextConfig> contextConfig = null;
+
+ if (token == XContentParser.Token.VALUE_STRING) {
+ inputs.add(parser.text());
+ multiFields.parse(this, context);
+ } else {
+ String currentFieldName = null;
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ currentFieldName = parser.currentName();
+ if (!ALLOWED_CONTENT_FIELD_NAMES.contains(currentFieldName)) {
+ throw new IllegalArgumentException(
+ "Unknown field name[" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES);
+ }
+ } else if (Fields.CONTEXT.equals(currentFieldName)) {
+ SortedMap<String, ContextConfig> configs = new TreeMap<>();
+ if (token == Token.START_OBJECT) {
+ while ((token = parser.nextToken()) != Token.END_OBJECT) {
+ String name = parser.currentName();
+ ContextMapping mapping = fieldType().getContextMapping().get(name);
+ if (mapping == null) {
+ throw new ElasticsearchParseException("context [{}] is not defined", name);
+ } else {
+ token = parser.nextToken();
+ configs.put(name, mapping.parseContext(context, parser));
+ }
+ }
+ contextConfig = new TreeMap<>();
+ for (ContextMapping mapping : fieldType().getContextMapping().values()) {
+ ContextConfig config = configs.get(mapping.name());
+ contextConfig.put(mapping.name(), config == null ? mapping.defaultConfig() : config);
+ }
+ } else {
+ throw new ElasticsearchParseException("context must be an object");
+ }
+ } else if (Fields.CONTENT_FIELD_NAME_PAYLOAD.equals(currentFieldName)) {
+ if (!isStoringPayloads()) {
+ throw new MapperException("Payloads disabled in mapping");
+ }
+ if (token == XContentParser.Token.START_OBJECT) {
+ XContentBuilder payloadBuilder =
+ XContentFactory.contentBuilder(parser.contentType()).copyCurrentStructure(parser);
+ payload = payloadBuilder.bytes().toBytesRef();
+ payloadBuilder.close();
+ } else if (token.isValue()) {
+ payload = parser.utf8BytesOrNull();
+ } else {
+ throw new MapperException("payload doesn't support type " + token);
+ }
+ } else if (token == XContentParser.Token.VALUE_STRING) {
+ if (Fields.CONTENT_FIELD_NAME_OUTPUT.equals(currentFieldName)) {
+ surfaceForm = parser.text();
+ }
+ if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
+ inputs.add(parser.text());
+ }
+ if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
+ Number weightValue;
+ try {
+ weightValue = Long.parseLong(parser.text());
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException(
+ "Weight must be a string representing a numeric value, but was [" + parser.text() + "]");
+ }
+ weight = weightValue.longValue(); // always parse a long to make sure we don't get overflow
+ checkWeight(weight);
+ }
+ } else if (token == XContentParser.Token.VALUE_NUMBER) {
+ if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
+ NumberType numberType = parser.numberType();
+ if (NumberType.LONG != numberType && NumberType.INT != numberType) {
+ throw new IllegalArgumentException(
+ "Weight must be an integer, but was [" + parser.numberValue() + "]");
+ }
+ weight = parser.longValue(); // always parse a long to make sure we don't get overflow
+ checkWeight(weight);
+ }
+ } else if (token == XContentParser.Token.START_ARRAY) {
+ if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
+ while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+ inputs.add(parser.text());
+ }
+ }
+ }
+ }
+ }
+
+ if (contextConfig == null) {
+ contextConfig = new TreeMap<>();
+ for (ContextMapping mapping : fieldType().getContextMapping().values()) {
+ contextConfig.put(mapping.name(), mapping.defaultConfig());
+ }
+ }
+
+ final ContextMapping.Context ctx = new ContextMapping.Context(contextConfig, context.doc());
+
+ payload = payload == null ? EMPTY : payload;
+ if (surfaceForm == null) { // no surface form use the input
+ for (String input : inputs) {
+ if (input.length() == 0) {
+ continue;
+ }
+ BytesRef suggestPayload = fieldType().analyzingSuggestLookupProvider.buildPayload(new BytesRef(
+ input), weight, payload);
+ context.doc().add(getCompletionField(ctx, input, suggestPayload));
+ }
+ } else {
+ BytesRef suggestPayload = fieldType().analyzingSuggestLookupProvider.buildPayload(new BytesRef(
+ surfaceForm), weight, payload);
+ for (String input : inputs) {
+ if (input.length() == 0) {
+ continue;
+ }
+ context.doc().add(getCompletionField(ctx, input, suggestPayload));
+ }
+ }
+ return null;
+ }
+
+ private void checkWeight(long weight) {
+ if (weight < 0 || weight > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("Weight must be in the interval [0..2147483647], but was [" + weight + "]");
+ }
+ }
+
+ public Field getCompletionField(ContextMapping.Context ctx, String input, BytesRef payload) {
+ final String originalInput = input;
+ if (input.length() > maxInputLength) {
+ final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length()));
+ input = input.substring(0, len);
+ }
+ for (int i = 0; i < input.length(); i++) {
+ if (isReservedChar(input.charAt(i))) {
+ throw new IllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x"
+ + Integer.toHexString(input.charAt(i)).toUpperCase(Locale.ROOT)
+ + "] at position " + i + " is a reserved character");
+ }
+ }
+ return new SuggestField(
+ fieldType().name(), ctx, input, fieldType(), payload, fieldType().analyzingSuggestLookupProvider);
+ }
+
+ public static int correctSubStringLen(String input, int len) {
+ if (Character.isHighSurrogate(input.charAt(len - 1))) {
+ assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len));
+ return len + 1;
+ }
+ return len;
+ }
+
+ public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
+ return fieldType().analyzingSuggestLookupProvider.buildPayload(surfaceForm, weight, payload);
+ }
+
+ private static final class SuggestField extends Field {
+ private final BytesRef payload;
+ private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
+ private final ContextMapping.Context ctx;
+
+ public SuggestField(String name, ContextMapping.Context ctx,
+ String value, MappedFieldType type, BytesRef payload,
+ CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
+ super(name, value, type);
+ this.payload = payload;
+ this.toFiniteStrings = toFiniteStrings;
+ this.ctx = ctx;
+ }
+
+ @Override
+ public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
+ TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer, previous));
+ return new CompletionTokenStream(ts, payload, toFiniteStrings);
+ }
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject(simpleName())
+ .field(Fields.TYPE, CONTENT_TYPE);
+
+ builder.field(Fields.ANALYZER, fieldType().indexAnalyzer().name());
+ if (fieldType().indexAnalyzer().name().equals(fieldType().searchAnalyzer().name()) == false) {
+ builder.field(Fields.SEARCH_ANALYZER.getPreferredName(), fieldType().searchAnalyzer().name());
+ }
+ builder.field(Fields.PAYLOADS, fieldType().analyzingSuggestLookupProvider.hasPayloads());
+ builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(),
+ fieldType().analyzingSuggestLookupProvider.getPreserveSep());
+ builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(),
+ fieldType().analyzingSuggestLookupProvider.getPreservePositionsIncrements());
+ builder.field(Fields.MAX_INPUT_LENGTH.getPreferredName(), this.maxInputLength);
+ multiFields.toXContent(builder, params);
+
+ if (fieldType().requiresContext()) {
+ builder.startObject(Fields.CONTEXT);
+ for (ContextMapping mapping : fieldType().getContextMapping().values()) {
+ builder.value(mapping);
+ }
+ builder.endObject();
+ }
+
+ return builder.endObject();
+ }
+
+ @Override
+ protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
+ }
+
+ @Override
+ protected String contentType() {
+ return CONTENT_TYPE;
+ }
+
+ public boolean isStoringPayloads() {
+ return fieldType().analyzingSuggestLookupProvider.hasPayloads();
+ }
+
+ @Override
+ protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
+ super.doMerge(mergeWith, updateAllTypes);
+ CompletionFieldMapper2x fieldMergeWith = (CompletionFieldMapper2x) mergeWith;
+ this.maxInputLength = fieldMergeWith.maxInputLength;
+ }
+
+ // this should be package private but our tests don't allow it.
+ public static boolean isReservedChar(char character) {
+ /* we use 0x001F as a SEP_LABEL in the suggester but we can use the UTF-16 representation since they
+ * are equivalent. We also don't need to convert the input character to UTF-8 here to check for
+ * the 0x00 end label since all multi-byte UTF-8 chars start with 0x10 binary so if the UTF-16 CP is == 0x00
+ * it's the single byte UTF-8 CP */
+ assert XAnalyzingSuggester.PAYLOAD_SEP == XAnalyzingSuggester.SEP_LABEL; // ensure they are the same!
+ switch (character) {
+ case XAnalyzingSuggester.END_BYTE:
+ case XAnalyzingSuggester.SEP_LABEL:
+ case XAnalyzingSuggester.HOLE_CHARACTER:
+ case ContextMapping.SEPARATOR:
+ return true;
+ default:
+ return false;
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
index 979dfd1815..8739c26570 100644
--- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
+++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
@@ -19,6 +19,7 @@
package org.elasticsearch.index.shard;
+import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
@@ -105,6 +106,7 @@ import org.elasticsearch.indices.recovery.RecoveryFailedException;
import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.search.suggest.completion.CompletionFieldStats;
import org.elasticsearch.search.suggest.completion.CompletionStats;
+import org.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat;
import org.elasticsearch.threadpool.ThreadPool;
import java.io.IOException;
@@ -678,6 +680,9 @@ public class IndexShard extends AbstractIndexShardComponent {
CompletionStats completionStats = new CompletionStats();
try (final Engine.Searcher currentSearcher = acquireSearcher("completion_stats")) {
completionStats.add(CompletionFieldStats.completionStats(currentSearcher.reader(), fields));
+ Completion090PostingsFormat postingsFormat = ((Completion090PostingsFormat)
+ PostingsFormat.forName(Completion090PostingsFormat.CODEC_NAME));
+ completionStats.add(postingsFormat.completionStats(currentSearcher.reader(), fields));
}
return completionStats;
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/Suggest.java b/core/src/main/java/org/elasticsearch/search/suggest/Suggest.java
index 35f70abf2c..f8fbdaf969 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/Suggest.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/Suggest.java
@@ -112,6 +112,9 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
case CompletionSuggestion.TYPE:
suggestion = new CompletionSuggestion();
break;
+ case org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.TYPE:
+ suggestion = new org.elasticsearch.search.suggest.completion2x.CompletionSuggestion();
+ break;
case PhraseSuggestion.TYPE:
suggestion = new PhraseSuggestion();
break;
@@ -173,6 +176,16 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
List<Suggestion<? extends Entry<? extends Option>>> reduced = new ArrayList<>(groupedSuggestions.size());
for (java.util.Map.Entry<String, List<Suggestion>> unmergedResults : groupedSuggestions.entrySet()) {
List<Suggestion> value = unmergedResults.getValue();
+ Class<? extends Suggestion> suggestionClass = null;
+ for (Suggestion suggestion : value) {
+ if (suggestionClass == null) {
+ suggestionClass = suggestion.getClass();
+ } else if (suggestionClass != suggestion.getClass()) {
+ throw new IllegalArgumentException(
+ "detected mixed suggestion results, due to querying on old and new completion suggester," +
+ " query on a single completion suggester version");
+ }
+ }
Suggestion reduce = value.get(0).reduce(value);
reduce.trim();
reduced.add(reduce);
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
index 8bf35a34b2..16a2804b37 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
@@ -18,8 +18,11 @@
*/
package org.elasticsearch.search.suggest.completion;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.IndexSearcher;
@@ -29,7 +32,9 @@ import org.apache.lucene.search.suggest.document.CompletionQuery;
import org.apache.lucene.search.suggest.document.TopSuggestDocs;
import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector;
import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.PriorityQueue;
+import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.fielddata.AtomicFieldData;
@@ -42,10 +47,12 @@ import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.search.suggest.SuggestionBuilder;
+import org.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
@@ -61,56 +68,119 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
@Override
protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name,
final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
- final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
- if (fieldType == null) {
- throw new IllegalArgumentException("field [" + suggestionContext.getField() + "] is not a completion field");
- }
- CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
- spare.copyUTF8Bytes(suggestionContext.getText());
- CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new Text(spare.toString()), 0, spare.length());
- completionSuggestion.addTerm(completionSuggestEntry);
- TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize());
- suggest(searcher, suggestionContext.toQuery(), collector);
- int numResult = 0;
- List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
- for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
- TopDocumentsCollector.SuggestDoc suggestDoc = (TopDocumentsCollector.SuggestDoc) suggestScoreDoc;
- // collect contexts
- Map<String, Set<CharSequence>> contexts = Collections.emptyMap();
- if (fieldType.hasContextMappings() && suggestDoc.getContexts().isEmpty() == false) {
- contexts = fieldType.getContextMappings().getNamedContexts(suggestDoc.getContexts());
- }
- // collect payloads
- final Map<String, List<Object>> payload = new HashMap<>(0);
- List<String> payloadFields = suggestionContext.getPayloadFields();
- if (payloadFields.isEmpty() == false) {
- final int readerIndex = ReaderUtil.subIndex(suggestDoc.doc, leaves);
- final LeafReaderContext subReaderContext = leaves.get(readerIndex);
- final int subDocId = suggestDoc.doc - subReaderContext.docBase;
- for (String field : payloadFields) {
- MapperService mapperService = suggestionContext.getShardContext().getMapperService();
- MappedFieldType payloadFieldType = mapperService.fullName(field);
- if (payloadFieldType != null) {
- QueryShardContext shardContext = suggestionContext.getShardContext();
- final AtomicFieldData data = shardContext.getForField(payloadFieldType)
+ if (suggestionContext.getFieldType() != null) {
+ final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
+ CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
+ spare.copyUTF8Bytes(suggestionContext.getText());
+ CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(
+ new Text(spare.toString()), 0, spare.length());
+ completionSuggestion.addTerm(completionSuggestEntry);
+ TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize());
+ suggest(searcher, suggestionContext.toQuery(), collector);
+ int numResult = 0;
+ List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
+ for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
+ TopDocumentsCollector.SuggestDoc suggestDoc = (TopDocumentsCollector.SuggestDoc) suggestScoreDoc;
+ // collect contexts
+ Map<String, Set<CharSequence>> contexts = Collections.emptyMap();
+ if (fieldType.hasContextMappings() && suggestDoc.getContexts().isEmpty() == false) {
+ contexts = fieldType.getContextMappings().getNamedContexts(suggestDoc.getContexts());
+ }
+ // collect payloads
+ final Map<String, List<Object>> payload = new HashMap<>(0);
+ List<String> payloadFields = suggestionContext.getPayloadFields();
+ if (payloadFields.isEmpty() == false) {
+ final int readerIndex = ReaderUtil.subIndex(suggestDoc.doc, leaves);
+ final LeafReaderContext subReaderContext = leaves.get(readerIndex);
+ final int subDocId = suggestDoc.doc - subReaderContext.docBase;
+ for (String field : payloadFields) {
+ MapperService mapperService = suggestionContext.getShardContext().getMapperService();
+ MappedFieldType payloadFieldType = mapperService.fullName(field);
+ if (payloadFieldType != null) {
+ QueryShardContext shardContext = suggestionContext.getShardContext();
+ final AtomicFieldData data = shardContext.getForField(payloadFieldType)
.load(subReaderContext);
- final ScriptDocValues scriptValues = data.getScriptValues();
- scriptValues.setNextDocId(subDocId);
- payload.put(field, new ArrayList<>(scriptValues.getValues()));
- } else {
- throw new IllegalArgumentException("payload field [" + field + "] does not exist");
+ final ScriptDocValues scriptValues = data.getScriptValues();
+ scriptValues.setNextDocId(subDocId);
+ payload.put(field, new ArrayList<>(scriptValues.getValues()));
+ } else {
+ throw new IllegalArgumentException("payload field [" + field + "] does not exist");
+ }
}
}
- }
- if (numResult++ < suggestionContext.getSize()) {
- CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(
+ if (numResult++ < suggestionContext.getSize()) {
+ CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(
new Text(suggestDoc.key.toString()), suggestDoc.score, contexts, payload);
- completionSuggestEntry.addOption(option);
- } else {
- break;
+ completionSuggestEntry.addOption(option);
+ } else {
+ break;
+ }
}
+ return completionSuggestion;
+ } else if (suggestionContext.getFieldType2x() != null) {
+ final IndexReader indexReader = searcher.getIndexReader();
+ org.elasticsearch.search.suggest.completion2x.CompletionSuggestion completionSuggestion =
+ new org.elasticsearch.search.suggest.completion2x.CompletionSuggestion(name, suggestionContext.getSize());
+ spare.copyUTF8Bytes(suggestionContext.getText());
+
+ org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry completionSuggestEntry =
+ new org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry(new Text(spare.toString()), 0, spare.length());
+ completionSuggestion.addTerm(completionSuggestEntry);
+
+ String fieldName = suggestionContext.getField();
+ Map<String, org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry.Option> results =
+ new HashMap<>(indexReader.leaves().size() * suggestionContext.getSize());
+ for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
+ LeafReader atomicReader = atomicReaderContext.reader();
+ Terms terms = atomicReader.fields().terms(fieldName);
+ if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
+ final Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
+ final Lookup lookup = lookupTerms.getLookup(suggestionContext.getFieldType2x(), suggestionContext);
+ if (lookup == null) {
+ // we don't have a lookup for this segment.. this might be possible if a merge dropped all
+ // docs from the segment that had a value in this segment.
+ continue;
+ }
+ List<Lookup.LookupResult> lookupResults = lookup.lookup(spare.get(), false, suggestionContext.getSize());
+ for (Lookup.LookupResult res : lookupResults) {
+
+ final String key = res.key.toString();
+ final float score = res.value;
+ final org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry.Option value = results.get(key);
+ if (value == null) {
+ final org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry.Option option =
+ new org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry.Option(new Text(key), score,
+ res.payload == null ? null : new BytesArray(res.payload));
+ results.put(key, option);
+ } else if (value.getScore() < score) {
+ value.setScore(score);
+ value.setPayload(res.payload == null ? null : new BytesArray(res.payload));
+ }
+ }
+ }
+ }
+ final List<org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry.Option> options =
+ new ArrayList<>(results.values());
+ CollectionUtil.introSort(options, scoreComparator);
+
+ int optionCount = Math.min(suggestionContext.getSize(), options.size());
+ for (int i = 0; i < optionCount; i++) {
+ completionSuggestEntry.addOption(options.get(i));
+ }
+
+ return completionSuggestion;
+ }
+ return null;
+ }
+
+ private static final ScoreComparator scoreComparator = new ScoreComparator();
+ public static class ScoreComparator implements
+ Comparator<org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry.Option> {
+ @Override
+ public int compare(org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry.Option o1,
+ org.elasticsearch.search.suggest.completion2x.CompletionSuggestion.Entry.Option o2) {
+ return Float.compare(o2.getScore(), o1.getScore());
}
- return completionSuggestion;
}
private static void suggest(IndexSearcher searcher, CompletionQuery query, TopSuggestDocsCollector collector) throws IOException {
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java
index 2d9307a882..7810d03004 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java
@@ -34,6 +34,7 @@ import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper2x;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.suggest.SuggestUtils;
@@ -41,6 +42,9 @@ import org.elasticsearch.search.suggest.SuggestionBuilder;
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
import org.elasticsearch.search.suggest.completion.context.ContextMapping;
import org.elasticsearch.search.suggest.completion.context.ContextMappings;
+import org.elasticsearch.search.suggest.completion2x.context.CategoryContextMapping;
+import org.elasticsearch.search.suggest.completion2x.context.ContextMapping.ContextQuery;
+import org.elasticsearch.search.suggest.completion2x.context.GeolocationContextMapping;
import java.io.IOException;
import java.util.ArrayList;
@@ -219,13 +223,116 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
contentBuilder.endArray();
}
contentBuilder.endObject();
- contextBytes = contentBuilder.bytes();
- return this;
+ return contexts(contentBuilder);
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
}
+ private CompletionSuggestionBuilder contexts(XContentBuilder contextBuilder) {
+ contextBytes = contextBuilder.bytes();
+ return this;
+ }
+
+ public CompletionSuggestionBuilder contexts(Contexts2x contexts2x) {
+ Objects.requireNonNull(contexts2x, "contexts must not be null");
+ try {
+ XContentBuilder contentBuilder = XContentFactory.jsonBuilder();
+ contentBuilder.startObject();
+ for (ContextQuery contextQuery : contexts2x.contextQueries) {
+ contextQuery.toXContent(contentBuilder, EMPTY_PARAMS);
+ }
+ contentBuilder.endObject();
+ return contexts(contentBuilder);
+ } catch (IOException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ // for 2.x context support
+ public static class Contexts2x {
+ private List<ContextQuery> contextQueries = new ArrayList<>();
+
+ @SuppressWarnings("unchecked")
+ private Contexts2x addContextQuery(ContextQuery ctx) {
+ this.contextQueries.add(ctx);
+ return this;
+ }
+
+ /**
+ * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}.
+ * @param lat Latitude of the location
+ * @param lon Longitude of the Location
+ * @return this
+ */
+ @Deprecated
+ public Contexts2x addGeoLocation(String name, double lat, double lon, int ... precisions) {
+ return addContextQuery(GeolocationContextMapping.query(name, lat, lon, precisions));
+ }
+
+ /**
+ * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}.
+ * @param lat Latitude of the location
+ * @param lon Longitude of the Location
+ * @param precisions precisions as string var-args
+ * @return this
+ */
+ @Deprecated
+ public Contexts2x addGeoLocationWithPrecision(String name, double lat, double lon, String ... precisions) {
+ return addContextQuery(GeolocationContextMapping.query(name, lat, lon, precisions));
+ }
+
+ /**
+ * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}.
+ * @param geohash Geohash of the location
+ * @return this
+ */
+ @Deprecated
+ public Contexts2x addGeoLocation(String name, String geohash) {
+ return addContextQuery(GeolocationContextMapping.query(name, geohash));
+ }
+
+ /**
+ * Setup a Category for suggestions. See {@link CategoryContextMapping}.
+ * @param categories name of the category
+ * @return this
+ */
+ @Deprecated
+ public Contexts2x addCategory(String name, CharSequence...categories) {
+ return addContextQuery(CategoryContextMapping.query(name, categories));
+ }
+
+ /**
+ * Setup a Category for suggestions. See {@link CategoryContextMapping}.
+ * @param categories name of the category
+ * @return this
+ */
+ @Deprecated
+ public Contexts2x addCategory(String name, Iterable<? extends CharSequence> categories) {
+ return addContextQuery(CategoryContextMapping.query(name, categories));
+ }
+
+ /**
+ * Setup a Context Field for suggestions. See {@link CategoryContextMapping}.
+ * @param fieldvalues name of the category
+ * @return this
+ */
+ @Deprecated
+ public Contexts2x addContextField(String name, CharSequence...fieldvalues) {
+ return addContextQuery(CategoryContextMapping.query(name, fieldvalues));
+ }
+
+ /**
+ * Setup a Context Field for suggestions. See {@link CategoryContextMapping}.
+ * @param fieldvalues name of the category
+ * @return this
+ */
+ @Deprecated
+ public Contexts2x addContextField(String name, Iterable<? extends CharSequence> fieldvalues) {
+ return addContextQuery(CategoryContextMapping.query(name, fieldvalues));
+ }
+ }
+
private static class InnerBuilder extends CompletionSuggestionBuilder {
private String field;
@@ -285,7 +392,12 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
suggestionContext.setFuzzyOptions(fuzzyOptions);
suggestionContext.setRegexOptions(regexOptions);
MappedFieldType mappedFieldType = mapperService.fullName(suggestionContext.getField());
- if (mappedFieldType != null && mappedFieldType instanceof CompletionFieldMapper.CompletionFieldType) {
+ if (mappedFieldType == null ||
+ (mappedFieldType instanceof CompletionFieldMapper.CompletionFieldType == false
+ && mappedFieldType instanceof CompletionFieldMapper2x.CompletionFieldType == false)) {
+ throw new IllegalArgumentException("Field [" + suggestionContext.getField() + "] is not a completion suggest field");
+ }
+ if (mappedFieldType instanceof CompletionFieldMapper.CompletionFieldType) {
CompletionFieldMapper.CompletionFieldType type = (CompletionFieldMapper.CompletionFieldType) mappedFieldType;
suggestionContext.setFieldType(type);
if (type.hasContextMappings() && contextBytes != null) {
@@ -310,9 +422,23 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
} else if (contextBytes != null) {
throw new IllegalArgumentException("suggester [" + type.name() + "] doesn't expect any context");
}
- } else {
- throw new IllegalArgumentException("Field [" + suggestionContext.getField() + "] is not a completion suggest field");
+ } else if (mappedFieldType instanceof CompletionFieldMapper2x.CompletionFieldType) {
+ CompletionFieldMapper2x.CompletionFieldType type = ((CompletionFieldMapper2x.CompletionFieldType) mappedFieldType);
+ suggestionContext.setFieldType2x(type);
+ if (type.requiresContext()) {
+ if (contextBytes != null) {
+ try (XContentParser contextParser = XContentFactory.xContent(contextBytes).createParser(contextBytes)) {
+ contextParser.nextToken();
+ suggestionContext.setContextQueries(ContextQuery.parseQueries(type.getContextMapping(), contextParser));
+ }
+ } else {
+ throw new IllegalArgumentException("suggester [completion] requires context to be setup");
+ }
+ } else if (contextBytes != null) {
+ throw new IllegalArgumentException("suggester [completion] doesn't expect any context");
+ }
}
+ assert suggestionContext.getFieldType() != null || suggestionContext.getFieldType2x() != null : "no completion field type set";
return suggestionContext;
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java
index 1941bc9fb8..268e0553ff 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java
@@ -21,10 +21,12 @@ package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.search.suggest.document.CompletionQuery;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper2x;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.suggest.SuggestionSearchContext;
import org.elasticsearch.search.suggest.completion.context.ContextMapping;
import org.elasticsearch.search.suggest.completion.context.ContextMappings;
+import org.elasticsearch.search.suggest.completion2x.context.ContextMapping.ContextQuery;
import java.util.Collections;
import java.util.List;
@@ -44,11 +46,17 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
private RegexOptions regexOptions;
private Map<String, List<ContextMapping.InternalQueryContext>> queryContexts = Collections.emptyMap();
private List<String> payloadFields = Collections.emptyList();
+ private CompletionFieldMapper2x.CompletionFieldType fieldType2x;
+ private List<ContextQuery> contextQueries;
CompletionFieldMapper.CompletionFieldType getFieldType() {
return this.fieldType;
}
+ CompletionFieldMapper2x.CompletionFieldType getFieldType2x() {
+ return this.fieldType2x;
+ }
+
void setFieldType(CompletionFieldMapper.CompletionFieldType fieldType) {
this.fieldType = fieldType;
}
@@ -116,4 +124,16 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
}
return query;
}
+
+ public void setFieldType2x(CompletionFieldMapper2x.CompletionFieldType type) {
+ this.fieldType2x = type;
+ }
+
+ public void setContextQueries(List<ContextQuery> contextQueries) {
+ this.contextQueries = contextQueries;
+ }
+
+ public List<ContextQuery> getContextQueries() {
+ return contextQueries;
+ }
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/AnalyzingCompletionLookupProvider.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/AnalyzingCompletionLookupProvider.java
new file mode 100644
index 0000000000..48f0afc73c
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/AnalyzingCompletionLookupProvider.java
@@ -0,0 +1,413 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion2x;
+
+import com.carrotsearch.hppc.ObjectLongHashMap;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
+import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.elasticsearch.common.regex.Regex;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper2x;
+import org.elasticsearch.search.suggest.completion.CompletionStats;
+import org.elasticsearch.search.suggest.completion.CompletionSuggestionContext;
+import org.elasticsearch.search.suggest.completion.FuzzyOptions;
+import org.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat.CompletionLookupProvider;
+import org.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat.LookupFactory;
+import org.elasticsearch.search.suggest.completion2x.context.ContextMapping.ContextQuery;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider {
+
+ // for serialization
+ public static final int SERIALIZE_PRESERVE_SEPARATORS = 1;
+ public static final int SERIALIZE_HAS_PAYLOADS = 2;
+ public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4;
+
+ private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256;
+ private static final int MAX_GRAPH_EXPANSIONS = -1;
+
+ public static final String CODEC_NAME = "analyzing";
+ public static final int CODEC_VERSION_START = 1;
+ public static final int CODEC_VERSION_SERIALIZED_LABELS = 2;
+ public static final int CODEC_VERSION_CHECKSUMS = 3;
+ public static final int CODEC_VERSION_LATEST = CODEC_VERSION_CHECKSUMS;
+
+ private final boolean preserveSep;
+ private final boolean preservePositionIncrements;
+ private final int maxSurfaceFormsPerAnalyzedForm;
+ private final int maxGraphExpansions;
+ private final boolean hasPayloads;
+ private final XAnalyzingSuggester prototype;
+
+ public AnalyzingCompletionLookupProvider(boolean preserveSep, boolean preservePositionIncrements, boolean hasPayloads) {
+ this.preserveSep = preserveSep;
+ this.preservePositionIncrements = preservePositionIncrements;
+ this.hasPayloads = hasPayloads;
+ this.maxSurfaceFormsPerAnalyzedForm = MAX_SURFACE_FORMS_PER_ANALYZED_FORM;
+ this.maxGraphExpansions = MAX_GRAPH_EXPANSIONS;
+ int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;
+ // needs to fixed in the suggester first before it can be supported
+ //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
+ prototype = new XAnalyzingSuggester(null, null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
+ preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP,
+ XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
+ }
+
+ @Override
+ public String getName() {
+ return "analyzing";
+ }
+
+ public boolean getPreserveSep() {
+ return preserveSep;
+ }
+
+ public boolean getPreservePositionsIncrements() {
+ return preservePositionIncrements;
+ }
+
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+
+ @Override
+ public FieldsConsumer consumer(final IndexOutput output) throws IOException {
+ CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
+ return new FieldsConsumer() {
+ private Map<String, Long> fieldOffsets = new HashMap<>();
+
+ @Override
+ public void close() throws IOException {
+ try {
+ /*
+ * write the offsets per field such that we know where
+ * we need to load the FSTs from
+ */
+ long pointer = output.getFilePointer();
+ output.writeVInt(fieldOffsets.size());
+ for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
+ output.writeString(entry.getKey());
+ output.writeVLong(entry.getValue());
+ }
+ output.writeLong(pointer);
+ CodecUtil.writeFooter(output);
+ } finally {
+ IOUtils.close(output);
+ }
+ }
+
+ @Override
+ public void write(Fields fields) throws IOException {
+ for(String field : fields) {
+ Terms terms = fields.terms(field);
+ if (terms == null) {
+ continue;
+ }
+ TermsEnum termsEnum = terms.iterator();
+ PostingsEnum docsEnum = null;
+ final SuggestPayload spare = new SuggestPayload();
+ int maxAnalyzedPathsForOneInput = 0;
+ final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
+ maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
+ int docCount = 0;
+ while (true) {
+ BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS);
+ builder.startTerm(term);
+ int docFreq = 0;
+ while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ for (int i = 0; i < docsEnum.freq(); i++) {
+ final int position = docsEnum.nextPosition();
+ AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
+ builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
+ // multi fields have the same surface form so we sum up here
+ maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
+ }
+ docFreq++;
+ docCount = Math.max(docCount, docsEnum.docID()+1);
+ }
+ builder.finishTerm(docFreq);
+ }
+ /*
+ * Here we are done processing the field and we can
+ * buid the FST and write it to disk.
+ */
+ FST<Pair<Long, BytesRef>> build = builder.build();
+ assert build != null || docCount == 0: "the FST is null but docCount is != 0 actual value: [" + docCount + "]";
+ /*
+ * it's possible that the FST is null if we have 2 segments that get merged
+ * and all docs that have a value in this field are deleted. This will cause
+ * a consumer to be created but it doesn't consume any values causing the FSTBuilder
+ * to return null.
+ */
+ if (build != null) {
+ fieldOffsets.put(field, output.getFilePointer());
+ build.save(output);
+ /* write some more meta-info */
+ output.writeVInt(maxAnalyzedPathsForOneInput);
+ output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
+ output.writeInt(maxGraphExpansions); // can be negative
+ int options = 0;
+ options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
+ options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
+ options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
+ output.writeVInt(options);
+ output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
+ output.writeVInt(XAnalyzingSuggester.END_BYTE);
+ output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
+ output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
+ }
+ }
+ }
+ };
+ }
+
+
+ @Override
+ public LookupFactory load(IndexInput input) throws IOException {
+ long sizeInBytes = 0;
+ int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
+ if (version >= CODEC_VERSION_CHECKSUMS) {
+ CodecUtil.checksumEntireFile(input);
+ }
+ final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS? 8 + CodecUtil.footerLength() : 8);
+ final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
+ input.seek(metaPointerPosition);
+ long metaPointer = input.readLong();
+ input.seek(metaPointer);
+ int numFields = input.readVInt();
+
+ Map<Long, String> meta = new TreeMap<>();
+ for (int i = 0; i < numFields; i++) {
+ String name = input.readString();
+ long offset = input.readVLong();
+ meta.put(offset, name);
+ }
+
+ for (Map.Entry<Long, String> entry : meta.entrySet()) {
+ input.seek(entry.getKey());
+ FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>(
+ PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
+ int maxAnalyzedPathsForOneInput = input.readVInt();
+ int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
+ int maxGraphExpansions = input.readInt();
+ int options = input.readVInt();
+ boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
+ boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
+ boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
+
+ // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
+ // was updated in Lucene, so we cannot use the suggester defaults)
+ int sepLabel, payloadSep, endByte, holeCharacter;
+ switch (version) {
+ case CODEC_VERSION_START:
+ sepLabel = 0xFF;
+ payloadSep = '\u001f';
+ endByte = 0x0;
+ holeCharacter = '\u001E';
+ break;
+ default:
+ sepLabel = input.readVInt();
+ endByte = input.readVInt();
+ payloadSep = input.readVInt();
+ holeCharacter = input.readVInt();
+ }
+
+ AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
+ maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
+ fst, sepLabel, payloadSep, endByte, holeCharacter);
+ sizeInBytes += fst.ramBytesUsed();
+ lookupMap.put(entry.getValue(), holder);
+ }
+ final long ramBytesUsed = sizeInBytes;
+ return new LookupFactory() {
+ @Override
+ public Lookup getLookup(CompletionFieldMapper2x.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) {
+ AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.name());
+ if (analyzingSuggestHolder == null) {
+ return null;
+ }
+ int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;
+
+ final XAnalyzingSuggester suggester;
+ final Automaton queryPrefix = fieldType.requiresContext() ?
+ ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null;
+
+ final FuzzyOptions fuzzyOptions = suggestionContext.getFuzzyOptions();
+ if (fuzzyOptions != null) {
+ suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags,
+ analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
+ fuzzyOptions.getEditDistance(), fuzzyOptions.isTranspositions(),
+ fuzzyOptions.getFuzzyPrefixLength(), fuzzyOptions.getFuzzyMinLength(), fuzzyOptions.isUnicodeAware(),
+ analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
+ analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
+ analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
+ analyzingSuggestHolder.holeCharacter);
+ } else {
+ suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags,
+ analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
+ analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
+ analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
+ analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
+ }
+ return suggester;
+ }
+
+ @Override
+ public CompletionStats stats(String... fields) {
+ long sizeInBytes = 0;
+ ObjectLongHashMap<String> completionFields = null;
+ if (fields != null && fields.length > 0) {
+ completionFields = new ObjectLongHashMap<>(fields.length);
+ }
+
+ for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
+ sizeInBytes += entry.getValue().fst.ramBytesUsed();
+ if (fields == null || fields.length == 0) {
+ continue;
+ }
+ if (Regex.simpleMatch(fields, entry.getKey())) {
+ long fstSize = entry.getValue().fst.ramBytesUsed();
+ completionFields.addTo(entry.getKey(), fstSize);
+ }
+ }
+
+ return new CompletionStats(sizeInBytes, completionFields);
+ }
+
+ @Override
+ AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
+ return lookupMap.get(fieldType.name());
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed;
+ }
+
+ @Override
+ public Collection<Accountable> getChildResources() {
+ return Accountables.namedAccountables("field", lookupMap);
+ }
+ };
+ }
+
+ static class AnalyzingSuggestHolder implements Accountable {
+ final boolean preserveSep;
+ final boolean preservePositionIncrements;
+ final int maxSurfaceFormsPerAnalyzedForm;
+ final int maxGraphExpansions;
+ final boolean hasPayloads;
+ final int maxAnalyzedPathsForOneInput;
+ final FST<Pair<Long, BytesRef>> fst;
+ final int sepLabel;
+ final int payloadSep;
+ final int endByte;
+ final int holeCharacter;
+
+ public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements,
+ int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
+ boolean hasPayloads, int maxAnalyzedPathsForOneInput,
+ FST<Pair<Long, BytesRef>> fst, int sepLabel, int payloadSep,
+ int endByte, int holeCharacter) {
+ this.preserveSep = preserveSep;
+ this.preservePositionIncrements = preservePositionIncrements;
+ this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
+ this.maxGraphExpansions = maxGraphExpansions;
+ this.hasPayloads = hasPayloads;
+ this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
+ this.fst = fst;
+ this.sepLabel = sepLabel;
+ this.payloadSep = payloadSep;
+ this.endByte = endByte;
+ this.holeCharacter = holeCharacter;
+ }
+
+ public boolean getPreserveSeparator() {
+ return preserveSep;
+ }
+
+ public boolean getPreservePositionIncrements() {
+ return preservePositionIncrements;
+ }
+
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ if (fst != null) {
+ return fst.ramBytesUsed();
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public Collection<Accountable> getChildResources() {
+ if (fst != null) {
+ return Collections.singleton(Accountables.namedAccountable("fst", fst));
+ } else {
+ return Collections.emptyList();
+ }
+ }
+ }
+
+ @Override
+ public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
+ return prototype.toFiniteStrings(stream);
+ }
+
+
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/Completion090PostingsFormat.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/Completion090PostingsFormat.java
new file mode 100644
index 0000000000..8e9e51597d
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/Completion090PostingsFormat.java
@@ -0,0 +1,353 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion2x;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterLeafReader.FilterTerms;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.store.IOContext.Context;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.logging.Loggers;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper2x;
+import org.elasticsearch.search.suggest.completion.CompletionStats;
+import org.elasticsearch.search.suggest.completion.CompletionSuggestionContext;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This {@link PostingsFormat} is basically a T-Sink for a default postings
+ * format that is used to store postings on disk fitting the lucene APIs and
+ * builds a suggest FST as an auxiliary data structure next to the actual
+ * postings format. It uses the delegate postings format for simplicity to
+ * handle all the merge operations. The auxiliary suggest FST data structure is
+ * only loaded if a FieldsProducer is requested for reading, for merging it uses
+ * the low memory delegate postings format.
+ */
+public class Completion090PostingsFormat extends PostingsFormat {
+
+ public static final String CODEC_NAME = "completion090";
+ public static final int SUGGEST_CODEC_VERSION = 1;
+ public static final int SUGGEST_VERSION_CURRENT = SUGGEST_CODEC_VERSION;
+ public static final String EXTENSION = "cmp";
+
+ private final static ESLogger logger = Loggers.getLogger(Completion090PostingsFormat.class);
+ private PostingsFormat delegatePostingsFormat;
+ private final static Map<String, CompletionLookupProvider> providers;
+ private CompletionLookupProvider writeProvider;
+
+
+ static {
+ final CompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, true, false);
+ providers = Collections.singletonMap(provider.getName(), provider);
+ }
+
+ public Completion090PostingsFormat(PostingsFormat delegatePostingsFormat, CompletionLookupProvider provider) {
+ super(CODEC_NAME);
+ this.delegatePostingsFormat = delegatePostingsFormat;
+ this.writeProvider = provider;
+ assert delegatePostingsFormat != null && writeProvider != null;
+ }
+
+ /*
+ * Used only by core Lucene at read-time via Service Provider instantiation
+ * do not use at Write-time in application code.
+ */
+ public Completion090PostingsFormat() {
+ super(CODEC_NAME);
+ }
+
+ @Override
+ public CompletionFieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ if (delegatePostingsFormat == null) {
+ throw new UnsupportedOperationException("Error - " + getClass().getName()
+ + " has been constructed without a choice of PostingsFormat");
+ }
+ assert writeProvider != null;
+ return new CompletionFieldsConsumer(state);
+ }
+
+ @Override
+ public CompletionFieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ return new CompletionFieldsProducer(state);
+ }
+
+ private class CompletionFieldsConsumer extends FieldsConsumer {
+
+ private FieldsConsumer delegatesFieldsConsumer;
+ private FieldsConsumer suggestFieldsConsumer;
+
+ public CompletionFieldsConsumer(SegmentWriteState state) throws IOException {
+ this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
+ String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
+ IndexOutput output = null;
+ boolean success = false;
+ try {
+ output = state.directory.createOutput(suggestFSTFile, state.context);
+ CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_VERSION_CURRENT);
+ /*
+ * we write the delegate postings format name so we can load it
+ * without getting an instance in the ctor
+ */
+ output.writeString(delegatePostingsFormat.getName());
+ output.writeString(writeProvider.getName());
+ this.suggestFieldsConsumer = writeProvider.consumer(output);
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(output);
+ }
+ }
+ }
+
+ @Override
+ public void write(Fields fields) throws IOException {
+ delegatesFieldsConsumer.write(fields);
+ suggestFieldsConsumer.write(fields);
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.close(delegatesFieldsConsumer, suggestFieldsConsumer);
+ }
+ }
+
+ private static class CompletionFieldsProducer extends FieldsProducer {
+ // TODO make this class lazyload all the things in order to take advantage of the new merge instance API
+ // today we just load everything up-front
+ private final FieldsProducer delegateProducer;
+ private final LookupFactory lookupFactory;
+ private final int version;
+
+ public CompletionFieldsProducer(SegmentReadState state) throws IOException {
+ String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
+ IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
+ version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
+ FieldsProducer delegateProducer = null;
+ boolean success = false;
+ try {
+ PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
+ String providerName = input.readString();
+ CompletionLookupProvider completionLookupProvider = providers.get(providerName);
+ if (completionLookupProvider == null) {
+ throw new IllegalStateException("no provider with name [" + providerName + "] registered");
+ }
+ // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
+ delegateProducer = delegatePostingsFormat.fieldsProducer(state);
+ /*
+ * If we are merging we don't load the FSTs at all such that we
+ * don't consume so much memory during merge
+ */
+ if (state.context.context != Context.MERGE) {
+ // TODO: maybe we can do this in a fully lazy fashion based on some configuration
+ // eventually we should have some kind of curciut breaker that prevents us from going OOM here
+ // with some configuration
+ this.lookupFactory = completionLookupProvider.load(input);
+ } else {
+ this.lookupFactory = null;
+ }
+ this.delegateProducer = delegateProducer;
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(delegateProducer, input);
+ } else {
+ IOUtils.close(input);
+ }
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.close(delegateProducer);
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return delegateProducer.iterator();
+ }
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ final Terms terms = delegateProducer.terms(field);
+ if (terms == null || lookupFactory == null) {
+ return terms;
+ }
+ return new CompletionTerms(terms, lookupFactory);
+ }
+
+ @Override
+ public int size() {
+ return delegateProducer.size();
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return (lookupFactory == null ? 0 : lookupFactory.ramBytesUsed()) + delegateProducer.ramBytesUsed();
+ }
+
+ @Override
+ public Collection<Accountable> getChildResources() {
+ List<Accountable> resources = new ArrayList<>();
+ if (lookupFactory != null) {
+ resources.add(Accountables.namedAccountable("lookup", lookupFactory));
+ }
+ resources.add(Accountables.namedAccountable("delegate", delegateProducer));
+ return Collections.unmodifiableList(resources);
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ delegateProducer.checkIntegrity();
+ }
+
+ @Override
+ public FieldsProducer getMergeInstance() throws IOException {
+ return delegateProducer.getMergeInstance();
+ }
+ }
+
+ public static final class CompletionTerms extends FilterTerms {
+ private final LookupFactory lookup;
+
+ public CompletionTerms(Terms delegate, LookupFactory lookup) {
+ super(delegate);
+ this.lookup = lookup;
+ }
+
+ public Lookup getLookup(CompletionFieldMapper2x.CompletionFieldType mapper, CompletionSuggestionContext suggestionContext) {
+ return lookup.getLookup(mapper, suggestionContext);
+ }
+
+ public CompletionStats stats(String ... fields) {
+ return lookup.stats(fields);
+ }
+ }
+
+ public static abstract class CompletionLookupProvider implements PayloadProcessor, CompletionTokenStream.ToFiniteStrings {
+
+ public static final char UNIT_SEPARATOR = '\u001f';
+
+ public abstract FieldsConsumer consumer(IndexOutput output) throws IOException;
+
+ public abstract String getName();
+
+ public abstract LookupFactory load(IndexInput input) throws IOException;
+
+ @Override
+ public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
+ if (weight < -1 || weight > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("weight must be >= -1 && <= Integer.MAX_VALUE");
+ }
+ for (int i = 0; i < surfaceForm.length; i++) {
+ if (surfaceForm.bytes[i] == UNIT_SEPARATOR) {
+ throw new IllegalArgumentException(
+ "surface form cannot contain unit separator character U+001F; this character is reserved");
+ }
+ }
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream);
+ output.writeVLong(weight + 1);
+ output.writeVInt(surfaceForm.length);
+ output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
+ output.writeVInt(payload.length);
+ output.writeBytes(payload.bytes, 0, payload.length);
+
+ output.close();
+ return new BytesRef(byteArrayOutputStream.toByteArray());
+ }
+
+ @Override
+ public void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException {
+ ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
+ InputStreamDataInput input = new InputStreamDataInput(byteArrayInputStream);
+ ref.weight = input.readVLong() - 1;
+ int len = input.readVInt();
+ ref.surfaceForm.grow(len);
+ ref.surfaceForm.setLength(len);
+ input.readBytes(ref.surfaceForm.bytes(), 0, ref.surfaceForm.length());
+ len = input.readVInt();
+ ref.payload.grow(len);
+ ref.payload.setLength(len);
+ input.readBytes(ref.payload.bytes(), 0, ref.payload.length());
+ input.close();
+ }
+ }
+
+ /**
+ * Returns total in-heap bytes used by all suggesters. This method has CPU cost <code>O(numIndexedFields)</code>.
+ *
+ * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns will break out its in-heap bytes
+ * separately in the returned {@link CompletionStats}
+ */
+ public CompletionStats completionStats(IndexReader indexReader, String ... fieldNamePatterns) {
+ CompletionStats completionStats = new CompletionStats();
+ for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
+ LeafReader atomicReader = atomicReaderContext.reader();
+ try {
+ Fields fields = atomicReader.fields();
+ for (String fieldName : fields) {
+ Terms terms = fields.terms(fieldName);
+ if (terms instanceof CompletionTerms) {
+ CompletionTerms completionTerms = (CompletionTerms) terms;
+ completionStats.add(completionTerms.stats(fieldNamePatterns));
+ }
+ }
+ } catch (IOException ioe) {
+ logger.error("Could not get completion stats", ioe);
+ }
+ }
+
+ return completionStats;
+ }
+
+ public static abstract class LookupFactory implements Accountable {
+ public abstract Lookup getLookup(CompletionFieldMapper2x.CompletionFieldType fieldType,
+ CompletionSuggestionContext suggestionContext);
+ public abstract CompletionStats stats(String ... fields);
+ abstract AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType);
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/CompletionSuggestion.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/CompletionSuggestion.java
new file mode 100644
index 0000000000..565be4fcd9
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/CompletionSuggestion.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion2x;
+
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.search.suggest.Suggest;
+
+import java.io.IOException;
+import java.util.Map;
+
+/**
+ *
+ */
+public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestion.Entry> {
+
+ public static final int TYPE = 2;
+
+ public CompletionSuggestion() {
+ }
+
+ public CompletionSuggestion(String name, int size) {
+ super(name, size);
+ }
+
+ @Override
+ public int getType() {
+ return TYPE;
+ }
+
+ @Override
+ protected Entry newEntry() {
+ return new Entry();
+ }
+
+ public static class Entry extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry<CompletionSuggestion.Entry.Option> {
+
+ public Entry(Text text, int offset, int length) {
+ super(text, offset, length);
+ }
+
+ protected Entry() {
+ super();
+ }
+
+ @Override
+ protected Option newOption() {
+ return new Option();
+ }
+
+ public static class Option extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option {
+ private BytesReference payload;
+
+ public Option(Text text, float score, BytesReference payload) {
+ super(text, score);
+ this.payload = payload;
+ }
+
+
+ protected Option() {
+ super();
+ }
+
+ public void setPayload(BytesReference payload) {
+ this.payload = payload;
+ }
+
+ public BytesReference getPayload() {
+ return payload;
+ }
+
+ public String getPayloadAsString() {
+ return payload.toUtf8();
+ }
+
+ public long getPayloadAsLong() {
+ return Long.parseLong(payload.toUtf8());
+ }
+
+ public double getPayloadAsDouble() {
+ return Double.parseDouble(payload.toUtf8());
+ }
+
+ public Map<String, Object> getPayloadAsMap() {
+ return XContentHelper.convertToMap(payload, false).v2();
+ }
+
+ @Override
+ public void setScore(float score) {
+ super.setScore(score);
+ }
+
+ @Override
+ protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
+ super.innerToXContent(builder, params);
+ if (payload != null && payload.length() > 0) {
+ XContentType contentType = XContentFactory.xContentType(payload);
+ if (contentType == null) {
+ // must be a string or number
+ builder.field("payload", payload.toUtf8());
+ } else {
+ builder.rawField("payload", payload);
+ }
+ }
+ return builder;
+ }
+
+ @Override
+ public void readFrom(StreamInput in) throws IOException {
+ super.readFrom(in);
+ payload = in.readBytesReference();
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ super.writeTo(out);
+ out.writeBytesReference(payload);
+ }
+ }
+ }
+
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/CompletionTokenStream.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/CompletionTokenStream.java
new file mode 100644
index 0000000000..cbf34be838
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/CompletionTokenStream.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.suggest.completion2x;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.fst.Util;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ *
+ */
+public final class CompletionTokenStream extends TokenStream {
+
+ private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);
+ private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
+ private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);;
+
+
+ private final TokenStream input;
+ private BytesRef payload;
+ private Iterator<IntsRef> finiteStrings;
+ private ToFiniteStrings toFiniteStrings;
+ private int posInc = -1;
+ private static final int MAX_PATHS = 256;
+ private CharTermAttribute charTermAttribute;
+
+ public CompletionTokenStream(TokenStream input, BytesRef payload, ToFiniteStrings toFiniteStrings) {
+ // Don't call the super(input) ctor - this is a true delegate and has a new attribute source since we consume
+ // the input stream entirely in toFiniteStrings(input)
+ this.input = input;
+ this.payload = payload;
+ this.toFiniteStrings = toFiniteStrings;
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ clearAttributes();
+ if (finiteStrings == null) {
+ Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input);
+
+ if (strings.size() > MAX_PATHS) {
+ throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS
+ + " finite strings are supported");
+ }
+ posInc = strings.size();
+ finiteStrings = strings.iterator();
+ }
+ if (finiteStrings.hasNext()) {
+ posAttr.setPositionIncrement(posInc);
+ /*
+ * this posInc encodes the number of paths that this surface form
+ * produced. Multi Fields have the same surface form and therefore sum up
+ */
+ posInc = 0;
+ Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
+ if (charTermAttribute != null) {
+ charTermAttribute.setLength(0);
+ charTermAttribute.append(bytesAtt.toUTF16());
+ }
+ if (payload != null) {
+ payloadAttr.setPayload(this.payload);
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ @Override
+ public void end() throws IOException {
+ super.end();
+ if (posInc == -1) {
+ input.end();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ input.close();
+ }
+
+ public static interface ToFiniteStrings {
+ public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ if (hasAttribute(CharTermAttribute.class)) {
+ // we only create this if we really need it to safe the UTF-8 to UTF-16 conversion
+ charTermAttribute = getAttribute(CharTermAttribute.class);
+ }
+ finiteStrings = null;
+ posInc = -1;
+ }
+
+ public interface ByteTermAttribute extends TermToBytesRefAttribute {
+ // marker interface
+
+ /**
+ * Return the builder from which the term is derived.
+ */
+ public BytesRefBuilder builder();
+
+ public CharSequence toUTF16();
+ }
+
+ public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
+ private final BytesRefBuilder bytes = new BytesRefBuilder();
+ private CharsRefBuilder charsRef;
+
+ @Override
+ public BytesRefBuilder builder() {
+ return bytes;
+ }
+
+ @Override
+ public BytesRef getBytesRef() {
+ return bytes.get();
+ }
+
+ @Override
+ public void clear() {
+ bytes.clear();
+ }
+
+ @Override
+ public void reflectWith(AttributeReflector reflector) {
+
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
+ other.bytes.copyBytes(bytes);
+ }
+
+ @Override
+ public CharSequence toUTF16() {
+ if (charsRef == null) {
+ charsRef = new CharsRefBuilder();
+ }
+ charsRef.copyUTF8Bytes(getBytesRef());
+ return charsRef.get();
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/PayloadProcessor.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/PayloadProcessor.java
new file mode 100644
index 0000000000..eb857ce61e
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/PayloadProcessor.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion2x;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+
+import java.io.IOException;
+
+interface PayloadProcessor {
+
+ BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException;
+
+ void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException;
+
+ static class SuggestPayload {
+ final BytesRefBuilder payload = new BytesRefBuilder();
+ long weight = 0;
+ final BytesRefBuilder surfaceForm = new BytesRefBuilder();
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/CategoryContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/CategoryContextMapping.java
new file mode 100644
index 0000000000..775d8b031a
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/CategoryContextMapping.java
@@ -0,0 +1,374 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion2x.context;
+
+import org.apache.lucene.analysis.PrefixAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentParser.Token;
+import org.elasticsearch.index.mapper.ParseContext;
+import org.elasticsearch.index.mapper.ParseContext.Document;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * The {@link CategoryContextMapping} is used to define a {@link ContextMapping} that
+ * references a field within a document. The value of the field in turn will be
+ * used to setup the suggestions made by the completion suggester.
+ */
+public class CategoryContextMapping extends ContextMapping {
+
+ protected static final String TYPE = "category";
+
+ private static final String FIELD_FIELDNAME = "path";
+ private static final String DEFAULT_FIELDNAME = "_type";
+
+ private static final Iterable<String> EMPTY_VALUES = Collections.emptyList();
+
+ private final String fieldName;
+ private final Iterable<String> defaultValues;
+ private final FieldConfig defaultConfig;
+
+ /**
+ * Create a new {@link CategoryContextMapping} with the default field
+ * <code>[_type]</code>
+ */
+ public CategoryContextMapping(String name) {
+ this(name, DEFAULT_FIELDNAME, EMPTY_VALUES);
+ }
+
+ /**
+ * Create a new {@link CategoryContextMapping} with the default field
+ * <code>[_type]</code>
+ */
+ public CategoryContextMapping(String name, String fieldName) {
+ this(name, fieldName, EMPTY_VALUES);
+ }
+
+ /**
+ * Create a new {@link CategoryContextMapping} with the default field
+ * <code>[_type]</code>
+ */
+ public CategoryContextMapping(String name, Iterable<String> defaultValues) {
+ this(name, DEFAULT_FIELDNAME, defaultValues);
+ }
+
+ /**
+ * Create a new {@link CategoryContextMapping} with the default field
+ * <code>[_type]</code>
+ */
+ public CategoryContextMapping(String name, String fieldName, Iterable<String> defaultValues) {
+ super(TYPE, name);
+ this.fieldName = fieldName;
+ this.defaultValues = defaultValues;
+ this.defaultConfig = new FieldConfig(fieldName, defaultValues, null);
+ }
+
+ /**
+ * Name of the field used by this {@link CategoryContextMapping}
+ */
+ public String getFieldName() {
+ return fieldName;
+ }
+
+ public Iterable<? extends CharSequence> getDefaultValues() {
+ return defaultValues;
+ }
+
+ @Override
+ public FieldConfig defaultConfig() {
+ return defaultConfig;
+ }
+
+ /**
+ * Load the specification of a {@link CategoryContextMapping}
+ *
+ * @param name
+ * name of the field to use. If <code>null</code> default field
+ * will be used
+ * @return new {@link CategoryContextMapping}
+ */
+ protected static CategoryContextMapping load(String name, Map<String, Object> config) throws ElasticsearchParseException {
+ CategoryContextMapping.Builder mapping = new CategoryContextMapping.Builder(name);
+
+ Object fieldName = config.get(FIELD_FIELDNAME);
+ Object defaultValues = config.get(FIELD_MISSING);
+
+ if (fieldName != null) {
+ mapping.fieldName(fieldName.toString());
+ config.remove(FIELD_FIELDNAME);
+ }
+
+ if (defaultValues != null) {
+ if (defaultValues instanceof Iterable) {
+ for (Object value : (Iterable) defaultValues) {
+ mapping.addDefaultValue(value.toString());
+ }
+ } else {
+ mapping.addDefaultValue(defaultValues.toString());
+ }
+ config.remove(FIELD_MISSING);
+ }
+
+ return mapping.build();
+ }
+
+ @Override
+ protected XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException {
+ if (fieldName != null) {
+ builder.field(FIELD_FIELDNAME, fieldName);
+ }
+ builder.startArray(FIELD_MISSING);
+ for (CharSequence value : defaultValues) {
+ builder.value(value);
+ }
+ builder.endArray();
+ return builder;
+ }
+
+ @Override
+ public ContextConfig parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException {
+ Token token = parser.currentToken();
+ if (token == Token.VALUE_NULL) {
+ return new FieldConfig(fieldName, defaultValues, null);
+ } else if (token == Token.VALUE_STRING) {
+ return new FieldConfig(fieldName, null, Collections.singleton(parser.text()));
+ } else if (token == Token.VALUE_NUMBER) {
+ return new FieldConfig(fieldName, null, Collections.singleton(parser.text()));
+ } else if (token == Token.VALUE_BOOLEAN) {
+ return new FieldConfig(fieldName, null, Collections.singleton(parser.text()));
+ } else if (token == Token.START_ARRAY) {
+ ArrayList<String> values = new ArrayList<>();
+ while((token = parser.nextToken()) != Token.END_ARRAY) {
+ values.add(parser.text());
+ }
+ if(values.isEmpty()) {
+ throw new ElasticsearchParseException("FieldConfig must contain a least one category");
+ }
+ return new FieldConfig(fieldName, null, values);
+ } else {
+ throw new ElasticsearchParseException("FieldConfig must be either [null], a string or a list of strings");
+ }
+ }
+
+ @Override
+ public FieldQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException {
+ Iterable<? extends CharSequence> values;
+ Token token = parser.currentToken();
+ if (token == Token.START_ARRAY) {
+ ArrayList<String> list = new ArrayList<>();
+ while ((token = parser.nextToken()) != Token.END_ARRAY) {
+ list.add(parser.text());
+ }
+ values = list;
+ } else if (token == Token.VALUE_NULL) {
+ values = defaultValues;
+ } else {
+ values = Collections.singleton(parser.text());
+ }
+
+ return new FieldQuery(name, values);
+ }
+
+ public static FieldQuery query(String name, CharSequence... fieldvalues) {
+ return query(name, Arrays.asList(fieldvalues));
+ }
+
+ public static FieldQuery query(String name, Iterable<? extends CharSequence> fieldvalues) {
+ return new FieldQuery(name, fieldvalues);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj instanceof CategoryContextMapping) {
+ CategoryContextMapping other = (CategoryContextMapping) obj;
+ if (this.fieldName.equals(other.fieldName)) {
+ return Objects.deepEquals(this.defaultValues, other.defaultValues);
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int hashCode = fieldName.hashCode();
+ for (CharSequence seq : defaultValues) {
+ hashCode = 31 * hashCode + seq.hashCode();
+ }
+ return hashCode;
+ }
+
+ private static class FieldConfig extends ContextConfig {
+
+ private final String fieldname;
+ private final Iterable<String> defaultValues;
+ private final Iterable<String> values;
+
+ public FieldConfig(String fieldname, Iterable<String> defaultValues, Iterable<String> values) {
+ this.fieldname = fieldname;
+ this.defaultValues = defaultValues;
+ this.values = values;
+ }
+
+ @Override
+ protected TokenStream wrapTokenStream(Document doc, TokenStream stream) {
+ if (values != null) {
+ return new PrefixAnalyzer.PrefixTokenFilter(stream, ContextMapping.SEPARATOR, values);
+ // if fieldname is default, BUT our default values are set, we take that one
+ } else if ((doc.getFields(fieldname).length == 0
+ || fieldname.equals(DEFAULT_FIELDNAME)) && defaultValues.iterator().hasNext()) {
+ return new PrefixAnalyzer.PrefixTokenFilter(stream, ContextMapping.SEPARATOR, defaultValues);
+ } else {
+ IndexableField[] fields = doc.getFields(fieldname);
+ ArrayList<CharSequence> values = new ArrayList<>(fields.length);
+ for (int i = 0; i < fields.length; i++) {
+ values.add(fields[i].stringValue());
+ }
+
+ return new PrefixAnalyzer.PrefixTokenFilter(stream, ContextMapping.SEPARATOR, values);
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("FieldConfig(" + fieldname + " = [");
+ if (this.values != null && this.values.iterator().hasNext()) {
+ final Iterator<String> valuesIterator = this.values.iterator();
+ sb.append("(");
+ while (valuesIterator.hasNext()) {
+ sb.append(valuesIterator.next());
+ if (valuesIterator.hasNext()) {
+ sb.append(", ");
+ }
+ }
+ sb.append(")");
+ }
+ if (this.defaultValues != null && this.defaultValues.iterator().hasNext()) {
+ final Iterator<String> defaultValuesIterator = this.defaultValues.iterator();
+ sb.append(" default(");
+ while (defaultValuesIterator.hasNext()) {
+ sb.append(defaultValuesIterator.next());
+ if (defaultValuesIterator.hasNext()) {
+ sb.append(", ");
+ }
+ }
+ sb.append(")");
+ }
+ return sb.append("])").toString();
+ }
+
+ }
+
+ private static class FieldQuery extends ContextQuery {
+
+ private final Iterable<? extends CharSequence> values;
+
+ public FieldQuery(String name, Iterable<? extends CharSequence> values) {
+ super(name);
+ this.values = values;
+ }
+
+ @Override
+ public Automaton toAutomaton() {
+ List<Automaton> automatons = new ArrayList<>();
+ for (CharSequence value : values) {
+ automatons.add(Automata.makeString(value.toString()));
+ }
+ return Operations.union(automatons);
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startArray(name);
+ for (CharSequence value : values) {
+ builder.value(value);
+ }
+ builder.endArray();
+ return builder;
+ }
+ }
+
+ public static class Builder extends ContextBuilder<CategoryContextMapping> {
+
+ private String fieldname;
+ private List<String> defaultValues = new ArrayList<>();
+
+ public Builder(String name) {
+ this(name, DEFAULT_FIELDNAME);
+ }
+
+ public Builder(String name, String fieldname) {
+ super(name);
+ this.fieldname = fieldname;
+ }
+
+ /**
+ * Set the name of the field to use
+ */
+ public Builder fieldName(String fieldname) {
+ this.fieldname = fieldname;
+ return this;
+ }
+
+ /**
+ * Add value to the default values of the mapping
+ */
+ public Builder addDefaultValue(String defaultValue) {
+ this.defaultValues.add(defaultValue);
+ return this;
+ }
+
+ /**
+ * Add set of default values to the mapping
+ */
+ public Builder addDefaultValues(String... defaultValues) {
+ Collections.addAll(this.defaultValues, defaultValues);
+ return this;
+ }
+
+ /**
+ * Add set of default values to the mapping
+ */
+ public Builder addDefaultValues(Iterable<String> defaultValues) {
+ for (String defaultValue : defaultValues) {
+ this.defaultValues.add(defaultValue);
+ }
+ return this;
+ }
+
+ @Override
+ public CategoryContextMapping build() {
+ return new CategoryContextMapping(name, fieldname, defaultValues);
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/ContextBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/ContextBuilder.java
new file mode 100644
index 0000000000..16ef0053bb
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/ContextBuilder.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion2x.context;
+
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.Version;
+import org.elasticsearch.index.mapper.DocumentMapperParser;
+
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public abstract class ContextBuilder<E extends ContextMapping> {
+
+ protected String name;
+
+ public ContextBuilder(String name) {
+ this.name = name;
+ }
+
+ public abstract E build();
+
+ /**
+ * Create a new {@link GeolocationContextMapping}
+ */
+ public static GeolocationContextMapping.Builder location(String name) {
+ return new GeolocationContextMapping.Builder(name);
+ }
+
+ /**
+ * Create a new {@link GeolocationContextMapping} with given precision and
+ * neighborhood usage
+ *
+ * @param precision geohash length
+ * @param neighbors use neighbor cells
+ */
+ public static GeolocationContextMapping.Builder location(String name, int precision, boolean neighbors) {
+ return new GeolocationContextMapping.Builder(name, neighbors, precision);
+ }
+
+ /**
+ * Create a new {@link CategoryContextMapping}
+ */
+ public static CategoryContextMapping.Builder category(String name) {
+ return new CategoryContextMapping.Builder(name, null);
+ }
+
+ /**
+ * Create a new {@link CategoryContextMapping} with default category
+ *
+ * @param defaultCategory category to use, if it is not provided
+ */
+ public static CategoryContextMapping.Builder category(String name, String defaultCategory) {
+ return new CategoryContextMapping.Builder(name, null).addDefaultValue(defaultCategory);
+ }
+
+ /**
+ * Create a new {@link CategoryContextMapping}
+ *
+ * @param fieldname
+ * name of the field to use
+ */
+ public static CategoryContextMapping.Builder reference(String name, String fieldname) {
+ return new CategoryContextMapping.Builder(name, fieldname);
+ }
+
+ /**
+ * Create a new {@link CategoryContextMapping}
+ *
+ * @param fieldname name of the field to use
+ * @param defaultValues values to use, if the document not provides
+ * a field with the given name
+ */
+ public static CategoryContextMapping.Builder reference(String name, String fieldname, Iterable<String> defaultValues) {
+ return new CategoryContextMapping.Builder(name, fieldname).addDefaultValues(defaultValues);
+ }
+
+ public static SortedMap<String, ContextMapping> loadMappings(Object configuration, Version indexVersionCreated)
+ throws ElasticsearchParseException {
+ if (configuration instanceof Map) {
+ Map<String, Object> configurations = (Map<String, Object>)configuration;
+ SortedMap<String, ContextMapping> mappings = new TreeMap<>();
+ for (Entry<String,Object> config : configurations.entrySet()) {
+ String name = config.getKey();
+ mappings.put(name, loadMapping(name, (Map<String, Object>) config.getValue(), indexVersionCreated));
+ }
+ return mappings;
+ } else if (configuration == null) {
+ return ContextMapping.EMPTY_MAPPING;
+ } else {
+ throw new ElasticsearchParseException("no valid context configuration");
+ }
+ }
+
+ protected static ContextMapping loadMapping(String name, Map<String, Object> config, Version indexVersionCreated)
+ throws ElasticsearchParseException {
+ final Object argType = config.get(ContextMapping.FIELD_TYPE);
+
+ if (argType == null) {
+ throw new ElasticsearchParseException("missing [{}] in context mapping", ContextMapping.FIELD_TYPE);
+ }
+
+ final String type = argType.toString();
+ ContextMapping contextMapping;
+ if (GeolocationContextMapping.TYPE.equals(type)) {
+ contextMapping = GeolocationContextMapping.load(name, config);
+ } else if (CategoryContextMapping.TYPE.equals(type)) {
+ contextMapping = CategoryContextMapping.load(name, config);
+ } else {
+ throw new ElasticsearchParseException("unknown context type [{}]", type);
+ }
+ config.remove(ContextMapping.FIELD_TYPE);
+ DocumentMapperParser.checkNoRemainingFields(name, config, indexVersionCreated);
+
+ return contextMapping;
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/ContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/ContextMapping.java
new file mode 100644
index 0000000000..dd23bdecb0
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/ContextMapping.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion2x.context;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.fst.FST;
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentParser.Token;
+import org.elasticsearch.common.xcontent.json.JsonXContent;
+import org.elasticsearch.index.mapper.ParseContext;
+import org.elasticsearch.index.mapper.ParseContext.Document;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.SortedMap;
+
+/**
+ * A {@link ContextMapping} is used t define a context that may used
+ * in conjunction with a suggester. To define a suggester that depends on a
+ * specific context derived class of {@link ContextMapping} will be
+ * used to specify the kind of additional information required in order to make
+ * suggestions.
+ */
+public abstract class ContextMapping implements ToXContent {
+
+ /** Character used to separate several contexts */
+ public static final char SEPARATOR = '\u001D';
+
+ /** Dummy Context Mapping that should be used if no context is used*/
+ public static final SortedMap<String, ContextMapping> EMPTY_MAPPING = Collections.emptySortedMap();
+
+ /** Dummy Context Config matching the Dummy Mapping by providing an empty context*/
+ public static final SortedMap<String, ContextConfig> EMPTY_CONFIG = Collections.emptySortedMap();
+
+ /** Dummy Context matching the Dummy Mapping by not wrapping a {@link TokenStream} */
+ public static final Context EMPTY_CONTEXT = new Context(EMPTY_CONFIG, null);
+
+ public static final String FIELD_VALUE = "value";
+ public static final String FIELD_MISSING = "default";
+ public static final String FIELD_TYPE = "type";
+
+ protected final String type; // Type of the Contextmapping
+ protected final String name;
+
+ /**
+ * Define a new context mapping of a specific type
+ *
+ * @param type
+ * name of the new context mapping
+ */
+ protected ContextMapping(String type, String name) {
+ super();
+ this.type = type;
+ this.name = name;
+ }
+
+ /**
+ * @return the type name of the context
+ */
+ protected String type() {
+ return type;
+ }
+
+ /**
+ * @return the name/id of the context
+ */
+ public String name() {
+ return name;
+ }
+
+ @Override
+ public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject(name);
+ builder.field(FIELD_TYPE, type);
+ toInnerXContent(builder, params);
+ builder.endObject();
+ return builder;
+ }
+
+ /**
+ * A {@link ContextMapping} combined with the information provided by a document
+ * form a {@link ContextConfig} which is used to build the underlying FST.
+ *
+ * @param parseContext context of parsing phase
+ * @param parser {@link XContentParser} used to read and setup the configuration
+ * @return A {@link ContextConfig} related to <b>this</b> mapping
+ *
+ */
+ public abstract ContextConfig parseContext(ParseContext parseContext, XContentParser parser)
+ throws IOException, ElasticsearchParseException;
+
+ public abstract ContextConfig defaultConfig();
+
+ /**
+ * Parse a query according to the context. Parsing starts at parsers <b>current</b> position
+ *
+ * @param name name of the context
+ * @param parser {@link XContentParser} providing the data of the query
+ *
+ * @return {@link ContextQuery} according to this mapping
+ *
+ */
+ public abstract ContextQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException;
+
+ /**
+ * Since every context mapping is assumed to have a name given by the field name of an context object, this
+ * method is used to build the value used to serialize the mapping
+ *
+ * @param builder builder to append the mapping to
+ * @param params parameters passed to the builder
+ *
+ * @return the builder used
+ *
+ */
+ protected abstract XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException;
+
+ /**
+ * Test equality of two mapping
+ *
+ * @param thisMappings first mapping
+ * @param otherMappings second mapping
+ *
+ * @return true if both arguments are equal
+ */
+ public static boolean mappingsAreEqual(SortedMap<String, ? extends ContextMapping> thisMappings,
+ SortedMap<String, ? extends ContextMapping> otherMappings) {
+ return Objects.equals(thisMappings, otherMappings);
+ }
+
+ @Override
+ public String toString() {
+ try {
+ return toXContent(JsonXContent.contentBuilder(), ToXContent.EMPTY_PARAMS).string();
+ } catch (IOException e) {
+ return super.toString();
+ }
+ }
+
+ /**
+ * A collection of {@link ContextMapping}s, their {@link ContextConfig}uration and a
+ * Document form a complete {@link Context}. Since this Object provides all information used
+ * to setup a suggestion, it can be used to wrap the entire {@link TokenStream} used to build a
+ * path within the {@link FST}.
+ */
+ public static class Context {
+
+ final SortedMap<String, ContextConfig> contexts;
+ final Document doc;
+
+ public Context(SortedMap<String, ContextConfig> contexts, Document doc) {
+ super();
+ this.contexts = contexts;
+ this.doc = doc;
+ }
+
+ /**
+ * Wrap the {@link TokenStream} according to the provided informations of {@link ContextConfig}
+ * and a related {@link Document}.
+ *
+ * @param tokenStream {@link TokenStream} to wrap
+ *
+ * @return wrapped token stream
+ */
+ public TokenStream wrapTokenStream(TokenStream tokenStream) {
+ for (ContextConfig context : contexts.values()) {
+ tokenStream = context.wrapTokenStream(doc, tokenStream);
+ }
+ return tokenStream;
+ }
+ }
+
+ /**
+ * A {@link ContextMapping} combined with the information provided by a document
+ * form a {@link ContextConfig} which is used to build the underlying {@link FST}. This class hold
+ * a simple method wrapping a {@link TokenStream} by provided document informations.
+ */
+ public static abstract class ContextConfig {
+
+ /**
+ * Wrap a {@link TokenStream} for building suggestions to use context informations
+ * provided by a document or a {@link ContextMapping}
+ *
+ * @param doc document related to the stream
+ * @param stream original stream used to build the underlying {@link FST}
+ *
+ * @return A new {@link TokenStream} providing additional context information
+ */
+ protected abstract TokenStream wrapTokenStream(Document doc, TokenStream stream);
+
+ }
+
+ /**
+ * A {@link ContextQuery} defines the context information for a specific {@link ContextMapping}
+ * defined within a suggestion request. According to the parameters set in the request and the
+ * {@link ContextMapping} such a query is used to wrap the {@link TokenStream} of the actual
+ * suggestion request into a {@link TokenStream} with the context settings
+ */
+ public static abstract class ContextQuery implements ToXContent {
+
+ protected final String name;
+
+ protected ContextQuery(String name) {
+ this.name = name;
+ }
+
+ public String name() {
+ return name;
+ }
+
+ /**
+ * Create a automaton for a given context query this automaton will be used
+ * to find the matching paths with the fst
+ *
+ * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query
+ * @param queries list of {@link ContextQuery} defining the lookup context
+ *
+ * @return Automaton matching the given Query
+ */
+ public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
+ Automaton a = Automata.makeEmptyString();
+
+ Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
+ if (preserveSep) {
+ // if separators are preserved the fst contains a SEP_LABEL
+ // behind each gap. To have a matching automaton, we need to
+ // include the SEP_LABEL in the query as well
+ gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
+ }
+
+ for (ContextQuery query : queries) {
+ a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
+ }
+
+ // TODO: should we limit this? Do any of our ContextQuery impls really create exponential regexps?
+ // GeoQuery looks safe (union of strings).
+ return Operations.determinize(a, Integer.MAX_VALUE);
+ }
+
+ /**
+ * Build a LookUp Automaton for this context.
+ * @return LookUp Automaton
+ */
+ protected abstract Automaton toAutomaton();
+
+ /**
+ * Parse a set of {@link ContextQuery} according to a given mapping
+ * @param mappings List of mapping defined y the suggest field
+ * @param parser parser holding the settings of the queries. The parsers
+ * current token is assumed hold an array. The number of elements
+ * in this array must match the number of elements in the mappings.
+ * @return List of context queries
+ *
+ * @throws IOException if something unexpected happened on the underlying stream
+ * @throws ElasticsearchParseException if the list of queries could not be parsed
+ */
+ public static List<ContextQuery> parseQueries(Map<String, ContextMapping> mappings, XContentParser parser)
+ throws IOException, ElasticsearchParseException {
+
+ Map<String, ContextQuery> querySet = new HashMap<>();
+ Token token = parser.currentToken();
+ if(token == Token.START_OBJECT) {
+ while ((token = parser.nextToken()) != Token.END_OBJECT) {
+ String name = parser.currentName();
+ ContextMapping mapping = mappings.get(name);
+ if (mapping == null) {
+ throw new ElasticsearchParseException("no mapping defined for [{}]", name);
+ }
+ parser.nextToken();
+ querySet.put(name, mapping.parseQuery(name, parser));
+ }
+ }
+
+ List<ContextQuery> queries = new ArrayList<>(mappings.size());
+ for (ContextMapping mapping : mappings.values()) {
+ queries.add(querySet.get(mapping.name));
+ }
+ return queries;
+ }
+
+ @Override
+ public String toString() {
+ try {
+ return toXContent(JsonXContent.contentBuilder(), ToXContent.EMPTY_PARAMS).string();
+ } catch (IOException e) {
+ return super.toString();
+ }
+ }
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/GeolocationContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/GeolocationContextMapping.java
new file mode 100644
index 0000000000..e131efce9e
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion2x/context/GeolocationContextMapping.java
@@ -0,0 +1,750 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion2x.context;
+
+import com.carrotsearch.hppc.IntHashSet;
+import org.apache.lucene.analysis.PrefixAnalyzer.PrefixTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.spatial.geopoint.document.GeoPointField;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.fst.FST;
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.geo.GeoHashUtils;
+import org.elasticsearch.common.geo.GeoPoint;
+import org.elasticsearch.common.geo.GeoUtils;
+import org.elasticsearch.common.unit.DistanceUnit;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentParser.Token;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.ParseContext;
+import org.elasticsearch.index.mapper.ParseContext.Document;
+import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * The {@link GeolocationContextMapping} allows to take GeoInfomation into account
+ * during building suggestions. The mapping itself works with geohashes
+ * explicitly and is configured by three parameters:
+ * <ul>
+ * <li><code>precision</code>: length of the geohash indexed as prefix of the
+ * completion field</li>
+ * <li><code>neighbors</code>: Should the neighbor cells of the deepest geohash
+ * level also be indexed as alternatives to the actual geohash</li>
+ * <li><code>location</code>: (optional) location assumed if it is not provided</li>
+ * </ul>
+ * Internally this mapping wraps the suggestions into a form
+ * <code>[geohash][suggestion]</code>. If the neighbor option is set the cells
+ * next to the cell on the deepest geohash level ( <code>precision</code>) will
+ * be indexed as well. The {@link TokenStream} used to build the {@link FST} for
+ * suggestion will be wrapped into a {@link PrefixTokenFilter} managing these
+ * geohases as prefixes.
+ */
+public class GeolocationContextMapping extends ContextMapping {
+
+ public static final String TYPE = "geo";
+
+ public static final String FIELD_PRECISION = "precision";
+ public static final String FIELD_NEIGHBORS = "neighbors";
+ public static final String FIELD_FIELDNAME = "path";
+
+ private final Collection<String> defaultLocations;
+ private final int[] precision;
+ private final boolean neighbors;
+ private final String fieldName;
+ private final GeoConfig defaultConfig;
+
+ /**
+ * Create a new {@link GeolocationContextMapping} with a given precision
+ *
+ * @param precision
+ * length of the geohashes
+ * @param neighbors
+ * should neighbors be indexed
+ * @param defaultLocations
+ * location to use, if it is not provided by the document
+ */
+ protected GeolocationContextMapping(String name, int[] precision, boolean neighbors,
+ Collection<String> defaultLocations, String fieldName) {
+ super(TYPE, name);
+ this.precision = precision;
+ this.neighbors = neighbors;
+ this.defaultLocations = defaultLocations;
+ this.fieldName = fieldName;
+ this.defaultConfig = new GeoConfig(this, defaultLocations);
+ }
+
+ /**
+ * load a {@link GeolocationContextMapping} by configuration. Such a configuration
+ * can set the parameters
+ * <ul>
+ * <li>precision [<code>String</code>, <code>Double</code>,
+ * <code>Float</code> or <code>Integer</code>] defines the length of the
+ * underlying geohash</li>
+ * <li>defaultLocation [<code>String</code>] defines the location to use if
+ * it is not provided by the document</li>
+ * <li>neighbors [<code>Boolean</code>] defines if the last level of the
+ * geohash should be extended by neighbor cells</li>
+ * </ul>
+ *
+ * @param config
+ * Configuration for {@link GeolocationContextMapping}
+ * @return new {@link GeolocationContextMapping} configured by the parameters of
+ * <code>config</code>
+ */
+ protected static GeolocationContextMapping load(String name, Map<String, Object> config) {
+ if (!config.containsKey(FIELD_PRECISION)) {
+ throw new ElasticsearchParseException("field [precision] is missing");
+ }
+
+ final GeolocationContextMapping.Builder builder = new GeolocationContextMapping.Builder(name);
+
+ if (config != null) {
+ final Object configPrecision = config.get(FIELD_PRECISION);
+ if (configPrecision == null) {
+ // ignore precision
+ } else if (configPrecision instanceof Integer) {
+ builder.precision((Integer) configPrecision);
+ config.remove(FIELD_PRECISION);
+ } else if (configPrecision instanceof Long) {
+ builder.precision((Long) configPrecision);
+ config.remove(FIELD_PRECISION);
+ } else if (configPrecision instanceof Double) {
+ builder.precision((Double) configPrecision);
+ config.remove(FIELD_PRECISION);
+ } else if (configPrecision instanceof Float) {
+ builder.precision((Float) configPrecision);
+ config.remove(FIELD_PRECISION);
+ } else if (configPrecision instanceof Iterable) {
+ for (Object precision : (Iterable)configPrecision) {
+ if (precision instanceof Integer) {
+ builder.precision((Integer) precision);
+ } else if (precision instanceof Long) {
+ builder.precision((Long) precision);
+ } else if (precision instanceof Double) {
+ builder.precision((Double) precision);
+ } else if (precision instanceof Float) {
+ builder.precision((Float) precision);
+ } else {
+ builder.precision(precision.toString());
+ }
+ }
+ config.remove(FIELD_PRECISION);
+ } else {
+ builder.precision(configPrecision.toString());
+ config.remove(FIELD_PRECISION);
+ }
+
+ final Object configNeighbors = config.get(FIELD_NEIGHBORS);
+ if (configNeighbors != null) {
+ builder.neighbors((Boolean) configNeighbors);
+ config.remove(FIELD_NEIGHBORS);
+ }
+
+ final Object def = config.get(FIELD_MISSING);
+ if (def != null) {
+ if (def instanceof Iterable) {
+ for (Object location : (Iterable)def) {
+ builder.addDefaultLocation(location.toString());
+ }
+ } else if (def instanceof String) {
+ builder.addDefaultLocation(def.toString());
+ } else if (def instanceof Map) {
+ Map<String, Object> latlonMap = (Map<String, Object>) def;
+ if (!latlonMap.containsKey("lat") || !(latlonMap.get("lat") instanceof Double)) {
+ throw new ElasticsearchParseException(
+ "field [{}] map must have field lat and a valid latitude", FIELD_MISSING);
+ }
+ if (!latlonMap.containsKey("lon") || !(latlonMap.get("lon") instanceof Double)) {
+ throw new ElasticsearchParseException(
+ "field [{}] map must have field lon and a valid longitude", FIELD_MISSING);
+ }
+ builder.addDefaultLocation(
+ Double.valueOf(latlonMap.get("lat").toString()), Double.valueOf(latlonMap.get("lon").toString()));
+ } else {
+ throw new ElasticsearchParseException("field [{}] must be of type string or list", FIELD_MISSING);
+ }
+ config.remove(FIELD_MISSING);
+ }
+
+ final Object fieldName = config.get(FIELD_FIELDNAME);
+ if (fieldName != null) {
+ builder.field(fieldName.toString());
+ config.remove(FIELD_FIELDNAME);
+ }
+ }
+ return builder.build();
+ }
+
+ @Override
+ protected XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.field(FIELD_PRECISION, precision);
+ builder.field(FIELD_NEIGHBORS, neighbors);
+ if (defaultLocations != null) {
+ builder.startArray(FIELD_MISSING);
+ for (String defaultLocation : defaultLocations) {
+ builder.value(defaultLocation);
+ }
+ builder.endArray();
+ }
+ if (fieldName != null) {
+ builder.field(FIELD_FIELDNAME, fieldName);
+ }
+ return builder;
+ }
+
+ protected static Collection<String> parseSinglePointOrList(XContentParser parser) throws IOException {
+ Token token = parser.currentToken();
+ if(token == Token.START_ARRAY) {
+ token = parser.nextToken();
+ // Test if value is a single point in <code>[lon, lat]</code> format
+ if(token == Token.VALUE_NUMBER) {
+ double lon = parser.doubleValue();
+ if(parser.nextToken() == Token.VALUE_NUMBER) {
+ double lat = parser.doubleValue();
+ if(parser.nextToken() == Token.END_ARRAY) {
+ return Collections.singleton(GeoHashUtils.stringEncode(lon, lat));
+ } else {
+ throw new ElasticsearchParseException("only two values expected");
+ }
+ } else {
+ throw new ElasticsearchParseException("latitue must be a numeric value");
+ }
+ } else {
+ // otherwise it's a list of locations
+ ArrayList<String> result = new ArrayList<>();
+ while (token != Token.END_ARRAY) {
+ result.add(GeoUtils.parseGeoPoint(parser).geohash());
+ token = parser.nextToken(); //infinite loop without this line
+ }
+ return result;
+ }
+ } else {
+ // or a single location
+ return Collections.singleton(GeoUtils.parseGeoPoint(parser).geohash());
+ }
+ }
+
+ @Override
+ public ContextConfig defaultConfig() {
+ return defaultConfig;
+ }
+
+ @Override
+ public ContextConfig parseContext(ParseContext parseContext, XContentParser parser)
+ throws IOException, ElasticsearchParseException {
+
+ if(fieldName != null) {
+ FieldMapper mapper = parseContext.docMapper().mappers().getMapper(fieldName);
+ if(!(mapper instanceof GeoPointFieldMapper)) {
+ throw new ElasticsearchParseException("referenced field must be mapped to geo_point");
+ }
+ }
+
+ Collection<String> locations;
+ if(parser.currentToken() == Token.VALUE_NULL) {
+ locations = null;
+ } else {
+ locations = parseSinglePointOrList(parser);
+ }
+ return new GeoConfig(this, locations);
+ }
+
+ /**
+ * Create a new geolocation query from a given GeoPoint
+ *
+ * @param point
+ * query location
+ * @return new geolocation query
+ */
+ public static GeoQuery query(String name, GeoPoint point) {
+ return query(name, point.getGeohash());
+ }
+
+ /**
+ * Create a new geolocation query from a given geocoordinate
+ *
+ * @param lat
+ * latitude of the location
+ * @param lon
+ * longitude of the location
+ * @return new geolocation query
+ */
+ public static GeoQuery query(String name, double lat, double lon, int ... precisions) {
+ return query(name, GeoHashUtils.stringEncode(lon, lat), precisions);
+ }
+
+ public static GeoQuery query(String name, double lat, double lon, String ... precisions) {
+ int precisionInts[] = new int[precisions.length];
+ for (int i = 0 ; i < precisions.length; i++) {
+ precisionInts[i] = GeoUtils.geoHashLevelsForPrecision(precisions[i]);
+ }
+ return query(name, GeoHashUtils.stringEncode(lon, lat), precisionInts);
+ }
+
+ /**
+ * Create a new geolocation query from a given geohash
+ *
+ * @param geohash
+ * geohash of the location
+ * @return new geolocation query
+ */
+ public static GeoQuery query(String name, String geohash, int ... precisions) {
+ return new GeoQuery(name, geohash, precisions);
+ }
+
+ private static final int parsePrecision(XContentParser parser) throws IOException, ElasticsearchParseException {
+ switch (parser.currentToken()) {
+ case VALUE_STRING:
+ return GeoUtils.geoHashLevelsForPrecision(parser.text());
+ case VALUE_NUMBER:
+ switch (parser.numberType()) {
+ case INT:
+ case LONG:
+ return parser.intValue();
+ default:
+ return GeoUtils.geoHashLevelsForPrecision(parser.doubleValue());
+ }
+ default:
+ throw new ElasticsearchParseException("invalid precision value");
+ }
+ }
+
+ @Override
+ public GeoQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException {
+ if (parser.currentToken() == Token.START_OBJECT) {
+ double lat = Double.NaN;
+ double lon = Double.NaN;
+ GeoPoint point = null;
+ int[] precision = null;
+
+ while (parser.nextToken() != Token.END_OBJECT) {
+ final String fieldName = parser.currentName();
+ if("lat".equals(fieldName)) {
+ if(point == null) {
+ parser.nextToken();
+ switch (parser.currentToken()) {
+ case VALUE_NUMBER:
+ case VALUE_STRING:
+ lat = parser.doubleValue(true);
+ break;
+ default:
+ throw new ElasticsearchParseException("latitude must be a number");
+ }
+ } else {
+ throw new ElasticsearchParseException("only lat/lon or [{}] is allowed", FIELD_VALUE);
+ }
+ } else if ("lon".equals(fieldName)) {
+ if(point == null) {
+ parser.nextToken();
+ switch (parser.currentToken()) {
+ case VALUE_NUMBER:
+ case VALUE_STRING:
+ lon = parser.doubleValue(true);
+ break;
+ default:
+ throw new ElasticsearchParseException("longitude must be a number");
+ }
+ } else {
+ throw new ElasticsearchParseException("only lat/lon or [{}] is allowed", FIELD_VALUE);
+ }
+ } else if (FIELD_PRECISION.equals(fieldName)) {
+ if(parser.nextToken() == Token.START_ARRAY) {
+ IntHashSet precisions = new IntHashSet();
+ while(parser.nextToken() != Token.END_ARRAY) {
+ precisions.add(parsePrecision(parser));
+ }
+ precision = precisions.toArray();
+ } else {
+ precision = new int[] { parsePrecision(parser) };
+ }
+ } else if (FIELD_VALUE.equals(fieldName)) {
+ if(Double.isNaN(lon) && Double.isNaN(lat)) {
+ parser.nextToken();
+ point = GeoUtils.parseGeoPoint(parser);
+ } else {
+ throw new ElasticsearchParseException("only lat/lon or [{}] is allowed", FIELD_VALUE);
+ }
+ } else {
+ throw new ElasticsearchParseException("unexpected fieldname [{}]", fieldName);
+ }
+ }
+
+ if (point == null) {
+ if (Double.isNaN(lat) || Double.isNaN(lon)) {
+ throw new ElasticsearchParseException("location is missing");
+ } else {
+ point = new GeoPoint(lat, lon);
+ }
+ }
+
+ if (precision == null || precision.length == 0) {
+ precision = this.precision;
+ }
+
+ return new GeoQuery(name, point.geohash(), precision);
+ } else {
+ return new GeoQuery(name, GeoUtils.parseGeoPoint(parser).getGeohash(), precision);
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((defaultLocations == null) ? 0 : defaultLocations.hashCode());
+ result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode());
+ result = prime * result + (neighbors ? 1231 : 1237);
+ result = prime * result + Arrays.hashCode(precision);
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ GeolocationContextMapping other = (GeolocationContextMapping) obj;
+ if (defaultLocations == null) {
+ if (other.defaultLocations != null)
+ return false;
+ } else if (!defaultLocations.equals(other.defaultLocations))
+ return false;
+ if (fieldName == null) {
+ if (other.fieldName != null)
+ return false;
+ } else if (!fieldName.equals(other.fieldName))
+ return false;
+ if (neighbors != other.neighbors)
+ return false;
+ if (!Arrays.equals(precision, other.precision))
+ return false;
+ return true;
+ }
+
+
+
+
+ public static class Builder extends ContextBuilder<GeolocationContextMapping> {
+
+ private IntHashSet precisions = new IntHashSet();
+ private boolean neighbors; // take neighbor cell on the lowest level into account
+ private HashSet<String> defaultLocations = new HashSet<>();
+ private String fieldName = null;
+
+ protected Builder(String name) {
+ this(name, true, null);
+ }
+
+ protected Builder(String name, boolean neighbors, int...levels) {
+ super(name);
+ neighbors(neighbors);
+ if (levels != null) {
+ for (int level : levels) {
+ precision(level);
+ }
+ }
+ }
+
+ /**
+ * Set the precision use o make suggestions
+ *
+ * @param precision
+ * precision as distance with {@link DistanceUnit}. Default:
+ * meters
+ * @return this
+ */
+ public Builder precision(String precision) {
+ return precision(DistanceUnit.parse(precision, DistanceUnit.METERS, DistanceUnit.METERS));
+ }
+
+ /**
+ * Set the precision use o make suggestions
+ *
+ * @param precision
+ * precision value
+ * @param unit
+ * {@link DistanceUnit} to use
+ * @return this
+ */
+ public Builder precision(double precision, DistanceUnit unit) {
+ return precision(unit.toMeters(precision));
+ }
+
+ /**
+ * Set the precision use o make suggestions
+ *
+ * @param meters
+ * precision as distance in meters
+ * @return this
+ */
+ public Builder precision(double meters) {
+ int level = GeoUtils.geoHashLevelsForPrecision(meters);
+ // Ceiling precision: we might return more results
+ if (GeoUtils.geoHashCellSize(level) < meters) {
+ level = Math.max(1, level - 1);
+ }
+ return precision(level);
+ }
+
+ /**
+ * Set the precision use o make suggestions
+ *
+ * @param level
+ * maximum length of geohashes
+ * @return this
+ */
+ public Builder precision(int level) {
+ this.precisions.add(level);
+ return this;
+ }
+
+ /**
+ * Set neighborhood usage
+ *
+ * @param neighbors
+ * should neighbor cells also be valid
+ * @return this
+ */
+ public Builder neighbors(boolean neighbors) {
+ this.neighbors = neighbors;
+ return this;
+ }
+
+ /**
+ * Set a default location that should be used, if no location is
+ * provided by the query
+ *
+ * @param geohash
+ * geohash of the default location
+ * @return this
+ */
+ public Builder addDefaultLocation(String geohash) {
+ this.defaultLocations.add(geohash);
+ return this;
+ }
+
+ /**
+ * Set a default location that should be used, if no location is
+ * provided by the query
+ *
+ * @param geohashes
+ * geohash of the default location
+ * @return this
+ */
+ public Builder addDefaultLocations(Collection<String> geohashes) {
+ this.defaultLocations.addAll(geohashes);
+ return this;
+ }
+
+ /**
+ * Set a default location that should be used, if no location is
+ * provided by the query
+ *
+ * @param lat
+ * latitude of the default location
+ * @param lon
+ * longitude of the default location
+ * @return this
+ */
+ public Builder addDefaultLocation(double lat, double lon) {
+ this.defaultLocations.add(GeoHashUtils.stringEncode(lon, lat));
+ return this;
+ }
+
+ /**
+ * Set a default location that should be used, if no location is
+ * provided by the query
+ *
+ * @param point
+ * location
+ * @return this
+ */
+ public Builder defaultLocation(GeoPoint point) {
+ this.defaultLocations.add(point.geohash());
+ return this;
+ }
+
+ /**
+ * Set the name of the field containing a geolocation to use
+ * @param fieldName name of the field
+ * @return this
+ */
+ public Builder field(String fieldName) {
+ this.fieldName = fieldName;
+ return this;
+ }
+
+ @Override
+ public GeolocationContextMapping build() {
+ if(precisions.isEmpty()) {
+ precisions.add(GeoHashUtils.PRECISION);
+ }
+ int[] precisionArray = precisions.toArray();
+ Arrays.sort(precisionArray);
+ return new GeolocationContextMapping(name, precisionArray, neighbors, defaultLocations, fieldName);
+ }
+
+ }
+
+ private static class GeoConfig extends ContextConfig {
+
+ private final GeolocationContextMapping mapping;
+ private final Collection<String> locations;
+
+ public GeoConfig(GeolocationContextMapping mapping, Collection<String> locations) {
+ this.locations = locations;
+ this.mapping = mapping;
+ }
+
+ @Override
+ protected TokenStream wrapTokenStream(Document doc, TokenStream stream) {
+ Collection<String> geohashes;
+
+ if (locations == null || locations.size() == 0) {
+ if(mapping.fieldName != null) {
+ IndexableField[] fields = doc.getFields(mapping.fieldName);
+ if(fields.length == 0) {
+ IndexableField[] lonFields = doc.getFields(mapping.fieldName + ".lon");
+ IndexableField[] latFields = doc.getFields(mapping.fieldName + ".lat");
+ if (lonFields.length > 0 && latFields.length > 0) {
+ geohashes = new ArrayList<>(fields.length);
+ GeoPoint spare = new GeoPoint();
+ for (int i = 0 ; i < lonFields.length ; i++) {
+ IndexableField lonField = lonFields[i];
+ IndexableField latField = latFields[i];
+ assert lonField.fieldType().docValuesType() == latField.fieldType().docValuesType();
+ // we write doc values fields differently: one field for all values,
+ // so we need to only care about indexed fields
+ if (lonField.fieldType().docValuesType() == DocValuesType.NONE) {
+ spare.reset(latField.numericValue().doubleValue(), lonField.numericValue().doubleValue());
+ geohashes.add(spare.geohash());
+ }
+ }
+ } else {
+ geohashes = mapping.defaultLocations;
+ }
+ } else {
+ geohashes = new ArrayList<>(fields.length);
+ GeoPoint spare = new GeoPoint();
+ for (IndexableField field : fields) {
+ if (field instanceof StringField) {
+ spare.resetFromString(field.stringValue());
+ } else if (field instanceof GeoPointField) {
+ GeoPointField geoPointField = (GeoPointField) field;
+ spare.reset(geoPointField.getLat(), geoPointField.getLon());
+ } else {
+ spare.resetFromString(field.stringValue());
+ }
+ geohashes.add(spare.geohash());
+ }
+ }
+ } else {
+ geohashes = mapping.defaultLocations;
+ }
+ } else {
+ geohashes = locations;
+ }
+
+ Collection<String> locations = new HashSet<>();
+ for (String geohash : geohashes) {
+ for (int p : mapping.precision) {
+ int precision = Math.min(p, geohash.length());
+ String truncatedGeohash = geohash.substring(0, precision);
+ if(mapping.neighbors) {
+ GeoHashUtils.addNeighbors(truncatedGeohash, precision, locations);
+ }
+ locations.add(truncatedGeohash);
+ }
+ }
+
+ return new PrefixTokenFilter(stream, ContextMapping.SEPARATOR, locations);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("GeoConfig(location = [");
+ Iterator<? extends CharSequence> location = this.locations.iterator();
+ if (location.hasNext()) {
+ sb.append(location.next());
+ while (location.hasNext()) {
+ sb.append(", ").append(location.next());
+ }
+ }
+ return sb.append("])").toString();
+ }
+ }
+
+ private static class GeoQuery extends ContextQuery {
+ private final String location;
+ private final int[] precisions;
+
+ public GeoQuery(String name, String location, int...precisions) {
+ super(name);
+ this.location = location;
+ this.precisions = precisions;
+ }
+
+ @Override
+ public Automaton toAutomaton() {
+ Automaton automaton;
+ if(precisions == null || precisions.length == 0) {
+ automaton = Automata.makeString(location);
+ } else {
+ automaton = Automata.makeString(
+ location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))));
+ for (int i = 1; i < precisions.length; i++) {
+ final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[i])));
+ automaton = Operations.union(automaton, Automata.makeString(cell));
+ }
+ }
+ return automaton;
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ if(precisions == null || precisions.length == 0) {
+ builder.field(name, location);
+ } else {
+ builder.startObject(name);
+ builder.field(FIELD_VALUE, location);
+ builder.field(FIELD_PRECISION, precisions);
+ builder.endObject();
+ }
+ return builder;
+ }
+ }
+}