diff options
author | Areek Zillur <areek.zillur@elasticsearch.com> | 2015-10-31 01:59:21 -0400 |
---|---|---|
committer | Areek Zillur <areek.zillur@elasticsearch.com> | 2015-11-07 17:46:27 -0500 |
commit | dd1c687ace953c82f3671363dd077c1c99b1fc8e (patch) | |
tree | 383402288e296ca292926a1813c51e3d334c2f4f /core | |
parent | d0f5950c80044bd174629cf9da8cf8e1b97084dd (diff) |
Completion Suggester V2
The completion suggester provides auto-complete/search-as-you-type functionality.
This is a navigational feature to guide users to relevant results as they are typing, improving search precision.
It is not meant for spell correction or did-you-mean functionality like the term or phrase suggesters.
The completions are indexed as a weighted FST (finite state transducer) to provide fast Top N prefix-based
searches suitable for serving relevant results as a user types.
closes #10746
Diffstat (limited to 'core')
48 files changed, 4643 insertions, 7277 deletions
diff --git a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java deleted file mode 100644 index a6077f8e84..0000000000 --- a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java +++ /dev/null @@ -1,1136 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.lucene.search.suggest.analyzing; - -import com.carrotsearch.hppc.ObjectIntHashMap; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.TokenStreamToAutomaton; -import org.apache.lucene.search.suggest.InputIterator; -import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.store.*; -import org.apache.lucene.util.*; -import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator; -import org.apache.lucene.util.automaton.Operations; -import org.apache.lucene.util.automaton.Transition; -import org.apache.lucene.util.fst.*; -import org.apache.lucene.util.fst.FST.BytesReader; -import org.apache.lucene.util.fst.PairOutputs.Pair; -import org.apache.lucene.util.fst.Util.Result; -import org.apache.lucene.util.fst.Util.TopResults; -import org.elasticsearch.common.collect.HppcMaps; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.*; - -/** - * Suggester that first analyzes the surface form, adds the - * analyzed form to a weighted FST, and then does the same - * thing at lookup time. This means lookup is based on the - * analyzed form while suggestions are still the surface - * form(s). - * - * <p> - * This can result in powerful suggester functionality. For - * example, if you use an analyzer removing stop words, - * then the partial text "ghost chr..." could see the - * suggestion "The Ghost of Christmas Past". Note that - * position increments MUST NOT be preserved for this example - * to work, so you should call the constructor with - * <code>preservePositionIncrements</code> parameter set to - * false - * - * <p> - * If SynonymFilter is used to map wifi and wireless network to - * hotspot then the partial text "wirele..." could suggest - * "wifi router". Token normalization like stemmers, accent - * removal, etc., would allow suggestions to ignore such - * variations. - * - * <p> - * When two matching suggestions have the same weight, they - * are tie-broken by the analyzed form. If their analyzed - * form is the same then the order is undefined. - * - * <p> - * There are some limitations: - * <ul> - * - * <li> A lookup from a query like "net" in English won't - * be any different than "net " (ie, user added a - * trailing space) because analyzers don't reflect - * when they've seen a token separator and when they - * haven't. - * - * <li> If you're using {@code StopFilter}, and the user will - * type "fast apple", but so far all they've typed is - * "fast a", again because the analyzer doesn't convey whether - * it's seen a token separator after the "a", - * {@code StopFilter} will remove that "a" causing - * far more matches than you'd expect. - * - * <li> Lookups with the empty string return no results - * instead of all results. - * </ul> - * - * @lucene.experimental - */ -public class XAnalyzingSuggester extends Lookup { - - /** - * FST<Weight,Surface>: - * input is the analyzed form, with a null byte between terms - * weights are encoded as costs: (Integer.MAX_VALUE-weight) - * surface is the original, unanalyzed form. - */ - private FST<Pair<Long,BytesRef>> fst = null; - - /** - * Analyzer that will be used for analyzing suggestions at - * index time. - */ - private final Analyzer indexAnalyzer; - - /** - * Analyzer that will be used for analyzing suggestions at - * query time. - */ - private final Analyzer queryAnalyzer; - - /** - * True if exact match suggestions should always be returned first. - */ - private final boolean exactFirst; - - /** - * True if separator between tokens should be preserved. - */ - private final boolean preserveSep; - - /** Include this flag in the options parameter to {@code - * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to always - * return the exact match first, regardless of score. This - * has no performance impact but could result in - * low-quality suggestions. */ - public static final int EXACT_FIRST = 1; - - /** Include this flag in the options parameter to {@code - * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to preserve - * token separators when matching. */ - public static final int PRESERVE_SEP = 2; - - /** Represents the separation between tokens, if - * PRESERVE_SEP was specified */ - public static final int SEP_LABEL = '\u001F'; - - /** Marks end of the analyzed input and start of dedup - * byte. */ - public static final int END_BYTE = 0x0; - - /** Maximum number of dup surface forms (different surface - * forms for the same analyzed form). */ - private final int maxSurfaceFormsPerAnalyzedForm; - - /** Maximum graph paths to index for a single analyzed - * surface form. This only matters if your analyzer - * makes lots of alternate paths (e.g. contains - * SynonymFilter). */ - private final int maxGraphExpansions; - - /** Highest number of analyzed paths we saw for any single - * input surface form. For analyzers that never create - * graphs this will always be 1. */ - private int maxAnalyzedPathsForOneInput; - - private boolean hasPayloads; - - private final int sepLabel; - private final int payloadSep; - private final int endByte; - private final int holeCharacter; - - public static final int PAYLOAD_SEP = '\u001F'; - public static final int HOLE_CHARACTER = '\u001E'; - - private final Automaton queryPrefix; - - /** Whether position holes should appear in the automaton. */ - private boolean preservePositionIncrements; - - /** Number of entries the lookup was built with */ - private long count = 0; - - /** - * Calls {@code #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int) - * AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST | - * PRESERVE_SEP, 256, -1)} - */ - public XAnalyzingSuggester(Analyzer analyzer) { - this(analyzer, null, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); - } - - /** - * Calls {@code #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int) - * AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST | - * PRESERVE_SEP, 256, -1)} - */ - public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) { - this(indexAnalyzer, null, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); - } - - /** - * Creates a new suggester. - * - * @param indexAnalyzer Analyzer that will be used for - * analyzing suggestions while building the index. - * @param queryAnalyzer Analyzer that will be used for - * analyzing query text during lookup - * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP} - * @param maxSurfaceFormsPerAnalyzedForm Maximum number of - * surface forms to keep for a single analyzed form. - * When there are too many surface forms we discard the - * lowest weighted ones. - * @param maxGraphExpansions Maximum number of graph paths - * to expand from the analyzed form. Set this to -1 for - * no limit. - */ - public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, - boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, - int sepLabel, int payloadSep, int endByte, int holeCharacter) { - // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput - this.indexAnalyzer = indexAnalyzer; - this.queryAnalyzer = queryAnalyzer; - this.fst = fst; - this.hasPayloads = hasPayloads; - if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) { - throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options); - } - this.exactFirst = (options & EXACT_FIRST) != 0; - this.preserveSep = (options & PRESERVE_SEP) != 0; - - // FLORIAN EDIT: I added <code>queryPrefix</code> for context dependent suggestions - this.queryPrefix = queryPrefix; - - // NOTE: this is just an implementation limitation; if - // somehow this is a problem we could fix it by using - // more than one byte to disambiguate ... but 256 seems - // like it should be way more then enough. - if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) { - throw new IllegalArgumentException("maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")"); - } - this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; - - if (maxGraphExpansions < 1 && maxGraphExpansions != -1) { - throw new IllegalArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")"); - } - this.maxGraphExpansions = maxGraphExpansions; - this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput; - this.preservePositionIncrements = preservePositionIncrements; - this.sepLabel = sepLabel; - this.payloadSep = payloadSep; - this.endByte = endByte; - this.holeCharacter = holeCharacter; - } - - /** Returns byte size of the underlying FST. */ - @Override -public long ramBytesUsed() { - return fst == null ? 0 : fst.ramBytesUsed(); - } - - public int getMaxAnalyzedPathsForOneInput() { - return maxAnalyzedPathsForOneInput; - } - - // Replaces SEP with epsilon or remaps them if - // we were asked to preserve them: - private Automaton replaceSep(Automaton a) { - - Automaton result = new Automaton(); - - // Copy all states over - int numStates = a.getNumStates(); - for(int s=0;s<numStates;s++) { - result.createState(); - result.setAccept(s, a.isAccept(s)); - } - - // Go in reverse topo sort so we know we only have to - // make one pass: - Transition t = new Transition(); - int[] topoSortStates = topoSortStates(a); - for(int i=0;i<topoSortStates.length;i++) { - int state = topoSortStates[topoSortStates.length-1-i]; - int count = a.initTransition(state, t); - for(int j=0;j<count;j++) { - a.getNextTransition(t); - if (t.min == TokenStreamToAutomaton.POS_SEP) { - assert t.max == TokenStreamToAutomaton.POS_SEP; - if (preserveSep) { - // Remap to SEP_LABEL: - result.addTransition(state, t.dest, SEP_LABEL); - } else { - result.addEpsilon(state, t.dest); - } - } else if (t.min == TokenStreamToAutomaton.HOLE) { - assert t.max == TokenStreamToAutomaton.HOLE; - - // Just remove the hole: there will then be two - // SEP tokens next to each other, which will only - // match another hole at search time. Note that - // it will also match an empty-string token ... if - // that's somehow a problem we can always map HOLE - // to a dedicated byte (and escape it in the - // input). - result.addEpsilon(state, t.dest); - } else { - result.addTransition(state, t.dest, t.min, t.max); - } - } - } - - result.finishState(); - - return result; - } - - protected Automaton convertAutomaton(Automaton a) { - if (queryPrefix != null) { - a = Operations.concatenate(Arrays.asList(queryPrefix, a)); - // This automaton should not blow up during determinize: - a = Operations.determinize(a, Integer.MAX_VALUE); - } - return a; - } - - private int[] topoSortStates(Automaton a) { - int[] states = new int[a.getNumStates()]; - final Set<Integer> visited = new HashSet<>(); - final LinkedList<Integer> worklist = new LinkedList<>(); - worklist.add(0); - visited.add(0); - int upto = 0; - states[upto] = 0; - upto++; - Transition t = new Transition(); - while (worklist.size() > 0) { - int s = worklist.removeFirst(); - int count = a.initTransition(s, t); - for (int i=0;i<count;i++) { - a.getNextTransition(t); - if (!visited.contains(t.dest)) { - visited.add(t.dest); - worklist.add(t.dest); - states[upto++] = t.dest; - } - } - } - return states; - } - - /** Just escapes the 0xff byte (which we still for SEP). */ - private static final class EscapingTokenStreamToAutomaton extends TokenStreamToAutomaton { - - final BytesRefBuilder spare = new BytesRefBuilder(); - private char sepLabel; - - public EscapingTokenStreamToAutomaton(char sepLabel) { - this.sepLabel = sepLabel; - } - - @Override - protected BytesRef changeToken(BytesRef in) { - int upto = 0; - for(int i=0;i<in.length;i++) { - byte b = in.bytes[in.offset+i]; - if (b == (byte) sepLabel) { - spare.grow(upto+2); - spare.setByteAt(upto++, (byte) sepLabel); - spare.setByteAt(upto++, b); - } else { - spare.grow(upto+1); - spare.setByteAt(upto++, b); - } - } - spare.setLength(upto); - return spare.get(); - } - } - - public TokenStreamToAutomaton getTokenStreamToAutomaton() { - final TokenStreamToAutomaton tsta; - if (preserveSep) { - tsta = new EscapingTokenStreamToAutomaton((char) sepLabel); - } else { - // When we're not preserving sep, we don't steal 0xff - // byte, so we don't need to do any escaping: - tsta = new TokenStreamToAutomaton(); - } - tsta.setPreservePositionIncrements(preservePositionIncrements); - return tsta; - } - - private static class AnalyzingComparator implements Comparator<BytesRef> { - - private final boolean hasPayloads; - - public AnalyzingComparator(boolean hasPayloads) { - this.hasPayloads = hasPayloads; - } - - private final ByteArrayDataInput readerA = new ByteArrayDataInput(); - private final ByteArrayDataInput readerB = new ByteArrayDataInput(); - private final BytesRef scratchA = new BytesRef(); - private final BytesRef scratchB = new BytesRef(); - - @Override - public int compare(BytesRef a, BytesRef b) { - - // First by analyzed form: - readerA.reset(a.bytes, a.offset, a.length); - scratchA.length = readerA.readShort(); - scratchA.bytes = a.bytes; - scratchA.offset = readerA.getPosition(); - - readerB.reset(b.bytes, b.offset, b.length); - scratchB.bytes = b.bytes; - scratchB.length = readerB.readShort(); - scratchB.offset = readerB.getPosition(); - - int cmp = scratchA.compareTo(scratchB); - if (cmp != 0) { - return cmp; - } - readerA.skipBytes(scratchA.length); - readerB.skipBytes(scratchB.length); - // Next by cost: - long aCost = readerA.readInt(); - long bCost = readerB.readInt(); - if (aCost < bCost) { - return -1; - } else if (aCost > bCost) { - return 1; - } - - // Finally by surface form: - if (hasPayloads) { - scratchA.length = readerA.readShort(); - scratchA.offset = readerA.getPosition(); - scratchB.length = readerB.readShort(); - scratchB.offset = readerB.getPosition(); - } else { - scratchA.offset = readerA.getPosition(); - scratchA.length = a.length - scratchA.offset; - scratchB.offset = readerB.getPosition(); - scratchB.length = b.length - scratchB.offset; - } - return scratchA.compareTo(scratchB); - } - } - - @Override - public void build(InputIterator iterator) throws IOException { - String prefix = getClass().getSimpleName(); - Path directory = OfflineSorter.getDefaultTempDir(); - Path tempInput = Files.createTempFile(directory, prefix, ".input"); - Path tempSorted = Files.createTempFile(directory, prefix, ".sorted"); - - hasPayloads = iterator.hasPayloads(); - - OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput); - OfflineSorter.ByteSequencesReader reader = null; - BytesRefBuilder scratch = new BytesRefBuilder(); - - TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); - - boolean success = false; - count = 0; - byte buffer[] = new byte[8]; - try { - ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); - - for (BytesRef surfaceForm; (surfaceForm = iterator.next()) != null;) { - LimitedFiniteStringsIterator finiteStrings = - new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); - for (IntsRef string; (string = finiteStrings.next()) != null; count++) { - Util.toBytesRef(string, scratch); - - // length of the analyzed text (FST input) - if (scratch.length() > Short.MAX_VALUE-2) { - throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length() + ")"); - } - short analyzedLength = (short) scratch.length(); - - // compute the required length: - // analyzed sequence + weight (4) + surface + analyzedLength (short) - int requiredLength = analyzedLength + 4 + surfaceForm.length + 2; - - BytesRef payload; - - if (hasPayloads) { - if (surfaceForm.length > (Short.MAX_VALUE-2)) { - throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")"); - } - payload = iterator.payload(); - // payload + surfaceLength (short) - requiredLength += payload.length + 2; - } else { - payload = null; - } - - buffer = ArrayUtil.grow(buffer, requiredLength); - - output.reset(buffer); - - output.writeShort(analyzedLength); - - output.writeBytes(scratch.bytes(), 0, scratch.length()); - - output.writeInt(encodeWeight(iterator.weight())); - - if (hasPayloads) { - for(int i=0;i<surfaceForm.length;i++) { - if (surfaceForm.bytes[i] == payloadSep) { - throw new IllegalArgumentException("surface form cannot contain unit separator character U+001F; this character is reserved"); - } - } - output.writeShort((short) surfaceForm.length); - output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length); - output.writeBytes(payload.bytes, payload.offset, payload.length); - } else { - output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length); - } - - assert output.getPosition() == requiredLength: output.getPosition() + " vs " + requiredLength; - - writer.write(buffer, 0, output.getPosition()); - } - maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size()); - } - writer.close(); - - // Sort all input/output pairs (required by FST.Builder): - new OfflineSorter(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted); - - // Free disk space: - Files.delete(tempInput); - - reader = new OfflineSorter.ByteSequencesReader(tempSorted); - - PairOutputs<Long,BytesRef> outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()); - Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); - - // Build FST: - BytesRefBuilder previousAnalyzed = null; - BytesRefBuilder analyzed = new BytesRefBuilder(); - BytesRef surface = new BytesRef(); - IntsRefBuilder scratchInts = new IntsRefBuilder(); - ByteArrayDataInput input = new ByteArrayDataInput(); - - // Used to remove duplicate surface forms (but we - // still index the hightest-weight one). We clear - // this when we see a new analyzed form, so it cannot - // grow unbounded (at most 256 entries): - Set<BytesRef> seenSurfaceForms = new HashSet<>(); - - int dedup = 0; - while (reader.read(scratch)) { - input.reset(scratch.bytes(), 0, scratch.length()); - short analyzedLength = input.readShort(); - analyzed.grow(analyzedLength+2); - input.readBytes(analyzed.bytes(), 0, analyzedLength); - analyzed.setLength(analyzedLength); - - long cost = input.readInt(); - - surface.bytes = scratch.bytes(); - if (hasPayloads) { - surface.length = input.readShort(); - surface.offset = input.getPosition(); - } else { - surface.offset = input.getPosition(); - surface.length = scratch.length() - surface.offset; - } - - if (previousAnalyzed == null) { - previousAnalyzed = new BytesRefBuilder(); - previousAnalyzed.copyBytes(analyzed); - seenSurfaceForms.add(BytesRef.deepCopyOf(surface)); - } else if (analyzed.get().equals(previousAnalyzed.get())) { - dedup++; - if (dedup >= maxSurfaceFormsPerAnalyzedForm) { - // More than maxSurfaceFormsPerAnalyzedForm - // dups: skip the rest: - continue; - } - if (seenSurfaceForms.contains(surface)) { - continue; - } - seenSurfaceForms.add(BytesRef.deepCopyOf(surface)); - } else { - dedup = 0; - previousAnalyzed.copyBytes(analyzed); - seenSurfaceForms.clear(); - seenSurfaceForms.add(BytesRef.deepCopyOf(surface)); - } - - // TODO: I think we can avoid the extra 2 bytes when - // there is no dup (dedup==0), but we'd have to fix - // the exactFirst logic ... which would be sort of - // hairy because we'd need to special case the two - // (dup/not dup)... - - // NOTE: must be byte 0 so we sort before whatever - // is next - analyzed.append((byte) 0); - analyzed.append((byte) dedup); - - Util.toIntsRef(analyzed.get(), scratchInts); - //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString()); - if (!hasPayloads) { - builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface))); - } else { - int payloadOffset = input.getPosition() + surface.length; - int payloadLength = scratch.length() - payloadOffset; - BytesRef br = new BytesRef(surface.length + 1 + payloadLength); - System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length); - br.bytes[surface.length] = (byte) payloadSep; - System.arraycopy(scratch.bytes(), payloadOffset, br.bytes, surface.length+1, payloadLength); - br.length = br.bytes.length; - builder.add(scratchInts.get(), outputs.newPair(cost, br)); - } - } - fst = builder.finish(); - - //PrintWriter pw = new PrintWriter("/tmp/out.dot"); - //Util.toDot(fst, pw, true, true); - //pw.close(); - - success = true; - } finally { - IOUtils.closeWhileHandlingException(reader, writer); - - if (success) { - IOUtils.deleteFilesIfExist(tempInput, tempSorted); - } else { - IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted); - } - } - } - - @Override - public boolean store(OutputStream output) throws IOException { - DataOutput dataOut = new OutputStreamDataOutput(output); - try { - if (fst == null) { - return false; - } - - fst.save(dataOut); - dataOut.writeVInt(maxAnalyzedPathsForOneInput); - dataOut.writeByte((byte) (hasPayloads ? 1 : 0)); - } finally { - IOUtils.close(output); - } - return true; - } - - @Override - public long getCount() { - return count; - } - - @Override - public boolean load(InputStream input) throws IOException { - DataInput dataIn = new InputStreamDataInput(input); - try { - this.fst = new FST<>(dataIn, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); - maxAnalyzedPathsForOneInput = dataIn.readVInt(); - hasPayloads = dataIn.readByte() == 1; - } finally { - IOUtils.close(input); - } - return true; - } - - private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) { - LookupResult result; - if (hasPayloads) { - int sepIndex = -1; - for(int i=0;i<output2.length;i++) { - if (output2.bytes[output2.offset+i] == payloadSep) { - sepIndex = i; - break; - } - } - assert sepIndex != -1; - final int payloadLen = output2.length - sepIndex - 1; - spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex); - BytesRef payload = new BytesRef(payloadLen); - System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen); - payload.length = payloadLen; - result = new LookupResult(spare.toString(), decodeWeight(output1), payload); - } else { - spare.copyUTF8Bytes(output2); - result = new LookupResult(spare.toString(), decodeWeight(output1)); - } - - return result; - } - - private boolean sameSurfaceForm(BytesRef key, BytesRef output2) { - if (hasPayloads) { - // output2 has at least PAYLOAD_SEP byte: - if (key.length >= output2.length) { - return false; - } - for(int i=0;i<key.length;i++) { - if (key.bytes[key.offset+i] != output2.bytes[output2.offset+i]) { - return false; - } - } - return output2.bytes[output2.offset + key.length] == payloadSep; - } else { - return key.bytesEquals(output2); - } - } - - @Override - public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) { - assert num > 0; - - if (onlyMorePopular) { - throw new IllegalArgumentException("this suggester only works with onlyMorePopular=false"); - } - if (fst == null) { - return Collections.emptyList(); - } - - //System.out.println("lookup key=" + key + " num=" + num); - for (int i = 0; i < key.length(); i++) { - if (key.charAt(i) == holeCharacter) { - throw new IllegalArgumentException("lookup key cannot contain HOLE character U+001E; this character is reserved"); - } - if (key.charAt(i) == sepLabel) { - throw new IllegalArgumentException("lookup key cannot contain unit separator character U+001F; this character is reserved"); - } - } - final BytesRef utf8Key = new BytesRef(key); - try { - - Automaton lookupAutomaton = toLookupAutomaton(key); - - final CharsRefBuilder spare = new CharsRefBuilder(); - - //System.out.println(" now intersect exactFirst=" + exactFirst); - - // Intersect automaton w/ suggest wFST and get all - // prefix starting nodes & their outputs: - //final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst); - - //System.out.println(" prefixPaths: " + prefixPaths.size()); - - BytesReader bytesReader = fst.getBytesReader(); - - FST.Arc<Pair<Long,BytesRef>> scratchArc = new FST.Arc<>(); - - final List<LookupResult> results = new ArrayList<>(); - - List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(convertAutomaton(lookupAutomaton), fst); - - if (exactFirst) { - - int count = 0; - for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) { - if (fst.findTargetArc(endByte, path.fstNode, scratchArc, bytesReader) != null) { - // This node has END_BYTE arc leaving, meaning it's an - // "exact" match: - count++; - } - } - - // Searcher just to find the single exact only - // match, if present: - Util.TopNSearcher<Pair<Long,BytesRef>> searcher; - searcher = new Util.TopNSearcher<>(fst, count * maxSurfaceFormsPerAnalyzedForm, count * maxSurfaceFormsPerAnalyzedForm, weightComparator); - - // NOTE: we could almost get away with only using - // the first start node. The only catch is if - // maxSurfaceFormsPerAnalyzedForm had kicked in and - // pruned our exact match from one of these nodes - // ...: - for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) { - if (fst.findTargetArc(endByte, path.fstNode, scratchArc, bytesReader) != null) { - // This node has END_BYTE arc leaving, meaning it's an - // "exact" match: - searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output), false, path.input); - } - } - - Util.TopResults<Pair<Long,BytesRef>> completions = searcher.search(); - - // NOTE: this is rather inefficient: we enumerate - // every matching "exactly the same analyzed form" - // path, and then do linear scan to see if one of - // these exactly matches the input. It should be - // possible (though hairy) to do something similar - // to getByOutput, since the surface form is encoded - // into the FST output, so we more efficiently hone - // in on the exact surface-form match. Still, I - // suspect very little time is spent in this linear - // seach: it's bounded by how many prefix start - // nodes we have and the - // maxSurfaceFormsPerAnalyzedForm: - for(Result<Pair<Long,BytesRef>> completion : completions) { - BytesRef output2 = completion.output.output2; - if (sameSurfaceForm(utf8Key, output2)) { - results.add(getLookupResult(completion.output.output1, output2, spare)); - break; - } - } - - if (results.size() == num) { - // That was quick: - return results; - } - } - - Util.TopNSearcher<Pair<Long,BytesRef>> searcher; - searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst, - num - results.size(), - num * maxAnalyzedPathsForOneInput, - weightComparator) { - private final Set<BytesRef> seen = new HashSet<>(); - - @Override - protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) { - - // Dedup: when the input analyzes to a graph we - // can get duplicate surface forms: - if (seen.contains(output.output2)) { - return false; - } - seen.add(output.output2); - - if (!exactFirst) { - return true; - } else { - // In exactFirst mode, don't accept any paths - // matching the surface form since that will - // create duplicate results: - if (sameSurfaceForm(utf8Key, output.output2)) { - // We found exact match, which means we should - // have already found it in the first search: - assert results.size() == 1; - return false; - } else { - return true; - } - } - } - }; - - prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst); - - for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) { - searcher.addStartPaths(path.fstNode, path.output, true, path.input); - } - - TopResults<Pair<Long,BytesRef>> completions = searcher.search(); - - for(Result<Pair<Long,BytesRef>> completion : completions) { - - LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare); - - // TODO: for fuzzy case would be nice to return - // how many edits were required - - //System.out.println(" result=" + result); - results.add(result); - - if (results.size() == num) { - // In the exactFirst=true case the search may - // produce one extra path - break; - } - } - - return results; - } catch (IOException bogus) { - throw new RuntimeException(bogus); - } - } - - @Override - public boolean store(DataOutput output) throws IOException { - output.writeVLong(count); - if (fst == null) { - return false; - } - - fst.save(output); - output.writeVInt(maxAnalyzedPathsForOneInput); - output.writeByte((byte) (hasPayloads ? 1 : 0)); - return true; - } - - @Override - public boolean load(DataInput input) throws IOException { - count = input.readVLong(); - this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); - maxAnalyzedPathsForOneInput = input.readVInt(); - hasPayloads = input.readByte() == 1; - return true; - } - - /** Returns all completion paths to initialize the search. */ - protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths, - Automaton lookupAutomaton, - FST<Pair<Long,BytesRef>> fst) - throws IOException { - return prefixPaths; - } - - final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { - try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) { - return toAutomaton(ts, ts2a); - } - } - - final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException { - // Create corresponding automaton: labels are bytes - // from each analyzed token, with byte 0 used as - // separator between tokens: - Automaton automaton = ts2a.toAutomaton(ts); - - automaton = replaceSep(automaton); - automaton = convertAutomaton(automaton); - - // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings - // assert SpecialOperations.isFinite(automaton); - - // Get all paths from the automaton (there can be - // more than one path, eg if the analyzer created a - // graph using SynFilter or WDF): - - return automaton; - } - - // EDIT: Adrien, needed by lookup providers - // NOTE: these XForks are unmaintainable, we need to get rid of them... - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { - final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); - Automaton automaton; - try (TokenStream ts = stream) { - automaton = toAutomaton(ts, ts2a); - } - LimitedFiniteStringsIterator finiteStrings = - new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); - Set<IntsRef> set = new HashSet<>(); - for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) { - set.add(IntsRef.deepCopyOf(string)); - } - return Collections.unmodifiableSet(set); - } - - final Automaton toLookupAutomaton(final CharSequence key) throws IOException { - // TODO: is there a Reader from a CharSequence? - // Turn tokenstream into automaton: - Automaton automaton = null; - - try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) { - automaton = getTokenStreamToAutomaton().toAutomaton(ts); - } - - automaton = replaceSep(automaton); - - // TODO: we can optimize this somewhat by determinizing - // while we convert - - // This automaton should not blow up during determinize: - automaton = Operations.determinize(automaton, Integer.MAX_VALUE); - return automaton; - } - - - - /** - * Returns the weight associated with an input string, - * or null if it does not exist. - */ - public Object get(CharSequence key) { - throw new UnsupportedOperationException(); - } - - /** cost -> weight */ - public static int decodeWeight(long encoded) { - return (int)(Integer.MAX_VALUE - encoded); - } - - /** weight -> cost */ - public static int encodeWeight(long value) { - if (value < 0 || value > Integer.MAX_VALUE) { - throw new UnsupportedOperationException("cannot encode value: " + value); - } - return Integer.MAX_VALUE - (int)value; - } - - static final Comparator<Pair<Long,BytesRef>> weightComparator = new Comparator<Pair<Long,BytesRef>> () { - @Override - public int compare(Pair<Long,BytesRef> left, Pair<Long,BytesRef> right) { - return left.output1.compareTo(right.output1); - } - }; - - - public static class XBuilder { - private Builder<Pair<Long, BytesRef>> builder; - private int maxSurfaceFormsPerAnalyzedForm; - private IntsRefBuilder scratchInts = new IntsRefBuilder(); - private final PairOutputs<Long, BytesRef> outputs; - private boolean hasPayloads; - private BytesRefBuilder analyzed = new BytesRefBuilder(); - private final SurfaceFormAndPayload[] surfaceFormsAndPayload; - private int count; - private ObjectIntHashMap<BytesRef> seenSurfaceForms = HppcMaps.Object.Integer.ensureNoNullKeys(256, 0.75f); - private int payloadSep; - - public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) { - this.payloadSep = payloadSep; - this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()); - this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); - this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; - this.hasPayloads = hasPayloads; - surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm]; - - } - public void startTerm(BytesRef analyzed) { - this.analyzed.grow(analyzed.length+2); - this.analyzed.copyBytes(analyzed); - } - - private final static class SurfaceFormAndPayload implements Comparable<SurfaceFormAndPayload> { - BytesRef payload; - long weight; - - public SurfaceFormAndPayload(BytesRef payload, long cost) { - super(); - this.payload = payload; - this.weight = cost; - } - - @Override - public int compareTo(SurfaceFormAndPayload o) { - int res = compare(weight, o.weight); - if (res == 0 ){ - return payload.compareTo(o.payload); - } - return res; - } - public static int compare(long x, long y) { - return (x < y) ? -1 : ((x == y) ? 0 : 1); - } - } - - public void addSurface(BytesRef surface, BytesRef payload, long cost) throws IOException { - int surfaceIndex = -1; - long encodedWeight = cost == -1 ? cost : encodeWeight(cost); - /* - * we need to check if we have seen this surface form, if so only use the - * the surface form with the highest weight and drop the rest no matter if - * the payload differs. - */ - if (count >= maxSurfaceFormsPerAnalyzedForm) { - // More than maxSurfaceFormsPerAnalyzedForm - // dups: skip the rest: - return; - } - - BytesRef surfaceCopy; - final int keySlot; - if (count > 0 && (keySlot = seenSurfaceForms.indexOf(surface)) >= 0) { - surfaceIndex = seenSurfaceForms.indexGet(keySlot); - SurfaceFormAndPayload surfaceFormAndPayload = surfaceFormsAndPayload[surfaceIndex]; - if (encodedWeight >= surfaceFormAndPayload.weight) { - return; - } - surfaceCopy = BytesRef.deepCopyOf(surface); - } else { - surfaceIndex = count++; - surfaceCopy = BytesRef.deepCopyOf(surface); - seenSurfaceForms.put(surfaceCopy, surfaceIndex); - } - - BytesRef payloadRef; - if (!hasPayloads) { - payloadRef = surfaceCopy; - } else { - int len = surface.length + 1 + payload.length; - final BytesRef br = new BytesRef(len); - System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length); - br.bytes[surface.length] = (byte) payloadSep; - System.arraycopy(payload.bytes, payload.offset, br.bytes, surface.length + 1, payload.length); - br.length = len; - payloadRef = br; - } - if (surfaceFormsAndPayload[surfaceIndex] == null) { - surfaceFormsAndPayload[surfaceIndex] = new SurfaceFormAndPayload(payloadRef, encodedWeight); - } else { - surfaceFormsAndPayload[surfaceIndex].payload = payloadRef; - surfaceFormsAndPayload[surfaceIndex].weight = encodedWeight; - } - } - - public void finishTerm(long defaultWeight) throws IOException { - ArrayUtil.timSort(surfaceFormsAndPayload, 0, count); - int deduplicator = 0; - analyzed.append((byte) 0); - analyzed.setLength(analyzed.length() + 1); - analyzed.grow(analyzed.length()); - for (int i = 0; i < count; i++) { - analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++); - Util.toIntsRef(analyzed.get(), scratchInts); - SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i]; - long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight; - builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload)); - } - seenSurfaceForms.clear(); - count = 0; - } - - public FST<Pair<Long, BytesRef>> build() throws IOException { - return builder.finish(); - } - - public boolean hasPayloads() { - return hasPayloads; - } - - public int maxSurfaceFormsPerAnalyzedForm() { - return maxSurfaceFormsPerAnalyzedForm; - } - - } -} diff --git a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java deleted file mode 100644 index a4338f8a65..0000000000 --- a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.lucene.search.suggest.analyzing; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStreamToAutomaton; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.UnicodeUtil; -import org.apache.lucene.util.automaton.*; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.PairOutputs; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES; - -/** - * Implements a fuzzy {@link AnalyzingSuggester}. The similarity measurement is - * based on the Damerau-Levenshtein (optimal string alignment) algorithm, though - * you can explicitly choose classic Levenshtein by passing <code>false</code> - * for the <code>transpositions</code> parameter. - * <p> - * At most, this query will match terms up to - * {@value org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} - * edits. Higher distances are not supported. Note that the - * fuzzy distance is measured in "byte space" on the bytes - * returned by the {@link org.apache.lucene.analysis.TokenStream}'s {@link - * org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute}, usually UTF8. By default - * the analyzed bytes must be at least 3 {@link - * #DEFAULT_MIN_FUZZY_LENGTH} bytes before any edits are - * considered. Furthermore, the first 1 {@link - * #DEFAULT_NON_FUZZY_PREFIX} byte is not allowed to be - * edited. We allow up to 1 (@link - * #DEFAULT_MAX_EDITS} edit. - * If {@link #unicodeAware} parameter in the constructor is set to true, maxEdits, - * minFuzzyLength, transpositions and nonFuzzyPrefix are measured in Unicode code - * points (actual letters) instead of bytes.* - * - * <p> - * NOTE: This suggester does not boost suggestions that - * required no edits over suggestions that did require - * edits. This is a known limitation. - * - * <p> - * Note: complex query analyzers can have a significant impact on the lookup - * performance. It's recommended to not use analyzers that drop or inject terms - * like synonyms to keep the complexity of the prefix intersection low for good - * lookup performance. At index time, complex analyzers can safely be used. - * </p> - * - * @lucene.experimental - */ -public final class XFuzzySuggester extends XAnalyzingSuggester { - private final int maxEdits; - private final boolean transpositions; - private final int nonFuzzyPrefix; - private final int minFuzzyLength; - private final boolean unicodeAware; - - /** - * Measure maxEdits, minFuzzyLength, transpositions and nonFuzzyPrefix - * parameters in Unicode code points (actual letters) - * instead of bytes. - */ - public static final boolean DEFAULT_UNICODE_AWARE = false; - - /** - * The default minimum length of the key passed to {@link - * #lookup} before any edits are allowed. - */ - public static final int DEFAULT_MIN_FUZZY_LENGTH = 3; - - /** - * The default prefix length where edits are not allowed. - */ - public static final int DEFAULT_NON_FUZZY_PREFIX = 1; - - /** - * The default maximum number of edits for fuzzy - * suggestions. - */ - public static final int DEFAULT_MAX_EDITS = 1; - - /** - * The default transposition value passed to {@link org.apache.lucene.util.automaton.LevenshteinAutomata} - */ - public static final boolean DEFAULT_TRANSPOSITIONS = true; - - /** - * Creates a {@link FuzzySuggester} instance initialized with default values. - * - * @param analyzer the analyzer used for this suggester - */ - public XFuzzySuggester(Analyzer analyzer) { - this(analyzer, analyzer); - } - - /** - * Creates a {@link FuzzySuggester} instance with an index & a query analyzer initialized with default values. - * - * @param indexAnalyzer - * Analyzer that will be used for analyzing suggestions while building the index. - * @param queryAnalyzer - * Analyzer that will be used for analyzing query text during lookup - */ - public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) { - this(indexAnalyzer, null, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS, - DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); - - } - - /** - * Creates a {@link FuzzySuggester} instance. - * - * @param indexAnalyzer Analyzer that will be used for - * analyzing suggestions while building the index. - * @param queryAnalyzer Analyzer that will be used for - * analyzing query text during lookup - * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP} - * @param maxSurfaceFormsPerAnalyzedForm Maximum number of - * surface forms to keep for a single analyzed form. - * When there are too many surface forms we discard the - * lowest weighted ones. - * @param maxGraphExpansions Maximum number of graph paths - * to expand from the analyzed form. Set this to -1 for - * no limit. - * @param maxEdits must be >= 0 and <= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} . - * @param transpositions <code>true</code> if transpositions should be treated as a primitive - * edit operation. If this is false, comparisons will implement the classic - * Levenshtein algorithm. - * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX} - * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH}) - * @param sepLabel separation label - * @param payloadSep payload separator byte - * @param endByte end byte marker byte - */ - public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, - int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware, - FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, - int sepLabel, int payloadSep, int endByte, int holeCharacter) { - super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter); - if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { - throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); - } - if (nonFuzzyPrefix < 0) { - throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")"); - } - if (minFuzzyLength < 0) { - throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")"); - } - - this.maxEdits = maxEdits; - this.transpositions = transpositions; - this.nonFuzzyPrefix = nonFuzzyPrefix; - this.minFuzzyLength = minFuzzyLength; - this.unicodeAware = unicodeAware; - } - - @Override - protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, - Automaton lookupAutomaton, - FST<PairOutputs.Pair<Long,BytesRef>> fst) - throws IOException { - - // TODO: right now there's no penalty for fuzzy/edits, - // ie a completion whose prefix matched exactly what the - // user typed gets no boost over completions that - // required an edit, which get no boost over completions - // requiring two edits. I suspect a multiplicative - // factor is appropriate (eg, say a fuzzy match must be at - // least 2X better weight than the non-fuzzy match to - // "compete") ... in which case I think the wFST needs - // to be log weights or something ... - - Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); - /* - Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); - w.write(levA.toDot()); - w.close(); - System.out.println("Wrote LevA to out.dot"); - */ - return FSTUtil.intersectPrefixPaths(levA, fst); - } - - @Override - protected Automaton convertAutomaton(Automaton a) { - if (unicodeAware) { - // FLORIAN EDIT: get converted Automaton from superclass - Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); - // This automaton should not blow up during determinize: - utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE); - return utf8automaton; - } else { - return super.convertAutomaton(a); - } - } - - @Override - public TokenStreamToAutomaton getTokenStreamToAutomaton() { - final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton(); - tsta.setUnicodeArcs(unicodeAware); - return tsta; - } - - Automaton toLevenshteinAutomata(Automaton automaton) { - List<Automaton> subs = new ArrayList<>(); - FiniteStringsIterator finiteStrings = new FiniteStringsIterator(automaton); - for (IntsRef string; (string = finiteStrings.next()) != null;) { - if (string.length <= nonFuzzyPrefix || string.length < minFuzzyLength) { - subs.add(Automata.makeString(string.ints, string.offset, string.length)); - } else { - int ints[] = new int[string.length-nonFuzzyPrefix]; - System.arraycopy(string.ints, string.offset+nonFuzzyPrefix, ints, 0, ints.length); - // TODO: maybe add alphaMin to LevenshteinAutomata, - // and pass 1 instead of 0? We probably don't want - // to allow the trailing dedup bytes to be - // edited... but then 0 byte is "in general" allowed - // on input (but not in UTF8). - LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); - subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix))); - } - } - - if (subs.isEmpty()) { - // automaton is empty, there is no accepted paths through it - return Automata.makeEmpty(); // matches nothing - } else if (subs.size() == 1) { - // no synonyms or anything: just a single path through the tokenstream - return subs.get(0); - } else { - // multiple paths: this is really scary! is it slow? - // maybe we should not do this and throw UOE? - Automaton a = Operations.union(subs); - // TODO: we could call toLevenshteinAutomata() before det? - // this only happens if you have multiple paths anyway (e.g. synonyms) - return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES); - } - } -} diff --git a/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java b/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java index b504c4c21c..2c23f94747 100644 --- a/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java +++ b/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java @@ -58,10 +58,7 @@ public class PerFieldMappingPostingFormatCodec extends Lucene54Codec { if (indexName == null) { logger.warn("no index mapper found for field: [{}] returning default postings format", field); } else if (indexName instanceof CompletionFieldMapper.CompletionFieldType) { - // CompletionFieldMapper needs a special postings format - final CompletionFieldMapper.CompletionFieldType fieldType = (CompletionFieldMapper.CompletionFieldType) indexName; - final PostingsFormat defaultFormat = super.getPostingsFormatForField(field); - return fieldType.postingsFormat(defaultFormat); + return CompletionFieldMapper.CompletionFieldType.postingsFormat(); } return super.getPostingsFormatForField(field); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java index 3a3a854915..7f793ab616 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java @@ -18,144 +18,91 @@ */ package org.elasticsearch.index.mapper.core; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.document.Field; -import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; -import org.apache.lucene.util.BytesRef; +import org.apache.lucene.search.suggest.document.Completion50PostingsFormat; +import org.apache.lucene.search.suggest.document.CompletionAnalyzer; +import org.apache.lucene.search.suggest.document.CompletionQuery; +import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery; +import org.apache.lucene.search.suggest.document.PrefixCompletionQuery; +import org.apache.lucene.search.suggest.document.RegexCompletionQuery; +import org.apache.lucene.search.suggest.document.SuggestField; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser.NumberType; import org.elasticsearch.common.xcontent.XContentParser.Token; import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.Mapper; -import org.elasticsearch.index.mapper.MapperException; -import org.elasticsearch.index.mapper.MapperParsingException; -import org.elasticsearch.index.mapper.MergeMappingException; -import org.elasticsearch.index.mapper.MergeResult; -import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider; -import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat; -import org.elasticsearch.search.suggest.completion.CompletionTokenStream; -import org.elasticsearch.search.suggest.context.ContextBuilder; -import org.elasticsearch.search.suggest.context.ContextMapping; -import org.elasticsearch.search.suggest.context.ContextMapping.ContextConfig; +import org.elasticsearch.index.mapper.*; +import org.elasticsearch.index.mapper.object.ArrayValueMapperParser; +import org.elasticsearch.search.suggest.completion.CompletionSuggester; +import org.elasticsearch.search.suggest.completion.context.ContextMapping; +import org.elasticsearch.search.suggest.completion.context.ContextMappings; import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; +import java.util.*; import static org.elasticsearch.index.mapper.MapperBuilders.completionField; import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField; /** + * Mapper for completion field. The field values are indexed as a weighted FST for + * fast auto-completion/search-as-you-type functionality.<br> * + * Type properties:<br> + * <ul> + * <li>"analyzer": "simple", (default)</li> + * <li>"search_analyzer": "simple", (default)</li> + * <li>"preserve_separators" : true, (default)</li> + * <li>"preserve_position_increments" : true (default)</li> + * <li>"min_input_length": 50 (default)</li> + * <li>"contexts" : CONTEXTS</li> + * </ul> + * see {@link ContextMappings#load(Object, Version)} for CONTEXTS<br> + * see {@link #parse(ParseContext)} for acceptable inputs for indexing<br> + * <p> + * This field type constructs completion queries that are run + * against the weighted FST index by the {@link CompletionSuggester}. + * This field can also be extended to add search criteria to suggestions + * for query-time filtering and boosting (see {@link ContextMappings} */ -public class CompletionFieldMapper extends FieldMapper { +public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapperParser { public static final String CONTENT_TYPE = "completion"; public static class Defaults { - public static final CompletionFieldType FIELD_TYPE = new CompletionFieldType(); - + public static final MappedFieldType FIELD_TYPE = new CompletionFieldType(); static { FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.freeze(); } - public static final boolean DEFAULT_PRESERVE_SEPARATORS = true; public static final boolean DEFAULT_POSITION_INCREMENTS = true; - public static final boolean DEFAULT_HAS_PAYLOADS = false; public static final int DEFAULT_MAX_INPUT_LENGTH = 50; } public static class Fields { // Mapping field names - public static final String ANALYZER = "analyzer"; + public static final ParseField ANALYZER = new ParseField("analyzer"); public static final ParseField SEARCH_ANALYZER = new ParseField("search_analyzer"); public static final ParseField PRESERVE_SEPARATORS = new ParseField("preserve_separators"); public static final ParseField PRESERVE_POSITION_INCREMENTS = new ParseField("preserve_position_increments"); - public static final String PAYLOADS = "payloads"; - public static final String TYPE = "type"; + public static final ParseField TYPE = new ParseField("type"); + public static final ParseField CONTEXTS = new ParseField("contexts"); public static final ParseField MAX_INPUT_LENGTH = new ParseField("max_input_length", "max_input_len"); // Content field names public static final String CONTENT_FIELD_NAME_INPUT = "input"; - public static final String CONTENT_FIELD_NAME_OUTPUT = "output"; - public static final String CONTENT_FIELD_NAME_PAYLOAD = "payload"; public static final String CONTENT_FIELD_NAME_WEIGHT = "weight"; - public static final String CONTEXT = "context"; + public static final String CONTENT_FIELD_NAME_CONTEXTS = "contexts"; } public static final Set<String> ALLOWED_CONTENT_FIELD_NAMES = Sets.newHashSet(Fields.CONTENT_FIELD_NAME_INPUT, - Fields.CONTENT_FIELD_NAME_OUTPUT, Fields.CONTENT_FIELD_NAME_PAYLOAD, Fields.CONTENT_FIELD_NAME_WEIGHT, Fields.CONTEXT); - - public static class Builder extends FieldMapper.Builder<Builder, CompletionFieldMapper> { - - private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS; - private boolean payloads = Defaults.DEFAULT_HAS_PAYLOADS; - private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS; - private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH; - private SortedMap<String, ContextMapping> contextMapping = ContextMapping.EMPTY_MAPPING; - - public Builder(String name) { - super(name, Defaults.FIELD_TYPE); - builder = this; - } - - public Builder payloads(boolean payloads) { - this.payloads = payloads; - return this; - } - - public Builder preserveSeparators(boolean preserveSeparators) { - this.preserveSeparators = preserveSeparators; - return this; - } - - public Builder preservePositionIncrements(boolean preservePositionIncrements) { - this.preservePositionIncrements = preservePositionIncrements; - return this; - } - - public Builder maxInputLength(int maxInputLength) { - if (maxInputLength <= 0) { - throw new IllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]"); - } - this.maxInputLength = maxInputLength; - return this; - } - - public Builder contextMapping(SortedMap<String, ContextMapping> contextMapping) { - this.contextMapping = contextMapping; - return this; - } - - @Override - public CompletionFieldMapper build(Mapper.BuilderContext context) { - setupFieldType(context); - CompletionFieldType completionFieldType = (CompletionFieldType)fieldType; - completionFieldType.setProvider(new AnalyzingCompletionLookupProvider(preserveSeparators, false, preservePositionIncrements, payloads)); - completionFieldType.setContextMapping(contextMapping); - return new CompletionFieldMapper(name, fieldType, maxInputLength, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); - } - - } + Fields.CONTENT_FIELD_NAME_WEIGHT, Fields.CONTENT_FIELD_NAME_CONTEXTS); public static class TypeParser implements Mapper.TypeParser { @@ -171,17 +118,12 @@ public class CompletionFieldMapper extends FieldMapper { if (fieldName.equals("type")) { continue; } - if (Fields.ANALYZER.equals(fieldName) || // index_analyzer is for backcompat, remove for v3.0 - fieldName.equals("index_analyzer") && parserContext.indexVersionCreated().before(Version.V_2_0_0_beta1)) { - + if (parserContext.parseFieldMatcher().match(fieldName, Fields.ANALYZER)) { indexAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString()); iterator.remove(); } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.SEARCH_ANALYZER)) { searchAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString()); iterator.remove(); - } else if (fieldName.equals(Fields.PAYLOADS)) { - builder.payloads(Boolean.parseBoolean(fieldNode.toString())); - iterator.remove(); } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.PRESERVE_SEPARATORS)) { builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString())); iterator.remove(); @@ -191,14 +133,14 @@ public class CompletionFieldMapper extends FieldMapper { } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.MAX_INPUT_LENGTH)) { builder.maxInputLength(Integer.parseInt(fieldNode.toString())); iterator.remove(); - } else if (parseMultiField(builder, name, parserContext, fieldName, fieldNode)) { + } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.CONTEXTS)) { + builder.contextMappings(ContextMappings.load(fieldNode, parserContext.indexVersionCreated())); iterator.remove(); - } else if (fieldName.equals(Fields.CONTEXT)) { - builder.contextMapping(ContextBuilder.loadMappings(fieldNode, parserContext.indexVersionCreated())); + } else if (parseMultiField(builder, name, parserContext, fieldName, fieldNode)) { iterator.remove(); } } - + if (indexAnalyzer == null) { if (searchAnalyzer != null) { throw new MapperParsingException("analyzer on completion field [" + name + "] must be set when search_analyzer is set"); @@ -207,9 +149,9 @@ public class CompletionFieldMapper extends FieldMapper { } else if (searchAnalyzer == null) { searchAnalyzer = indexAnalyzer; } + builder.indexAnalyzer(indexAnalyzer); builder.searchAnalyzer(searchAnalyzer); - return builder; } @@ -223,40 +165,138 @@ public class CompletionFieldMapper extends FieldMapper { } public static final class CompletionFieldType extends MappedFieldType { - private PostingsFormat postingsFormat; - private AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider; - private SortedMap<String, ContextMapping> contextMapping = ContextMapping.EMPTY_MAPPING; + + private static PostingsFormat postingsFormat; + + private boolean preserveSep = Defaults.DEFAULT_PRESERVE_SEPARATORS; + private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS; + private ContextMappings contextMappings = null; public CompletionFieldType() { setFieldDataType(null); } - protected CompletionFieldType(CompletionFieldType ref) { + private CompletionFieldType(CompletionFieldType ref) { super(ref); - this.postingsFormat = ref.postingsFormat; - this.analyzingSuggestLookupProvider = ref.analyzingSuggestLookupProvider; - this.contextMapping = ref.contextMapping; + this.contextMappings = ref.contextMappings; + this.preserveSep = ref.preserveSep; + this.preservePositionIncrements = ref.preservePositionIncrements; + } + + public void setPreserveSep(boolean preserveSep) { + checkIfFrozen(); + this.preserveSep = preserveSep; + } + + public void setPreservePositionIncrements(boolean preservePositionIncrements) { + checkIfFrozen(); + this.preservePositionIncrements = preservePositionIncrements; + } + + public void setContextMappings(ContextMappings contextMappings) { + checkIfFrozen(); + this.contextMappings = contextMappings; + } + + @Override + public NamedAnalyzer indexAnalyzer() { + final NamedAnalyzer indexAnalyzer = super.indexAnalyzer(); + if (indexAnalyzer != null && !(indexAnalyzer.analyzer() instanceof CompletionAnalyzer)) { + return new NamedAnalyzer(indexAnalyzer.name(), + new CompletionAnalyzer(indexAnalyzer, preserveSep, preservePositionIncrements)); + + } + return indexAnalyzer; + } + + @Override + public NamedAnalyzer searchAnalyzer() { + final NamedAnalyzer searchAnalyzer = super.searchAnalyzer(); + if (searchAnalyzer != null && !(searchAnalyzer.analyzer() instanceof CompletionAnalyzer)) { + return new NamedAnalyzer(searchAnalyzer.name(), + new CompletionAnalyzer(searchAnalyzer, preserveSep, preservePositionIncrements)); + } + return searchAnalyzer; + } + + /** + * @return true if there are one or more context mappings defined + * for this field type + */ + public boolean hasContextMappings() { + return contextMappings != null; + } + + /** + * @return associated context mappings for this field type + */ + public ContextMappings getContextMappings() { + return contextMappings; + } + + public boolean preserveSep() { + return preserveSep; + } + + public boolean preservePositionIncrements() { + return preservePositionIncrements; + } + + /** + * @return postings format to use for this field-type + */ + public static synchronized PostingsFormat postingsFormat() { + if (postingsFormat == null) { + postingsFormat = new Completion50PostingsFormat(); + } + return postingsFormat; + } + + /** + * Completion prefix query + */ + public CompletionQuery prefixQuery(Object value) { + return new PrefixCompletionQuery(searchAnalyzer().analyzer(), createTerm(value)); + } + + /** + * Completion prefix regular expression query + */ + public CompletionQuery regexpQuery(Object value, int flags, int maxDeterminizedStates) { + return new RegexCompletionQuery(createTerm(value), flags, maxDeterminizedStates); + } + + /** + * Completion prefix fuzzy query + */ + public CompletionQuery fuzzyQuery(String value, Fuzziness fuzziness, int nonFuzzyPrefixLength, + int minFuzzyPrefixLength, int maxExpansions, boolean transpositions, + boolean unicodeAware) { + return new FuzzyCompletionQuery(searchAnalyzer().analyzer(), createTerm(value), null, + fuzziness.asDistance(), transpositions, nonFuzzyPrefixLength, minFuzzyPrefixLength, + unicodeAware, maxExpansions); } @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof CompletionFieldType)) return false; + if (o == null || getClass() != o.getClass()) return false; if (!super.equals(o)) return false; - CompletionFieldType fieldType = (CompletionFieldType) o; - return analyzingSuggestLookupProvider.getPreserveSep() == fieldType.analyzingSuggestLookupProvider.getPreserveSep() && - analyzingSuggestLookupProvider.getPreservePositionsIncrements() == fieldType.analyzingSuggestLookupProvider.getPreservePositionsIncrements() && - analyzingSuggestLookupProvider.hasPayloads() == fieldType.analyzingSuggestLookupProvider.hasPayloads() && - Objects.equals(getContextMapping(), fieldType.getContextMapping()); + + CompletionFieldType that = (CompletionFieldType) o; + + if (preserveSep != that.preserveSep) return false; + if (preservePositionIncrements != that.preservePositionIncrements) return false; + return !(contextMappings != null ? !contextMappings.equals(that.contextMappings) : that.contextMappings != null); + } @Override public int hashCode() { return Objects.hash(super.hashCode(), - analyzingSuggestLookupProvider.getPreserveSep(), - analyzingSuggestLookupProvider.getPreservePositionsIncrements(), - analyzingSuggestLookupProvider.hasPayloads(), - getContextMapping()); + preserveSep, + preservePositionIncrements, + contextMappings); } @Override @@ -273,69 +313,99 @@ public class CompletionFieldMapper extends FieldMapper { public void checkCompatibility(MappedFieldType fieldType, List<String> conflicts, boolean strict) { super.checkCompatibility(fieldType, conflicts, strict); CompletionFieldType other = (CompletionFieldType)fieldType; - if (analyzingSuggestLookupProvider.hasPayloads() != other.analyzingSuggestLookupProvider.hasPayloads()) { - conflicts.add("mapper [" + names().fullName() + "] has different [payload] values"); - } - if (analyzingSuggestLookupProvider.getPreservePositionsIncrements() != other.analyzingSuggestLookupProvider.getPreservePositionsIncrements()) { + + if (preservePositionIncrements != other.preservePositionIncrements) { conflicts.add("mapper [" + names().fullName() + "] has different [preserve_position_increments] values"); } - if (analyzingSuggestLookupProvider.getPreserveSep() != other.analyzingSuggestLookupProvider.getPreserveSep()) { + if (preserveSep != other.preserveSep) { conflicts.add("mapper [" + names().fullName() + "] has different [preserve_separators] values"); } - if(!ContextMapping.mappingsAreEqual(getContextMapping(), other.getContextMapping())) { - conflicts.add("mapper [" + names().fullName() + "] has different [context_mapping] values"); + if (hasContextMappings() != other.hasContextMappings()) { + conflicts.add("mapper [" + names().fullName() + "] has different [context_mappings] values"); + } else if (hasContextMappings() && contextMappings.equals(other.contextMappings) == false) { + conflicts.add("mapper [" + names().fullName() + "] has different [context_mappings] values"); } } - public void setProvider(AnalyzingCompletionLookupProvider provider) { - checkIfFrozen(); - this.analyzingSuggestLookupProvider = provider; + @Override + public String value(Object value) { + if (value == null) { + return null; + } + return value.toString(); } - public synchronized PostingsFormat postingsFormat(PostingsFormat in) { - if (in instanceof Completion090PostingsFormat) { - throw new IllegalStateException("Double wrapping of " + Completion090PostingsFormat.class); - } - if (postingsFormat == null) { - postingsFormat = new Completion090PostingsFormat(in, analyzingSuggestLookupProvider); - } - return postingsFormat; + @Override + public boolean isSortable() { + return false; } - public void setContextMapping(SortedMap<String, ContextMapping> contextMapping) { - checkIfFrozen(); - this.contextMapping = contextMapping; + } + + /** + * Builder for {@link CompletionFieldMapper} + */ + public static class Builder extends FieldMapper.Builder<Builder, CompletionFieldMapper> { + + private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH; + private ContextMappings contextMappings = null; + private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS; + private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS; + + /** + * @param name of the completion field to build + */ + public Builder(String name) { + super(name, new CompletionFieldType()); + builder = this; } - /** Get the context mapping associated with this completion field */ - public SortedMap<String, ContextMapping> getContextMapping() { - return contextMapping; + /** + * @param maxInputLength maximum expected prefix length + * NOTE: prefixes longer than this will + * be truncated + */ + public Builder maxInputLength(int maxInputLength) { + if (maxInputLength <= 0) { + throw new IllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]"); + } + this.maxInputLength = maxInputLength; + return this; } - /** @return true if a context mapping has been defined */ - public boolean requiresContext() { - return contextMapping.isEmpty() == false; + /** + * Add context mapping to this field + * @param contextMappings see {@link ContextMappings#load(Object, Version)} + */ + public Builder contextMappings(ContextMappings contextMappings) { + this.contextMappings = contextMappings; + return this; } - @Override - public String value(Object value) { - if (value == null) { - return null; - } - return value.toString(); + public Builder preserveSeparators(boolean preserveSeparators) { + this.preserveSeparators = preserveSeparators; + return this; + } + + public Builder preservePositionIncrements(boolean preservePositionIncrements) { + this.preservePositionIncrements = preservePositionIncrements; + return this; } @Override - public boolean isSortable() { - return false; + public CompletionFieldMapper build(BuilderContext context) { + setupFieldType(context); + CompletionFieldType completionFieldType = (CompletionFieldType) this.fieldType; + completionFieldType.setContextMappings(contextMappings); + completionFieldType.setPreservePositionIncrements(preservePositionIncrements); + completionFieldType.setPreserveSep(preserveSeparators); + return new CompletionFieldMapper(name, this.fieldType, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo, maxInputLength); } } - private static final BytesRef EMPTY = new BytesRef(); - private int maxInputLength; - public CompletionFieldMapper(String simpleName, MappedFieldType fieldType, int maxInputLength, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { + public CompletionFieldMapper(String simpleName, MappedFieldType fieldType, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, int maxInputLength) { super(simpleName, fieldType, Defaults.FIELD_TYPE, indexSettings, multiFields, copyTo); this.maxInputLength = maxInputLength; } @@ -345,216 +415,188 @@ public class CompletionFieldMapper extends FieldMapper { return (CompletionFieldType) super.fieldType(); } + /** + * Parses and indexes inputs + * + * Parsing: + * Acceptable format: + * "STRING" - interpreted as field value (input) + * "ARRAY" - each element can be one of {@link #parse(ParseContext, Token, XContentParser, Map)} + * "OBJECT" - see {@link #parse(ParseContext, Token, XContentParser, Map)} + * + * Indexing: + * if context mappings are defined, delegates to {@link ContextMappings#addField(ParseContext.Document, String, String, int, Map)} + * else adds inputs as a {@link org.apache.lucene.search.suggest.document.SuggestField} + */ @Override public Mapper parse(ParseContext context) throws IOException { + // parse XContentParser parser = context.parser(); - XContentParser.Token token = parser.currentToken(); - if (token == XContentParser.Token.VALUE_NULL) { + Token token = parser.currentToken(); + Map<String, CompletionInputMetaData> inputMap = new HashMap<>(1); + if (token == Token.VALUE_NULL) { throw new MapperParsingException("completion field [" + fieldType().names().fullName() + "] does not support null values"); + } else if (token == Token.START_ARRAY) { + while ((token = parser.nextToken()) != Token.END_ARRAY) { + parse(context, token, parser, inputMap); + } + } else { + parse(context, token, parser, inputMap); + } + + // index + for (Map.Entry<String, CompletionInputMetaData> completionInput : inputMap.entrySet()) { + String input = completionInput.getKey(); + // truncate input + if (input.length() > maxInputLength) { + int len = Math.min(maxInputLength, input.length()); + if (Character.isHighSurrogate(input.charAt(len - 1))) { + assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len)); + len += 1; + } + input = input.substring(0, len); + } + CompletionInputMetaData metaData = completionInput.getValue(); + if (fieldType().hasContextMappings()) { + fieldType().getContextMappings().addField(context.doc(), fieldType().names().indexName(), + input, metaData.weight, metaData.contexts); + } else { + context.doc().add(new SuggestField(fieldType().names().indexName(), input, metaData.weight)); + } } + multiFields.parse(this, context); + return null; + } - String surfaceForm = null; - BytesRef payload = null; - long weight = -1; - List<String> inputs = new ArrayList<>(4); - - SortedMap<String, ContextConfig> contextConfig = null; - - if (token == XContentParser.Token.VALUE_STRING) { - inputs.add(parser.text()); - multiFields.parse(this, context); - } else { - String currentFieldName = null; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { + /** + * Acceptable inputs: + * "STRING" - interpreted as the field value (input) + * "OBJECT" - { "input": STRING|ARRAY, "weight": STRING|INT, "contexts": ARRAY|OBJECT } + */ + private void parse(ParseContext parseContext, Token token, XContentParser parser, Map<String, CompletionInputMetaData> inputMap) throws IOException { + String currentFieldName = null; + if (token == Token.VALUE_STRING) { + inputMap.put(parser.text(), new CompletionInputMetaData(Collections.<String, Set<CharSequence>>emptyMap(), 1)); + } else if (token == Token.START_OBJECT) { + Set<String> inputs = new HashSet<>(); + int weight = 1; + Map<String, Set<CharSequence>> contextsMap = new HashMap<>(); + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == Token.FIELD_NAME) { currentFieldName = parser.currentName(); if (!ALLOWED_CONTENT_FIELD_NAMES.contains(currentFieldName)) { - throw new IllegalArgumentException("Unknown field name[" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES); + throw new IllegalArgumentException("unknown field name [" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES); } - } else if (Fields.CONTEXT.equals(currentFieldName)) { - SortedMap<String, ContextConfig> configs = new TreeMap<>(); - - if (token == Token.START_OBJECT) { - while ((token = parser.nextToken()) != Token.END_OBJECT) { - String name = parser.text(); - ContextMapping mapping = fieldType().getContextMapping().get(name); - if (mapping == null) { - throw new ElasticsearchParseException("context [{}] is not defined", name); - } else { - token = parser.nextToken(); - configs.put(name, mapping.parseContext(context, parser)); + } else if (currentFieldName != null) { + if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) { + if (token == Token.VALUE_STRING) { + inputs.add(parser.text()); + } else if (token == Token.START_ARRAY) { + while ((token = parser.nextToken()) != Token.END_ARRAY) { + if (token == Token.VALUE_STRING) { + inputs.add(parser.text()); + } else { + throw new IllegalArgumentException("input array must have string values, but was [" + token.name() + "]"); + } } + } else { + throw new IllegalArgumentException("input must be a string or array, but was [" + token.name() + "]"); } - contextConfig = new TreeMap<>(); - for (ContextMapping mapping : fieldType().getContextMapping().values()) { - ContextConfig config = configs.get(mapping.name()); - contextConfig.put(mapping.name(), config==null ? mapping.defaultConfig() : config); + } else if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) { + final Number weightValue; + if (token == Token.VALUE_STRING) { + try { + weightValue = Long.parseLong(parser.text()); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("weight must be an integer, but was [" + parser.text() + "]"); + } + } else if (token == Token.VALUE_NUMBER) { + NumberType numberType = parser.numberType(); + if (NumberType.LONG != numberType && NumberType.INT != numberType) { + throw new IllegalArgumentException("weight must be an integer, but was [" + parser.numberValue() + "]"); + } + weightValue = parser.numberValue(); + } else { + throw new IllegalArgumentException("weight must be a number or string, but was [" + token.name() + "]"); } - } else { - throw new ElasticsearchParseException("context must be an object"); - } - } else if (Fields.CONTENT_FIELD_NAME_PAYLOAD.equals(currentFieldName)) { - if (!isStoringPayloads()) { - throw new MapperException("Payloads disabled in mapping"); - } - if (token == XContentParser.Token.START_OBJECT) { - XContentBuilder payloadBuilder = XContentFactory.contentBuilder(parser.contentType()).copyCurrentStructure(parser); - payload = payloadBuilder.bytes().toBytesRef(); - payloadBuilder.close(); - } else if (token.isValue()) { - payload = parser.utf8BytesOrNull(); - } else { - throw new MapperException("payload doesn't support type " + token); - } - } else if (token == XContentParser.Token.VALUE_STRING) { - if (Fields.CONTENT_FIELD_NAME_OUTPUT.equals(currentFieldName)) { - surfaceForm = parser.text(); - } - if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) { - inputs.add(parser.text()); - } - if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) { - Number weightValue; - try { - weightValue = Long.parseLong(parser.text()); - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Weight must be a string representing a numeric value, but was [" + parser.text() + "]"); + if (weightValue.longValue() < 0 || weightValue.longValue() > Integer.MAX_VALUE) { // always parse a long to make sure we don't get overflow + throw new IllegalArgumentException("weight must be in the interval [0..2147483647], but was [" + weightValue.longValue() + "]"); } - weight = weightValue.longValue(); // always parse a long to make sure we don't get overflow - checkWeight(weight); - } - } else if (token == XContentParser.Token.VALUE_NUMBER) { - if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) { - NumberType numberType = parser.numberType(); - if (NumberType.LONG != numberType && NumberType.INT != numberType) { - throw new IllegalArgumentException("Weight must be an integer, but was [" + parser.numberValue() + "]"); + weight = weightValue.intValue(); + } else if (Fields.CONTENT_FIELD_NAME_CONTEXTS.equals(currentFieldName)) { + if (fieldType().hasContextMappings() == false) { + throw new IllegalArgumentException("contexts field is not supported for field: [" + fieldType().names().fullName() + "]"); } - weight = parser.longValue(); // always parse a long to make sure we don't get overflow - checkWeight(weight); - } - } else if (token == XContentParser.Token.START_ARRAY) { - if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) { - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - inputs.add(parser.text()); + ContextMappings contextMappings = fieldType().getContextMappings(); + XContentParser.Token currentToken = parser.currentToken(); + if (currentToken == XContentParser.Token.START_OBJECT) { + ContextMapping contextMapping = null; + String fieldName = null; + while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (currentToken == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + contextMapping = contextMappings.get(fieldName); + } else if (currentToken == XContentParser.Token.VALUE_STRING + || currentToken == XContentParser.Token.START_ARRAY + || currentToken == XContentParser.Token.START_OBJECT) { + assert fieldName != null; + assert !contextsMap.containsKey(fieldName); + contextsMap.put(fieldName, contextMapping.parseContext(parseContext, parser)); + } else { + throw new IllegalArgumentException("contexts must be an object or an array , but was [" + currentToken + "]"); + } + } + } else { + throw new IllegalArgumentException("contexts must be an object or an array , but was [" + currentToken + "]"); } } } } - } - - if(contextConfig == null) { - contextConfig = new TreeMap<>(); - for (ContextMapping mapping : fieldType().getContextMapping().values()) { - contextConfig.put(mapping.name(), mapping.defaultConfig()); - } - } - - final ContextMapping.Context ctx = new ContextMapping.Context(contextConfig, context.doc()); - - payload = payload == null ? EMPTY : payload; - if (surfaceForm == null) { // no surface form use the input for (String input : inputs) { - if (input.length() == 0) { - continue; + if (inputMap.containsKey(input) == false || inputMap.get(input).weight < weight) { + inputMap.put(input, new CompletionInputMetaData(contextsMap, weight)); } - BytesRef suggestPayload = fieldType().analyzingSuggestLookupProvider.buildPayload(new BytesRef( - input), weight, payload); - context.doc().add(getCompletionField(ctx, input, suggestPayload)); } } else { - BytesRef suggestPayload = fieldType().analyzingSuggestLookupProvider.buildPayload(new BytesRef( - surfaceForm), weight, payload); - for (String input : inputs) { - if (input.length() == 0) { - continue; - } - context.doc().add(getCompletionField(ctx, input, suggestPayload)); - } + throw new ElasticsearchParseException("failed to parse expected text or object got" + token.name()); } - return null; } - private void checkWeight(long weight) { - if (weight < 0 || weight > Integer.MAX_VALUE) { - throw new IllegalArgumentException("Weight must be in the interval [0..2147483647], but was [" + weight + "]"); - } - } + static class CompletionInputMetaData { + public final Map<String, Set<CharSequence>> contexts; + public final int weight; - public Field getCompletionField(ContextMapping.Context ctx, String input, BytesRef payload) { - final String originalInput = input; - if (input.length() > maxInputLength) { - final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length())); - input = input.substring(0, len); - } - for (int i = 0; i < input.length(); i++) { - if (isReservedChar(input.charAt(i))) { - throw new IllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x" - + Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT) - + "] at position " + i + " is a reserved character"); - } + CompletionInputMetaData(Map<String, Set<CharSequence>> contexts, int weight) { + this.contexts = contexts; + this.weight = weight; } - return new SuggestField(fieldType().names().indexName(), ctx, input, fieldType(), payload, fieldType().analyzingSuggestLookupProvider); } - public static int correctSubStringLen(String input, int len) { - if (Character.isHighSurrogate(input.charAt(len - 1))) { - assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len)); - return len + 1; - } - return len; - } - - public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException { - return fieldType().analyzingSuggestLookupProvider.buildPayload(surfaceForm, weight, payload); - } - - private static final class SuggestField extends Field { - private final BytesRef payload; - private final CompletionTokenStream.ToFiniteStrings toFiniteStrings; - private final ContextMapping.Context ctx; - - public SuggestField(String name, ContextMapping.Context ctx, String value, MappedFieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) { - super(name, value, type); - this.payload = payload; - this.toFiniteStrings = toFiniteStrings; - this.ctx = ctx; - } - - @Override - public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException { - TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer, previous)); - return new CompletionTokenStream(ts, payload, toFiniteStrings); - } - } - @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(simpleName()) - .field(Fields.TYPE, CONTENT_TYPE); - - builder.field(Fields.ANALYZER, fieldType().indexAnalyzer().name()); + .field(Fields.TYPE.getPreferredName(), CONTENT_TYPE); + builder.field(Fields.ANALYZER.getPreferredName(), fieldType().indexAnalyzer().name()); if (fieldType().indexAnalyzer().name().equals(fieldType().searchAnalyzer().name()) == false) { builder.field(Fields.SEARCH_ANALYZER.getPreferredName(), fieldType().searchAnalyzer().name()); } - builder.field(Fields.PAYLOADS, fieldType().analyzingSuggestLookupProvider.hasPayloads()); - builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), fieldType().analyzingSuggestLookupProvider.getPreserveSep()); - builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), fieldType().analyzingSuggestLookupProvider.getPreservePositionsIncrements()); + builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), fieldType().preserveSep()); + builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), fieldType().preservePositionIncrements()); builder.field(Fields.MAX_INPUT_LENGTH.getPreferredName(), this.maxInputLength); - multiFields.toXContent(builder, params); - if(fieldType().requiresContext()) { - builder.startObject(Fields.CONTEXT); - for (ContextMapping mapping : fieldType().getContextMapping().values()) { - builder.value(mapping); - } - builder.endObject(); + if (fieldType().hasContextMappings()) { + builder.startArray(Fields.CONTEXTS.getPreferredName()); + fieldType().getContextMappings().toXContent(builder, params); + builder.endArray(); } + multiFields.toXContent(builder, params); return builder.endObject(); } @Override protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException { + // no-op } @Override @@ -562,10 +604,6 @@ public class CompletionFieldMapper extends FieldMapper { return CONTENT_TYPE; } - public boolean isStoringPayloads() { - return fieldType().analyzingSuggestLookupProvider.hasPayloads(); - } - @Override public void merge(Mapper mergeWith, MergeResult mergeResult) throws MergeMappingException { super.merge(mergeWith, mergeResult); @@ -574,22 +612,4 @@ public class CompletionFieldMapper extends FieldMapper { this.maxInputLength = fieldMergeWith.maxInputLength; } } - - // this should be package private but our tests don't allow it. - public static boolean isReservedChar(char character) { - /* we use 0x001F as a SEP_LABEL in the suggester but we can use the UTF-16 representation since they - * are equivalent. We also don't need to convert the input character to UTF-8 here to check for - * the 0x00 end label since all multi-byte UTF-8 chars start with 0x10 binary so if the UTF-16 CP is == 0x00 - * it's the single byte UTF-8 CP */ - assert XAnalyzingSuggester.PAYLOAD_SEP == XAnalyzingSuggester.SEP_LABEL; // ensure they are the same! - switch(character) { - case XAnalyzingSuggester.END_BYTE: - case XAnalyzingSuggester.SEP_LABEL: - case XAnalyzingSuggester.HOLE_CHARACTER: - case ContextMapping.SEPARATOR: - return true; - default: - return false; - } - } } diff --git a/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java b/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java index 45f58c47da..1ebf44e23f 100644 --- a/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java +++ b/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java @@ -108,7 +108,7 @@ public enum RegexpFlag { * @param flags A string representing a list of regular expression flags * @return The combined OR'ed value for all the flags */ - static int resolveValue(String flags) { + public static int resolveValue(String flags) { if (flags == null || flags.isEmpty()) { return RegExp.ALL; } diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 6e7893df98..aad1497c9d 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.shard; -import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; @@ -106,8 +105,8 @@ import org.elasticsearch.indices.memory.IndexingMemoryController; import org.elasticsearch.indices.recovery.RecoveryFailedException; import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.percolator.PercolatorService; -import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat; import org.elasticsearch.search.suggest.completion.CompletionStats; +import org.elasticsearch.search.suggest.completion.CompletionFieldStats; import org.elasticsearch.threadpool.ThreadPool; import java.io.IOException; @@ -618,15 +617,8 @@ public class IndexShard extends AbstractIndexShardComponent { public CompletionStats completionStats(String... fields) { CompletionStats completionStats = new CompletionStats(); - final Engine.Searcher currentSearcher = acquireSearcher("completion_stats"); - try { - PostingsFormat postingsFormat = PostingsFormat.forName(Completion090PostingsFormat.CODEC_NAME); - if (postingsFormat instanceof Completion090PostingsFormat) { - Completion090PostingsFormat completionPostingsFormat = (Completion090PostingsFormat) postingsFormat; - completionStats.add(completionPostingsFormat.completionStats(currentSearcher.reader(), fields)); - } - } finally { - currentSearcher.close(); + try (final Engine.Searcher currentSearcher = acquireSearcher("completion_stats")) { + completionStats.add(CompletionFieldStats.completionStats(currentSearcher.reader(), fields)); } return completionStats; } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java index ea45c1033e..5621e03e7d 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java @@ -20,9 +20,6 @@ package org.elasticsearch.search.suggest; import org.elasticsearch.action.support.ToXContentToBytes; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.search.suggest.context.CategoryContextMapping; -import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; -import org.elasticsearch.search.suggest.context.GeolocationContextMapping; import java.io.IOException; import java.util.ArrayList; @@ -101,90 +98,18 @@ public class SuggestBuilder extends ToXContentToBytes { private String name; private String suggester; private String text; + private String prefix; + private String regex; private String field; private String analyzer; private Integer size; private Integer shardSize; - - private List<ContextQuery> contextQueries = new ArrayList<>(); public SuggestionBuilder(String name, String suggester) { this.name = name; this.suggester = suggester; } - @SuppressWarnings("unchecked") - private T addContextQuery(ContextQuery ctx) { - this.contextQueries.add(ctx); - return (T) this; - } - - /** - * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}. - * @param lat Latitude of the location - * @param lon Longitude of the Location - * @return this - */ - public T addGeoLocation(String name, double lat, double lon, int ... precisions) { - return addContextQuery(GeolocationContextMapping.query(name, lat, lon, precisions)); - } - - /** - * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}. - * @param lat Latitude of the location - * @param lon Longitude of the Location - * @param precisions precisions as string var-args - * @return this - */ - public T addGeoLocationWithPrecision(String name, double lat, double lon, String ... precisions) { - return addContextQuery(GeolocationContextMapping.query(name, lat, lon, precisions)); - } - - /** - * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}. - * @param geohash Geohash of the location - * @return this - */ - public T addGeoLocation(String name, String geohash) { - return addContextQuery(GeolocationContextMapping.query(name, geohash)); - } - - /** - * Setup a Category for suggestions. See {@link CategoryContextMapping}. - * @param categories name of the category - * @return this - */ - public T addCategory(String name, CharSequence...categories) { - return addContextQuery(CategoryContextMapping.query(name, categories)); - } - - /** - * Setup a Category for suggestions. See {@link CategoryContextMapping}. - * @param categories name of the category - * @return this - */ - public T addCategory(String name, Iterable<? extends CharSequence> categories) { - return addContextQuery(CategoryContextMapping.query(name, categories)); - } - - /** - * Setup a Context Field for suggestions. See {@link CategoryContextMapping}. - * @param fieldvalues name of the category - * @return this - */ - public T addContextField(String name, CharSequence...fieldvalues) { - return addContextQuery(CategoryContextMapping.query(name, fieldvalues)); - } - - /** - * Setup a Context Field for suggestions. See {@link CategoryContextMapping}. - * @param fieldvalues name of the category - * @return this - */ - public T addContextField(String name, Iterable<? extends CharSequence> fieldvalues) { - return addContextQuery(CategoryContextMapping.query(name, fieldvalues)); - } - /** * Same as in {@link SuggestBuilder#setText(String)}, but in the suggestion scope. */ @@ -194,12 +119,26 @@ public class SuggestBuilder extends ToXContentToBytes { return (T) this; } + protected void setPrefix(String prefix) { + this.prefix = prefix; + } + + protected void setRegex(String regex) { + this.regex = regex; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(name); if (text != null) { builder.field("text", text); } + if (prefix != null) { + builder.field("prefix", prefix); + } + if (regex != null) { + builder.field("regex", regex); + } builder.startObject(suggester); if (analyzer != null) { builder.field("analyzer", analyzer); @@ -214,13 +153,6 @@ public class SuggestBuilder extends ToXContentToBytes { builder.field("shard_size", shardSize); } - if (!contextQueries.isEmpty()) { - builder.startObject("context"); - for (ContextQuery query : contextQueries) { - query.toXContent(builder, params); - } - builder.endObject(); - } builder = innerToXContent(builder, params); builder.endObject(); builder.endObject(); diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilders.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilders.java index 16957986e2..66b917394f 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilders.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilders.java @@ -20,7 +20,6 @@ package org.elasticsearch.search.suggest; import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder; -import org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder; import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder; import org.elasticsearch.search.suggest.term.TermSuggestionBuilder; @@ -61,15 +60,4 @@ public abstract class SuggestBuilders { public static CompletionSuggestionBuilder completionSuggestion(String name) { return new CompletionSuggestionBuilder(name); } - - /** - * Creates a fuzzy completion suggestion lookup query with the provided <code>name</code> - * - * @param name The suggestion name - * @return a {@link org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder} - * instance - */ - public static CompletionSuggestionFuzzyBuilder fuzzyCompletionSuggestion(String name) { - return new CompletionSuggestionFuzzyBuilder(name); - } } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestContextParser.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestContextParser.java index ddb2235513..be6ee096e5 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestContextParser.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestContextParser.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.suggest; import org.elasticsearch.common.HasContextAndHeaders; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.IndexFieldDataService; import org.elasticsearch.index.mapper.MapperService; import java.io.IOException; diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java index 23bdaab99a..314afbefc1 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java @@ -22,6 +22,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.HasContextAndHeaders; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.IndexFieldDataService; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.internal.SearchContext; @@ -70,6 +71,8 @@ public final class SuggestParseElement implements SearchParseElement { } else if (token == XContentParser.Token.START_OBJECT) { String suggestionName = fieldName; BytesRef suggestText = null; + BytesRef prefix = null; + BytesRef regex = null; SuggestionContext suggestionContext = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { @@ -78,6 +81,10 @@ public final class SuggestParseElement implements SearchParseElement { } else if (token.isValue()) { if ("text".equals(fieldName)) { suggestText = parser.utf8Bytes(); + } else if ("prefix".equals(fieldName)) { + prefix = parser.utf8Bytes(); + } else if ("regex".equals(fieldName)) { + regex = parser.utf8Bytes(); } else { throw new IllegalArgumentException("[suggest] does not support [" + fieldName + "]"); } @@ -93,10 +100,18 @@ public final class SuggestParseElement implements SearchParseElement { } } if (suggestionContext != null) { - suggestionContext.setText(suggestText); + if (suggestText != null && prefix == null) { + suggestionContext.setPrefix(suggestText); + suggestionContext.setText(suggestText); + } else if (suggestText == null && prefix != null) { + suggestionContext.setPrefix(prefix); + suggestionContext.setText(prefix); + } else if (regex != null) { + suggestionContext.setRegex(regex); + suggestionContext.setText(regex); + } suggestionContexts.put(suggestionName, suggestionContext); } - } } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java index 2cb36f5391..1d3339e057 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java @@ -40,6 +40,8 @@ public class SuggestionSearchContext { public static class SuggestionContext { private BytesRef text; + private BytesRef prefix; + private BytesRef regex; private final Suggester suggester; private String field; private Analyzer analyzer; @@ -55,7 +57,23 @@ public class SuggestionSearchContext { public void setText(BytesRef text) { this.text = text; } - + + public BytesRef getPrefix() { + return prefix; + } + + public void setPrefix(BytesRef prefix) { + this.prefix = prefix; + } + + public BytesRef getRegex() { + return regex; + } + + public void setRegex(BytesRef regex) { + this.regex = regex; + } + public SuggestionContext(Suggester suggester) { this.suggester = suggester; } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java deleted file mode 100644 index c5b1b5931e..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search.suggest.completion; - -import com.carrotsearch.hppc.ObjectLongHashMap; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.TokenStreamToAutomaton; -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; -import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator; -import org.apache.lucene.util.fst.ByteSequenceOutputs; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.PairOutputs; -import org.apache.lucene.util.fst.PairOutputs.Pair; -import org.apache.lucene.util.fst.PositiveIntOutputs; -import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.core.CompletionFieldMapper; -import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider; -import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory; -import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; - -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; - -public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider { - - // for serialization - public static final int SERIALIZE_PRESERVE_SEPARATORS = 1; - public static final int SERIALIZE_HAS_PAYLOADS = 2; - public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4; - - private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256; - private static final int MAX_GRAPH_EXPANSIONS = -1; - - public static final String CODEC_NAME = "analyzing"; - public static final int CODEC_VERSION_START = 1; - public static final int CODEC_VERSION_SERIALIZED_LABELS = 2; - public static final int CODEC_VERSION_CHECKSUMS = 3; - public static final int CODEC_VERSION_LATEST = CODEC_VERSION_CHECKSUMS; - - private final boolean preserveSep; - private final boolean preservePositionIncrements; - private final int maxSurfaceFormsPerAnalyzedForm; - private final int maxGraphExpansions; - private final boolean hasPayloads; - private final XAnalyzingSuggester prototype; - - public AnalyzingCompletionLookupProvider(boolean preserveSep, boolean exactFirst, boolean preservePositionIncrements, boolean hasPayloads) { - this.preserveSep = preserveSep; - this.preservePositionIncrements = preservePositionIncrements; - this.hasPayloads = hasPayloads; - this.maxSurfaceFormsPerAnalyzedForm = MAX_SURFACE_FORMS_PER_ANALYZED_FORM; - this.maxGraphExpansions = MAX_GRAPH_EXPANSIONS; - int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; - // needs to fixed in the suggester first before it can be supported - //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0; - prototype = new XAnalyzingSuggester(null, null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); - } - - @Override - public String getName() { - return "analyzing"; - } - - public boolean getPreserveSep() { - return preserveSep; - } - - public boolean getPreservePositionsIncrements() { - return preservePositionIncrements; - } - - public boolean hasPayloads() { - return hasPayloads; - } - - @Override - public FieldsConsumer consumer(final IndexOutput output) throws IOException { - CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST); - return new FieldsConsumer() { - private Map<String, Long> fieldOffsets = new HashMap<>(); - - @Override - public void close() throws IOException { - try { - /* - * write the offsets per field such that we know where - * we need to load the FSTs from - */ - long pointer = output.getFilePointer(); - output.writeVInt(fieldOffsets.size()); - for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) { - output.writeString(entry.getKey()); - output.writeVLong(entry.getValue()); - } - output.writeLong(pointer); - CodecUtil.writeFooter(output); - } finally { - IOUtils.close(output); - } - } - - @Override - public void write(Fields fields) throws IOException { - for(String field : fields) { - Terms terms = fields.terms(field); - if (terms == null) { - continue; - } - TermsEnum termsEnum = terms.iterator(); - PostingsEnum docsEnum = null; - final SuggestPayload spare = new SuggestPayload(); - int maxAnalyzedPathsForOneInput = 0; - final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP); - int docCount = 0; - while (true) { - BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS); - builder.startTerm(term); - int docFreq = 0; - while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - for (int i = 0; i < docsEnum.freq(); i++) { - final int position = docsEnum.nextPosition(); - AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare); - builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight); - // multi fields have the same surface form so we sum up here - maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1); - } - docFreq++; - docCount = Math.max(docCount, docsEnum.docID()+1); - } - builder.finishTerm(docFreq); - } - /* - * Here we are done processing the field and we can - * buid the FST and write it to disk. - */ - FST<Pair<Long, BytesRef>> build = builder.build(); - assert build != null || docCount == 0: "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; - /* - * it's possible that the FST is null if we have 2 segments that get merged - * and all docs that have a value in this field are deleted. This will cause - * a consumer to be created but it doesn't consume any values causing the FSTBuilder - * to return null. - */ - if (build != null) { - fieldOffsets.put(field, output.getFilePointer()); - build.save(output); - /* write some more meta-info */ - output.writeVInt(maxAnalyzedPathsForOneInput); - output.writeVInt(maxSurfaceFormsPerAnalyzedForm); - output.writeInt(maxGraphExpansions); // can be negative - int options = 0; - options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0; - options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; - options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; - output.writeVInt(options); - output.writeVInt(XAnalyzingSuggester.SEP_LABEL); - output.writeVInt(XAnalyzingSuggester.END_BYTE); - output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP); - output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER); - } - } - } - }; - } - - - @Override - public LookupFactory load(IndexInput input) throws IOException { - long sizeInBytes = 0; - int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST); - if (version >= CODEC_VERSION_CHECKSUMS) { - CodecUtil.checksumEntireFile(input); - } - final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS? 8 + CodecUtil.footerLength() : 8); - final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>(); - input.seek(metaPointerPosition); - long metaPointer = input.readLong(); - input.seek(metaPointer); - int numFields = input.readVInt(); - - Map<Long, String> meta = new TreeMap<>(); - for (int i = 0; i < numFields; i++) { - String name = input.readString(); - long offset = input.readVLong(); - meta.put(offset, name); - } - - for (Map.Entry<Long, String> entry : meta.entrySet()) { - input.seek(entry.getKey()); - FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>( - PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); - int maxAnalyzedPathsForOneInput = input.readVInt(); - int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); - int maxGraphExpansions = input.readInt(); - int options = input.readVInt(); - boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0; - boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; - boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; - - // first version did not include these three fields, so fall back to old default (before the analyzingsuggester - // was updated in Lucene, so we cannot use the suggester defaults) - int sepLabel, payloadSep, endByte, holeCharacter; - switch (version) { - case CODEC_VERSION_START: - sepLabel = 0xFF; - payloadSep = '\u001f'; - endByte = 0x0; - holeCharacter = '\u001E'; - break; - default: - sepLabel = input.readVInt(); - endByte = input.readVInt(); - payloadSep = input.readVInt(); - holeCharacter = input.readVInt(); - } - - AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, - hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter); - sizeInBytes += fst.ramBytesUsed(); - lookupMap.put(entry.getValue(), holder); - } - final long ramBytesUsed = sizeInBytes; - return new LookupFactory() { - @Override - public Lookup getLookup(CompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { - AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName()); - if (analyzingSuggestHolder == null) { - return null; - } - int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; - - final XAnalyzingSuggester suggester; - final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; - - if (suggestionContext.isFuzzy()) { - suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, - analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, - suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), - suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(), - analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, - analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, - analyzingSuggestHolder.holeCharacter); - } else { - suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, - analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, - analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, - analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, - analyzingSuggestHolder.holeCharacter); - } - return suggester; - } - - @Override - public CompletionStats stats(String... fields) { - long sizeInBytes = 0; - ObjectLongHashMap<String> completionFields = null; - if (fields != null && fields.length > 0) { - completionFields = new ObjectLongHashMap<>(fields.length); - } - - for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { - sizeInBytes += entry.getValue().fst.ramBytesUsed(); - if (fields == null || fields.length == 0) { - continue; - } - if (Regex.simpleMatch(fields, entry.getKey())) { - long fstSize = entry.getValue().fst.ramBytesUsed(); - completionFields.addTo(entry.getKey(), fstSize); - } - } - - return new CompletionStats(sizeInBytes, completionFields); - } - - @Override - AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) { - return lookupMap.get(fieldType.names().indexName()); - } - - @Override - public long ramBytesUsed() { - return ramBytesUsed; - } - - @Override - public Collection<Accountable> getChildResources() { - return Accountables.namedAccountables("field", lookupMap); - } - }; - } - - static class AnalyzingSuggestHolder implements Accountable { - final boolean preserveSep; - final boolean preservePositionIncrements; - final int maxSurfaceFormsPerAnalyzedForm; - final int maxGraphExpansions; - final boolean hasPayloads; - final int maxAnalyzedPathsForOneInput; - final FST<Pair<Long, BytesRef>> fst; - final int sepLabel; - final int payloadSep; - final int endByte; - final int holeCharacter; - - public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, - boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) { - this(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); - } - - public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst, int sepLabel, int payloadSep, int endByte, int holeCharacter) { - this.preserveSep = preserveSep; - this.preservePositionIncrements = preservePositionIncrements; - this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; - this.maxGraphExpansions = maxGraphExpansions; - this.hasPayloads = hasPayloads; - this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput; - this.fst = fst; - this.sepLabel = sepLabel; - this.payloadSep = payloadSep; - this.endByte = endByte; - this.holeCharacter = holeCharacter; - } - - public boolean getPreserveSeparator() { - return preserveSep; - } - - public boolean getPreservePositionIncrements() { - return preservePositionIncrements; - } - - public boolean hasPayloads() { - return hasPayloads; - } - - @Override - public long ramBytesUsed() { - if (fst != null) { - return fst.ramBytesUsed(); - } else { - return 0; - } - } - - @Override - public Collection<Accountable> getChildResources() { - if (fst != null) { - return Collections.singleton(Accountables.namedAccountable("fst", fst)); - } else { - return Collections.emptyList(); - } - } - } - - @Override - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { - return prototype.toFiniteStrings(stream); - } - - -}
\ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java deleted file mode 100644 index 447b3fd719..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.search.suggest.completion; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.FilterLeafReader.FilterTerms; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.Terms; -import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.store.IOContext.Context; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.InputStreamDataInput; -import org.apache.lucene.store.OutputStreamDataOutput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; -import org.elasticsearch.common.logging.ESLogger; -import org.elasticsearch.common.logging.Loggers; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.core.CompletionFieldMapper; -import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ToFiniteStrings; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import static java.util.Collections.singletonMap; - -/** - * This {@link PostingsFormat} is basically a T-Sink for a default postings - * format that is used to store postings on disk fitting the lucene APIs and - * builds a suggest FST as an auxiliary data structure next to the actual - * postings format. It uses the delegate postings format for simplicity to - * handle all the merge operations. The auxiliary suggest FST data structure is - * only loaded if a FieldsProducer is requested for reading, for merging it uses - * the low memory delegate postings format. - */ -public class Completion090PostingsFormat extends PostingsFormat { - - public static final String CODEC_NAME = "completion090"; - public static final int SUGGEST_CODEC_VERSION = 1; - public static final int SUGGEST_VERSION_CURRENT = SUGGEST_CODEC_VERSION; - public static final String EXTENSION = "cmp"; - - private static final ESLogger logger = Loggers.getLogger(Completion090PostingsFormat.class); - private static final CompletionLookupProvider LOOKUP_PROVIDER = new AnalyzingCompletionLookupProvider(true, false, true, false); - private static final Map<String, CompletionLookupProvider> PROVIDERS = singletonMap(LOOKUP_PROVIDER.getName(), LOOKUP_PROVIDER); - private PostingsFormat delegatePostingsFormat; - private CompletionLookupProvider writeProvider; - - public Completion090PostingsFormat(PostingsFormat delegatePostingsFormat, CompletionLookupProvider provider) { - super(CODEC_NAME); - this.delegatePostingsFormat = delegatePostingsFormat; - this.writeProvider = provider; - assert delegatePostingsFormat != null && writeProvider != null; - } - - /* - * Used only by core Lucene at read-time via Service Provider instantiation - * do not use at Write-time in application code. - */ - public Completion090PostingsFormat() { - super(CODEC_NAME); - } - - @Override - public CompletionFieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - if (delegatePostingsFormat == null) { - throw new UnsupportedOperationException("Error - " + getClass().getName() - + " has been constructed without a choice of PostingsFormat"); - } - assert writeProvider != null; - return new CompletionFieldsConsumer(state); - } - - @Override - public CompletionFieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - return new CompletionFieldsProducer(state); - } - - private class CompletionFieldsConsumer extends FieldsConsumer { - - private FieldsConsumer delegatesFieldsConsumer; - private FieldsConsumer suggestFieldsConsumer; - - public CompletionFieldsConsumer(SegmentWriteState state) throws IOException { - this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state); - String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); - IndexOutput output = null; - boolean success = false; - try { - output = state.directory.createOutput(suggestFSTFile, state.context); - CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_VERSION_CURRENT); - /* - * we write the delegate postings format name so we can load it - * without getting an instance in the ctor - */ - output.writeString(delegatePostingsFormat.getName()); - output.writeString(writeProvider.getName()); - this.suggestFieldsConsumer = writeProvider.consumer(output); - success = true; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(output); - } - } - } - - @Override - public void write(Fields fields) throws IOException { - delegatesFieldsConsumer.write(fields); - suggestFieldsConsumer.write(fields); - } - - @Override - public void close() throws IOException { - IOUtils.close(delegatesFieldsConsumer, suggestFieldsConsumer); - } - } - - private static class CompletionFieldsProducer extends FieldsProducer { - // TODO make this class lazyload all the things in order to take advantage of the new merge instance API - // today we just load everything up-front - private final FieldsProducer delegateProducer; - private final LookupFactory lookupFactory; - private final int version; - - public CompletionFieldsProducer(SegmentReadState state) throws IOException { - String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); - IndexInput input = state.directory.openInput(suggestFSTFile, state.context); - version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT); - FieldsProducer delegateProducer = null; - boolean success = false; - try { - PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString()); - String providerName = input.readString(); - CompletionLookupProvider completionLookupProvider = PROVIDERS.get(providerName); - if (completionLookupProvider == null) { - throw new IllegalStateException("no provider with name [" + providerName + "] registered"); - } - // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage? - delegateProducer = delegatePostingsFormat.fieldsProducer(state); - /* - * If we are merging we don't load the FSTs at all such that we - * don't consume so much memory during merge - */ - if (state.context.context != Context.MERGE) { - // TODO: maybe we can do this in a fully lazy fashion based on some configuration - // eventually we should have some kind of curciut breaker that prevents us from going OOM here - // with some configuration - this.lookupFactory = completionLookupProvider.load(input); - } else { - this.lookupFactory = null; - } - this.delegateProducer = delegateProducer; - success = true; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(delegateProducer, input); - } else { - IOUtils.close(input); - } - } - } - - @Override - public void close() throws IOException { - IOUtils.close(delegateProducer); - } - - @Override - public Iterator<String> iterator() { - return delegateProducer.iterator(); - } - - @Override - public Terms terms(String field) throws IOException { - final Terms terms = delegateProducer.terms(field); - if (terms == null || lookupFactory == null) { - return terms; - } - return new CompletionTerms(terms, lookupFactory); - } - - @Override - public int size() { - return delegateProducer.size(); - } - - @Override - public long ramBytesUsed() { - return (lookupFactory == null ? 0 : lookupFactory.ramBytesUsed()) + delegateProducer.ramBytesUsed(); - } - - @Override - public Collection<Accountable> getChildResources() { - List<Accountable> resources = new ArrayList<>(); - if (lookupFactory != null) { - resources.add(Accountables.namedAccountable("lookup", lookupFactory)); - } - resources.add(Accountables.namedAccountable("delegate", delegateProducer)); - return Collections.unmodifiableList(resources); - } - - @Override - public void checkIntegrity() throws IOException { - delegateProducer.checkIntegrity(); - } - - @Override - public FieldsProducer getMergeInstance() throws IOException { - return delegateProducer.getMergeInstance(); - } - } - - public static final class CompletionTerms extends FilterTerms { - private final LookupFactory lookup; - - public CompletionTerms(Terms delegate, LookupFactory lookup) { - super(delegate); - this.lookup = lookup; - } - - public Lookup getLookup(CompletionFieldMapper.CompletionFieldType mapper, CompletionSuggestionContext suggestionContext) { - return lookup.getLookup(mapper, suggestionContext); - } - - public CompletionStats stats(String ... fields) { - return lookup.stats(fields); - } - } - - public static abstract class CompletionLookupProvider implements PayloadProcessor, ToFiniteStrings { - - public static final char UNIT_SEPARATOR = '\u001f'; - - public abstract FieldsConsumer consumer(IndexOutput output) throws IOException; - - public abstract String getName(); - - public abstract LookupFactory load(IndexInput input) throws IOException; - - @Override - public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException { - if (weight < -1 || weight > Integer.MAX_VALUE) { - throw new IllegalArgumentException("weight must be >= -1 && <= Integer.MAX_VALUE"); - } - for (int i = 0; i < surfaceForm.length; i++) { - if (surfaceForm.bytes[i] == UNIT_SEPARATOR) { - throw new IllegalArgumentException( - "surface form cannot contain unit separator character U+001F; this character is reserved"); - } - } - ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream); - output.writeVLong(weight + 1); - output.writeVInt(surfaceForm.length); - output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length); - output.writeVInt(payload.length); - output.writeBytes(payload.bytes, 0, payload.length); - - output.close(); - return new BytesRef(byteArrayOutputStream.toByteArray()); - } - - @Override - public void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException { - ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length); - InputStreamDataInput input = new InputStreamDataInput(byteArrayInputStream); - ref.weight = input.readVLong() - 1; - int len = input.readVInt(); - ref.surfaceForm.grow(len); - ref.surfaceForm.setLength(len); - input.readBytes(ref.surfaceForm.bytes(), 0, ref.surfaceForm.length()); - len = input.readVInt(); - ref.payload.grow(len); - ref.payload.setLength(len); - input.readBytes(ref.payload.bytes(), 0, ref.payload.length()); - input.close(); - } - } - - public CompletionStats completionStats(IndexReader indexReader, String ... fields) { - CompletionStats completionStats = new CompletionStats(); - for (LeafReaderContext atomicReaderContext : indexReader.leaves()) { - LeafReader atomicReader = atomicReaderContext.reader(); - try { - for (String fieldName : atomicReader.fields()) { - Terms terms = atomicReader.fields().terms(fieldName); - if (terms instanceof CompletionTerms) { - CompletionTerms completionTerms = (CompletionTerms) terms; - completionStats.add(completionTerms.stats(fields)); - } - } - } catch (IOException e) { - logger.error("Could not get completion stats: {}", e, e.getMessage()); - } - } - - return completionStats; - } - - public static abstract class LookupFactory implements Accountable { - public abstract Lookup getLookup(CompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext); - public abstract CompletionStats stats(String ... fields); - abstract AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType); - } -} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionFieldStats.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionFieldStats.java new file mode 100644 index 0000000000..c5d39405e5 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionFieldStats.java @@ -0,0 +1,59 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion; + +import com.carrotsearch.hppc.ObjectLongHashMap; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.suggest.document.CompletionTerms; +import org.elasticsearch.common.regex.Regex; + +import java.io.IOException; + +public class CompletionFieldStats { + + public static CompletionStats completionStats(IndexReader indexReader, String ... fields) { + long sizeInBytes = 0; + ObjectLongHashMap<String> completionFields = null; + if (fields != null && fields.length > 0) { + completionFields = new ObjectLongHashMap<>(fields.length); + } + for (LeafReaderContext atomicReaderContext : indexReader.leaves()) { + LeafReader atomicReader = atomicReaderContext.reader(); + try { + for (String fieldName : atomicReader.fields()) { + Terms terms = atomicReader.fields().terms(fieldName); + if (terms instanceof CompletionTerms) { + // TODO: currently we load up the suggester for reporting it's size + long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed(); + if (fields != null && fields.length > 0 && Regex.simpleMatch(fields, fieldName)) { + completionFields.addTo(fieldName, fstSize); + } + sizeInBytes += fstSize; + } + } + } catch (IOException ignored) { + } + } + return new CompletionStats(sizeInBytes, completionFields); + } +} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java index 4f3222f4ff..8b0dbd758b 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.search.suggest.completion; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.HasContextAndHeaders; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.bytes.BytesReference; @@ -25,19 +26,49 @@ import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.IndexFieldDataService; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.core.CompletionFieldMapper; +import org.elasticsearch.index.query.RegexpFlag; import org.elasticsearch.search.suggest.SuggestContextParser; import org.elasticsearch.search.suggest.SuggestionSearchContext; -import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; +import org.elasticsearch.search.suggest.completion.context.CategoryQueryContext; +import org.elasticsearch.search.suggest.completion.context.ContextMapping; +import org.elasticsearch.search.suggest.completion.context.ContextMappings; import java.io.IOException; -import java.util.List; +import java.util.*; import static org.elasticsearch.search.suggest.SuggestUtils.parseSuggestContext; /** + * Parses query options for {@link CompletionSuggester} * + * Acceptable input: + * { + * "field" : STRING + * "size" : INT + * "fuzzy" : BOOLEAN | FUZZY_OBJECT + * "contexts" : QUERY_CONTEXTS + * "regex" : REGEX_OBJECT + * } + * + * FUZZY_OBJECT : { + * "edit_distance" : STRING | INT + * "transpositions" : BOOLEAN + * "min_length" : INT + * "prefix_length" : INT + * "unicode_aware" : BOOLEAN + * "max_determinized_states" : INT + * } + * + * REGEX_OBJECT: { + * "flags" : REGEX_FLAGS + * "max_determinized_states" : INT + * } + * + * see {@link RegexpFlag} for REGEX_FLAGS */ public class CompletionSuggestParser implements SuggestContextParser { @@ -55,6 +86,9 @@ public class CompletionSuggestParser implements SuggestContextParser { CompletionSuggestionContext suggestion = new CompletionSuggestionContext(completionSuggester); XContentParser contextParser = null; + CompletionSuggestionBuilder.FuzzyOptionsBuilder fuzzyOptions = null; + CompletionSuggestionBuilder.RegexOptionsBuilder regexOptions = null; + Set<String> payloadFields = new HashSet<>(1); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -62,12 +96,16 @@ public class CompletionSuggestParser implements SuggestContextParser { } else if (token.isValue()) { if (!parseSuggestContext(parser, mapperService, fieldName, suggestion, parseFieldMatcher)) { if (token == XContentParser.Token.VALUE_BOOLEAN && "fuzzy".equals(fieldName)) { - suggestion.setFuzzy(parser.booleanValue()); + if (parser.booleanValue()) { + fuzzyOptions = new CompletionSuggestionBuilder.FuzzyOptionsBuilder(); + } + } else if (token == XContentParser.Token.VALUE_STRING && "payload".equals(fieldName)) { + payloadFields.add(parser.text()); } } } else if (token == XContentParser.Token.START_OBJECT) { - if("fuzzy".equals(fieldName)) { - suggestion.setFuzzy(true); + if ("fuzzy".equals(fieldName)) { + fuzzyOptions = new CompletionSuggestionBuilder.FuzzyOptionsBuilder(); String fuzzyConfigName = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -76,47 +114,97 @@ public class CompletionSuggestParser implements SuggestContextParser { if (parseFieldMatcher.match(fuzzyConfigName, Fuzziness.FIELD)) { suggestion.setFuzzyEditDistance(Fuzziness.parse(parser).asDistance()); } else if ("transpositions".equals(fuzzyConfigName)) { - suggestion.setFuzzyTranspositions(parser.booleanValue()); + fuzzyOptions.setTranspositions(parser.booleanValue()); } else if ("min_length".equals(fuzzyConfigName) || "minLength".equals(fuzzyConfigName)) { - suggestion.setFuzzyMinLength(parser.intValue()); + fuzzyOptions.setFuzzyMinLength(parser.intValue()); } else if ("prefix_length".equals(fuzzyConfigName) || "prefixLength".equals(fuzzyConfigName)) { - suggestion.setFuzzyPrefixLength(parser.intValue()); + fuzzyOptions.setFuzzyPrefixLength(parser.intValue()); } else if ("unicode_aware".equals(fuzzyConfigName) || "unicodeAware".equals(fuzzyConfigName)) { - suggestion.setFuzzyUnicodeAware(parser.booleanValue()); + fuzzyOptions.setUnicodeAware(parser.booleanValue()); + } else if ("max_determinized_states".equals(fuzzyConfigName)) { + fuzzyOptions.setMaxDeterminizedStates(parser.intValue()); + } else { + throw new IllegalArgumentException("[fuzzy] query does not support [" + fuzzyConfigName + "]"); } } } - } else if("context".equals(fieldName)) { + } else if ("contexts".equals(fieldName) || "context".equals(fieldName)) { // Copy the current structure. We will parse, once the mapping is provided XContentBuilder builder = XContentFactory.contentBuilder(parser.contentType()); builder.copyCurrentStructure(parser); BytesReference bytes = builder.bytes(); - contextParser = parser.contentType().xContent().createParser(bytes); + contextParser = XContentFactory.xContent(bytes).createParser(bytes); + } else if ("regex".equals(fieldName)) { + regexOptions = new CompletionSuggestionBuilder.RegexOptionsBuilder(); + String currentFieldName = fieldName; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else { + if ("flags".equals(currentFieldName)) { + String flags = parser.textOrNull(); + regexOptions.setFlags(flags); + } else if ("max_determinized_states".equals(currentFieldName)) { + regexOptions.setMaxDeterminizedStates(parser.intValue()); + } else { + throw new IllegalArgumentException("[regexp] query does not support [" + currentFieldName + "]"); + } + } + } + } else { + throw new IllegalArgumentException("suggester [completion] doesn't support field [" + fieldName + "]"); + } + } else if (token == XContentParser.Token.START_ARRAY) { + if ("payload".equals(fieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.VALUE_STRING) { + payloadFields.add(parser.text()); + } else { + throw new IllegalArgumentException("suggester [completion] expected string values in [payload] array"); + } + } } else { throw new IllegalArgumentException("suggester [completion] doesn't support field [" + fieldName + "]"); } } else { - throw new IllegalArgumentException("suggester[completion] doesn't support field [" + fieldName + "]"); + throw new IllegalArgumentException("suggester [completion] doesn't support field [" + fieldName + "]"); } } - - suggestion.fieldType((CompletionFieldMapper.CompletionFieldType) mapperService.smartNameFieldType(suggestion.getField())); - - CompletionFieldMapper.CompletionFieldType fieldType = suggestion.fieldType(); - if (fieldType != null) { - if (fieldType.requiresContext()) { - if (contextParser == null) { - throw new IllegalArgumentException("suggester [completion] requires context to be setup"); - } else { - contextParser.nextToken(); - List<ContextQuery> contextQueries = ContextQuery.parseQueries(fieldType.getContextMapping(), contextParser); - suggestion.setContextQuery(contextQueries); + MappedFieldType mappedFieldType = mapperService.smartNameFieldType(suggestion.getField()); + if (mappedFieldType == null) { + throw new ElasticsearchException("Field [" + suggestion.getField() + "] is not a completion suggest field"); + } else if (mappedFieldType instanceof CompletionFieldMapper.CompletionFieldType) { + CompletionFieldMapper.CompletionFieldType type = (CompletionFieldMapper.CompletionFieldType) mappedFieldType; + if (type.hasContextMappings() == false && contextParser != null) { + throw new IllegalArgumentException("suggester [" + type.names().fullName() + "] doesn't expect any context"); + } + Map<String, List<CategoryQueryContext>> queryContexts = Collections.emptyMap(); + if (type.hasContextMappings() && contextParser != null) { + ContextMappings contextMappings = type.getContextMappings(); + contextParser.nextToken(); + queryContexts = new HashMap<>(contextMappings.size()); + assert contextParser.currentToken() == XContentParser.Token.START_OBJECT; + XContentParser.Token currentToken; + String currentFieldName; + while ((currentToken = contextParser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (currentToken == XContentParser.Token.FIELD_NAME) { + currentFieldName = contextParser.currentName(); + final ContextMapping mapping = contextMappings.get(currentFieldName); + queryContexts.put(currentFieldName, mapping.parseQueryContext(contextParser)); + } } - } else if (contextParser != null) { - throw new IllegalArgumentException("suggester [completion] doesn't expect any context"); + contextParser.close(); } + suggestion.setFieldType(type); + suggestion.setFuzzyOptionsBuilder(fuzzyOptions); + suggestion.setRegexOptionsBuilder(regexOptions); + suggestion.setQueryContexts(queryContexts); + suggestion.setMapperService(mapperService); + suggestion.setFieldData(fieldDataService); + suggestion.setPayloadFields(payloadFields); + return suggestion; + } else { + throw new ElasticsearchException("Field [" + suggestion.getField() + "] is not a completion suggest field"); } - return suggestion; } - } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java index b3e5e2dc2a..bb4c386553 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java @@ -18,97 +18,242 @@ */ package org.elasticsearch.search.suggest.completion; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Terms; +import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.search.BulkScorer; +import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Weight; import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.util.CharsRefBuilder; -import org.apache.lucene.util.CollectionUtil; +import org.apache.lucene.search.suggest.document.CompletionQuery; +import org.apache.lucene.search.suggest.document.TopSuggestDocs; +import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector; +import org.apache.lucene.util.*; +import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.text.StringText; +import org.elasticsearch.index.fielddata.AtomicFieldData; +import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.search.suggest.Suggest; import org.elasticsearch.search.suggest.SuggestContextParser; import org.elasticsearch.search.suggest.Suggester; -import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option; import java.io.IOException; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; public class CompletionSuggester extends Suggester<CompletionSuggestionContext> { - private static final ScoreComparator scoreComparator = new ScoreComparator(); - + public SuggestContextParser getContextParser() { + return new CompletionSuggestParser(this); + } @Override protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name, - CompletionSuggestionContext suggestionContext, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { - if (suggestionContext.fieldType() == null) { - throw new ElasticsearchException("Field [" + suggestionContext.getField() + "] is not a completion suggest field"); + final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException { + final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType(); + if (fieldType == null) { + throw new ElasticsearchException("field [" + suggestionContext.getField() + "] is not a completion field"); } - final IndexReader indexReader = searcher.getIndexReader(); CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize()); spare.copyUTF8Bytes(suggestionContext.getText()); - CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length()); completionSuggestion.addTerm(completionSuggestEntry); + TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize()); + suggest(searcher, suggestionContext.toQuery(), collector); + int numResult = 0; + for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) { + TopDocumentsCollector.SuggestDoc suggestDoc = (TopDocumentsCollector.SuggestDoc) suggestScoreDoc; + // collect contexts + Map<String, Set<CharSequence>> contexts = Collections.emptyMap(); + if (fieldType.hasContextMappings() && !suggestDoc.getContexts().isEmpty()) { + contexts = fieldType.getContextMappings().getNamedContexts(suggestDoc.getContexts()); + } + // collect payloads + Map<String, List<Object>> payload = Collections.emptyMap(); + Set<String> payloadFields = suggestionContext.getPayloadFields(); + if (!payloadFields.isEmpty()) { + int readerIndex = ReaderUtil.subIndex(suggestDoc.doc, searcher.getIndexReader().leaves()); + LeafReaderContext subReaderContext = searcher.getIndexReader().leaves().get(readerIndex); + int subDocId = suggestDoc.doc - subReaderContext.docBase; + payload = new LinkedHashMap<>(payloadFields.size()); + for (String field : payloadFields) { + MappedFieldType payloadFieldType = suggestionContext.getMapperService().smartNameFieldType(field); + if (payloadFieldType != null) { + AtomicFieldData data = suggestionContext.getFieldData().getForField(payloadFieldType).load(subReaderContext); + ScriptDocValues scriptValues = data.getScriptValues(); + scriptValues.setNextDocId(subDocId); + payload.put(field, new ArrayList<>(scriptValues.getValues())); + } else { + throw new ElasticsearchException("payload field [" + field + "] does not exist"); + } + } + } + if (numResult++ < suggestionContext.getSize()) { + CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option( + new StringText(suggestDoc.key.toString()), suggestDoc.score, contexts, payload); + completionSuggestEntry.addOption(option); + } else { + break; + } + } + return completionSuggestion; + } - String fieldName = suggestionContext.getField(); - Map<String, CompletionSuggestion.Entry.Option> results = new HashMap<>(indexReader.leaves().size() * suggestionContext.getSize()); - for (LeafReaderContext atomicReaderContext : indexReader.leaves()) { - LeafReader atomicReader = atomicReaderContext.reader(); - Terms terms = atomicReader.fields().terms(fieldName); - if (terms instanceof Completion090PostingsFormat.CompletionTerms) { - final Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms; - final Lookup lookup = lookupTerms.getLookup(suggestionContext.fieldType(), suggestionContext); - if (lookup == null) { - // we don't have a lookup for this segment.. this might be possible if a merge dropped all - // docs from the segment that had a value in this segment. - continue; + private static void suggest(IndexSearcher searcher, CompletionQuery query, TopSuggestDocsCollector collector) throws IOException { + query = (CompletionQuery) query.rewrite(searcher.getIndexReader()); + Weight weight = query.createWeight(searcher, collector.needsScores()); + for (LeafReaderContext context : searcher.getIndexReader().leaves()) { + BulkScorer scorer = weight.bulkScorer(context); + if (scorer != null) { + try { + scorer.score(collector.getLeafCollector(context), context.reader().getLiveDocs()); + } catch (CollectionTerminatedException e) { + // collection was terminated prematurely + // continue with the following leaf + } + } + } + } + + // TODO: this should be refactored and moved to lucene + private static class TopDocumentsCollector extends TopSuggestDocsCollector { + + /** + * Holds a list of suggest meta data for a doc + */ + private static class SuggestDoc extends TopSuggestDocs.SuggestScoreDoc { + + private List<TopSuggestDocs.SuggestScoreDoc> suggestScoreDocs; + + public SuggestDoc(int doc, CharSequence key, CharSequence context, float score) { + super(doc, key, context, score); + } + + void add(CharSequence key, CharSequence context, float score) { + if (suggestScoreDocs == null) { + suggestScoreDocs = new ArrayList<>(1); + } + suggestScoreDocs.add(new TopSuggestDocs.SuggestScoreDoc(doc, key, context, score)); + } + + public List<CharSequence> getKeys() { + if (suggestScoreDocs == null) { + return Collections.singletonList(key); + } else { + List<CharSequence> keys = new ArrayList<>(suggestScoreDocs.size() + 1); + keys.add(key); + for (TopSuggestDocs.SuggestScoreDoc scoreDoc : suggestScoreDocs) { + keys.add(scoreDoc.key); + } + return keys; + } + } + + public List<CharSequence> getContexts() { + if (suggestScoreDocs == null) { + if (context != null) { + return Collections.singletonList(context); + } else { + return Collections.emptyList(); + } + } else { + List<CharSequence> contexts = new ArrayList<>(suggestScoreDocs.size() + 1); + contexts.add(context); + for (TopSuggestDocs.SuggestScoreDoc scoreDoc : suggestScoreDocs) { + contexts.add(scoreDoc.context); + } + return contexts; } - List<Lookup.LookupResult> lookupResults = lookup.lookup(spare.get(), false, suggestionContext.getSize()); - for (Lookup.LookupResult res : lookupResults) { - - final String key = res.key.toString(); - final float score = res.value; - final Option value = results.get(key); - if (value == null) { - final Option option = new CompletionSuggestion.Entry.Option(new StringText(key), score, res.payload == null ? null - : new BytesArray(res.payload)); - results.put(key, option); - } else if (value.getScore() < score) { - value.setScore(score); - value.setPayload(res.payload == null ? null : new BytesArray(res.payload)); + } + } + + private static class SuggestDocPriorityQueue extends PriorityQueue<SuggestDoc> { + + public SuggestDocPriorityQueue(int maxSize) { + super(maxSize); + } + + @Override + protected boolean lessThan(SuggestDoc a, SuggestDoc b) { + if (a.score == b.score) { + int cmp = Lookup.CHARSEQUENCE_COMPARATOR.compare(a.key, b.key); + if (cmp == 0) { + // prefer smaller doc id, in case of a tie + return a.doc > b.doc; + } else { + return cmp > 0; } } + return a.score < b.score; + } + + public SuggestDoc[] getResults() { + int size = size(); + SuggestDoc[] res = new SuggestDoc[size]; + for (int i = size - 1; i >= 0; i--) { + res[i] = pop(); + } + return res; } } - final List<CompletionSuggestion.Entry.Option> options = new ArrayList<>(results.values()); - CollectionUtil.introSort(options, scoreComparator); - int optionCount = Math.min(suggestionContext.getSize(), options.size()); - for (int i = 0 ; i < optionCount ; i++) { - completionSuggestEntry.addOption(options.get(i)); + private final int num; + private final SuggestDocPriorityQueue pq; + private final Map<Integer, SuggestDoc> scoreDocMap; + + public TopDocumentsCollector(int num) { + super(1); // TODO hack, we don't use the underlying pq, so we allocate a size of 1 + this.num = num; + this.scoreDocMap = new LinkedHashMap<>(num); + this.pq = new SuggestDocPriorityQueue(num); } - return completionSuggestion; - } + @Override + public int getCountToCollect() { + // This is only needed because we initialize + // the base class with 1 instead of the actual num + return num; + } - @Override - public SuggestContextParser getContextParser() { - return new CompletionSuggestParser(this); - } - public static class ScoreComparator implements Comparator<CompletionSuggestion.Entry.Option> { @Override - public int compare(Option o1, Option o2) { - return Float.compare(o2.getScore(), o1.getScore()); + protected void doSetNextReader(LeafReaderContext context) throws IOException { + super.doSetNextReader(context); + updateResults(); + } + + private void updateResults() { + for (SuggestDoc suggestDoc : scoreDocMap.values()) { + if (pq.insertWithOverflow(suggestDoc) == suggestDoc) { + break; + } + } + scoreDocMap.clear(); + } + + @Override + public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException { + if (scoreDocMap.containsKey(docID)) { + SuggestDoc suggestDoc = scoreDocMap.get(docID); + suggestDoc.add(key, context, score); + } else if (scoreDocMap.size() <= num) { + scoreDocMap.put(docID, new SuggestDoc(docBase + docID, key, context, score)); + } else { + throw new CollectionTerminatedException(); + } + } + + @Override + public TopSuggestDocs get() throws IOException { + updateResults(); // to empty the last set of collected suggest docs + TopSuggestDocs.SuggestScoreDoc[] suggestScoreDocs = pq.getResults(); + if (suggestScoreDocs.length > 0) { + return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score); + } else { + return TopSuggestDocs.EMPTY; + } } } } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java index 83515ff74f..a9d5a4bddb 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java @@ -18,23 +18,37 @@ */ package org.elasticsearch.search.suggest.completion; -import org.elasticsearch.common.bytes.BytesReference; +import org.apache.lucene.search.suggest.Lookup; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.ESLoggerFactory; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.search.suggest.Suggest; import java.io.IOException; -import java.util.Map; +import java.util.*; /** + * Suggestion response for {@link CompletionSuggester} results + * + * Response format for each entry: + * { + * "text" : STRING + * "score" : FLOAT + * "contexts" : CONTEXTS + * } + * + * CONTEXTS : { + * "CONTEXT_NAME" : ARRAY, + * .. + * } * */ public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestion.Entry> { - public static final int TYPE = 2; + public static final int TYPE = 4; public CompletionSuggestion() { } @@ -43,6 +57,58 @@ public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestio super(name, size); } + private class OptionPriorityQueue extends org.apache.lucene.util.PriorityQueue<Entry.Option> { + + public OptionPriorityQueue(int maxSize) { + super(maxSize); + } + + @Override + protected boolean lessThan(Entry.Option a, Entry.Option b) { + int cmp = sortComparator().compare(a, b); + if (cmp != 0) { + return cmp > 0; + } + return Lookup.CHARSEQUENCE_COMPARATOR.compare(a.getText().string(), b.getText().string()) > 0; + } + + public Entry.Option[] get() { + int size = size(); + Entry.Option[] results = new Entry.Option[size]; + for (int i = size - 1; i >= 0; i--) { + results[i] = pop(); + } + return results; + } + } + + @Override + public Suggest.Suggestion<Entry> reduce(List<Suggest.Suggestion<Entry>> toReduce) { + if (toReduce.size() == 1) { + return toReduce.get(0); + } else { + // combine suggestion entries from participating shards on the coordinating node + // the global top <code>size</code> entries are collected from the shard results + // using a priority queue + OptionPriorityQueue priorityQueue = new OptionPriorityQueue(size); + for (Suggest.Suggestion<Entry> entries : toReduce) { + assert entries.getEntries().size() == 1 : "CompletionSuggestion must have only one entry"; + for (Entry.Option option : entries.getEntries().get(0)) { + if (option == priorityQueue.insertWithOverflow(option)) { + // if the current option has overflown from pq, + // we can assume all of the successive options + // from this shard result will be overflown as well + break; + } + } + } + Entry options = this.entries.get(0); + options.getOptions().clear(); + Collections.addAll(options.getOptions(), priorityQueue.get()); + return this; + } + } + @Override public int getType() { return TYPE; @@ -53,7 +119,7 @@ public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestio return new Entry(); } - public static class Entry extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry<CompletionSuggestion.Entry.Option> { + public static class Entry extends Suggest.Suggestion.Entry<CompletionSuggestion.Entry.Option> { public Entry(Text text, int offset, int length) { super(text, offset, length); @@ -68,41 +134,33 @@ public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestio return new Option(); } - public static class Option extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option { - private BytesReference payload; + public static class Option extends Suggest.Suggestion.Entry.Option { + private Map<String, Set<CharSequence>> contexts; + private Map<String, List<Object>> payload; - public Option(Text text, float score, BytesReference payload) { + public Option(Text text, float score, Map<String, Set<CharSequence>> contexts, Map<String, List<Object>> payload) { super(text, score); this.payload = payload; + this.contexts = contexts; } - protected Option() { super(); } - public void setPayload(BytesReference payload) { - this.payload = payload; + @Override + protected void mergeInto(Suggest.Suggestion.Entry.Option otherOption) { + // Completion suggestions are reduced by + // org.elasticsearch.search.suggest.completion.CompletionSuggestion.reduce() + throw new UnsupportedOperationException(); } - public BytesReference getPayload() { + public Map<String, List<Object>> getPayload() { return payload; } - public String getPayloadAsString() { - return payload.toUtf8(); - } - - public long getPayloadAsLong() { - return Long.parseLong(payload.toUtf8()); - } - - public double getPayloadAsDouble() { - return Double.parseDouble(payload.toUtf8()); - } - - public Map<String, Object> getPayloadAsMap() { - return XContentHelper.convertToMap(payload, false).v2(); + public Map<String, Set<CharSequence>> getContexts() { + return contexts; } @Override @@ -113,8 +171,27 @@ public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestio @Override protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { super.innerToXContent(builder, params); - if (payload != null && payload.length() > 0) { - builder.rawField("payload", payload); + if (payload.size() > 0) { + builder.startObject("payload"); + for (Map.Entry<String, List<Object>> entry : payload.entrySet()) { + builder.startArray(entry.getKey()); + for (Object payload : entry.getValue()) { + builder.value(payload); + } + builder.endArray(); + } + builder.endObject(); + } + if (contexts.size() > 0) { + builder.startObject("contexts"); + for (Map.Entry<String, Set<CharSequence>> entry : contexts.entrySet()) { + builder.startArray(entry.getKey()); + for (CharSequence context : entry.getValue()) { + builder.value(context.toString()); + } + builder.endArray(); + } + builder.endObject(); } return builder; } @@ -122,14 +199,78 @@ public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestio @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); - payload = in.readBytesReference(); + int payloadSize = in.readInt(); + this.payload = new LinkedHashMap<>(payloadSize); + for (int i = 0; i < payloadSize; i++) { + String payloadName = in.readString(); + int nValues = in.readVInt(); + List<Object> values = new ArrayList<>(nValues); + for (int j = 0; j < nValues; j++) { + values.add(in.readGenericValue()); + } + this.payload.put(payloadName, values); + } + int contextSize = in.readInt(); + this.contexts = new LinkedHashMap<>(contextSize); + for (int i = 0; i < contextSize; i++) { + String contextName = in.readString(); + int nContexts = in.readVInt(); + Set<CharSequence> contexts = new HashSet<>(nContexts); + for (int j = 0; j < nContexts; j++) { + contexts.add(in.readString()); + } + this.contexts.put(contextName, contexts); + } } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeBytesReference(payload); + out.writeInt(payload.size()); + for (Map.Entry<String, List<Object>> entry : payload.entrySet()) { + out.writeString(entry.getKey()); + List<Object> values = entry.getValue(); + out.writeVInt(values.size()); + for (Object value : values) { + out.writeGenericValue(value); + } + } + out.writeInt(contexts.size()); + for (Map.Entry<String, Set<CharSequence>> entry : contexts.entrySet()) { + out.writeString(entry.getKey()); + out.writeVInt(entry.getValue().size()); + for (CharSequence ctx : entry.getValue()) { + out.writeString(ctx.toString()); + } + } } + + @Override + public String toString() { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("text:"); + stringBuilder.append(getText()); + stringBuilder.append(" score:"); + stringBuilder.append(getScore()); + stringBuilder.append(" payload:["); + for (Map.Entry<String, List<Object>> entry : payload.entrySet()) { + stringBuilder.append(" "); + stringBuilder.append(entry.getKey()); + stringBuilder.append(":"); + stringBuilder.append(entry.getValue()); + } + stringBuilder.append("]"); + stringBuilder.append(" context:["); + for (Map.Entry<String, Set<CharSequence>> entry: contexts.entrySet()) { + stringBuilder.append(" "); + stringBuilder.append(entry.getKey()); + stringBuilder.append(":"); + stringBuilder.append(entry.getValue()); + } + stringBuilder.append("]"); + return stringBuilder.toString(); + } + } } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java index 15d04e845e..9fcb86c6c1 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java @@ -18,10 +18,21 @@ */ package org.elasticsearch.search.suggest.completion; +import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery; +import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.RegExp; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.query.RegexpFlag; import org.elasticsearch.search.suggest.SuggestBuilder; +import org.elasticsearch.search.suggest.completion.context.CategoryQueryContext; +import org.elasticsearch.search.suggest.completion.context.GeoQueryContext; import java.io.IOException; +import java.util.*; + +import static org.elasticsearch.search.suggest.completion.context.CategoryContextMapping.*; /** * Defines a suggest command based on a prefix, typically to provide "auto-complete" functionality @@ -30,13 +41,283 @@ import java.io.IOException; * indexing. */ public class CompletionSuggestionBuilder extends SuggestBuilder.SuggestionBuilder<CompletionSuggestionBuilder> { + private FuzzyOptionsBuilder fuzzyOptionsBuilder; + private RegexOptionsBuilder regexOptionsBuilder; + private Map<String, List<CategoryQueryContext>> queryContexts; + private String[] payloadFields; public CompletionSuggestionBuilder(String name) { super(name, "completion"); } + /** + * Options for fuzzy queries + */ + public static class FuzzyOptionsBuilder implements ToXContent { + private int editDistance = FuzzyCompletionQuery.DEFAULT_MAX_EDITS; + private boolean transpositions = FuzzyCompletionQuery.DEFAULT_TRANSPOSITIONS; + private int fuzzyMinLength = FuzzyCompletionQuery.DEFAULT_MIN_FUZZY_LENGTH; + private int fuzzyPrefixLength = FuzzyCompletionQuery.DEFAULT_NON_FUZZY_PREFIX; + private boolean unicodeAware = FuzzyCompletionQuery.DEFAULT_UNICODE_AWARE; + private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES; + + public FuzzyOptionsBuilder() { + } + + /** + * Sets the level of fuzziness used to create suggestions using a {@link Fuzziness} instance. + * The default value is {@link Fuzziness#ONE} which allows for an "edit distance" of one. + */ + public FuzzyOptionsBuilder setFuzziness(int editDistance) { + this.editDistance = editDistance; + return this; + } + + /** + * Sets the level of fuzziness used to create suggestions using a {@link Fuzziness} instance. + * The default value is {@link Fuzziness#ONE} which allows for an "edit distance" of one. + */ + public FuzzyOptionsBuilder setFuzziness(Fuzziness fuzziness) { + this.editDistance = fuzziness.asDistance(); + return this; + } + + /** + * Sets if transpositions (swapping one character for another) counts as one character + * change or two. + * Defaults to true, meaning it uses the fuzzier option of counting transpositions as + * a single change. + */ + public FuzzyOptionsBuilder setTranspositions(boolean transpositions) { + this.transpositions = transpositions; + return this; + } + + /** + * Sets the minimum length of input string before fuzzy suggestions are returned, defaulting + * to 3. + */ + public FuzzyOptionsBuilder setFuzzyMinLength(int fuzzyMinLength) { + this.fuzzyMinLength = fuzzyMinLength; + return this; + } + + /** + * Sets the minimum length of the input, which is not checked for fuzzy alternatives, defaults to 1 + */ + public FuzzyOptionsBuilder setFuzzyPrefixLength(int fuzzyPrefixLength) { + this.fuzzyPrefixLength = fuzzyPrefixLength; + return this; + } + + /** + * Sets the maximum automaton states allowed for the fuzzy expansion + */ + public FuzzyOptionsBuilder setMaxDeterminizedStates(int maxDeterminizedStates) { + this.maxDeterminizedStates = maxDeterminizedStates; + return this; + } + + /** + * Set to true if all measurements (like edit distance, transpositions and lengths) are in unicode + * code points (actual letters) instead of bytes. Default is false. + */ + public FuzzyOptionsBuilder setUnicodeAware(boolean unicodeAware) { + this.unicodeAware = unicodeAware; + return this; + } + + int getEditDistance() { + return editDistance; + } + + boolean isTranspositions() { + return transpositions; + } + + int getFuzzyMinLength() { + return fuzzyMinLength; + } + + int getFuzzyPrefixLength() { + return fuzzyPrefixLength; + } + + boolean isUnicodeAware() { + return unicodeAware; + } + + int getMaxDeterminizedStates() { + return maxDeterminizedStates; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("fuzzy"); + builder.field(Fuzziness.FIELD.getPreferredName(), editDistance); + builder.field("transpositions", transpositions); + builder.field("min_length", fuzzyMinLength); + builder.field("prefix_length", fuzzyPrefixLength); + builder.field("unicode_aware", unicodeAware); + builder.field("max_determinized_states", maxDeterminizedStates); + builder.endObject(); + return builder; + } + } + + /** + * Options for regular expression queries + */ + public static class RegexOptionsBuilder implements ToXContent { + private int flagsValue = RegExp.ALL; + private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES; + + public RegexOptionsBuilder() { + } + + /** + * Sets the regular expression syntax flags + * see {@link RegexpFlag} + */ + public RegexOptionsBuilder setFlags(String flags) { + this.flagsValue = RegexpFlag.resolveValue(flags); + return this; + } + + /** + * Sets the maximum automaton states allowed for the regular expression expansion + */ + public RegexOptionsBuilder setMaxDeterminizedStates(int maxDeterminizedStates) { + this.maxDeterminizedStates = maxDeterminizedStates; + return this; + } + + int getFlagsValue() { + return flagsValue; + } + + int getMaxDeterminizedStates() { + return maxDeterminizedStates; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("regex"); + builder.field("flags_value", flagsValue); + builder.field("max_determinized_states", maxDeterminizedStates); + builder.endObject(); + return builder; + } + } + + /** + * Sets the prefix to provide completions for. + * The prefix gets analyzed by the suggest analyzer. + */ + public CompletionSuggestionBuilder prefix(String prefix) { + super.setPrefix(prefix); + return this; + } + + /** + * Same as {@link #prefix(String)} with fuzziness of <code>fuzziness</code> + */ + public CompletionSuggestionBuilder prefix(String prefix, Fuzziness fuzziness) { + super.setPrefix(prefix); + this.fuzzyOptionsBuilder = new FuzzyOptionsBuilder().setFuzziness(fuzziness); + return this; + } + + /** + * Same as {@link #prefix(String)} with full fuzzy options + * see {@link FuzzyOptionsBuilder} + */ + public CompletionSuggestionBuilder prefix(String prefix, FuzzyOptionsBuilder fuzzyOptionsBuilder) { + super.setPrefix(prefix); + this.fuzzyOptionsBuilder = fuzzyOptionsBuilder; + return this; + } + + /** + * Sets a regular expression pattern for prefixes to provide completions for. + */ + public CompletionSuggestionBuilder regex(String regex) { + super.setRegex(regex); + return this; + } + + /** + * Same as {@link #regex(String)} with full regular expression options + * see {@link RegexOptionsBuilder} + */ + public CompletionSuggestionBuilder regex(String regex, RegexOptionsBuilder regexOptionsBuilder) { + this.regex(regex); + this.regexOptionsBuilder = regexOptionsBuilder; + return this; + } + + /** + * Sets the fields to be returned as suggestion payload. + * Note: Only doc values enabled fields are supported + */ + public CompletionSuggestionBuilder payload(String... fields) { + this.payloadFields = fields; + return this; + } + + /** + * Sets query contexts for a category context + * @param name of the category context to execute on + * @param queryContexts a list of {@link CategoryQueryContext} + */ + public CompletionSuggestionBuilder categoryContexts(String name, CategoryQueryContext... queryContexts) { + if (this.queryContexts == null) { + this.queryContexts = new HashMap<>(2); + } + List<CategoryQueryContext> contexts = this.queryContexts.get(name); + if (contexts == null) { + contexts = new ArrayList<>(2); + this.queryContexts.put(name, contexts); + } + Collections.addAll(contexts, queryContexts); + return this; + } + + /** + * Sets query contexts for a geo context + * @param name of the geo context to execute on + * @param queryContexts a list of {@link GeoQueryContext} + */ + public CompletionSuggestionBuilder geoContexts(String name, GeoQueryContext... queryContexts) { + return categoryContexts(name, queryContexts); + } + @Override protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { + if (payloadFields != null) { + builder.startArray("payload"); + for (String field : payloadFields) { + builder.value(field); + } + builder.endArray(); + } + if (fuzzyOptionsBuilder != null) { + fuzzyOptionsBuilder.toXContent(builder, params); + } + if (regexOptionsBuilder != null) { + regexOptionsBuilder.toXContent(builder, params); + } + if (queryContexts != null) { + builder.startObject("contexts"); + for (Map.Entry<String, List<CategoryQueryContext>> entry : this.queryContexts.entrySet()) { + builder.startArray(entry.getKey()); + for (CategoryQueryContext queryContext : entry.getValue()) { + queryContext.toXContent(builder, params); + } + builder.endArray(); + } + builder.endObject(); + } return builder; } } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java index 6c7d3c0ef4..dffbc7753a 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java @@ -18,14 +18,20 @@ */ package org.elasticsearch.search.suggest.completion; -import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester; +import org.apache.lucene.search.suggest.document.CompletionQuery; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.fielddata.IndexFieldDataService; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.SuggestionSearchContext; -import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; +import org.elasticsearch.search.suggest.completion.context.CategoryQueryContext; +import org.elasticsearch.search.suggest.completion.context.ContextMapping; +import org.elasticsearch.search.suggest.completion.context.ContextMappings; -import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.Set; /** * @@ -33,79 +39,90 @@ import java.util.List; public class CompletionSuggestionContext extends SuggestionSearchContext.SuggestionContext { private CompletionFieldMapper.CompletionFieldType fieldType; - private int fuzzyEditDistance = XFuzzySuggester.DEFAULT_MAX_EDITS; - private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS; - private int fuzzyMinLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH; - private int fuzzyPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX; - private boolean fuzzy = false; - private boolean fuzzyUnicodeAware = XFuzzySuggester.DEFAULT_UNICODE_AWARE; - private List<ContextQuery> contextQueries = Collections.emptyList(); - - public CompletionSuggestionContext(Suggester suggester) { + private CompletionSuggestionBuilder.FuzzyOptionsBuilder fuzzyOptionsBuilder; + private CompletionSuggestionBuilder.RegexOptionsBuilder regexOptionsBuilder; + private Map<String, List<CategoryQueryContext>> queryContexts; + private MapperService mapperService; + private IndexFieldDataService fieldData; + private Set<String> payloadFields; + + CompletionSuggestionContext(Suggester suggester) { super(suggester); } - public CompletionFieldMapper.CompletionFieldType fieldType() { + CompletionFieldMapper.CompletionFieldType getFieldType() { return this.fieldType; } - public void fieldType(CompletionFieldMapper.CompletionFieldType fieldType) { + void setFieldType(CompletionFieldMapper.CompletionFieldType fieldType) { this.fieldType = fieldType; } - public void setFuzzyEditDistance(int fuzzyEditDistance) { - this.fuzzyEditDistance = fuzzyEditDistance; + void setRegexOptionsBuilder(CompletionSuggestionBuilder.RegexOptionsBuilder regexOptionsBuilder) { + this.regexOptionsBuilder = regexOptionsBuilder; } - public int getFuzzyEditDistance() { - return fuzzyEditDistance; + void setFuzzyOptionsBuilder(CompletionSuggestionBuilder.FuzzyOptionsBuilder fuzzyOptionsBuilder) { + this.fuzzyOptionsBuilder = fuzzyOptionsBuilder; } - public void setFuzzyTranspositions(boolean fuzzyTranspositions) { - this.fuzzyTranspositions = fuzzyTranspositions; + void setQueryContexts(Map<String, List<CategoryQueryContext>> queryContexts) { + this.queryContexts = queryContexts; } - public boolean isFuzzyTranspositions() { - return fuzzyTranspositions; + void setMapperService(MapperService mapperService) { + this.mapperService = mapperService; } - public void setFuzzyMinLength(int fuzzyMinPrefixLength) { - this.fuzzyMinLength = fuzzyMinPrefixLength; + MapperService getMapperService() { + return mapperService; } - public int getFuzzyMinLength() { - return fuzzyMinLength; + void setFieldData(IndexFieldDataService fieldData) { + this.fieldData = fieldData; } - public void setFuzzyPrefixLength(int fuzzyNonPrefixLength) { - this.fuzzyPrefixLength = fuzzyNonPrefixLength; + IndexFieldDataService getFieldData() { + return fieldData; } - public int getFuzzyPrefixLength() { - return fuzzyPrefixLength; + void setPayloadFields(Set<String> fields) { + this.payloadFields = fields; } - public void setFuzzy(boolean fuzzy) { - this.fuzzy = fuzzy; + Set<String> getPayloadFields() { + return payloadFields; } - public boolean isFuzzy() { - return fuzzy; - } - - public void setFuzzyUnicodeAware(boolean fuzzyUnicodeAware) { - this.fuzzyUnicodeAware = fuzzyUnicodeAware; - } - - public boolean isFuzzyUnicodeAware() { - return fuzzyUnicodeAware; - } - - public void setContextQuery(List<ContextQuery> queries) { - this.contextQueries = queries; - } - - public List<ContextQuery> getContextQueries() { - return this.contextQueries; + CompletionQuery toQuery() { + CompletionFieldMapper.CompletionFieldType fieldType = getFieldType(); + final CompletionQuery query; + if (getPrefix() != null) { + if (fuzzyOptionsBuilder != null) { + query = fieldType.fuzzyQuery(getPrefix().utf8ToString(), + Fuzziness.fromEdits(fuzzyOptionsBuilder.getEditDistance()), + fuzzyOptionsBuilder.getFuzzyPrefixLength(), fuzzyOptionsBuilder.getFuzzyMinLength(), + fuzzyOptionsBuilder.getMaxDeterminizedStates(), fuzzyOptionsBuilder.isTranspositions(), + fuzzyOptionsBuilder.isUnicodeAware()); + } else { + query = fieldType.prefixQuery(getPrefix()); + } + } else if (getRegex() != null) { + if (fuzzyOptionsBuilder != null) { + throw new IllegalArgumentException("can not use 'fuzzy' options with 'regex"); + } + if (regexOptionsBuilder == null) { + regexOptionsBuilder = new CompletionSuggestionBuilder.RegexOptionsBuilder(); + } + query = fieldType.regexpQuery(getRegex(), regexOptionsBuilder.getFlagsValue(), + regexOptionsBuilder.getMaxDeterminizedStates()); + } else { + throw new IllegalArgumentException("'prefix' or 'regex' must be defined"); + } + if (fieldType.hasContextMappings()) { + ContextMappings contextMappings = fieldType.getContextMappings(); + return contextMappings.toContextQuery(query, queryContexts); + } + return query; } } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java deleted file mode 100644 index de6bf1365d..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionFuzzyBuilder.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.search.suggest.completion; - -import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester; -import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.common.xcontent.ToXContent; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.search.suggest.SuggestBuilder; - -import java.io.IOException; - -/** - * A form of {@link CompletionSuggestionBuilder} that supports fuzzy queries allowing - * matches on typos. - * Various settings control when and how fuzziness is counted. - */ -public class CompletionSuggestionFuzzyBuilder extends SuggestBuilder.SuggestionBuilder<CompletionSuggestionFuzzyBuilder> { - - public CompletionSuggestionFuzzyBuilder(String name) { - super(name, "completion"); - } - - private Fuzziness fuzziness = Fuzziness.ONE; - private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS; - private int fuzzyMinLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH; - private int fuzzyPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX; - private boolean unicodeAware = XFuzzySuggester.DEFAULT_UNICODE_AWARE; - - public Fuzziness getFuzziness() { - return fuzziness; - } - - /** - * Sets the level of fuzziness used to create suggestions using a {@link Fuzziness} instance. - * The default value is {@link Fuzziness#ONE} which allows for an "edit distance" of one. - */ - public CompletionSuggestionFuzzyBuilder setFuzziness(Fuzziness fuzziness) { - this.fuzziness = fuzziness; - return this; - } - - public boolean isFuzzyTranspositions() { - return fuzzyTranspositions; - } - - /** - * Sets if transpositions (swapping one character for another) counts as one character - * change or two. - * Defaults to true, meaning it uses the fuzzier option of counting transpositions as - * a single change. - */ - public CompletionSuggestionFuzzyBuilder setFuzzyTranspositions(boolean fuzzyTranspositions) { - this.fuzzyTranspositions = fuzzyTranspositions; - return this; - } - - public int getFuzzyMinLength() { - return fuzzyMinLength; - } - - /** - * Sets the minimum length of input string before fuzzy suggestions are returned, defaulting - * to 3. - */ - public CompletionSuggestionFuzzyBuilder setFuzzyMinLength(int fuzzyMinLength) { - this.fuzzyMinLength = fuzzyMinLength; - return this; - } - - public int getFuzzyPrefixLength() { - return fuzzyPrefixLength; - } - - /** - * Sets the minimum length of the input, which is not checked for fuzzy alternatives, defaults to 1 - */ - public CompletionSuggestionFuzzyBuilder setFuzzyPrefixLength(int fuzzyPrefixLength) { - this.fuzzyPrefixLength = fuzzyPrefixLength; - return this; - } - - public boolean isUnicodeAware() { - return unicodeAware; - } - - /** - * Set to true if all measurements (like edit distance, transpositions and lengths) are in unicode - * code points (actual letters) instead of bytes. Default is false. - */ - public CompletionSuggestionFuzzyBuilder setUnicodeAware(boolean unicodeAware) { - this.unicodeAware = unicodeAware; - return this; - } - - @Override - protected XContentBuilder innerToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { - builder.startObject("fuzzy"); - - if (fuzziness != Fuzziness.ONE) { - fuzziness.toXContent(builder, params); - } - if (fuzzyTranspositions != XFuzzySuggester.DEFAULT_TRANSPOSITIONS) { - builder.field("transpositions", fuzzyTranspositions); - } - if (fuzzyMinLength != XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH) { - builder.field("min_length", fuzzyMinLength); - } - if (fuzzyPrefixLength != XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX) { - builder.field("prefix_length", fuzzyPrefixLength); - } - if (unicodeAware != XFuzzySuggester.DEFAULT_UNICODE_AWARE) { - builder.field("unicode_aware", unicodeAware); - } - - builder.endObject(); - return builder; - } -} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java deleted file mode 100644 index 5edf848dda..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.search.suggest.completion; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.util.*; -import org.apache.lucene.util.fst.Util; - -import java.io.IOException; -import java.util.Iterator; -import java.util.Set; - -/** - * - */ -public final class CompletionTokenStream extends TokenStream { - - private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class); - private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class); - private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);; - - - private final TokenStream input; - private BytesRef payload; - private Iterator<IntsRef> finiteStrings; - private ToFiniteStrings toFiniteStrings; - private int posInc = -1; - private static final int MAX_PATHS = 256; - private CharTermAttribute charTermAttribute; - - public CompletionTokenStream(TokenStream input, BytesRef payload, ToFiniteStrings toFiniteStrings) throws IOException { - // Don't call the super(input) ctor - this is a true delegate and has a new attribute source since we consume - // the input stream entirely in toFiniteStrings(input) - this.input = input; - this.payload = payload; - this.toFiniteStrings = toFiniteStrings; - } - - @Override - public boolean incrementToken() throws IOException { - clearAttributes(); - if (finiteStrings == null) { - Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input); - - if (strings.size() > MAX_PATHS) { - throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS - + " finite strings are supported"); - } - posInc = strings.size(); - finiteStrings = strings.iterator(); - } - if (finiteStrings.hasNext()) { - posAttr.setPositionIncrement(posInc); - /* - * this posInc encodes the number of paths that this surface form - * produced. Multi Fields have the same surface form and therefore sum up - */ - posInc = 0; - Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8 - if (charTermAttribute != null) { - charTermAttribute.setLength(0); - charTermAttribute.append(bytesAtt.toUTF16()); - } - if (payload != null) { - payloadAttr.setPayload(this.payload); - } - return true; - } - - return false; - } - - @Override - public void end() throws IOException { - super.end(); - if (posInc == -1) { - input.end(); - } - } - - @Override - public void close() throws IOException { - input.close(); - } - - public static interface ToFiniteStrings { - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException; - } - - @Override - public void reset() throws IOException { - super.reset(); - if (hasAttribute(CharTermAttribute.class)) { - // we only create this if we really need it to safe the UTF-8 to UTF-16 conversion - charTermAttribute = getAttribute(CharTermAttribute.class); - } - finiteStrings = null; - posInc = -1; - } - - public interface ByteTermAttribute extends TermToBytesRefAttribute { - // marker interface - - /** - * Return the builder from which the term is derived. - */ - public BytesRefBuilder builder(); - - public CharSequence toUTF16(); - } - - public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute { - private final BytesRefBuilder bytes = new BytesRefBuilder(); - private CharsRefBuilder charsRef; - - @Override - public BytesRefBuilder builder() { - return bytes; - } - - @Override - public BytesRef getBytesRef() { - return bytes.get(); - } - - @Override - public void clear() { - bytes.clear(); - } - - @Override - public void copyTo(AttributeImpl target) { - ByteTermAttributeImpl other = (ByteTermAttributeImpl) target; - other.bytes.copyBytes(bytes); - } - - @Override - public CharSequence toUTF16() { - if (charsRef == null) { - charsRef = new CharsRefBuilder(); - } - charsRef.copyUTF8Bytes(getBytesRef()); - return charsRef.get(); - } - } -} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/context/CategoryContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/CategoryContextMapping.java new file mode 100644 index 0000000000..554a2d341d --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/CategoryContextMapping.java @@ -0,0 +1,260 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion.context; + +import org.apache.lucene.index.IndexableField; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.Version; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.ParseContext.Document; + +import java.io.IOException; +import java.util.*; + +/** + * A {@link ContextMapping} that uses a simple string as a criteria + * The suggestions are boosted and/or filtered by their associated + * category (string) value. + * {@link CategoryQueryContext} defines options for constructing + * a unit of query context for this context type + */ +public class CategoryContextMapping extends ContextMapping { + + private static final String FIELD_FIELDNAME = "path"; + + static final String CONTEXT_VALUE = "context"; + static final String CONTEXT_BOOST = "boost"; + static final String CONTEXT_PREFIX = "prefix"; + + private final String fieldName; + + /** + * Create a new {@link CategoryContextMapping} with field + * <code>fieldName</code> + */ + private CategoryContextMapping(String name, String fieldName) { + super(Type.CATEGORY, name); + this.fieldName = fieldName; + } + + /** + * Name of the field to get contexts from at index-time + */ + public String getFieldName() { + return fieldName; + } + + /** + * Loads a <code>name</code>d {@link CategoryContextMapping} instance + * from a map. + * see {@link ContextMappings#load(Object, Version)} + * + * Acceptable map param: <code>path</code> + */ + protected static CategoryContextMapping load(String name, Map<String, Object> config) throws ElasticsearchParseException { + CategoryContextMapping.Builder mapping = new CategoryContextMapping.Builder(name); + Object fieldName = config.get(FIELD_FIELDNAME); + if (fieldName != null) { + mapping.field(fieldName.toString()); + config.remove(FIELD_FIELDNAME); + } + return mapping.build(); + } + + @Override + protected XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException { + if (fieldName != null) { + builder.field(FIELD_FIELDNAME, fieldName); + } + return builder; + } + + /** + * Parse a set of {@link CharSequence} contexts at index-time. + * Acceptable formats: + * + * <ul> + * <li>Array: <pre>[<i><string></i>, ..]</pre></li> + * <li>String: <pre>"string"</pre></li> + * </ul> + */ + @Override + public Set<CharSequence> parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException { + final Set<CharSequence> contexts = new HashSet<>(); + Token token = parser.currentToken(); + if (token == Token.VALUE_STRING) { + contexts.add(parser.text()); + } else if (token == Token.START_ARRAY) { + while ((token = parser.nextToken()) != Token.END_ARRAY) { + if (token == Token.VALUE_STRING) { + contexts.add(parser.text()); + } else { + throw new ElasticsearchParseException("context array must have string values"); + } + } + } else { + throw new ElasticsearchParseException("contexts must be a string or a list of strings"); + } + return contexts; + } + + @Override + public Set<CharSequence> parseContext(Document document) { + Set<CharSequence> values = null; + if (fieldName != null) { + IndexableField[] fields = document.getFields(fieldName); + values = new HashSet<>(fields.length); + for (IndexableField field : fields) { + values.add(field.stringValue()); + } + } + return (values == null) ? Collections.<CharSequence>emptySet() : values; + } + + /** + * Parse a list of {@link CategoryQueryContext} + * using <code>parser</code>. A QueryContexts accepts one of the following forms: + * + * <ul> + * <li>Object: CategoryQueryContext</li> + * <li>String: CategoryQueryContext value with prefix=false and boost=1</li> + * <li>Array: <pre>[CategoryQueryContext, ..]</pre></li> + * </ul> + * + * A CategoryQueryContext has one of the following forms: + * <ul> + * <li>Object: <pre>{"context": <i><string></i>, "boost": <i><int></i>, "prefix": <i><boolean></i>}</pre></li> + * <li>String: <pre>"string"</pre></li> + * </ul> + */ + @Override + public List<CategoryQueryContext> parseQueryContext(XContentParser parser) throws IOException, ElasticsearchParseException { + List<CategoryQueryContext> queryContexts = new ArrayList<>(); + Token token = parser.nextToken(); + if (token == Token.START_OBJECT || token == Token.VALUE_STRING) { + queryContexts.add(innerParseQueryContext(parser)); + } else if (token == Token.START_ARRAY) { + while (parser.nextToken() != Token.END_ARRAY) { + queryContexts.add(innerParseQueryContext(parser)); + } + } + return queryContexts; + } + + private CategoryQueryContext innerParseQueryContext(XContentParser parser) throws IOException, ElasticsearchParseException { + Token token = parser.currentToken(); + if (token == Token.VALUE_STRING) { + return new CategoryQueryContext(parser.text()); + } else if (token == Token.START_OBJECT) { + String currentFieldName = null; + String context = null; + boolean isPrefix = false; + int boost = 1; + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == Token.VALUE_STRING) { + // context, exact + if (CONTEXT_VALUE.equals(currentFieldName)) { + context = parser.text(); + } else if (CONTEXT_PREFIX.equals(currentFieldName)) { + isPrefix = Boolean.valueOf(parser.text()); + } else if (CONTEXT_BOOST.equals(currentFieldName)) { + Number number; + try { + number = Long.parseLong(parser.text()); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("boost must be a string representing a numeric value, but was [" + parser.text() + "]"); + } + boost = number.intValue(); + } + } else if (token == Token.VALUE_NUMBER) { + // boost + if (CONTEXT_BOOST.equals(currentFieldName)) { + Number number = parser.numberValue(); + if (parser.numberType() == XContentParser.NumberType.INT) { + boost = number.intValue(); + } else { + throw new ElasticsearchParseException("boost must be in the interval [0..2147483647], but was [" + number.longValue() + "]"); + } + } + } else if (token == Token.VALUE_BOOLEAN) { + // exact + if (CONTEXT_PREFIX.equals(currentFieldName)) { + isPrefix = parser.booleanValue(); + } + } + } + if (context == null) { + throw new ElasticsearchParseException("no context provided"); + } + return new CategoryQueryContext(context, boost, isPrefix); + } else { + throw new ElasticsearchParseException("contexts field expected string or object but was [" + token.name() + "]"); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + CategoryContextMapping mapping = (CategoryContextMapping) o; + return !(fieldName != null ? !fieldName.equals(mapping.fieldName) : mapping.fieldName != null); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), fieldName); + } + + /** + * Builder for {@link CategoryContextMapping} + */ + public static class Builder extends ContextBuilder<CategoryContextMapping> { + + private String fieldName; + + /** + * Create a builder for + * a named {@link CategoryContextMapping} + * @param name name of the mapping + */ + public Builder(String name) { + super(name); + } + + /** + * Set the name of the field to use + */ + public Builder field(String fieldName) { + this.fieldName = fieldName; + return this; + } + + @Override + public CategoryContextMapping build() { + return new CategoryContextMapping(name, fieldName); + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/context/CategoryQueryContext.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/CategoryQueryContext.java new file mode 100644 index 0000000000..da4ed802c4 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/CategoryQueryContext.java @@ -0,0 +1,77 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion.context; + +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; + +import static org.elasticsearch.search.suggest.completion.context.CategoryContextMapping.CONTEXT_BOOST; +import static org.elasticsearch.search.suggest.completion.context.CategoryContextMapping.CONTEXT_PREFIX; +import static org.elasticsearch.search.suggest.completion.context.CategoryContextMapping.CONTEXT_VALUE; + +/** + * Defines the query context for {@link CategoryContextMapping} + */ +public class CategoryQueryContext implements ToXContent { + + public final CharSequence context; + + public final boolean isPrefix; + + public final int boost; + + /** + * Creates a query context with a provided context and a + * boost of 1 + */ + public CategoryQueryContext(CharSequence context) { + this(context, 1); + } + + /** + * Creates a query context with a provided context and boost + */ + public CategoryQueryContext(CharSequence context, int boost) { + this(context, boost, false); + } + + /** + * Creates a query context with a provided context and boost + * Allows specifying whether the context should be treated as + * a prefix or not + */ + public CategoryQueryContext(CharSequence context, int boost, boolean isPrefix) { + this.context = context; + this.boost = boost; + this.isPrefix = isPrefix; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(CONTEXT_VALUE, context); + builder.field(CONTEXT_BOOST, boost); + builder.field(CONTEXT_PREFIX, isPrefix); + builder.endObject(); + return builder; + } +} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/PayloadProcessor.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/ContextBuilder.java index 544d9052a0..9e31d8370c 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/PayloadProcessor.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/ContextBuilder.java @@ -17,22 +17,36 @@ * under the License. */ -package org.elasticsearch.search.suggest.completion; +package org.elasticsearch.search.suggest.completion.context; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; +/** + * Builder for {@link ContextMapping} + */ +public abstract class ContextBuilder<E extends ContextMapping> { -import java.io.IOException; + protected String name; -interface PayloadProcessor { + /** + * @param name of the context mapper to build + */ + protected ContextBuilder(String name) { + this.name = name; + } - BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException; + public abstract E build(); - void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException; + /** + * Create a new {@link GeoContextMapping} + */ + public static GeoContextMapping.Builder geo(String name) { + return new GeoContextMapping.Builder(name); + } - static class SuggestPayload { - final BytesRefBuilder payload = new BytesRefBuilder(); - long weight = 0; - final BytesRefBuilder surfaceForm = new BytesRefBuilder(); + /** + * Create a new {@link CategoryContextMapping} + */ + public static CategoryContextMapping.Builder category(String name) { + return new CategoryContextMapping.Builder(name); } + } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/context/ContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/ContextMapping.java new file mode 100644 index 0000000000..bc92cde722 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/ContextMapping.java @@ -0,0 +1,144 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion.context; + +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.core.CompletionFieldMapper; + +import java.io.IOException; +import java.util.*; + +/** + * A {@link ContextMapping} defines criteria that can be used to + * filter and/or boost suggestions at query time for {@link CompletionFieldMapper}. + * + * Implementations have to define how contexts are parsed at query/index time + */ +public abstract class ContextMapping implements ToXContent { + + public static final String FIELD_TYPE = "type"; + public static final String FIELD_NAME = "name"; + protected final Type type; + protected final String name; + + public enum Type { + CATEGORY, GEO; + + public static Type fromString(String type) { + if (type.equalsIgnoreCase("category")) { + return CATEGORY; + } else if (type.equalsIgnoreCase("geo")) { + return GEO; + } else { + throw new IllegalArgumentException("No context type for [" + type + "]"); + } + } + } + + /** + * Define a new context mapping of a specific type + * + * @param type type of context mapping, either {@link Type#CATEGORY} or {@link Type#GEO} + * @param name name of context mapping + */ + protected ContextMapping(Type type, String name) { + super(); + this.type = type; + this.name = name; + } + + /** + * @return the type name of the context + */ + public Type type() { + return type; + } + + /** + * @return the name/id of the context + */ + public String name() { + return name; + } + + /** + * Parses a set of index-time contexts. + */ + public abstract Set<CharSequence> parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException; + + /** + * Retrieves a set of context from a <code>document</code> at index-time. + */ + protected abstract Set<CharSequence> parseContext(ParseContext.Document document); + + /** + * Parses query contexts for this mapper + */ + public abstract List<CategoryQueryContext> parseQueryContext(XContentParser parser) throws IOException, ElasticsearchParseException; + + /** + * Adds query contexts to a completion query + */ + protected List<CategoryQueryContext> getQueryContexts(List<CategoryQueryContext> queryContexts) { + return queryContexts; + } + + /** + * Implementations should add specific configurations + * that need to be persisted + */ + protected abstract XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException; + + @Override + public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(FIELD_NAME, name); + builder.field(FIELD_TYPE, type.name()); + toInnerXContent(builder, params); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ContextMapping that = (ContextMapping) o; + if (type != that.type) return false; + return name.equals(that.name); + } + + @Override + public int hashCode() { + return Objects.hash(type, name); + } + + @Override + public String toString() { + try { + return toXContent(JsonXContent.contentBuilder(), ToXContent.EMPTY_PARAMS).string(); + } catch (IOException e) { + return super.toString(); + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/context/ContextMappings.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/ContextMappings.java new file mode 100644 index 0000000000..76eb5a0ffd --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/ContextMappings.java @@ -0,0 +1,276 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion.context; + +import org.apache.lucene.search.suggest.document.CompletionQuery; +import org.apache.lucene.search.suggest.document.ContextQuery; +import org.apache.lucene.search.suggest.document.ContextSuggestField; +import org.apache.lucene.util.CharsRefBuilder; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.Version; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.DocumentMapperParser; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.core.CompletionFieldMapper; + +import java.io.IOException; +import java.util.*; + +import static org.elasticsearch.search.suggest.completion.context.ContextMapping.*; + +/** + * ContextMappings indexes context-enabled suggestion fields + * and creates context queries for defined {@link ContextMapping}s + * for a {@link CompletionFieldMapper} + */ +public class ContextMappings implements ToXContent { + private final List<ContextMapping> contextMappings; + private final Map<String, ContextMapping> contextNameMap; + + public ContextMappings(List<ContextMapping> contextMappings) { + if (contextMappings.size() > 255) { + // we can support more, but max of 255 (1 byte) unique context types per suggest field + // seems reasonable? + throw new UnsupportedOperationException("Maximum of 10 context types are supported was: " + contextMappings.size()); + } + this.contextMappings = contextMappings; + contextNameMap = new HashMap<>(contextMappings.size()); + for (ContextMapping mapping : contextMappings) { + contextNameMap.put(mapping.name(), mapping); + } + } + + /** + * @return number of context mappings + * held by this instance + */ + public int size() { + return contextMappings.size(); + } + + /** + * Returns a context mapping by its name + */ + public ContextMapping get(String name) { + ContextMapping contextMapping = contextNameMap.get(name); + if (contextMapping == null) { + throw new IllegalArgumentException("Unknown context name[" + name + "], must be one of " + contextNameMap.size()); + } + return contextMapping; + } + + /** + * Adds a context-enabled field for all the defined mappings to <code>document</code> + * see {@link org.elasticsearch.search.suggest.completion.context.ContextMappings.TypedContextField} + */ + public void addField(ParseContext.Document document, String name, String input, int weight, Map<String, Set<CharSequence>> contexts) { + document.add(new TypedContextField(name, input, weight, contexts, document)); + } + + /** + * Field prepends context values with a suggestion + * Context values are associated with a type, denoted by + * a type id, which is prepended to the context value. + * + * Every defined context mapping yields a unique type id (index of the + * corresponding context mapping in the context mappings list) + * for all its context values + * + * The type, context and suggestion values are encoded as follows: + * <p> + * TYPE_ID | CONTEXT_VALUE | CONTEXT_SEP | SUGGESTION_VALUE + * </p> + * + * Field can also use values of other indexed fields as contexts + * at index time + */ + private class TypedContextField extends ContextSuggestField { + private final Map<String, Set<CharSequence>> contexts; + private final ParseContext.Document document; + + public TypedContextField(String name, String value, int weight, Map<String, Set<CharSequence>> contexts, + ParseContext.Document document) { + super(name, value, weight); + this.contexts = contexts; + this.document = document; + } + + @Override + protected Iterable<CharSequence> contexts() { + Set<CharSequence> typedContexts = new HashSet<>(); + final CharsRefBuilder scratch = new CharsRefBuilder(); + scratch.grow(1); + for (int typeId = 0; typeId < contextMappings.size(); typeId++) { + scratch.setCharAt(0, (char) typeId); + scratch.setLength(1); + ContextMapping mapping = contextMappings.get(typeId); + Set<CharSequence> contexts = new HashSet<>(mapping.parseContext(document)); + if (this.contexts.get(mapping.name()) != null) { + contexts.addAll(this.contexts.get(mapping.name())); + } + for (CharSequence context : contexts) { + scratch.append(context); + typedContexts.add(scratch.toCharsRef()); + scratch.setLength(1); + } + } + return typedContexts; + } + } + + /** + * Wraps a {@link CompletionQuery} with context queries, + * individual context mappings adds query contexts using + * {@link ContextMapping#getQueryContexts(List)}s + * + * @param query base completion query to wrap + * @param queryContexts a map of context mapping name and collected query contexts + * @return a context-enabled query + */ + public ContextQuery toContextQuery(CompletionQuery query, Map<String, List<CategoryQueryContext>> queryContexts) { + ContextQuery typedContextQuery = new ContextQuery(query); + if (queryContexts.isEmpty() == false) { + CharsRefBuilder scratch = new CharsRefBuilder(); + scratch.grow(1); + for (int typeId = 0; typeId < contextMappings.size(); typeId++) { + scratch.setCharAt(0, (char) typeId); + scratch.setLength(1); + ContextMapping mapping = contextMappings.get(typeId); + List<CategoryQueryContext> queryContext = queryContexts.get(mapping.name()); + if (queryContext != null) { + for (CategoryQueryContext context : mapping.getQueryContexts(queryContext)) { + scratch.append(context.context); + typedContextQuery.addContext(scratch.toCharsRef(), context.boost, !context.isPrefix); + scratch.setLength(1); + } + } + } + } + return typedContextQuery; + } + + /** + * Maps an output context list to a map of context mapping names and their values + * + * see {@link org.elasticsearch.search.suggest.completion.context.ContextMappings.TypedContextField} + * @return a map of context names and their values + * + */ + public Map<String, Set<CharSequence>> getNamedContexts(List<CharSequence> contexts) { + Map<String, Set<CharSequence>> contextMap = new HashMap<>(contexts.size()); + for (CharSequence typedContext : contexts) { + int typeId = typedContext.charAt(0); + assert typeId < contextMappings.size() : "Returned context has invalid type"; + ContextMapping mapping = contextMappings.get(typeId); + Set<CharSequence> contextEntries = contextMap.get(mapping.name()); + if (contextEntries == null) { + contextEntries = new HashSet<>(); + contextMap.put(mapping.name(), contextEntries); + } + contextEntries.add(typedContext.subSequence(1, typedContext.length())); + } + return contextMap; + } + + /** + * Loads {@link ContextMappings} from configuration + * + * Expected configuration: + * List of maps representing {@link ContextMapping} + * [{"name": .., "type": .., ..}, {..}] + * + */ + public static ContextMappings load(Object configuration, Version indexVersionCreated) throws ElasticsearchParseException { + final List<ContextMapping> contextMappings; + if (configuration instanceof List) { + contextMappings = new ArrayList<>(); + List<Object> configurations = (List<Object>)configuration; + for (Object contextConfig : configurations) { + contextMappings.add(load((Map<String, Object>) contextConfig, indexVersionCreated)); + } + if (contextMappings.size() == 0) { + throw new ElasticsearchParseException("expected at least one context mapping"); + } + } else if (configuration instanceof Map) { + contextMappings = Collections.singletonList(load(((Map<String, Object>) configuration), indexVersionCreated)); + } else { + throw new ElasticsearchParseException("expected a list or an entry of context mapping"); + } + return new ContextMappings(contextMappings); + } + + private static ContextMapping load(Map<String, Object> contextConfig, Version indexVersionCreated) { + String name = extractRequiredValue(contextConfig, FIELD_NAME); + String type = extractRequiredValue(contextConfig, FIELD_TYPE); + final ContextMapping contextMapping; + switch (Type.fromString(type)) { + case CATEGORY: + contextMapping = CategoryContextMapping.load(name, contextConfig); + break; + case GEO: + contextMapping = GeoContextMapping.load(name, contextConfig); + break; + default: + throw new ElasticsearchParseException("unknown context type[" + type + "]"); + } + DocumentMapperParser.checkNoRemainingFields(name, contextConfig, indexVersionCreated); + return contextMapping; + } + + private static String extractRequiredValue(Map<String, Object> contextConfig, String paramName) { + final Object paramValue = contextConfig.get(paramName); + if (paramValue == null) { + throw new ElasticsearchParseException("missing [" + paramName + "] in context mapping"); + } + contextConfig.remove(paramName); + return paramValue.toString(); + } + + /** + * Writes a list of objects specified by the defined {@link ContextMapping}s + * + * see {@link ContextMapping#toXContent(XContentBuilder, Params)} + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + for (ContextMapping contextMapping : contextMappings) { + builder.startObject(); + contextMapping.toXContent(builder, params); + builder.endObject(); + } + return builder; + } + + @Override + public int hashCode() { + return Objects.hash(contextMappings); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || (obj instanceof ContextMappings) == false) { + return false; + } + ContextMappings other = ((ContextMappings) obj); + return contextMappings.equals(other.contextMappings); + } +} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/context/GeoContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/GeoContextMapping.java new file mode 100644 index 0000000000..9a01322145 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/GeoContextMapping.java @@ -0,0 +1,479 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion.context; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.GeoHashUtils; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.geo.GeoUtils; +import org.elasticsearch.common.unit.DistanceUnit; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.ParseContext.Document; +import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper; + +import java.io.IOException; +import java.util.*; + +/** + * A {@link ContextMapping} that uses a geo location/area as a + * criteria. + * The suggestions can be boosted and/or filtered depending on + * whether it falls within an area, represented by a query geo hash + * with a specified precision + * + * {@link GeoQueryContext} defines the options for constructing + * a unit of query context for this context type + */ +public class GeoContextMapping extends ContextMapping { + + public static final String FIELD_PRECISION = "precision"; + public static final String FIELD_FIELDNAME = "path"; + + public static final int DEFAULT_PRECISION = 6; + + static final String CONTEXT_VALUE = "context"; + static final String CONTEXT_BOOST = "boost"; + static final String CONTEXT_PRECISION = "precision"; + static final String CONTEXT_NEIGHBOURS = "neighbours"; + + private final int precision; + private final String fieldName; + + private GeoContextMapping(String name, String fieldName, int precision) { + super(Type.GEO, name); + this.precision = precision; + this.fieldName = fieldName; + } + + public String getFieldName() { + return fieldName; + } + + public int getPrecision() { + return precision; + } + + protected static GeoContextMapping load(String name, Map<String, Object> config) { + final GeoContextMapping.Builder builder = new GeoContextMapping.Builder(name); + + if (config != null) { + final Object configPrecision = config.get(FIELD_PRECISION); + if (configPrecision != null) { + if (configPrecision instanceof Integer) { + builder.precision((Integer) configPrecision); + } else if (configPrecision instanceof Long) { + builder.precision((Long) configPrecision); + } else if (configPrecision instanceof Double) { + builder.precision((Double) configPrecision); + } else if (configPrecision instanceof Float) { + builder.precision((Float) configPrecision); + } else { + builder.precision(configPrecision.toString()); + } + config.remove(FIELD_PRECISION); + } + + final Object fieldName = config.get(FIELD_FIELDNAME); + if (fieldName != null) { + builder.field(fieldName.toString()); + config.remove(FIELD_FIELDNAME); + } + } + return builder.build(); + } + + @Override + protected XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(FIELD_PRECISION, precision); + if (fieldName != null) { + builder.field(FIELD_FIELDNAME, fieldName); + } + return builder; + } + + /** + * Parse a set of {@link CharSequence} contexts at index-time. + * Acceptable formats: + * + * <ul> + * <li>Array: <pre>[<i><GEO POINT></i>, ..]</pre></li> + * <li>String/Object/Array: <pre>"GEO POINT"</pre></li> + * </ul> + * + * see {@link GeoUtils#parseGeoPoint(String, GeoPoint)} for GEO POINT + */ + @Override + public Set<CharSequence> parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException { + if (fieldName != null) { + FieldMapper mapper = parseContext.docMapper().mappers().getMapper(fieldName); + if (!(mapper instanceof GeoPointFieldMapper)) { + throw new ElasticsearchParseException("referenced field must be mapped to geo_point"); + } + } + final Set<CharSequence> contexts = new HashSet<>(); + Token token = parser.currentToken(); + if (token == Token.START_ARRAY) { + token = parser.nextToken(); + // Test if value is a single point in <code>[lon, lat]</code> format + if (token == Token.VALUE_NUMBER) { + double lon = parser.doubleValue(); + if (parser.nextToken() == Token.VALUE_NUMBER) { + double lat = parser.doubleValue(); + if (parser.nextToken() == Token.END_ARRAY) { + contexts.add(GeoHashUtils.stringEncode(lon, lat, precision)); + } else { + throw new ElasticsearchParseException("only two values [lon, lat] expected"); + } + } else { + throw new ElasticsearchParseException("latitude must be a numeric value"); + } + } else { + while (token != Token.END_ARRAY) { + GeoPoint point = GeoUtils.parseGeoPoint(parser); + contexts.add(GeoHashUtils.stringEncode(point.getLon(), point.getLat(), precision)); + token = parser.nextToken(); + } + } + } else if (token == Token.VALUE_STRING) { + final String geoHash = parser.text(); + final CharSequence truncatedGeoHash = geoHash.subSequence(0, Math.min(geoHash.length(), precision)); + contexts.add(truncatedGeoHash); + } else { + // or a single location + GeoPoint point = GeoUtils.parseGeoPoint(parser); + contexts.add(GeoHashUtils.stringEncode(point.getLon(), point.getLat(), precision)); + } + return contexts; + } + + @Override + public Set<CharSequence> parseContext(Document document) { + final Set<CharSequence> geohashes = new HashSet<>(); + + if (fieldName != null) { + IndexableField[] fields = document.getFields(fieldName); + GeoPoint spare = new GeoPoint(); + if (fields.length == 0) { + IndexableField[] lonFields = document.getFields(fieldName + ".lon"); + IndexableField[] latFields = document.getFields(fieldName + ".lat"); + if (lonFields.length > 0 && latFields.length > 0) { + for (int i = 0; i < lonFields.length; i++) { + IndexableField lonField = lonFields[i]; + IndexableField latField = latFields[i]; + assert lonField.fieldType().docValuesType() == latField.fieldType().docValuesType(); + // we write doc values fields differently: one field for all values, so we need to only care about indexed fields + if (lonField.fieldType().docValuesType() == DocValuesType.NONE) { + spare.reset(latField.numericValue().doubleValue(), lonField.numericValue().doubleValue()); + geohashes.add(GeoHashUtils.stringEncode(spare.getLon(), spare.getLat(), precision)); + } + } + } + } else { + for (IndexableField field : fields) { + spare.resetFromString(field.stringValue()); + geohashes.add(spare.geohash()); + } + } + } + + Set<CharSequence> locations = new HashSet<>(); + for (CharSequence geohash : geohashes) { + int precision = Math.min(this.precision, geohash.length()); + CharSequence truncatedGeohash = geohash.subSequence(0, precision); + locations.add(truncatedGeohash); + } + return locations; + } + + /** + * Parse a list of {@link GeoQueryContext} + * using <code>parser</code>. A QueryContexts accepts one of the following forms: + * + * <ul> + * <li>Object: GeoQueryContext</li> + * <li>String: GeoQueryContext value with boost=1 precision=PRECISION neighbours=[PRECISION]</li> + * <li>Array: <pre>[GeoQueryContext, ..]</pre></li> + * </ul> + * + * A GeoQueryContext has one of the following forms: + * <ul> + * <li>Object: + * <ul> + * <li><pre>GEO POINT</pre></li> + * <li><pre>{"lat": <i><double></i>, "lon": <i><double></i>, "precision": <i><int></i>, "neighbours": <i><[int, ..]></i>}</pre></li> + * <li><pre>{"context": <i><string></i>, "boost": <i><int></i>, "precision": <i><int></i>, "neighbours": <i><[int, ..]></i>}</pre></li> + * <li><pre>{"context": <i><GEO POINT></i>, "boost": <i><int></i>, "precision": <i><int></i>, "neighbours": <i><[int, ..]></i>}</pre></li> + * </ul> + * <li>String: <pre>GEO POINT</pre></li> + * </ul> + * see {@link GeoUtils#parseGeoPoint(String, GeoPoint)} for GEO POINT + */ + @Override + public List<CategoryQueryContext> parseQueryContext(XContentParser parser) throws IOException, ElasticsearchParseException { + List<CategoryQueryContext> queryContexts = new ArrayList<>(); + Token token = parser.nextToken(); + if (token == Token.START_OBJECT || token == Token.VALUE_STRING) { + queryContexts.add(innerParseQueryContext(parser)); + } else if (token == Token.START_ARRAY) { + while (parser.nextToken() != Token.END_ARRAY) { + queryContexts.add(innerParseQueryContext(parser)); + } + } + return queryContexts; + } + + private GeoQueryContext innerParseQueryContext(XContentParser parser) throws IOException, ElasticsearchParseException { + Token token = parser.currentToken(); + if (token == Token.VALUE_STRING) { + return new GeoQueryContext(GeoUtils.parseGeoPoint(parser), 1, precision, precision); + } else if (token == Token.START_OBJECT) { + String currentFieldName = null; + GeoPoint point = null; + double lat = Double.NaN; + double lon = Double.NaN; + int precision = this.precision; + List<Integer> neighbours = new ArrayList<>(); + int boost = 1; + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (currentFieldName != null) { + if ("lat".equals(currentFieldName)) { + if (token == Token.VALUE_STRING || token == Token.VALUE_NUMBER) { + if (point == null) { + lat = parser.doubleValue(true); + } else { + throw new ElasticsearchParseException("context must have either lat/lon or geohash"); + } + } else { + throw new ElasticsearchParseException("lat must be a number"); + } + } else if ("lon".equals(currentFieldName)) { + if (token == Token.VALUE_STRING || token == Token.VALUE_NUMBER) { + if (point == null) { + lon = parser.doubleValue(true); + } else { + throw new ElasticsearchParseException("context must have either lat/lon or geohash"); + } + } else { + throw new ElasticsearchParseException("lon must be a number"); + } + } else if (CONTEXT_VALUE.equals(currentFieldName)) { + point = GeoUtils.parseGeoPoint(parser); + } else if (CONTEXT_BOOST.equals(currentFieldName)) { + final Number number; + if (token == Token.VALUE_STRING) { + try { + number = Long.parseLong(parser.text()); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("boost must be a string representing a numeric value, but was [" + parser.text() + "]"); + } + } else if (token == Token.VALUE_NUMBER) { + XContentParser.NumberType numberType = parser.numberType(); + number = parser.numberValue(); + if (numberType != XContentParser.NumberType.INT) { + throw new ElasticsearchParseException("boost must be in the interval [0..2147483647], but was [" + number.longValue() + "]"); + } + } else { + throw new ElasticsearchParseException("boost must be an int"); + } + boost = number.intValue(); + } else if (CONTEXT_NEIGHBOURS.equals(currentFieldName)) { + if (token == Token.VALUE_STRING) { + neighbours.add(GeoUtils.geoHashLevelsForPrecision(parser.text())); + } else if (token == Token.VALUE_NUMBER) { + XContentParser.NumberType numberType = parser.numberType(); + if (numberType == XContentParser.NumberType.INT || numberType == XContentParser.NumberType.LONG) { + neighbours.add(parser.intValue()); + } else { + neighbours.add(GeoUtils.geoHashLevelsForPrecision(parser.doubleValue())); + } + } else if (token == Token.START_ARRAY) { + while ((token = parser.nextToken()) != Token.END_ARRAY) { + if (token == Token.VALUE_STRING || token == Token.VALUE_NUMBER) { + neighbours.add(parser.intValue(true)); + } else { + throw new ElasticsearchParseException("neighbours array must have only numbers"); + } + } + } else { + throw new ElasticsearchParseException("neighbours must be a number or a list of numbers"); + } + } else if (CONTEXT_PRECISION.equals(currentFieldName)) { + if (token == Token.VALUE_STRING) { + precision = GeoUtils.geoHashLevelsForPrecision(parser.text()); + } else if (token == Token.VALUE_NUMBER) { + XContentParser.NumberType numberType = parser.numberType(); + if (numberType == XContentParser.NumberType.INT || numberType == XContentParser.NumberType.LONG) { + precision = parser.intValue(); + } else { + precision = GeoUtils.geoHashLevelsForPrecision(parser.doubleValue()); + } + } else { + throw new ElasticsearchParseException("precision must be a number"); + } + } + } + } + if (point == null) { + if (Double.isNaN(lat) == false && Double.isNaN(lon) == false) { + point = new GeoPoint(lat, lon); + } else { + throw new ElasticsearchParseException("no context provided"); + } + } + + String geoHash = GeoHashUtils.stringEncode(point.getLon(), point.getLat(), precision); + if (neighbours.size() > 0) { + final int[] neighbourValues = new int[neighbours.size()]; + for (int i = 0; i < neighbours.size(); i++) { + neighbourValues[i] = neighbours.get(i); + } + return new GeoQueryContext(geoHash, boost, precision, neighbourValues); + } else { + return new GeoQueryContext(geoHash, boost, precision, precision); + } + } else { + throw new ElasticsearchParseException("contexts field expected string or object but was [" + token.name() + "]"); + } + } + + @Override + public List<CategoryQueryContext> getQueryContexts(List<CategoryQueryContext> queryContexts) { + List<CategoryQueryContext> queryContextList = new ArrayList<>(); + for (CategoryQueryContext queryContext : queryContexts) { + GeoQueryContext geoQueryContext = ((GeoQueryContext) queryContext); + int precision = Math.min(this.precision, geoQueryContext.context.length()); + String truncatedGeohash = geoQueryContext.context.toString().substring(0, precision); + queryContextList.add(new CategoryQueryContext(truncatedGeohash, geoQueryContext.boost, false)); + for (int neighboursPrecision : geoQueryContext.neighbours) { + int neighbourPrecision = Math.min(neighboursPrecision, truncatedGeohash.length()); + String neighbourGeohash = truncatedGeohash.substring(0, neighbourPrecision); + Collection<String> locations = new HashSet<>(); + GeoHashUtils.addNeighbors(neighbourGeohash, neighbourPrecision, locations); + boolean isPrefix = neighbourPrecision < precision; + for (String location : locations) { + queryContextList.add(new CategoryQueryContext(location, geoQueryContext.boost, isPrefix)); + } + } + } + return queryContextList; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + GeoContextMapping that = (GeoContextMapping) o; + if (precision != that.precision) return false; + return !(fieldName != null ? !fieldName.equals(that.fieldName) : that.fieldName != null); + + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), precision, fieldName); + } + + public static class Builder extends ContextBuilder<GeoContextMapping> { + + private int precision = DEFAULT_PRECISION; + private String fieldName = null; + + protected Builder(String name) { + super(name); + } + + /** + * Set the precision use o make suggestions + * + * @param precision + * precision as distance with {@link DistanceUnit}. Default: + * meters + * @return this + */ + public Builder precision(String precision) { + return precision(DistanceUnit.parse(precision, DistanceUnit.METERS, DistanceUnit.METERS)); + } + + /** + * Set the precision use o make suggestions + * + * @param precision + * precision value + * @param unit + * {@link DistanceUnit} to use + * @return this + */ + public Builder precision(double precision, DistanceUnit unit) { + return precision(unit.toMeters(precision)); + } + + /** + * Set the precision use o make suggestions + * + * @param meters + * precision as distance in meters + * @return this + */ + public Builder precision(double meters) { + int level = GeoUtils.geoHashLevelsForPrecision(meters); + // Ceiling precision: we might return more results + if (GeoUtils.geoHashCellSize(level) < meters) { + level = Math.max(1, level - 1); + } + return precision(level); + } + + /** + * Set the precision use o make suggestions + * + * @param level + * maximum length of geohashes + * @return this + */ + public Builder precision(int level) { + this.precision = level; + return this; + } + + /** + * Set the name of the field containing a geolocation to use + * @param fieldName name of the field + * @return this + */ + public Builder field(String fieldName) { + this.fieldName = fieldName; + return this; + } + + @Override + public GeoContextMapping build() { + return new GeoContextMapping(name, fieldName, precision); + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/context/GeoQueryContext.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/GeoQueryContext.java new file mode 100644 index 0000000000..caf1a3eeb9 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/context/GeoQueryContext.java @@ -0,0 +1,98 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion.context; + +import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; + +import static org.elasticsearch.search.suggest.completion.context.GeoContextMapping.*; + +/** + * Defines the query context for {@link GeoContextMapping} + */ +public class GeoQueryContext extends CategoryQueryContext { + public final int precision; + public final int[] neighbours; + + /** + * Creates a query context for a given geo point with a boost of 1 + * and a precision of {@value GeoContextMapping#DEFAULT_PRECISION} + */ + public GeoQueryContext(GeoPoint geoPoint) { + this(geoPoint.geohash()); + } + + /** + * Creates a query context for a given geo point with a + * provided boost + */ + public GeoQueryContext(GeoPoint geoPoint, int boost) { + this(geoPoint.geohash(), boost); + } + + /** + * Creates a query context with a given geo hash with a boost of 1 + * and a precision of {@value GeoContextMapping#DEFAULT_PRECISION} + */ + public GeoQueryContext(CharSequence geoHash) { + this(geoHash, 1); + } + + /** + * Creates a query context for a given geo hash with a + * provided boost + */ + public GeoQueryContext(CharSequence geoHash, int boost) { + this(geoHash, boost, DEFAULT_PRECISION); + } + + /** + * Creates a query context for a geo point with + * a provided boost and enables generating neighbours + * at specified precisions + */ + public GeoQueryContext(GeoPoint geoPoint, int boost, int precision, int... neighbours) { + this(geoPoint.geohash(), boost, precision, neighbours); + } + + /** + * Creates a query context for a geo hash with + * a provided boost and enables generating neighbours + * at specified precisions + */ + public GeoQueryContext(CharSequence geoHash, int boost, int precision, int... neighbours) { + super(geoHash, boost, true); + this.precision = precision; + this.neighbours = neighbours; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(CONTEXT_VALUE, context); + builder.field(CONTEXT_BOOST, boost); + builder.field(CONTEXT_NEIGHBOURS, neighbours); + builder.endObject(); + return builder; + } +} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/context/CategoryContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/context/CategoryContextMapping.java deleted file mode 100644 index 118d95e22d..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/context/CategoryContextMapping.java +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search.suggest.context; - -import org.apache.lucene.analysis.PrefixAnalyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.util.automaton.Automata; -import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.Operations; -import org.elasticsearch.ElasticsearchParseException; -import org.elasticsearch.common.util.iterable.Iterables; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentParser.Token; -import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.index.mapper.ParseContext.Document; - -import java.io.IOException; -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - -/** - * The {@link CategoryContextMapping} is used to define a {@link ContextMapping} that - * references a field within a document. The value of the field in turn will be - * used to setup the suggestions made by the completion suggester. - */ -public class CategoryContextMapping extends ContextMapping { - - protected static final String TYPE = "category"; - - private static final String FIELD_FIELDNAME = "path"; - private static final String DEFAULT_FIELDNAME = "_type"; - - private static final Iterable<? extends CharSequence> EMPTY_VALUES = Collections.emptyList(); - - private final String fieldName; - private final Iterable<? extends CharSequence> defaultValues; - private final FieldConfig defaultConfig; - - /** - * Create a new {@link CategoryContextMapping} with the default field - * <code>[_type]</code> - */ - public CategoryContextMapping(String name) { - this(name, DEFAULT_FIELDNAME, EMPTY_VALUES); - } - - /** - * Create a new {@link CategoryContextMapping} with the default field - * <code>[_type]</code> - */ - public CategoryContextMapping(String name, String fieldName) { - this(name, fieldName, EMPTY_VALUES); - } - - /** - * Create a new {@link CategoryContextMapping} with the default field - * <code>[_type]</code> - */ - public CategoryContextMapping(String name, Iterable<? extends CharSequence> defaultValues) { - this(name, DEFAULT_FIELDNAME, defaultValues); - } - - /** - * Create a new {@link CategoryContextMapping} with the default field - * <code>[_type]</code> - */ - public CategoryContextMapping(String name, String fieldName, Iterable<? extends CharSequence> defaultValues) { - super(TYPE, name); - this.fieldName = fieldName; - this.defaultValues = defaultValues; - this.defaultConfig = new FieldConfig(fieldName, defaultValues, null); - } - - /** - * Name of the field used by this {@link CategoryContextMapping} - */ - public String getFieldName() { - return fieldName; - } - - public Iterable<? extends CharSequence> getDefaultValues() { - return defaultValues; - } - - @Override - public FieldConfig defaultConfig() { - return defaultConfig; - } - - /** - * Load the specification of a {@link CategoryContextMapping} - * - * @param name - * name of the field to use. If <code>null</code> default field - * will be used - * @return new {@link CategoryContextMapping} - */ - protected static CategoryContextMapping load(String name, Map<String, Object> config) throws ElasticsearchParseException { - CategoryContextMapping.Builder mapping = new CategoryContextMapping.Builder(name); - - Object fieldName = config.get(FIELD_FIELDNAME); - Object defaultValues = config.get(FIELD_MISSING); - - if (fieldName != null) { - mapping.fieldName(fieldName.toString()); - config.remove(FIELD_FIELDNAME); - } - - if (defaultValues != null) { - if (defaultValues instanceof Iterable) { - for (Object value : (Iterable) defaultValues) { - mapping.addDefaultValue(value.toString()); - } - } else { - mapping.addDefaultValue(defaultValues.toString()); - } - config.remove(FIELD_MISSING); - } - - return mapping.build(); - } - - @Override - protected XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException { - if (fieldName != null) { - builder.field(FIELD_FIELDNAME, fieldName); - } - builder.startArray(FIELD_MISSING); - for (CharSequence value : defaultValues) { - builder.value(value); - } - builder.endArray(); - return builder; - } - - @Override - public ContextConfig parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException { - Token token = parser.currentToken(); - if (token == Token.VALUE_NULL) { - return new FieldConfig(fieldName, defaultValues, null); - } else if (token == Token.VALUE_STRING) { - return new FieldConfig(fieldName, null, Collections.singleton(parser.text())); - } else if (token == Token.VALUE_NUMBER) { - return new FieldConfig(fieldName, null, Collections.singleton(parser.text())); - } else if (token == Token.VALUE_BOOLEAN) { - return new FieldConfig(fieldName, null, Collections.singleton(parser.text())); - } else if (token == Token.START_ARRAY) { - ArrayList<String> values = new ArrayList<>(); - while((token = parser.nextToken()) != Token.END_ARRAY) { - values.add(parser.text()); - } - if(values.isEmpty()) { - throw new ElasticsearchParseException("FieldConfig must contain a least one category"); - } - return new FieldConfig(fieldName, null, values); - } else { - throw new ElasticsearchParseException("FieldConfig must be either [null], a string or a list of strings"); - } - } - - @Override - public FieldQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException { - Iterable<? extends CharSequence> values; - Token token = parser.currentToken(); - if (token == Token.START_ARRAY) { - ArrayList<String> list = new ArrayList<>(); - while ((token = parser.nextToken()) != Token.END_ARRAY) { - list.add(parser.text()); - } - values = list; - } else if (token == Token.VALUE_NULL) { - values = defaultValues; - } else { - values = Collections.singleton(parser.text()); - } - - return new FieldQuery(name, values); - } - - public static FieldQuery query(String name, CharSequence... fieldvalues) { - return query(name, Arrays.asList(fieldvalues)); - } - - public static FieldQuery query(String name, Iterable<? extends CharSequence> fieldvalues) { - return new FieldQuery(name, fieldvalues); - } - - @Override - public boolean equals(Object obj) { - if (obj instanceof CategoryContextMapping) { - CategoryContextMapping other = (CategoryContextMapping) obj; - if (this.fieldName.equals(other.fieldName)) { - return Iterables.allElementsAreEqual(this.defaultValues, other.defaultValues); - } - } - return false; - } - - @Override - public int hashCode() { - int hashCode = fieldName.hashCode(); - for (CharSequence seq : defaultValues) { - hashCode = 31 * hashCode + seq.hashCode(); - } - return hashCode; - } - - private static class FieldConfig extends ContextConfig { - - private final String fieldname; - private final Iterable<? extends CharSequence> defaultValues; - private final Iterable<? extends CharSequence> values; - - public FieldConfig(String fieldname, Iterable<? extends CharSequence> defaultValues, Iterable<? extends CharSequence> values) { - this.fieldname = fieldname; - this.defaultValues = defaultValues; - this.values = values; - } - - @Override - protected TokenStream wrapTokenStream(Document doc, TokenStream stream) { - if (values != null) { - return new PrefixAnalyzer.PrefixTokenFilter(stream, ContextMapping.SEPARATOR, values); - // if fieldname is default, BUT our default values are set, we take that one - } else if ((doc.getFields(fieldname).length == 0 || fieldname.equals(DEFAULT_FIELDNAME)) && defaultValues.iterator().hasNext()) { - return new PrefixAnalyzer.PrefixTokenFilter(stream, ContextMapping.SEPARATOR, defaultValues); - } else { - IndexableField[] fields = doc.getFields(fieldname); - ArrayList<CharSequence> values = new ArrayList<>(fields.length); - for (int i = 0; i < fields.length; i++) { - values.add(fields[i].stringValue()); - } - - return new PrefixAnalyzer.PrefixTokenFilter(stream, ContextMapping.SEPARATOR, values); - } - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("FieldConfig(" + fieldname + " = ["); - if (this.values != null && this.values.iterator().hasNext()) { - sb.append(delimitValues(this.values)); - } - if (this.defaultValues != null && this.defaultValues.iterator().hasNext()) { - sb.append(" default").append(delimitValues(this.defaultValues)); - } - return sb.append("])").toString(); - } - - private String delimitValues(Iterable<? extends CharSequence> values) { - return StreamSupport.stream(values.spliterator(), false).collect(Collectors.joining(", ", "(", ")")); - } - - } - - private static class FieldQuery extends ContextQuery { - - private final Iterable<? extends CharSequence> values; - - public FieldQuery(String name, Iterable<? extends CharSequence> values) { - super(name); - this.values = values; - } - - @Override - public Automaton toAutomaton() { - List<Automaton> automatons = new ArrayList<>(); - for (CharSequence value : values) { - automatons.add(Automata.makeString(value.toString())); - } - return Operations.union(automatons); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startArray(name); - for (CharSequence value : values) { - builder.value(value); - } - builder.endArray(); - return builder; - } - } - - public static class Builder extends ContextBuilder<CategoryContextMapping> { - - private String fieldname; - private List<CharSequence> defaultValues = new ArrayList<>(); - - public Builder(String name) { - this(name, DEFAULT_FIELDNAME); - } - - public Builder(String name, String fieldname) { - super(name); - this.fieldname = fieldname; - } - - /** - * Set the name of the field to use - */ - public Builder fieldName(String fieldname) { - this.fieldname = fieldname; - return this; - } - - /** - * Add value to the default values of the mapping - */ - public Builder addDefaultValue(CharSequence defaultValue) { - this.defaultValues.add(defaultValue); - return this; - } - - /** - * Add set of default values to the mapping - */ - public Builder addDefaultValues(CharSequence... defaultValues) { - for (CharSequence defaultValue : defaultValues) { - this.defaultValues.add(defaultValue); - } - return this; - } - - /** - * Add set of default values to the mapping - */ - public Builder addDefaultValues(Iterable<? extends CharSequence> defaultValues) { - for (CharSequence defaultValue : defaultValues) { - this.defaultValues.add(defaultValue); - } - return this; - } - - @Override - public CategoryContextMapping build() { - return new CategoryContextMapping(name, fieldname, defaultValues); - } - } -} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/context/ContextBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/context/ContextBuilder.java deleted file mode 100644 index 8b554d957d..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/context/ContextBuilder.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search.suggest.context; - -import org.elasticsearch.ElasticsearchParseException; -import org.elasticsearch.Version; -import org.elasticsearch.index.mapper.DocumentMapperParser; - -import java.util.Map; -import java.util.Map.Entry; -import java.util.SortedMap; -import java.util.TreeMap; - -public abstract class ContextBuilder<E extends ContextMapping> { - - protected String name; - - public ContextBuilder(String name) { - this.name = name; - } - - public abstract E build(); - - /** - * Create a new {@link GeolocationContextMapping} - */ - public static GeolocationContextMapping.Builder location(String name) { - return new GeolocationContextMapping.Builder(name); - } - - /** - * Create a new {@link GeolocationContextMapping} with given precision and - * neighborhood usage - * - * @param precision geohash length - * @param neighbors use neighbor cells - */ - public static GeolocationContextMapping.Builder location(String name, int precision, boolean neighbors) { - return new GeolocationContextMapping.Builder(name, neighbors, precision); - } - - /** - * Create a new {@link CategoryContextMapping.Builder} - */ - public static CategoryContextMapping.Builder category(String name) { - return new CategoryContextMapping.Builder(name, null); - } - - /** - * Create a new {@link CategoryContextMapping.Builder} with default category - * - * @param defaultCategory category to use, if it is not provided - */ - public static CategoryContextMapping.Builder category(String name, String defaultCategory) { - return new CategoryContextMapping.Builder(name, null).addDefaultValue(defaultCategory); - } - - /** - * Create a new {@link CategoryContextMapping} - * - * @param fieldname - * name of the field to use - */ - public static CategoryContextMapping.Builder reference(String name, String fieldname) { - return new CategoryContextMapping.Builder(name, fieldname); - } - - /** - * Create a new {@link CategoryContextMapping} - * - * @param fieldname name of the field to use - * @param defaultValues values to use, if the document not provides - * a field with the given name - */ - public static CategoryContextMapping.Builder reference(String name, String fieldname, Iterable<? extends CharSequence> defaultValues) { - return new CategoryContextMapping.Builder(name, fieldname).addDefaultValues(defaultValues); - } - - public static SortedMap<String, ContextMapping> loadMappings(Object configuration, Version indexVersionCreated) - throws ElasticsearchParseException { - if (configuration instanceof Map) { - Map<String, Object> configurations = (Map<String, Object>)configuration; - SortedMap<String, ContextMapping> mappings = new TreeMap<>(); - for (Entry<String,Object> config : configurations.entrySet()) { - String name = config.getKey(); - mappings.put(name, loadMapping(name, (Map<String, Object>) config.getValue(), indexVersionCreated)); - } - return mappings; - } else if (configuration == null) { - return ContextMapping.EMPTY_MAPPING; - } else { - throw new ElasticsearchParseException("no valid context configuration"); - } - } - - protected static ContextMapping loadMapping(String name, Map<String, Object> config, Version indexVersionCreated) - throws ElasticsearchParseException { - final Object argType = config.get(ContextMapping.FIELD_TYPE); - - if (argType == null) { - throw new ElasticsearchParseException("missing [{}] in context mapping", ContextMapping.FIELD_TYPE); - } - - final String type = argType.toString(); - ContextMapping contextMapping; - if (GeolocationContextMapping.TYPE.equals(type)) { - contextMapping = GeolocationContextMapping.load(name, config); - } else if (CategoryContextMapping.TYPE.equals(type)) { - contextMapping = CategoryContextMapping.load(name, config); - } else { - throw new ElasticsearchParseException("unknown context type [{}]", type); - } - config.remove(ContextMapping.FIELD_TYPE); - DocumentMapperParser.checkNoRemainingFields(name, config, indexVersionCreated); - - return contextMapping; - } -} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java deleted file mode 100644 index bbdb614c94..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search.suggest.context; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; -import org.apache.lucene.util.automaton.Automata; -import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.Operations; -import org.apache.lucene.util.fst.FST; -import org.elasticsearch.ElasticsearchParseException; -import org.elasticsearch.common.xcontent.ToXContent; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentParser.Token; -import org.elasticsearch.common.xcontent.json.JsonXContent; -import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.index.mapper.ParseContext.Document; - -import java.io.IOException; -import java.util.*; - -/** - * A {@link ContextMapping} is used t define a context that may used - * in conjunction with a suggester. To define a suggester that depends on a - * specific context derived class of {@link ContextMapping} will be - * used to specify the kind of additional information required in order to make - * suggestions. - */ -public abstract class ContextMapping implements ToXContent { - - /** Character used to separate several contexts */ - public static final char SEPARATOR = '\u001D'; - - /** Dummy Context Mapping that should be used if no context is used*/ - public static final SortedMap<String, ContextMapping> EMPTY_MAPPING = new TreeMap<>(); - - /** Dummy Context Config matching the Dummy Mapping by providing an empty context*/ - public static final SortedMap<String, ContextConfig> EMPTY_CONFIG = new TreeMap<>(); - - /** Dummy Context matching the Dummy Mapping by not wrapping a {@link TokenStream} */ - public static final Context EMPTY_CONTEXT = new Context(EMPTY_CONFIG, null); - - public static final String FIELD_VALUE = "value"; - public static final String FIELD_MISSING = "default"; - public static final String FIELD_TYPE = "type"; - - protected final String type; // Type of the Contextmapping - protected final String name; - - /** - * Define a new context mapping of a specific type - * - * @param type - * name of the new context mapping - */ - protected ContextMapping(String type, String name) { - super(); - this.type = type; - this.name = name; - } - - /** - * @return the type name of the context - */ - protected String type() { - return type; - } - - /** - * @return the name/id of the context - */ - public String name() { - return name; - } - - @Override - public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(name); - builder.field(FIELD_TYPE, type); - toInnerXContent(builder, params); - builder.endObject(); - return builder; - } - - /** - * A {@link ContextMapping} combined with the information provided by a document - * form a {@link ContextConfig} which is used to build the underlying FST. - * - * @param parseContext context of parsing phase - * @param parser {@link XContentParser} used to read and setup the configuration - * @return A {@link ContextConfig} related to <b>this</b> mapping - */ - public abstract ContextConfig parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException; - - public abstract ContextConfig defaultConfig(); - - /** - * Parse a query according to the context. Parsing starts at parsers <b>current</b> position - * - * @param name name of the context - * @param parser {@link XContentParser} providing the data of the query - * - * @return {@link ContextQuery} according to this mapping - */ - public abstract ContextQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException; - - /** - * Since every context mapping is assumed to have a name given by the field name of an context object, this - * method is used to build the value used to serialize the mapping - * - * @param builder builder to append the mapping to - * @param params parameters passed to the builder - * - * @return the builder used - */ - protected abstract XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException; - - /** - * Test equality of two mapping - * - * @param thisMappings first mapping - * @param otherMappings second mapping - * - * @return true if both arguments are equal - */ - public static boolean mappingsAreEqual(SortedMap<String, ? extends ContextMapping> thisMappings, SortedMap<String, ? extends ContextMapping> otherMappings) { - return thisMappings.entrySet().equals(otherMappings.entrySet()); - } - - @Override - public String toString() { - try { - return toXContent(JsonXContent.contentBuilder(), ToXContent.EMPTY_PARAMS).string(); - } catch (IOException e) { - return super.toString(); - } - } - - /** - * A collection of {@link ContextMapping}s, their {@link ContextConfig}uration and a - * Document form a complete {@link Context}. Since this Object provides all information used - * to setup a suggestion, it can be used to wrap the entire {@link TokenStream} used to build a - * path within the {@link FST}. - */ - public static class Context { - - final SortedMap<String, ContextConfig> contexts; - final Document doc; - - public Context(SortedMap<String, ContextConfig> contexts, Document doc) { - super(); - this.contexts = contexts; - this.doc = doc; - } - - /** - * Wrap the {@link TokenStream} according to the provided informations of {@link ContextConfig} - * and a related {@link Document}. - * - * @param tokenStream {@link TokenStream} to wrap - * - * @return wrapped token stream - */ - public TokenStream wrapTokenStream(TokenStream tokenStream) { - for (ContextConfig context : contexts.values()) { - tokenStream = context.wrapTokenStream(doc, tokenStream); - } - return tokenStream; - } - } - - /** - * A {@link ContextMapping} combined with the information provided by a document - * form a {@link ContextConfig} which is used to build the underlying {@link FST}. This class hold - * a simple method wrapping a {@link TokenStream} by provided document informations. - */ - public static abstract class ContextConfig { - - /** - * Wrap a {@link TokenStream} for building suggestions to use context informations - * provided by a document or a {@link ContextMapping} - * - * @param doc document related to the stream - * @param stream original stream used to build the underlying {@link FST} - * - * @return A new {@link TokenStream} providing additional context information - */ - protected abstract TokenStream wrapTokenStream(Document doc, TokenStream stream); - - } - - /** - * A {@link ContextQuery} defines the context information for a specific {@link ContextMapping} - * defined within a suggestion request. According to the parameters set in the request and the - * {@link ContextMapping} such a query is used to wrap the {@link TokenStream} of the actual - * suggestion request into a {@link TokenStream} with the context settings - */ - public static abstract class ContextQuery implements ToXContent { - - protected final String name; - - protected ContextQuery(String name) { - this.name = name; - } - - public String name() { - return name; - } - - /** - * Create a automaton for a given context query this automaton will be used - * to find the matching paths with the fst - * - * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query - * @param queries list of {@link ContextQuery} defining the lookup context - * - * @return Automaton matching the given Query - */ - public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) { - Automaton a = Automata.makeEmptyString(); - - Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR); - if (preserveSep) { - // if separators are preserved the fst contains a SEP_LABEL - // behind each gap. To have a matching automaton, we need to - // include the SEP_LABEL in the query as well - gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL)); - } - - for (ContextQuery query : queries) { - a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a)); - } - - // TODO: should we limit this? Do any of our ContextQuery impls really create exponential regexps? GeoQuery looks safe (union - // of strings). - return Operations.determinize(a, Integer.MAX_VALUE); - } - - /** - * Build a LookUp Automaton for this context. - * @return LookUp Automaton - */ - protected abstract Automaton toAutomaton(); - - /** - * Parse a set of {@link ContextQuery} according to a given mapping - * @param mappings List of mapping defined y the suggest field - * @param parser parser holding the settings of the queries. The parsers - * current token is assumed hold an array. The number of elements - * in this array must match the number of elements in the mappings. - * @return List of context queries - * - * @throws IOException if something unexpected happened on the underlying stream - * @throws ElasticsearchParseException if the list of queries could not be parsed - */ - public static List<ContextQuery> parseQueries(Map<String, ContextMapping> mappings, XContentParser parser) - throws IOException, ElasticsearchParseException { - - Map<String, ContextQuery> querySet = new HashMap<>(); - Token token = parser.currentToken(); - if(token == Token.START_OBJECT) { - while ((token = parser.nextToken()) != Token.END_OBJECT) { - String name = parser.text(); - ContextMapping mapping = mappings.get(name); - if (mapping == null) { - throw new ElasticsearchParseException("no mapping defined for [{}]", name); - } - parser.nextToken(); - querySet.put(name, mapping.parseQuery(name, parser)); - } - } - - List<ContextQuery> queries = new ArrayList<>(mappings.size()); - for (ContextMapping mapping : mappings.values()) { - queries.add(querySet.get(mapping.name)); - } - return queries; - } - - @Override - public String toString() { - try { - return toXContent(JsonXContent.contentBuilder(), ToXContent.EMPTY_PARAMS).string(); - } catch (IOException e) { - return super.toString(); - } - } - } -} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/context/GeolocationContextMapping.java b/core/src/main/java/org/elasticsearch/search/suggest/context/GeolocationContextMapping.java deleted file mode 100644 index a3a4a3f669..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/context/GeolocationContextMapping.java +++ /dev/null @@ -1,734 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search.suggest.context; - -import com.carrotsearch.hppc.IntHashSet; -import org.apache.lucene.analysis.PrefixAnalyzer.PrefixTokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.util.GeoHashUtils; -import org.apache.lucene.util.automaton.Automata; -import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.Operations; -import org.apache.lucene.util.fst.FST; -import org.elasticsearch.ElasticsearchParseException; -import org.elasticsearch.common.geo.GeoPoint; -import org.elasticsearch.common.geo.GeoUtils; -import org.elasticsearch.common.unit.DistanceUnit; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentParser.Token; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.index.mapper.ParseContext.Document; -import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; - -/** - * The {@link GeolocationContextMapping} allows to take GeoInfomation into account - * during building suggestions. The mapping itself works with geohashes - * explicitly and is configured by three parameters: - * <ul> - * <li><code>precision</code>: length of the geohash indexed as prefix of the - * completion field</li> - * <li><code>neighbors</code>: Should the neighbor cells of the deepest geohash - * level also be indexed as alternatives to the actual geohash</li> - * <li><code>location</code>: (optional) location assumed if it is not provided</li> - * </ul> - * Internally this mapping wraps the suggestions into a form - * <code>[geohash][suggestion]</code>. If the neighbor option is set the cells - * next to the cell on the deepest geohash level ( <code>precision</code>) will - * be indexed as well. The {@link TokenStream} used to build the {@link FST} for - * suggestion will be wrapped into a {@link PrefixTokenFilter} managing these - * geohases as prefixes. - */ -public class GeolocationContextMapping extends ContextMapping { - - public static final String TYPE = "geo"; - - public static final String FIELD_PRECISION = "precision"; - public static final String FIELD_NEIGHBORS = "neighbors"; - public static final String FIELD_FIELDNAME = "path"; - - private final Collection<String> defaultLocations; - private final int[] precision; - private final boolean neighbors; - private final String fieldName; - private final GeoConfig defaultConfig; - - /** - * Create a new {@link GeolocationContextMapping} with a given precision - * - * @param precision - * length of the geohashes - * @param neighbors - * should neighbors be indexed - * @param defaultLocations - * location to use, if it is not provided by the document - */ - protected GeolocationContextMapping(String name, int[] precision, boolean neighbors, Collection<String> defaultLocations, String fieldName) { - super(TYPE, name); - this.precision = precision; - this.neighbors = neighbors; - this.defaultLocations = defaultLocations; - this.fieldName = fieldName; - this.defaultConfig = new GeoConfig(this, defaultLocations); - } - - /** - * load a {@link GeolocationContextMapping} by configuration. Such a configuration - * can set the parameters - * <ul> - * <li>precision [<code>String</code>, <code>Double</code>, - * <code>Float</code> or <code>Integer</code>] defines the length of the - * underlying geohash</li> - * <li>defaultLocation [<code>String</code>] defines the location to use if - * it is not provided by the document</li> - * <li>neighbors [<code>Boolean</code>] defines if the last level of the - * geohash should be extended by neighbor cells</li> - * </ul> - * - * @param config - * Configuration for {@link GeolocationContextMapping} - * @return new {@link GeolocationContextMapping} configured by the parameters of - * <code>config</code> - */ - protected static GeolocationContextMapping load(String name, Map<String, Object> config) { - if (!config.containsKey(FIELD_PRECISION)) { - throw new ElasticsearchParseException("field [precision] is missing"); - } - - final GeolocationContextMapping.Builder builder = new GeolocationContextMapping.Builder(name); - - if (config != null) { - final Object configPrecision = config.get(FIELD_PRECISION); - if (configPrecision == null) { - // ignore precision - } else if (configPrecision instanceof Integer) { - builder.precision((Integer) configPrecision); - config.remove(FIELD_PRECISION); - } else if (configPrecision instanceof Long) { - builder.precision((Long) configPrecision); - config.remove(FIELD_PRECISION); - } else if (configPrecision instanceof Double) { - builder.precision((Double) configPrecision); - config.remove(FIELD_PRECISION); - } else if (configPrecision instanceof Float) { - builder.precision((Float) configPrecision); - config.remove(FIELD_PRECISION); - } else if (configPrecision instanceof Iterable) { - for (Object precision : (Iterable)configPrecision) { - if (precision instanceof Integer) { - builder.precision((Integer) precision); - } else if (precision instanceof Long) { - builder.precision((Long) precision); - } else if (precision instanceof Double) { - builder.precision((Double) precision); - } else if (precision instanceof Float) { - builder.precision((Float) precision); - } else { - builder.precision(precision.toString()); - } - } - config.remove(FIELD_PRECISION); - } else { - builder.precision(configPrecision.toString()); - config.remove(FIELD_PRECISION); - } - - final Object configNeighbors = config.get(FIELD_NEIGHBORS); - if (configNeighbors != null) { - builder.neighbors((Boolean) configNeighbors); - config.remove(FIELD_NEIGHBORS); - } - - final Object def = config.get(FIELD_MISSING); - if (def != null) { - if (def instanceof Iterable) { - for (Object location : (Iterable)def) { - builder.addDefaultLocation(location.toString()); - } - } else if (def instanceof String) { - builder.addDefaultLocation(def.toString()); - } else if (def instanceof Map) { - Map<String, Object> latlonMap = (Map<String, Object>) def; - if (!latlonMap.containsKey("lat") || !(latlonMap.get("lat") instanceof Double)) { - throw new ElasticsearchParseException("field [{}] map must have field lat and a valid latitude", FIELD_MISSING); - } - if (!latlonMap.containsKey("lon") || !(latlonMap.get("lon") instanceof Double)) { - throw new ElasticsearchParseException("field [{}] map must have field lon and a valid longitude", FIELD_MISSING); - } - builder.addDefaultLocation(Double.valueOf(latlonMap.get("lat").toString()), Double.valueOf(latlonMap.get("lon").toString())); - } else { - throw new ElasticsearchParseException("field [{}] must be of type string or list", FIELD_MISSING); - } - config.remove(FIELD_MISSING); - } - - final Object fieldName = config.get(FIELD_FIELDNAME); - if (fieldName != null) { - builder.field(fieldName.toString()); - config.remove(FIELD_FIELDNAME); - } - } - return builder.build(); - } - - @Override - protected XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException { - builder.field(FIELD_PRECISION, precision); - builder.field(FIELD_NEIGHBORS, neighbors); - if (defaultLocations != null) { - builder.startArray(FIELD_MISSING); - for (String defaultLocation : defaultLocations) { - builder.value(defaultLocation); - } - builder.endArray(); - } - if (fieldName != null) { - builder.field(FIELD_FIELDNAME, fieldName); - } - return builder; - } - - protected static Collection<String> parseSinglePointOrList(XContentParser parser) throws IOException { - Token token = parser.currentToken(); - if(token == Token.START_ARRAY) { - token = parser.nextToken(); - // Test if value is a single point in <code>[lon, lat]</code> format - if(token == Token.VALUE_NUMBER) { - double lon = parser.doubleValue(); - if(parser.nextToken() == Token.VALUE_NUMBER) { - double lat = parser.doubleValue(); - if(parser.nextToken() == Token.END_ARRAY) { - return Collections.singleton(GeoHashUtils.stringEncode(lon, lat)); - } else { - throw new ElasticsearchParseException("only two values expected"); - } - } else { - throw new ElasticsearchParseException("latitue must be a numeric value"); - } - } else { - // otherwise it's a list of locations - ArrayList<String> result = new ArrayList<>(); - while (token != Token.END_ARRAY) { - result.add(GeoUtils.parseGeoPoint(parser).geohash()); - token = parser.nextToken(); //infinite loop without this line - } - return result; - } - } else { - // or a single location - return Collections.singleton(GeoUtils.parseGeoPoint(parser).geohash()); - } - } - - @Override - public ContextConfig defaultConfig() { - return defaultConfig; - } - - @Override - public ContextConfig parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException { - - if(fieldName != null) { - FieldMapper mapper = parseContext.docMapper().mappers().getMapper(fieldName); - if(!(mapper instanceof GeoPointFieldMapper)) { - throw new ElasticsearchParseException("referenced field must be mapped to geo_point"); - } - } - - Collection<String> locations; - if(parser.currentToken() == Token.VALUE_NULL) { - locations = null; - } else { - locations = parseSinglePointOrList(parser); - } - return new GeoConfig(this, locations); - } - - /** - * Create a new geolocation query from a given GeoPoint - * - * @param point - * query location - * @return new geolocation query - */ - public static GeoQuery query(String name, GeoPoint point) { - return query(name, point.getGeohash()); - } - - /** - * Create a new geolocation query from a given geocoordinate - * - * @param lat - * latitude of the location - * @param lon - * longitude of the location - * @return new geolocation query - */ - public static GeoQuery query(String name, double lat, double lon, int ... precisions) { - return query(name, GeoHashUtils.stringEncode(lon, lat), precisions); - } - - public static GeoQuery query(String name, double lat, double lon, String ... precisions) { - int precisionInts[] = new int[precisions.length]; - for (int i = 0 ; i < precisions.length; i++) { - precisionInts[i] = GeoUtils.geoHashLevelsForPrecision(precisions[i]); - } - return query(name, GeoHashUtils.stringEncode(lon, lat), precisionInts); - } - - /** - * Create a new geolocation query from a given geohash - * - * @param geohash - * geohash of the location - * @return new geolocation query - */ - public static GeoQuery query(String name, String geohash, int ... precisions) { - return new GeoQuery(name, geohash, precisions); - } - - private static final int parsePrecision(XContentParser parser) throws IOException, ElasticsearchParseException { - switch (parser.currentToken()) { - case VALUE_STRING: - return GeoUtils.geoHashLevelsForPrecision(parser.text()); - case VALUE_NUMBER: - switch (parser.numberType()) { - case INT: - case LONG: - return parser.intValue(); - default: - return GeoUtils.geoHashLevelsForPrecision(parser.doubleValue()); - } - default: - throw new ElasticsearchParseException("invalid precision value"); - } - } - - @Override - public GeoQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException { - if (parser.currentToken() == Token.START_OBJECT) { - double lat = Double.NaN; - double lon = Double.NaN; - GeoPoint point = null; - int[] precision = null; - - while (parser.nextToken() != Token.END_OBJECT) { - final String fieldName = parser.text(); - if("lat".equals(fieldName)) { - if(point == null) { - parser.nextToken(); - switch (parser.currentToken()) { - case VALUE_NUMBER: - case VALUE_STRING: - lat = parser.doubleValue(true); - break; - default: - throw new ElasticsearchParseException("latitude must be a number"); - } - } else { - throw new ElasticsearchParseException("only lat/lon or [{}] is allowed", FIELD_VALUE); - } - } else if ("lon".equals(fieldName)) { - if(point == null) { - parser.nextToken(); - switch (parser.currentToken()) { - case VALUE_NUMBER: - case VALUE_STRING: - lon = parser.doubleValue(true); - break; - default: - throw new ElasticsearchParseException("longitude must be a number"); - } - } else { - throw new ElasticsearchParseException("only lat/lon or [{}] is allowed", FIELD_VALUE); - } - } else if (FIELD_PRECISION.equals(fieldName)) { - if(parser.nextToken() == Token.START_ARRAY) { - IntHashSet precisions = new IntHashSet(); - while(parser.nextToken() != Token.END_ARRAY) { - precisions.add(parsePrecision(parser)); - } - precision = precisions.toArray(); - } else { - precision = new int[] { parsePrecision(parser) }; - } - } else if (FIELD_VALUE.equals(fieldName)) { - if(Double.isNaN(lon) && Double.isNaN(lat)) { - parser.nextToken(); - point = GeoUtils.parseGeoPoint(parser); - } else { - throw new ElasticsearchParseException("only lat/lon or [{}] is allowed", FIELD_VALUE); - } - } else { - throw new ElasticsearchParseException("unexpected fieldname [{}]", fieldName); - } - } - - if (point == null) { - if (Double.isNaN(lat) || Double.isNaN(lon)) { - throw new ElasticsearchParseException("location is missing"); - } else { - point = new GeoPoint(lat, lon); - } - } - - if (precision == null || precision.length == 0) { - precision = this.precision; - } - - return new GeoQuery(name, point.geohash(), precision); - } else { - return new GeoQuery(name, GeoUtils.parseGeoPoint(parser).getGeohash(), precision); - } - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((defaultLocations == null) ? 0 : defaultLocations.hashCode()); - result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode()); - result = prime * result + (neighbors ? 1231 : 1237); - result = prime * result + Arrays.hashCode(precision); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - GeolocationContextMapping other = (GeolocationContextMapping) obj; - if (defaultLocations == null) { - if (other.defaultLocations != null) - return false; - } else if (!defaultLocations.equals(other.defaultLocations)) - return false; - if (fieldName == null) { - if (other.fieldName != null) - return false; - } else if (!fieldName.equals(other.fieldName)) - return false; - if (neighbors != other.neighbors) - return false; - if (!Arrays.equals(precision, other.precision)) - return false; - return true; - } - - - - - public static class Builder extends ContextBuilder<GeolocationContextMapping> { - - private IntHashSet precisions = new IntHashSet(); - private boolean neighbors; // take neighbor cell on the lowest level into account - private HashSet<String> defaultLocations = new HashSet<>(); - private String fieldName = null; - - protected Builder(String name) { - this(name, true, null); - } - - protected Builder(String name, boolean neighbors, int...levels) { - super(name); - neighbors(neighbors); - if (levels != null) { - for (int level : levels) { - precision(level); - } - } - } - - /** - * Set the precision use o make suggestions - * - * @param precision - * precision as distance with {@link DistanceUnit}. Default: - * meters - * @return this - */ - public Builder precision(String precision) { - return precision(DistanceUnit.parse(precision, DistanceUnit.METERS, DistanceUnit.METERS)); - } - - /** - * Set the precision use o make suggestions - * - * @param precision - * precision value - * @param unit - * {@link DistanceUnit} to use - * @return this - */ - public Builder precision(double precision, DistanceUnit unit) { - return precision(unit.toMeters(precision)); - } - - /** - * Set the precision use o make suggestions - * - * @param meters - * precision as distance in meters - * @return this - */ - public Builder precision(double meters) { - int level = GeoUtils.geoHashLevelsForPrecision(meters); - // Ceiling precision: we might return more results - if (GeoUtils.geoHashCellSize(level) < meters) { - level = Math.max(1, level - 1); - } - return precision(level); - } - - /** - * Set the precision use o make suggestions - * - * @param level - * maximum length of geohashes - * @return this - */ - public Builder precision(int level) { - this.precisions.add(level); - return this; - } - - /** - * Set neighborhood usage - * - * @param neighbors - * should neighbor cells also be valid - * @return this - */ - public Builder neighbors(boolean neighbors) { - this.neighbors = neighbors; - return this; - } - - /** - * Set a default location that should be used, if no location is - * provided by the query - * - * @param geohash - * geohash of the default location - * @return this - */ - public Builder addDefaultLocation(String geohash) { - this.defaultLocations.add(geohash); - return this; - } - - /** - * Set a default location that should be used, if no location is - * provided by the query - * - * @param geohashes - * geohash of the default location - * @return this - */ - public Builder addDefaultLocations(Collection<String> geohashes) { - this.defaultLocations.addAll(geohashes); - return this; - } - - /** - * Set a default location that should be used, if no location is - * provided by the query - * - * @param lat - * latitude of the default location - * @param lon - * longitude of the default location - * @return this - */ - public Builder addDefaultLocation(double lat, double lon) { - this.defaultLocations.add(GeoHashUtils.stringEncode(lon, lat)); - return this; - } - - /** - * Set a default location that should be used, if no location is - * provided by the query - * - * @param point - * location - * @return this - */ - public Builder defaultLocation(GeoPoint point) { - this.defaultLocations.add(point.geohash()); - return this; - } - - /** - * Set the name of the field containing a geolocation to use - * @param fieldName name of the field - * @return this - */ - public Builder field(String fieldName) { - this.fieldName = fieldName; - return this; - } - - @Override - public GeolocationContextMapping build() { - if(precisions.isEmpty()) { - precisions.add(GeoHashUtils.PRECISION); - } - int[] precisionArray = precisions.toArray(); - Arrays.sort(precisionArray); - return new GeolocationContextMapping(name, precisionArray, neighbors, defaultLocations, fieldName); - } - - } - - private static class GeoConfig extends ContextConfig { - - private final GeolocationContextMapping mapping; - private final Collection<String> locations; - - public GeoConfig(GeolocationContextMapping mapping, Collection<String> locations) { - this.locations = locations; - this.mapping = mapping; - } - - @Override - protected TokenStream wrapTokenStream(Document doc, TokenStream stream) { - Collection<String> geohashes; - - if (locations == null || locations.size() == 0) { - if(mapping.fieldName != null) { - IndexableField[] fields = doc.getFields(mapping.fieldName); - if(fields.length == 0) { - IndexableField[] lonFields = doc.getFields(mapping.fieldName + ".lon"); - IndexableField[] latFields = doc.getFields(mapping.fieldName + ".lat"); - if (lonFields.length > 0 && latFields.length > 0) { - geohashes = new ArrayList<>(fields.length); - GeoPoint spare = new GeoPoint(); - for (int i = 0 ; i < lonFields.length ; i++) { - IndexableField lonField = lonFields[i]; - IndexableField latField = latFields[i]; - assert lonField.fieldType().docValuesType() == latField.fieldType().docValuesType(); - // we write doc values fields differently: one field for all values, so we need to only care about indexed fields - if (lonField.fieldType().docValuesType() == DocValuesType.NONE) { - spare.reset(latField.numericValue().doubleValue(), lonField.numericValue().doubleValue()); - geohashes.add(spare.geohash()); - } - } - } else { - geohashes = mapping.defaultLocations; - } - } else { - geohashes = new ArrayList<>(fields.length); - GeoPoint spare = new GeoPoint(); - for (IndexableField field : fields) { - spare.resetFromString(field.stringValue()); - geohashes.add(spare.geohash()); - } - } - } else { - geohashes = mapping.defaultLocations; - } - } else { - geohashes = locations; - } - - Collection<String> locations = new HashSet<>(); - for (String geohash : geohashes) { - for (int p : mapping.precision) { - int precision = Math.min(p, geohash.length()); - String truncatedGeohash = geohash.substring(0, precision); - if(mapping.neighbors) { - GeoHashUtils.addNeighbors(truncatedGeohash, precision, locations); - } - locations.add(truncatedGeohash); - } - } - - return new PrefixTokenFilter(stream, ContextMapping.SEPARATOR, locations); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GeoConfig(location = ["); - Iterator<? extends CharSequence> location = this.locations.iterator(); - if (location.hasNext()) { - sb.append(location.next()); - while (location.hasNext()) { - sb.append(", ").append(location.next()); - } - } - return sb.append("])").toString(); - } - } - - private static class GeoQuery extends ContextQuery { - private final String location; - private final int[] precisions; - - public GeoQuery(String name, String location, int...precisions) { - super(name); - this.location = location; - this.precisions = precisions; - } - - @Override - public Automaton toAutomaton() { - Automaton automaton; - if(precisions == null || precisions.length == 0) { - automaton = Automata.makeString(location); - } else { - automaton = Automata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0])))); - for (int i = 1; i < precisions.length; i++) { - final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[i]))); - automaton = Operations.union(automaton, Automata.makeString(cell)); - } - } - return automaton; - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - if(precisions == null || precisions.length == 0) { - builder.field(name, location); - } else { - builder.startObject(name); - builder.field(FIELD_VALUE, location); - builder.field(FIELD_PRECISION, precisions); - builder.endObject(); - } - return builder; - } - } -} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java index eeec50d165..27bb662cec 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser.Token; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; +import org.elasticsearch.index.fielddata.IndexFieldDataService; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.script.CompiledScript; diff --git a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestParser.java b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestParser.java index 4e1aa8230c..42dec2aac0 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestParser.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestParser.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.suggest.term; import org.elasticsearch.common.HasContextAndHeaders; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.IndexFieldDataService; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.suggest.DirectSpellcheckerSettings; import org.elasticsearch.search.suggest.SuggestContextParser; diff --git a/core/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/core/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 06b50d314b..2c92f0ecd3 100644 --- a/core/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/core/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -1 +1 @@ -org.elasticsearch.search.suggest.completion.Completion090PostingsFormat
\ No newline at end of file +org.apache.lucene.search.suggest.document.Completion50PostingsFormat diff --git a/core/src/test/java/org/elasticsearch/get/GetActionIT.java b/core/src/test/java/org/elasticsearch/get/GetActionIT.java index 1b58ea5d43..f41f4adc74 100644 --- a/core/src/test/java/org/elasticsearch/get/GetActionIT.java +++ b/core/src/test/java/org/elasticsearch/get/GetActionIT.java @@ -894,8 +894,7 @@ public class GetActionIT extends ESIntegTestCase { " \"input\": [\n" + " \"Nevermind\",\n" + " \"Nirvana\"\n" + - " ],\n" + - " \"output\": \"Nirvana - Nevermind\"\n" + + " ]\n" + " }\n" + "}"; diff --git a/core/src/test/java/org/elasticsearch/index/mapper/completion/CompletionFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/completion/CompletionFieldMapperTests.java index 10094ccd6d..b81a3d6d40 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/completion/CompletionFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/completion/CompletionFieldMapperTests.java @@ -18,11 +18,20 @@ */ package org.elasticsearch.index.mapper.completion; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.suggest.document.*; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.RegExp; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.json.JsonXContent; -import org.elasticsearch.index.mapper.DocumentMapper; -import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.*; import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -30,8 +39,7 @@ import java.io.IOException; import java.util.Map; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.*; public class CompletionFieldMapperTests extends ESSingleNodeTestCase { public void testDefaultConfiguration() throws IOException { @@ -46,21 +54,66 @@ public class CompletionFieldMapperTests extends ESSingleNodeTestCase { FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); assertThat(fieldMapper, instanceOf(CompletionFieldMapper.class)); - CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; - assertThat(completionFieldMapper.isStoringPayloads(), is(false)); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + + NamedAnalyzer indexAnalyzer = completionFieldType.indexAnalyzer(); + assertThat(indexAnalyzer.name(), equalTo("simple")); + assertThat(indexAnalyzer.analyzer(), instanceOf(CompletionAnalyzer.class)); + CompletionAnalyzer analyzer = (CompletionAnalyzer) indexAnalyzer.analyzer(); + assertThat(analyzer.preservePositionIncrements(), equalTo(true)); + assertThat(analyzer.preserveSep(), equalTo(true)); + + NamedAnalyzer searchAnalyzer = completionFieldType.searchAnalyzer(); + assertThat(searchAnalyzer.name(), equalTo("simple")); + assertThat(searchAnalyzer.analyzer(), instanceOf(CompletionAnalyzer.class)); + analyzer = (CompletionAnalyzer) searchAnalyzer.analyzer(); + assertThat(analyzer.preservePositionIncrements(), equalTo(true)); + assertThat(analyzer.preserveSep(), equalTo(true)); } - public void testThatSerializationIncludesAllElements() throws Exception { + public void testCompletionAnalyzerSettings() throws Exception { String mapping = jsonBuilder().startObject().startObject("type1") .startObject("properties").startObject("completion") .field("type", "completion") .field("analyzer", "simple") .field("search_analyzer", "standard") - .field("payloads", true) .field("preserve_separators", false) .field("preserve_position_increments", true) - .field("max_input_length", 14) + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + assertThat(fieldMapper, instanceOf(CompletionFieldMapper.class)); + + MappedFieldType completionFieldType = fieldMapper.fieldType(); + + NamedAnalyzer indexAnalyzer = completionFieldType.indexAnalyzer(); + assertThat(indexAnalyzer.name(), equalTo("simple")); + assertThat(indexAnalyzer.analyzer(), instanceOf(CompletionAnalyzer.class)); + CompletionAnalyzer analyzer = (CompletionAnalyzer) indexAnalyzer.analyzer(); + assertThat(analyzer.preservePositionIncrements(), equalTo(true)); + assertThat(analyzer.preserveSep(), equalTo(false)); + + NamedAnalyzer searchAnalyzer = completionFieldType.searchAnalyzer(); + assertThat(searchAnalyzer.name(), equalTo("standard")); + assertThat(searchAnalyzer.analyzer(), instanceOf(CompletionAnalyzer.class)); + analyzer = (CompletionAnalyzer) searchAnalyzer.analyzer(); + assertThat(analyzer.preservePositionIncrements(), equalTo(true)); + assertThat(analyzer.preserveSep(), equalTo(false)); + } + + public void testTypeParsing() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .field("analyzer", "simple") + .field("search_analyzer", "standard") + .field("preserve_separators", false) + .field("preserve_position_increments", true) + .field("max_input_length", 14) .endObject().endObject() .endObject().endObject().string(); @@ -71,45 +124,295 @@ public class CompletionFieldMapperTests extends ESSingleNodeTestCase { CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; XContentBuilder builder = jsonBuilder().startObject(); - completionFieldMapper.toXContent(builder, null).endObject(); + completionFieldMapper.toXContent(builder, ToXContent.EMPTY_PARAMS).endObject(); builder.close(); - Map<String, Object> serializedMap; - try (XContentParser parser = JsonXContent.jsonXContent.createParser(builder.bytes())) { - serializedMap = parser.map(); - } + Map<String, Object> serializedMap = JsonXContent.jsonXContent.createParser(builder.bytes()).map(); Map<String, Object> configMap = (Map<String, Object>) serializedMap.get("completion"); assertThat(configMap.get("analyzer").toString(), is("simple")); assertThat(configMap.get("search_analyzer").toString(), is("standard")); - assertThat(Boolean.valueOf(configMap.get("payloads").toString()), is(true)); assertThat(Boolean.valueOf(configMap.get("preserve_separators").toString()), is(false)); assertThat(Boolean.valueOf(configMap.get("preserve_position_increments").toString()), is(true)); assertThat(Integer.valueOf(configMap.get("max_input_length").toString()), is(14)); } - public void testThatSerializationCombinesToOneAnalyzerFieldIfBothAreEqual() throws Exception { + public void testParsingMinimal() throws Exception { String mapping = jsonBuilder().startObject().startObject("type1") .startObject("properties").startObject("completion") .field("type", "completion") - .field("analyzer", "simple") - .field("search_analyzer", "simple") .endObject().endObject() .endObject().endObject().string(); DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .field("completion", "suggestion") + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertSuggestFields(fields, 1); + } + + public void testParsingMultiValued() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); - assertThat(fieldMapper, instanceOf(CompletionFieldMapper.class)); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .array("completion", "suggestion1", "suggestion2") + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertSuggestFields(fields, 2); + } - CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; - XContentBuilder builder = jsonBuilder().startObject(); - completionFieldMapper.toXContent(builder, null).endObject(); - builder.close(); - Map<String, Object> serializedMap; - try (XContentParser parser = JsonXContent.jsonXContent.createParser(builder.bytes())) { - serializedMap = parser.map(); + public void testParsingWithWeight() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .startObject("completion") + .field("input", "suggestion") + .field("weight", 2) + .endObject() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertSuggestFields(fields, 1); + } + + public void testParsingMultiValueWithWeight() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .startObject("completion") + .array("input", "suggestion1", "suggestion2", "suggestion3") + .field("weight", 2) + .endObject() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertSuggestFields(fields, 3); + } + + public void testParsingFull() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .startArray("completion") + .startObject() + .field("input", "suggestion1") + .field("weight", 3) + .endObject() + .startObject() + .field("input", "suggestion2") + .field("weight", 4) + .endObject() + .startObject() + .field("input", "suggestion3") + .field("weight", 5) + .endObject() + .endArray() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertSuggestFields(fields, 3); + } + + public void testParsingMixed() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .startArray("completion") + .startObject() + .array("input", "suggestion1", "suggestion2") + .field("weight", 3) + .endObject() + .startObject() + .field("input", "suggestion3") + .field("weight", 4) + .endObject() + .startObject() + .field("input", "suggestion4", "suggestion5", "suggestion6") + .field("weight", 5) + .endObject() + .endArray() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertSuggestFields(fields, 6); + } + + public void testNonContextEnabledParsingWithContexts() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("field1") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + try { + defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .startObject("field1") + .field("input", "suggestion1") + .startObject("contexts") + .field("ctx", "ctx2") + .endObject() + .field("weight", 3) + .endObject() + .endObject() + .bytes()); + fail("Supplying contexts to a non context-enabled field should error"); + } catch (MapperParsingException e) { + assertThat(e.getRootCause().getMessage(), containsString("field1")); } - Map<String, Object> configMap = (Map<String, Object>) serializedMap.get("completion"); - assertThat(configMap.get("analyzer").toString(), is("simple")); } + public void testFieldValueValidation() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + CharsRefBuilder charsRefBuilder = new CharsRefBuilder(); + charsRefBuilder.append("sugg"); + charsRefBuilder.setCharAt(2, '\u001F'); + try { + defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .field("completion", charsRefBuilder.get().toString()) + .endObject() + .bytes()); + fail("No error indexing value with reserved character [0x1F]"); + } catch (MapperParsingException e) { + Throwable cause = e.unwrapCause().getCause(); + assertThat(cause, instanceOf(IllegalArgumentException.class)); + assertThat(cause.getMessage(), containsString("[0x1f]")); + } + + charsRefBuilder.setCharAt(2, '\u0000'); + try { + defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .field("completion", charsRefBuilder.get().toString()) + .endObject() + .bytes()); + fail("No error indexing value with reserved character [0x0]"); + } catch (MapperParsingException e) { + Throwable cause = e.unwrapCause().getCause(); + assertThat(cause, instanceOf(IllegalArgumentException.class)); + assertThat(cause.getMessage(), containsString("[0x0]")); + } + + charsRefBuilder.setCharAt(2, '\u001E'); + try { + defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder() + .startObject() + .field("completion", charsRefBuilder.get().toString()) + .endObject() + .bytes()); + fail("No error indexing value with reserved character [0x1E]"); + } catch (MapperParsingException e) { + Throwable cause = e.unwrapCause().getCause(); + assertThat(cause, instanceOf(IllegalArgumentException.class)); + assertThat(cause.getMessage(), containsString("[0x1e]")); + } + } + + public void testPrefixQueryType() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; + Query prefixQuery = completionFieldMapper.fieldType().prefixQuery(new BytesRef("co")); + assertThat(prefixQuery, instanceOf(PrefixCompletionQuery.class)); + } + + public void testFuzzyQueryType() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; + Query prefixQuery = completionFieldMapper.fieldType().fuzzyQuery("co", + Fuzziness.fromEdits(FuzzyCompletionQuery.DEFAULT_MAX_EDITS), FuzzyCompletionQuery.DEFAULT_NON_FUZZY_PREFIX, + FuzzyCompletionQuery.DEFAULT_MIN_FUZZY_LENGTH, Operations.DEFAULT_MAX_DETERMINIZED_STATES, + FuzzyCompletionQuery.DEFAULT_TRANSPOSITIONS, FuzzyCompletionQuery.DEFAULT_UNICODE_AWARE); + assertThat(prefixQuery, instanceOf(FuzzyCompletionQuery.class)); + } + + public void testRegexQueryType() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; + Query prefixQuery = completionFieldMapper.fieldType() + .regexpQuery(new BytesRef("co"), RegExp.ALL, Operations.DEFAULT_MAX_DETERMINIZED_STATES); + assertThat(prefixQuery, instanceOf(RegexCompletionQuery.class)); + } + + private static void assertSuggestFields(IndexableField[] fields, int expected) { + int actualFieldCount = 0; + for (IndexableField field : fields) { + if (field instanceof SuggestField) { + actualFieldCount++; + } + } + assertThat(actualFieldCount, equalTo(expected)); + } } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/core/CompletionFieldTypeTests.java b/core/src/test/java/org/elasticsearch/index/mapper/core/CompletionFieldTypeTests.java index 55dd7f8f7c..7ec1814a59 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/core/CompletionFieldTypeTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/core/CompletionFieldTypeTests.java @@ -20,20 +20,16 @@ package org.elasticsearch.index.mapper.core; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider; -import org.elasticsearch.search.suggest.context.ContextBuilder; -import org.elasticsearch.search.suggest.context.ContextMapping; +import org.elasticsearch.search.suggest.completion.context.ContextBuilder; +import org.elasticsearch.search.suggest.completion.context.ContextMappings; import org.junit.Before; -import java.util.SortedMap; -import java.util.TreeMap; +import java.util.Arrays; public class CompletionFieldTypeTests extends FieldTypeTestCase { @Override protected MappedFieldType createDefaultFieldType() { - CompletionFieldMapper.CompletionFieldType ft = new CompletionFieldMapper.CompletionFieldType(); - ft.setProvider(new AnalyzingCompletionLookupProvider(true, false, true, false)); - return ft; + return new CompletionFieldMapper.CompletionFieldType(); } @Before @@ -42,30 +38,22 @@ public class CompletionFieldTypeTests extends FieldTypeTestCase { @Override public void modify(MappedFieldType ft) { CompletionFieldMapper.CompletionFieldType cft = (CompletionFieldMapper.CompletionFieldType)ft; - cft.setProvider(new AnalyzingCompletionLookupProvider(false, false, true, false)); + cft.setPreserveSep(false); } }); addModifier(new Modifier("preserve_position_increments", false, true) { @Override public void modify(MappedFieldType ft) { CompletionFieldMapper.CompletionFieldType cft = (CompletionFieldMapper.CompletionFieldType)ft; - cft.setProvider(new AnalyzingCompletionLookupProvider(true, false, false, false)); + cft.setPreservePositionIncrements(false); } }); - addModifier(new Modifier("payload", false, true) { + addModifier(new Modifier("context_mappings", false, true) { @Override public void modify(MappedFieldType ft) { CompletionFieldMapper.CompletionFieldType cft = (CompletionFieldMapper.CompletionFieldType)ft; - cft.setProvider(new AnalyzingCompletionLookupProvider(true, false, true, true)); - } - }); - addModifier(new Modifier("context_mapping", false, true) { - @Override - public void modify(MappedFieldType ft) { - CompletionFieldMapper.CompletionFieldType cft = (CompletionFieldMapper.CompletionFieldType)ft; - SortedMap<String, ContextMapping> contextMapping = new TreeMap<>(); - contextMapping.put("foo", ContextBuilder.location("foo").build()); - cft.setContextMapping(contextMapping); + ContextMappings contextMappings = new ContextMappings(Arrays.asList(ContextBuilder.category("foo").build(), ContextBuilder.geo("geo").build())); + cft.setContextMappings(contextMappings); } }); } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldsIntegrationIT.java b/core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldsIntegrationIT.java index 20ef62291b..0c26324ac6 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldsIntegrationIT.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldsIntegrationIT.java @@ -179,7 +179,7 @@ public class MultiFieldsIntegrationIT extends ESIntegTestCase { assertThat(mappingMetaData, not(nullValue())); Map<String, Object> mappingSource = mappingMetaData.sourceAsMap(); Map aField = ((Map) XContentMapValues.extractValue("properties.a", mappingSource)); - assertThat(aField.size(), equalTo(7)); + assertThat(aField.size(), equalTo(6)); assertThat(aField.get("type").toString(), equalTo("completion")); assertThat(aField.get("fields"), notNullValue()); diff --git a/core/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java b/core/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java index d7bc9a7653..64ea10f4f7 100644 --- a/core/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java @@ -20,8 +20,10 @@ package org.elasticsearch.search.suggest; import com.carrotsearch.hppc.ObjectLongHashMap; import com.carrotsearch.randomizedtesting.generators.RandomStrings; - +import org.apache.lucene.analysis.TokenStreamToAutomaton; +import org.apache.lucene.search.suggest.document.ContextSuggestField; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.elasticsearch.action.ShardOperationFailedException; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeResponse; import org.elasticsearch.action.admin.indices.segments.IndexShardSegments; @@ -30,14 +32,13 @@ import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.percolate.PercolateResponse; import org.elasticsearch.action.search.SearchPhaseExecutionException; +import org.elasticsearch.action.suggest.SuggestRequest; import org.elasticsearch.action.suggest.SuggestResponse; import org.elasticsearch.client.Requests; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.index.mapper.MapperException; import org.elasticsearch.index.mapper.MapperParsingException; -import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.percolator.PercolatorService; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; @@ -45,17 +46,12 @@ import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.search.suggest.completion.CompletionStats; import org.elasticsearch.search.suggest.completion.CompletionSuggestion; import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder; -import org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder; +import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder.FuzzyOptionsBuilder; +import org.elasticsearch.search.suggest.completion.context.*; import org.elasticsearch.test.ESIntegTestCase; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Random; -import java.util.concurrent.ExecutionException; +import java.util.*; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; @@ -65,14 +61,8 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllSuccessful; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.hasItem; -import static org.hamcrest.Matchers.hasItems; -import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.notNullValue; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; +import static org.hamcrest.Matchers.*; @SuppressCodecs("*") // requires custom completion format public class CompletionSuggestSearchIT extends ESIntegTestCase { @@ -81,44 +71,287 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { private final String FIELD = RandomStrings.randomAsciiOfLength(getRandom(), 10).toLowerCase(Locale.ROOT); private final CompletionMappingBuilder completionMappingBuilder = new CompletionMappingBuilder(); - public void testSimple() throws Exception { - createIndexAndMapping(completionMappingBuilder); - String[][] input = {{"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"}, - {"Generator", "Foo Fighters Generator"}, {"Learn to Fly", "Foo Fighters Learn to Fly"}, - {"The Prodigy"}, {"The Prodigy"}, {"The Prodigy"}, {"Firestarter", "The Prodigy Firestarter"}, - {"Turbonegro"}, {"Turbonegro"}, {"Get it on", "Turbonegro Get it on"}}; // work with frequencies - for (int i = 0; i < input.length; i++) { - client().prepareIndex(INDEX, TYPE, "" + i) + public void testPrefix() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 1; i <= numDocs; i++) { + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) .setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value(input[i]).endArray() - .endObject() - .endObject() - ) - .execute().actionGet(); + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i) + .endObject() + .endObject() + )); } + indexRandom(true, indexRequestBuilders); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + assertSuggestions("foo", prefix, "suggestion10", "suggestion9", "suggestion8", "suggestion7", "suggestion6"); + } - refresh(); + public void testRegex() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 1; i <= numDocs; i++) { + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "sugg" + i + "estion") + .field("weight", i) + .endObject() + .endObject() + )); + } + indexRandom(true, indexRequestBuilders); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).regex("sugg.*es"); + assertSuggestions("foo", prefix, "sugg10estion", "sugg9estion", "sugg8estion", "sugg7estion", "sugg6estion"); + } + + public void testFuzzy() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 1; i <= numDocs; i++) { + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "sugxgestion" + i) + .field("weight", i) + .endObject() + .endObject() + )); + } + indexRandom(true, indexRequestBuilders); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg", Fuzziness.ONE); + assertSuggestions("foo", prefix, "sugxgestion10", "sugxgestion9", "sugxgestion8", "sugxgestion7", "sugxgestion6"); + } + + public void testEarlyTermination() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + int numDocs = atLeast(100); + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + (numDocs - i)) + .field("weight", numDocs - i) + .endObject() + .endObject() + )); + } + indexRandom(true, indexRequestBuilders); + int size = randomIntBetween(3, 10); + String[] outputs = new String[size]; + for (int i = 0; i < size; i++) { + outputs[i] = "suggestion" + (numDocs - i); + } + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sug").size(size); + assertSuggestions("foo", prefix, outputs); + + CompletionSuggestionBuilder regex = SuggestBuilders.completionSuggestion("foo").field(FIELD).regex("su[g|s]g").size(size); + assertSuggestions("foo", regex, outputs); + + CompletionSuggestionBuilder fuzzyPrefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg", Fuzziness.ONE).size(size); + assertSuggestions("foo", fuzzyPrefix, outputs); + } + + public void testSuggestWithNumericPayload() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source= jsonBuilder() + .startObject() + .field(FIELD, "suggestion" + i) + .field("count", i) + .endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i).setSource(source)); + } + indexRandom(true, indexRequestBuilders); + + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg").size(numDocs).payload("count"); + SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(prefix).execute().actionGet(); + assertNoFailures(suggestResponse); + CompletionSuggestion completionSuggestion = suggestResponse.getSuggest().getSuggestion("foo"); + CompletionSuggestion.Entry options = completionSuggestion.getEntries().get(0); + assertThat(options.getOptions().size(), equalTo(numDocs)); + for (CompletionSuggestion.Entry.Option option : options) { + Map<String, List<Object>> payloads = option.getPayload(); + assertThat(payloads.keySet(), contains("count")); + } + } + + public void testMalformedRequestPayload() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + SuggestRequest request = new SuggestRequest(INDEX); + XContentBuilder suggest = jsonBuilder().startObject() + .startObject("bad-payload") + .field("prefix", "sug") + .startObject("completion") + .field("field", FIELD) + .startArray("payload") + .startObject() + .field("payload", "field") + .endObject() + .endArray() + .endObject() + .endObject().endObject(); + request.suggest(suggest.bytes()); + ensureGreen(); - assertSuggestionsNotInOrder("f", "Foo Fighters", "Firestarter", "Foo Fighters Generator", "Foo Fighters Learn to Fly"); - assertSuggestionsNotInOrder("t", "The Prodigy", "Turbonegro", "Turbonegro Get it on", "The Prodigy Firestarter"); + SuggestResponse suggestResponse = client().suggest(request).get(); + assertThat(suggestResponse.getSuccessfulShards(), equalTo(0)); + for (ShardOperationFailedException exception : suggestResponse.getShardFailures()) { + assertThat(exception.reason(), containsString("expected string values in [payload] array")); + } + } + + public void testMissingPayloadField() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + List<IndexRequestBuilder> indexRequestBuilders = Arrays.asList( + client().prepareIndex(INDEX, TYPE, "1").setSource(FIELD, "suggestion", "test_field", "test"), + client().prepareIndex(INDEX, TYPE, "2").setSource(FIELD, "suggestion") + ); + indexRandom(true, indexRequestBuilders); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg").payload("test_field"); + SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(prefix).execute().actionGet(); + assertNoFailures(suggestResponse); + CompletionSuggestion completionSuggestion = suggestResponse.getSuggest().getSuggestion("foo"); + CompletionSuggestion.Entry options = completionSuggestion.getEntries().get(0); + assertThat(options.getOptions().size(), equalTo(2)); + for (CompletionSuggestion.Entry.Option option : options.getOptions()) { + assertThat(option.getPayload().keySet(), contains("test_field")); + } + } + + public void testPayload() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggest") + .field("weight", 1) + .endObject() + .field("title", "title1") + .field("count", 1) + .endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "1").setSource(source)); + source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion") + .field("weight", 2) + .endObject() + .field("title", "title2") + .field("count", 2) + .endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "2").setSource(source)); + indexRandom(true, indexRequestBuilders); + + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg").payload("title", "count"); + SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(prefix).execute().actionGet(); + assertNoFailures(suggestResponse); + CompletionSuggestion completionSuggestion = suggestResponse.getSuggest().getSuggestion("foo"); + List<CompletionSuggestion.Entry.Option> options = completionSuggestion.getEntries().get(0).getOptions(); + assertThat(options.size(), equalTo(2)); + assertThat(options.get(0).getText().toString(), equalTo("suggestion")); + assertThat(options.get(0).getScore(), equalTo(2f)); + assertThat(options.get(1).getText().toString(), equalTo("suggest")); + assertThat(options.get(1).getScore(), equalTo(1f)); + + Map<String, List<Object>> firstPayload = options.get(0).getPayload(); + assertThat(firstPayload.keySet(), contains("title", "count")); + assertThat((String) firstPayload.get("title").get(0), equalTo("title2")); + assertThat((long) firstPayload.get("count").get(0), equalTo(2l)); + + Map<String, List<Object>> secondPayload = options.get(1).getPayload(); + assertThat(secondPayload.keySet(), contains("title", "count")); + assertThat((String) secondPayload.get("title").get(0), equalTo("title1")); + assertThat((long) secondPayload.get("count").get(0), equalTo(1l)); + } + + public void testSuggestWithPayload() throws Exception { + final CompletionMappingBuilder mapping = new CompletionMappingBuilder(); + createIndexAndMapping(mapping); + int numDocs = randomIntBetween(10, 100); + int numPayloadFields = randomIntBetween(2, 5); + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 1; i <= numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i) + .endObject(); + for (int j = 0; j < numPayloadFields; j++) { + source.field("test_field" + j, j + "value" + i); + } + source.endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i).setSource(source)); + } + indexRandom(true, indexRequestBuilders); + + int suggestionSize = randomIntBetween(1, numDocs); + int numRequestedPayloadFields = randomIntBetween(2, numPayloadFields); + String[] payloadFields = new String[numRequestedPayloadFields]; + for (int i = 0; i < numRequestedPayloadFields; i++) { + payloadFields[i] = "test_field" + i; + } + + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg").size(suggestionSize).payload(payloadFields); + SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(prefix).execute().actionGet(); + assertNoFailures(suggestResponse); + CompletionSuggestion completionSuggestion = suggestResponse.getSuggest().getSuggestion("foo"); + CompletionSuggestion.Entry options = completionSuggestion.getEntries().get(0); + assertThat(options.getOptions().size(), equalTo(suggestionSize)); + int id = numDocs; + for (CompletionSuggestion.Entry.Option option : options) { + assertThat(option.getText().toString(), equalTo("suggestion" + id)); + assertThat(option.getPayload().size(), equalTo(numRequestedPayloadFields)); + for (int i = 0; i < numRequestedPayloadFields; i++) { + List<Object> fieldValue = option.getPayload().get("test_field" + i); + assertNotNull(fieldValue); + assertThat(fieldValue.size(), equalTo(1)); + assertThat((String)fieldValue.get(0), equalTo(i + "value" + id)); + } + id--; + } } public void testSuggestFieldWithPercolateApi() throws Exception { createIndexAndMapping(completionMappingBuilder); - String[][] input = {{"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"}, + String[][] inputs = {{"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"}, {"Generator", "Foo Fighters Generator"}, {"Learn to Fly", "Foo Fighters Learn to Fly"}, {"The Prodigy"}, {"The Prodigy"}, {"The Prodigy"}, {"Firestarter", "The Prodigy Firestarter"}, {"Turbonegro"}, {"Turbonegro"}, {"Get it on", "Turbonegro Get it on"}}; // work with frequencies - for (int i = 0; i < input.length; i++) { + for (int i = 0; i < inputs.length; i++) { + XContentBuilder source = jsonBuilder() + .startObject().startObject(FIELD) + .startArray("input"); + for (String input : inputs[i]) { + source.value(input); + } + source.endArray() + .endObject() + .endObject(); client().prepareIndex(INDEX, TYPE, "" + i) - .setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value(input[i]).endArray() - .endObject() - .endObject() - ) - .execute().actionGet(); + .setSource(source).execute().actionGet(); } client().prepareIndex(INDEX, PercolatorService.TYPE_NAME, "4") @@ -133,18 +366,6 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { assertThat(response.getCount(), equalTo(1l)); } - public void testBasicPrefixSuggestion() throws Exception { - completionMappingBuilder.payloads(true); - createIndexAndMapping(completionMappingBuilder); - for (int i = 0; i < 2; i++) { - createData(i == 0); - assertSuggestions("f", "Firestarter - The Prodigy", "Foo Fighters", "Generator - Foo Fighters", "Learn to Fly - Foo Fighters"); - assertSuggestions("ge", "Generator - Foo Fighters", "Get it on - Turbonegro"); - assertSuggestions("ge", "Generator - Foo Fighters", "Get it on - Turbonegro"); - assertSuggestions("t", "The Prodigy", "Firestarter - The Prodigy", "Get it on - Turbonegro", "Turbonegro"); - } - } - public void testThatWeightsAreWorking() throws Exception { createIndexAndMapping(completionMappingBuilder); @@ -243,120 +464,14 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { createIndexAndMapping(completionMappingBuilder); client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() - .startObject().startObject(FIELD) - .field("input", "Foo Fighters") - .field("output", "Boo Fighters") - .endObject().endObject() - ).get(); - - refresh(); - - assertSuggestions("f", "Boo Fighters"); - } - - public void testThatPayloadsAreArbitraryJsonObjects() throws Exception { - completionMappingBuilder.payloads(true); - createIndexAndMapping(completionMappingBuilder); - - client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value("Foo Fighters").endArray() - .field("output", "Boo Fighters") - .startObject("payload").field("foo", "bar").startArray("test").value("spam").value("eggs").endArray().endObject() - .endObject().endObject() - ).get(); - - refresh(); - - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionBuilder("testSuggestions").field(FIELD).text("foo").size(10) - ).execute().actionGet(); - - assertSuggestions(suggestResponse, "testSuggestions", "Boo Fighters"); - Suggest.Suggestion.Entry.Option option = suggestResponse.getSuggest().getSuggestion("testSuggestions").getEntries().get(0).getOptions().get(0); - assertThat(option, is(instanceOf(CompletionSuggestion.Entry.Option.class))); - CompletionSuggestion.Entry.Option prefixOption = (CompletionSuggestion.Entry.Option) option; - assertThat(prefixOption.getPayload(), is(notNullValue())); - - // parse JSON - Map<String, Object> jsonMap = prefixOption.getPayloadAsMap(); - assertThat(jsonMap.size(), is(2)); - assertThat(jsonMap.get("foo").toString(), is("bar")); - assertThat(jsonMap.get("test"), is(instanceOf(List.class))); - List<String> listValues = (List<String>) jsonMap.get("test"); - assertThat(listValues, hasItems("spam", "eggs")); - } - - public void testPayloadAsNumeric() throws Exception { - completionMappingBuilder.payloads(true); - createIndexAndMapping(completionMappingBuilder); - - client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value("Foo Fighters").endArray() - .field("output", "Boo Fighters") - .field("payload", 1) - .endObject().endObject() - ).get(); - - refresh(); - - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionBuilder("testSuggestions").field(FIELD).text("foo").size(10) - ).execute().actionGet(); - - assertSuggestions(suggestResponse, "testSuggestions", "Boo Fighters"); - Suggest.Suggestion.Entry.Option option = suggestResponse.getSuggest().getSuggestion("testSuggestions").getEntries().get(0).getOptions().get(0); - assertThat(option, is(instanceOf(CompletionSuggestion.Entry.Option.class))); - CompletionSuggestion.Entry.Option prefixOption = (CompletionSuggestion.Entry.Option) option; - assertThat(prefixOption.getPayload(), is(notNullValue())); - - assertThat(prefixOption.getPayloadAsLong(), equalTo(1l)); - } - - public void testPayloadAsString() throws Exception { - completionMappingBuilder.payloads(true); - createIndexAndMapping(completionMappingBuilder); - - client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value("Foo Fighters").endArray() - .field("output", "Boo Fighters") - .field("payload", "test") - .endObject().endObject() + .startObject().startObject(FIELD) + .field("input", "Foo Fighters") + .endObject().endObject() ).get(); refresh(); - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionBuilder("testSuggestions").field(FIELD).text("foo").size(10) - ).execute().actionGet(); - - assertSuggestions(suggestResponse, "testSuggestions", "Boo Fighters"); - Suggest.Suggestion.Entry.Option option = suggestResponse.getSuggest().getSuggestion("testSuggestions").getEntries().get(0).getOptions().get(0); - assertThat(option, is(instanceOf(CompletionSuggestion.Entry.Option.class))); - CompletionSuggestion.Entry.Option prefixOption = (CompletionSuggestion.Entry.Option) option; - assertThat(prefixOption.getPayload(), is(notNullValue())); - - assertThat(prefixOption.getPayloadAsString(), equalTo("test")); - } - - public void testThatExceptionIsThrownWhenPayloadsAreDisabledButInIndexRequest() throws Exception { - completionMappingBuilder.payloads(false); - createIndexAndMapping(completionMappingBuilder); - - try { - client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value("Foo Fighters").endArray() - .field("output", "Boo Fighters") - .startArray("payload").value("spam").value("eggs").endArray() - .endObject().endObject() - ).get(); - fail("Expected MapperException"); - } catch (MapperException e) { - assertThat(e.getMessage(), is("failed to parse")); - } + assertSuggestions("f", "Foo Fighters"); } public void testDisabledPreserveSeparators() throws Exception { @@ -409,14 +524,13 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() .startObject().startObject(FIELD) .startArray("input").value("Foo Fighters").value("Fu Fighters").endArray() - .field("output", "The incredible Foo Fighters") .endObject().endObject() ).get(); refresh(); - assertSuggestions("foo", "The incredible Foo Fighters"); - assertSuggestions("fu", "The incredible Foo Fighters"); + assertSuggestions("foo", "Foo Fighters"); + assertSuggestions("fu", "Fu Fighters"); } public void testThatShortSyntaxIsWorking() throws Exception { @@ -569,12 +683,12 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { refresh(); SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Nirv").size(10) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Nirv").size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Nirw").size(10) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Nirw", Fuzziness.ONE).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -592,13 +706,13 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { // edit distance 1 SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Norw").size(10) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Norw", Fuzziness.ONE).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo"); // edit distance 2 suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Norw").size(10).setFuzziness(Fuzziness.TWO) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Norw", Fuzziness.TWO).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -615,12 +729,12 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { refresh(); SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(false).setFuzziness(Fuzziness.ONE) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Nriv", new FuzzyOptionsBuilder().setTranspositions(false)).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo"); suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(true).setFuzziness(Fuzziness.ONE) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Nriv", Fuzziness.ONE).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -637,12 +751,12 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { refresh(); SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Nriva").size(10).setFuzzyMinLength(6) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Nriva", new FuzzyOptionsBuilder().setFuzzyMinLength(6)).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo"); suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Nrivan").size(10).setFuzzyMinLength(6) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Nrivan", new FuzzyOptionsBuilder().setFuzzyMinLength(6)).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -659,12 +773,12 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { refresh(); SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Nirw").size(10).setFuzzyPrefixLength(4) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Nirw", new FuzzyOptionsBuilder().setFuzzyPrefixLength(4)).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo"); suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("Nirvo").size(10).setFuzzyPrefixLength(4) + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("Nirvo", new FuzzyOptionsBuilder().setFuzzyPrefixLength(4)).size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "Nirvana"); } @@ -681,31 +795,32 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { refresh(); // suggestion with a character, which needs unicode awareness - CompletionSuggestionFuzzyBuilder completionSuggestionBuilder = - SuggestBuilders.fuzzyCompletionSuggestion("foo").field(FIELD).text("öööи").size(10).setUnicodeAware(true); + org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder completionSuggestionBuilder = + SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("öööи", new FuzzyOptionsBuilder().setUnicodeAware(true)).size(10); SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(completionSuggestionBuilder).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "ööööö"); // removing unicode awareness leads to no result - completionSuggestionBuilder.setUnicodeAware(false); + completionSuggestionBuilder = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("öööи", new FuzzyOptionsBuilder().setUnicodeAware(false)).size(10); suggestResponse = client().prepareSuggest(INDEX).addSuggestion(completionSuggestionBuilder).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo"); // increasing edit distance instead of unicode awareness works again, as this is only a single character - completionSuggestionBuilder.setFuzziness(Fuzziness.TWO); + completionSuggestionBuilder = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("öööи", new FuzzyOptionsBuilder().setUnicodeAware(false).setFuzziness(Fuzziness.TWO)).size(10); suggestResponse = client().prepareSuggest(INDEX).addSuggestion(completionSuggestionBuilder).execute().actionGet(); assertSuggestions(suggestResponse, false, "foo", "ööööö"); } public void testThatStatsAreWorking() throws Exception { String otherField = "testOtherField"; - - createIndex(INDEX); - + client().admin().indices().prepareCreate(INDEX) + .setSettings(Settings.settingsBuilder().put("index.number_of_replicas", 0).put("index.number_of_shards", 2)) + .execute().actionGet(); + ensureGreen(); PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject() .startObject(TYPE).startObject("properties") - .startObject(FIELD.toString()) + .startObject(FIELD) .field("type", "completion").field("analyzer", "simple") .endObject() .startObject(otherField) @@ -716,8 +831,14 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { assertThat(putMappingResponse.isAcknowledged(), is(true)); // Index two entities - client().prepareIndex(INDEX, TYPE, "1").setRefresh(true).setSource(jsonBuilder().startObject().field(FIELD, "Foo Fighters").field(otherField, "WHATEVER").endObject()).get(); - client().prepareIndex(INDEX, TYPE, "2").setRefresh(true).setSource(jsonBuilder().startObject().field(FIELD, "Bar Fighters").field(otherField, "WHATEVER2").endObject()).get(); + client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder().startObject().field(FIELD, "Foo Fighters").field(otherField, "WHATEVER").endObject()).get(); + client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder().startObject().field(FIELD, "Bar Fighters").field(otherField, "WHATEVER2").endObject()).get(); + + refresh(); + ensureGreen(); + // load the fst index into ram + client().prepareSuggest(INDEX).addSuggestion(SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("f")).get(); + client().prepareSuggest(INDEX).addSuggestion(SuggestBuilders.completionSuggestion("foo").field(otherField).prefix("f")).get(); // Get all stats IndicesStatsResponse indicesStatsResponse = client().admin().indices().prepareStats(INDEX).setIndices(INDEX).setCompletion(true).get(); @@ -814,13 +935,16 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { } - public void assertSuggestions(String suggestion, String... suggestions) { - String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text(suggestion).size(10) + public void assertSuggestions(String suggestionName, SuggestBuilder.SuggestionBuilder suggestBuilder, String... suggestions) { + SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(suggestBuilder ).execute().actionGet(); - assertSuggestions(suggestResponse, suggestionName, suggestions); + + } + public void assertSuggestions(String suggestion, String... suggestions) { + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionBuilder suggestionBuilder = SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text(suggestion).size(10); + assertSuggestions(suggestionName, suggestionBuilder, suggestions); } public void assertSuggestionsNotInOrder(String suggestString, String... suggestions) { @@ -832,11 +956,11 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { assertSuggestions(suggestResponse, false, suggestionName, suggestions); } - private void assertSuggestions(SuggestResponse suggestResponse, String name, String... suggestions) { + static void assertSuggestions(SuggestResponse suggestResponse, String name, String... suggestions) { assertSuggestions(suggestResponse, true, name, suggestions); } - private void assertSuggestions(SuggestResponse suggestResponse, boolean suggestionOrderStrict, String name, String... suggestions) { + private static void assertSuggestions(SuggestResponse suggestResponse, boolean suggestionOrderStrict, String name, String... suggestions) { assertAllSuccessful(suggestResponse); List<String> suggestionNames = new ArrayList<>(); @@ -866,7 +990,7 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { } } - private List<String> getNames(Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> suggestEntry) { + private static List<String> getNames(Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> suggestEntry) { List<String> names = new ArrayList<>(); for (Suggest.Suggestion.Entry.Option entry : suggestEntry.getOptions()) { names.add(entry.getText().string()); @@ -875,20 +999,44 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { } private void createIndexAndMappingAndSettings(Settings settings, CompletionMappingBuilder completionMappingBuilder) throws IOException { + XContentBuilder mapping = jsonBuilder().startObject() + .startObject(TYPE).startObject("properties") + .startObject(FIELD) + .field("type", "completion") + .field("analyzer", completionMappingBuilder.indexAnalyzer) + .field("search_analyzer", completionMappingBuilder.searchAnalyzer) + .field("preserve_separators", completionMappingBuilder.preserveSeparators) + .field("preserve_position_increments", completionMappingBuilder.preservePositionIncrements); + + if (completionMappingBuilder.contextMappings != null) { + mapping = mapping.startArray("contexts"); + for (Map.Entry<String, ContextMapping> contextMapping : completionMappingBuilder.contextMappings.entrySet()) { + mapping = mapping.startObject() + .field("name", contextMapping.getValue().name()) + .field("type", contextMapping.getValue().type().name()); + switch (contextMapping.getValue().type()) { + case CATEGORY: + mapping = mapping.field("path", ((CategoryContextMapping) contextMapping.getValue()).getFieldName()); + break; + case GEO: + mapping = mapping + .field("path", ((GeoContextMapping) contextMapping.getValue()).getFieldName()) + .field("precision", ((GeoContextMapping) contextMapping.getValue()).getPrecision()); + break; + } + + mapping = mapping.endObject(); + } + + mapping = mapping.endArray(); + } + mapping = mapping.endObject() + .endObject().endObject() + .endObject(); + assertAcked(client().admin().indices().prepareCreate(INDEX) .setSettings(Settings.settingsBuilder().put(indexSettings()).put(settings)) - .addMapping(TYPE, jsonBuilder().startObject() - .startObject(TYPE).startObject("properties") - .startObject(FIELD) - .field("type", "completion") - .field("analyzer", completionMappingBuilder.indexAnalyzer) - .field("search_analyzer", completionMappingBuilder.searchAnalyzer) - .field("payloads", completionMappingBuilder.payloads) - .field("preserve_separators", completionMappingBuilder.preserveSeparators) - .field("preserve_position_increments", completionMappingBuilder.preservePositionIncrements) - .endObject() - .endObject().endObject() - .endObject()) + .addMapping(TYPE, mapping) .get()); ensureYellow(); } @@ -897,47 +1045,6 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { createIndexAndMappingAndSettings(Settings.EMPTY, completionMappingBuilder); } - private void createData(boolean optimize) throws IOException, InterruptedException, ExecutionException { - String[][] input = {{"Foo Fighters"}, {"Generator", "Foo Fighters Generator"}, {"Learn to Fly", "Foo Fighters Learn to Fly"}, {"The Prodigy"}, {"Firestarter", "The Prodigy Firestarter"}, {"Turbonegro"}, {"Get it on", "Turbonegro Get it on"}}; - String[] surface = {"Foo Fighters", "Generator - Foo Fighters", "Learn to Fly - Foo Fighters", "The Prodigy", "Firestarter - The Prodigy", "Turbonegro", "Get it on - Turbonegro"}; - int[] weight = {10, 9, 8, 12, 11, 6, 7}; - IndexRequestBuilder[] builders = new IndexRequestBuilder[input.length]; - for (int i = 0; i < builders.length; i++) { - builders[i] = client().prepareIndex(INDEX, TYPE, "" + i) - .setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value(input[i]).endArray() - .field("output", surface[i]) - .startObject("payload").field("id", i).endObject() - .field("weight", 1) // WE FORCEFULLY INDEX A BOGUS WEIGHT - .endObject() - .endObject() - ); - } - indexRandom(false, builders); - - for (int i = 0; i < builders.length; i++) { // add them again to make sure we deduplicate on the surface form - builders[i] = client().prepareIndex(INDEX, TYPE, "n" + i) - .setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value(input[i]).endArray() - .field("output", surface[i]) - .startObject("payload").field("id", i).endObject() - .field("weight", weight[i]) - .endObject() - .endObject() - ); - } - indexRandom(false, builders); - - client().admin().indices().prepareRefresh(INDEX).execute().actionGet(); - if (optimize) { - // make sure merging works just fine - client().admin().indices().prepareFlush(INDEX).execute().actionGet(); - client().admin().indices().prepareForceMerge(INDEX).setMaxNumSegments(randomIntBetween(1, 5)).get(); - } - } - // see #3555 public void testPrunedSegments() throws IOException { createIndexAndMappingAndSettings(settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0).build(), completionMappingBuilder); @@ -975,43 +1082,6 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { } } - public void testMaxFieldLength() throws IOException { - client().admin().indices().prepareCreate(INDEX).get(); - ensureGreen(); - int iters = scaledRandomIntBetween(10, 20); - for (int i = 0; i < iters; i++) { - int maxInputLen = between(3, 50); - String str = replaceReservedChars(randomRealisticUnicodeOfCodepointLengthBetween(maxInputLen + 1, maxInputLen + scaledRandomIntBetween(2, 50)), (char) 0x01); - assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject() - .startObject(TYPE).startObject("properties") - .startObject(FIELD) - .field("type", "completion") - .field("max_input_length", maxInputLen) - // upgrade mapping each time - .field("analyzer", "keyword") - .endObject() - .endObject().endObject() - .endObject())); - client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value(str).endArray() - .field("output", "foobar") - .endObject().endObject() - ).setRefresh(true).get(); - // need to flush and refresh, because we keep changing the same document - // we have to make sure that segments without any live documents are deleted - flushAndRefresh(); - int prefixLen = CompletionFieldMapper.correctSubStringLen(str, between(1, maxInputLen - 1)); - assertSuggestions(str.substring(0, prefixLen), "foobar"); - if (maxInputLen + 1 < str.length()) { - int offset = Character.isHighSurrogate(str.charAt(maxInputLen - 1)) ? 2 : 1; - int correctSubStringLen = CompletionFieldMapper.correctSubStringLen(str, maxInputLen + offset); - String shortenedSuggestion = str.substring(0, correctSubStringLen); - assertSuggestions(shortenedSuggestion); - } - } - } - // see #3596 public void testVeryLongInput() throws IOException { assertAcked(client().admin().indices().prepareCreate(INDEX).addMapping(TYPE, jsonBuilder().startObject() @@ -1027,7 +1097,6 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() .startObject().startObject(FIELD) .startArray("input").value(longString).endArray() - .field("output", "foobar") .endObject().endObject() ).setRefresh(true).get(); @@ -1070,9 +1139,9 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { ensureYellow(); String string = "foo bar"; client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() - .startObject() - .field(FIELD, string) - .endObject() + .startObject() + .field(FIELD, string) + .endObject() ).setRefresh(true).get(); try { @@ -1103,7 +1172,7 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { ensureGreen(); client().prepareIndex(INDEX, TYPE, "1").setSource(FIELD, "strings make me happy", FIELD + "_1", "nulls make me sad") - .setRefresh(true).get(); + .setRefresh(true).get(); try { client().prepareIndex(INDEX, TYPE, "2").setSource(FIELD, null, FIELD + "_1", "nulls make me sad") @@ -1115,22 +1184,34 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { } } + public static boolean isReservedChar(char c) { + switch (c) { + case '\u001F': + case TokenStreamToAutomaton.HOLE: + case 0x0: + case ContextSuggestField.CONTEXT_SEPARATOR: + return true; + default: + return false; + } + } + private static String replaceReservedChars(String input, char replacement) { char[] charArray = input.toCharArray(); for (int i = 0; i < charArray.length; i++) { - if (CompletionFieldMapper.isReservedChar(charArray[i])) { + if (isReservedChar(charArray[i])) { charArray[i] = replacement; } } return new String(charArray); } - private static class CompletionMappingBuilder { - private String searchAnalyzer = "simple"; - private String indexAnalyzer = "simple"; - private Boolean payloads = getRandom().nextBoolean(); - private Boolean preserveSeparators = getRandom().nextBoolean(); - private Boolean preservePositionIncrements = getRandom().nextBoolean(); + static class CompletionMappingBuilder { + String searchAnalyzer = "simple"; + String indexAnalyzer = "simple"; + Boolean preserveSeparators = getRandom().nextBoolean(); + Boolean preservePositionIncrements = getRandom().nextBoolean(); + LinkedHashMap<String, ContextMapping> contextMappings = null; public CompletionMappingBuilder searchAnalyzer(String searchAnalyzer) { this.searchAnalyzer = searchAnalyzer; @@ -1140,10 +1221,6 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { this.indexAnalyzer = indexAnalyzer; return this; } - public CompletionMappingBuilder payloads(Boolean payloads) { - this.payloads = payloads; - return this; - } public CompletionMappingBuilder preserveSeparators(Boolean preserveSeparators) { this.preserveSeparators = preserveSeparators; return this; @@ -1152,5 +1229,10 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { this.preservePositionIncrements = preservePositionIncrements; return this; } + + public CompletionMappingBuilder context(LinkedHashMap<String, ContextMapping> contextMappings) { + this.contextMappings = contextMappings; + return this; + } } } diff --git a/core/src/test/java/org/elasticsearch/search/suggest/CompletionTokenStreamTests.java b/core/src/test/java/org/elasticsearch/search/suggest/CompletionTokenStreamTests.java deleted file mode 100644 index f2e83642f1..0000000000 --- a/core/src/test/java/org/elasticsearch/search/suggest/CompletionTokenStreamTests.java +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.search.suggest; - -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.core.SimpleAnalyzer; -import org.apache.lucene.analysis.synonym.SynonymFilter; -import org.apache.lucene.analysis.synonym.SynonymMap; -import org.apache.lucene.analysis.synonym.SynonymMap.Builder; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.search.suggest.completion.CompletionTokenStream; -import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ByteTermAttribute; -import org.elasticsearch.test.ESTokenStreamTestCase; - -import java.io.IOException; -import java.io.StringReader; -import java.util.Set; - -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.equalTo; - -public class CompletionTokenStreamTests extends ESTokenStreamTestCase { - final XAnalyzingSuggester suggester = new XAnalyzingSuggester(new SimpleAnalyzer()); - - public void testSuggestTokenFilter() throws Exception { - Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true); - tokenStream.setReader(new StringReader("mykeyword")); - BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); - TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(new CompletionTokenStream(tokenStream, payload, new CompletionTokenStream.ToFiniteStrings() { - @Override - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { - return suggester.toFiniteStrings(stream); - } - })); - assertTokenStreamContents(suggestTokenStream, new String[] {"mykeyword"}, null, null, new String[] {"Surface keyword|friggin payload|10"}, new int[] { 1 }, null, null); - } - - public void testSuggestTokenFilterWithSynonym() throws Exception { - Builder builder = new SynonymMap.Builder(true); - builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true); - - Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); - tokenizer.setReader(new StringReader("mykeyword")); - SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); - - BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); - TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(new CompletionTokenStream(filter, payload, new CompletionTokenStream.ToFiniteStrings() { - @Override - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { - return suggester.toFiniteStrings(stream); - } - })); - assertTokenStreamContents(suggestTokenStream, new String[] {"mysynonym", "mykeyword"}, null, null, new String[] {"Surface keyword|friggin payload|10", "Surface keyword|friggin payload|10"}, new int[] { 2, 0 }, null, null); - } - - public void testValidNumberOfExpansions() throws IOException { - Builder builder = new SynonymMap.Builder(true); - for (int i = 0; i < 256; i++) { - builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true); - } - StringBuilder valueBuilder = new StringBuilder(); - for (int i = 0 ; i < 8 ; i++) { - valueBuilder.append(i+1); - valueBuilder.append(" "); - } - MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); - tokenizer.setReader(new StringReader(valueBuilder.toString())); - SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); - - TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { - @Override - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { - Set<IntsRef> finiteStrings = suggester.toFiniteStrings(stream); - return finiteStrings; - } - }); - - suggestTokenStream.reset(); - ByteTermAttribute attr = suggestTokenStream.addAttribute(ByteTermAttribute.class); - PositionIncrementAttribute posAttr = suggestTokenStream.addAttribute(PositionIncrementAttribute.class); - int maxPos = 0; - int count = 0; - while(suggestTokenStream.incrementToken()) { - count++; - assertNotNull(attr.getBytesRef()); - assertTrue(attr.getBytesRef().length > 0); - maxPos += posAttr.getPositionIncrement(); - } - suggestTokenStream.close(); - assertEquals(count, 256); - assertEquals(count, maxPos); - - } - - public void testInValidNumberOfExpansions() throws IOException { - Builder builder = new SynonymMap.Builder(true); - for (int i = 0; i < 256; i++) { - builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true); - } - StringBuilder valueBuilder = new StringBuilder(); - for (int i = 0 ; i < 9 ; i++) { // 9 -> expands to 512 - valueBuilder.append(i+1); - valueBuilder.append(" "); - } - MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); - tokenizer.setReader(new StringReader(valueBuilder.toString())); - SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); - - TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { - @Override - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { - Set<IntsRef> finiteStrings = suggester.toFiniteStrings(stream); - return finiteStrings; - } - }); - - suggestTokenStream.reset(); - try { - suggestTokenStream.incrementToken(); - fail("Expected IllegalArgumentException"); - } catch (IllegalArgumentException e) { - assertThat(e.getMessage(), containsString("Only <= 256 finite strings are supported")); - } finally { - suggestTokenStream.close(); - } - } - - public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception { - Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); - tokenizer.setReader(new StringReader("mykeyword")); - BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); - TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(new CompletionTokenStream(tokenizer, payload, new CompletionTokenStream.ToFiniteStrings() { - @Override - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { - return suggester.toFiniteStrings(stream); - } - })); - TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class); - assertNotNull(termAtt.getBytesRef()); - suggestTokenStream.reset(); - - while (suggestTokenStream.incrementToken()) { - assertThat(termAtt.getBytesRef().utf8ToString(), equalTo("mykeyword")); - } - suggestTokenStream.end(); - suggestTokenStream.close(); - } - - - public final static class ByteTermAttrToCharTermAttrFilter extends TokenFilter { - private ByteTermAttribute byteAttr = addAttribute(ByteTermAttribute.class); - private PayloadAttribute payload = addAttribute(PayloadAttribute.class); - private TypeAttribute type = addAttribute(TypeAttribute.class); - private CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class); - protected ByteTermAttrToCharTermAttrFilter(TokenStream input) { - super(input); - } - - @Override - public boolean incrementToken() throws IOException { - if (input.incrementToken()) { - BytesRef bytesRef = byteAttr.getBytesRef(); - // we move them over so we can assert them more easily in the tests - type.setType(payload.getPayload().utf8ToString()); - return true; - } - return false; - } - - } -} diff --git a/core/src/test/java/org/elasticsearch/search/suggest/ContextCompletionSuggestSearchIT.java b/core/src/test/java/org/elasticsearch/search/suggest/ContextCompletionSuggestSearchIT.java new file mode 100644 index 0000000000..86177d4793 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/suggest/ContextCompletionSuggestSearchIT.java @@ -0,0 +1,621 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.suggest; + +import com.carrotsearch.randomizedtesting.generators.RandomStrings; + +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.lucene.util.GeoHashUtils; +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.action.suggest.SuggestResponse; +import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.search.suggest.CompletionSuggestSearchIT.CompletionMappingBuilder; +import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder; +import org.elasticsearch.search.suggest.completion.context.*; +import org.elasticsearch.test.ESIntegTestCase; + +import java.io.IOException; +import java.util.*; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +@SuppressCodecs("*") // requires custom completion format +public class ContextCompletionSuggestSearchIT extends ESIntegTestCase { + + private final String INDEX = RandomStrings.randomAsciiOfLength(getRandom(), 10).toLowerCase(Locale.ROOT); + private final String TYPE = RandomStrings.randomAsciiOfLength(getRandom(), 10).toLowerCase(Locale.ROOT); + private final String FIELD = RandomStrings.randomAsciiOfLength(getRandom(), 10).toLowerCase(Locale.ROOT); + + @Override + protected int numberOfReplicas() { + return 0; + } + + public void testContextPrefix() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("cat", ContextBuilder.category("cat").field("cat").build()); + boolean addAnotherContext = randomBoolean(); + if (addAnotherContext) { + map.put("type", ContextBuilder.category("type").field("type").build()); + } + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .endObject() + .field("cat", "cat" + i % 2); + if (addAnotherContext) { + source.field("type", "type" + i % 3); + } + source.endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + assertSuggestions("foo", prefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + } + + public void testContextRegex() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("cat", ContextBuilder.category("cat").field("cat").build()); + boolean addAnotherContext = randomBoolean(); + if (addAnotherContext) { + map.put("type", ContextBuilder.category("type").field("type").build()); + } + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "sugg" + i + "estion") + .field("weight", i + 1) + .endObject() + .field("cat", "cat" + i % 2); + if (addAnotherContext) { + source.field("type", "type" + i % 3); + } + source.endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).regex("sugg.*es"); + assertSuggestions("foo", prefix, "sugg9estion", "sugg8estion", "sugg7estion", "sugg6estion", "sugg5estion"); + } + + public void testContextFuzzy() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("cat", ContextBuilder.category("cat").field("cat").build()); + boolean addAnotherContext = randomBoolean(); + if (addAnotherContext) { + map.put("type", ContextBuilder.category("type").field("type").build()); + } + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "sugxgestion" + i) + .field("weight", i + 1) + .endObject() + .field("cat", "cat" + i % 2); + if (addAnotherContext) { + source.field("type", "type" + i % 3); + } + source.endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg", Fuzziness.ONE); + assertSuggestions("foo", prefix, "sugxgestion9", "sugxgestion8", "sugxgestion7", "sugxgestion6", "sugxgestion5"); + } + + public void testSingleContextFiltering() throws Exception { + CategoryContextMapping contextMapping = ContextBuilder.category("cat").field("cat").build(); + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<String, ContextMapping>(Collections.singletonMap("cat", contextMapping)); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .endObject() + .field("cat", "cat" + i % 2) + .endObject() + )); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg") + .categoryContexts("cat", + new CategoryQueryContext("cat0")); + + assertSuggestions("foo", prefix, "suggestion8", "suggestion6", "suggestion4", "suggestion2", "suggestion0"); + } + + public void testSingleContextBoosting() throws Exception { + CategoryContextMapping contextMapping = ContextBuilder.category("cat").field("cat").build(); + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<String, ContextMapping>(Collections.singletonMap("cat", contextMapping)); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .endObject() + .field("cat", "cat" + i % 2) + .endObject() + )); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg") + .categoryContexts("cat", + new CategoryQueryContext("cat0", 3), + new CategoryQueryContext("cat1")); + + assertSuggestions("foo", prefix, "suggestion8", "suggestion6", "suggestion4", "suggestion9", "suggestion2"); + } + + public void testSingleContextMultipleContexts() throws Exception { + CategoryContextMapping contextMapping = ContextBuilder.category("cat").field("cat").build(); + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<String, ContextMapping>(Collections.singletonMap("cat", contextMapping)); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<String> contexts = Arrays.asList("type1", "type2", "type3", "type4"); + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .endObject() + .field("cat", contexts) + .endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + + assertSuggestions("foo", prefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + } + + public void testMultiContextFiltering() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("cat", ContextBuilder.category("cat").field("cat").build()); + map.put("type", ContextBuilder.category("type").field("type").build()); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .endObject() + .field("cat", "cat" + i % 2) + .field("type", "type" + i % 4) + .endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + + // filter only on context cat + CompletionSuggestionBuilder catFilterSuggest = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + catFilterSuggest.categoryContexts("cat", new CategoryQueryContext("cat0")); + assertSuggestions("foo", catFilterSuggest, "suggestion8", "suggestion6", "suggestion4", "suggestion2", "suggestion0"); + + // filter only on context type + CompletionSuggestionBuilder typeFilterSuggest = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + typeFilterSuggest.categoryContexts("type", new CategoryQueryContext("type2"), new CategoryQueryContext("type1")); + assertSuggestions("foo", typeFilterSuggest, "suggestion9", "suggestion6", "suggestion5", "suggestion2", "suggestion1"); + + // filter on both contexts + CompletionSuggestionBuilder multiContextFilterSuggest = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + // query context order should never matter + if (randomBoolean()) { + multiContextFilterSuggest.categoryContexts("type", new CategoryQueryContext("type2"), new CategoryQueryContext("type1")); + multiContextFilterSuggest.categoryContexts("cat", new CategoryQueryContext("cat0")); + } else { + multiContextFilterSuggest.categoryContexts("cat", new CategoryQueryContext("cat0")); + multiContextFilterSuggest.categoryContexts("type", new CategoryQueryContext("type2"), new CategoryQueryContext("type1")); + } + assertSuggestions("foo", multiContextFilterSuggest, "suggestion9", "suggestion8", "suggestion6", "suggestion5", "suggestion4"); + } + + @AwaitsFix(bugUrl = "multiple context boosting is broken, as a suggestion, contexts pair is treated as (num(context) entries)") + public void testMultiContextBoosting() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("cat", ContextBuilder.category("cat").field("cat").build()); + map.put("type", ContextBuilder.category("type").field("type").build()); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .endObject() + .field("cat", "cat" + i % 2) + .field("type", "type" + i % 4) + .endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + + // boost only on context cat + CompletionSuggestionBuilder catBoostSuggest = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + catBoostSuggest.categoryContexts("cat", + new CategoryQueryContext("cat0", 3), + new CategoryQueryContext("cat1")); + assertSuggestions("foo", catBoostSuggest, "suggestion8", "suggestion6", "suggestion4", "suggestion9", "suggestion2"); + + // boost only on context type + CompletionSuggestionBuilder typeBoostSuggest = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + typeBoostSuggest.categoryContexts("type", + new CategoryQueryContext("type2", 2), + new CategoryQueryContext("type1", 4)); + assertSuggestions("foo", typeBoostSuggest, "suggestion9", "suggestion5", "suggestion6", "suggestion1", "suggestion2"); + + // boost on both contexts + CompletionSuggestionBuilder multiContextBoostSuggest = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + // query context order should never matter + if (randomBoolean()) { + multiContextBoostSuggest.categoryContexts("type", + new CategoryQueryContext("type2", 2), + new CategoryQueryContext("type1", 4)); + multiContextBoostSuggest.categoryContexts("cat", + new CategoryQueryContext("cat0", 3), + new CategoryQueryContext("cat1")); + } else { + multiContextBoostSuggest.categoryContexts("cat", + new CategoryQueryContext("cat0", 3), + new CategoryQueryContext("cat1")); + multiContextBoostSuggest.categoryContexts("type", + new CategoryQueryContext("type2", 2), + new CategoryQueryContext("type1", 4)); + } + assertSuggestions("foo", multiContextBoostSuggest, "suggestion9", "suggestion6", "suggestion5", "suggestion2", "suggestion1"); + } + + public void testMissingContextValue() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("cat", ContextBuilder.category("cat").field("cat").build()); + map.put("type", ContextBuilder.category("type").field("type").build()); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .endObject(); + if (randomBoolean()) { + source.field("cat", "cat" + i % 2); + } + if (randomBoolean()) { + source.field("type", "type" + i % 4); + } + source.endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + assertSuggestions("foo", prefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + } + + public void testSeveralContexts() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + final int numContexts = randomIntBetween(2, 5); + for (int i = 0; i < numContexts; i++) { + map.put("type" + i, ContextBuilder.category("type" + i).field("type" + i).build()); + } + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = randomIntBetween(10, 200); + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", numDocs - i) + .endObject(); + for (int c = 0; c < numContexts; c++) { + source.field("type"+c, "type" + c +i % 4); + } + source.endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + assertSuggestions("foo", prefix, "suggestion0", "suggestion1", "suggestion2", "suggestion3", "suggestion4"); + } + + public void testSimpleGeoPrefix() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("geo", ContextBuilder.geo("geo").build()); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .startObject("contexts") + .field("geo", GeoHashUtils.stringEncode(1.2, 1.3)) + .endObject() + .endObject().endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + assertSuggestions("foo", prefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + } + + public void testGeoFiltering() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("geo", ContextBuilder.geo("geo").build()); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + String[] geoHashes = new String[] {"ezs42e44yx96", "u4pruydqqvj8"}; + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .startObject("contexts") + .field("geo", (i % 2 == 0) ? geoHashes[0] : geoHashes[1]) + .endObject() + .endObject().endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + assertSuggestions("foo", prefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + + CompletionSuggestionBuilder geoFilteringPrefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg") + .geoContexts("geo", new GeoQueryContext(geoHashes[0])); + + assertSuggestions("foo", geoFilteringPrefix, "suggestion8", "suggestion6", "suggestion4", "suggestion2", "suggestion0"); + } + + public void testGeoBoosting() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("geo", ContextBuilder.geo("geo").build()); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + String[] geoHashes = new String[] {"ezs42e44yx96", "u4pruydqqvj8"}; + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .startObject("contexts") + .field("geo", (i % 2 == 0) ? geoHashes[0] : geoHashes[1]) + .endObject() + .endObject().endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + assertSuggestions("foo", prefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + + CompletionSuggestionBuilder geoBoostingPrefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg") + .geoContexts("geo", new GeoQueryContext(geoHashes[0], 2), new GeoQueryContext(geoHashes[1])); + + assertSuggestions("foo", geoBoostingPrefix, "suggestion8", "suggestion6", "suggestion4", "suggestion9", "suggestion7"); + } + + public void testGeoPointContext() throws Exception { + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("geo", ContextBuilder.geo("geo").build()); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .startObject("contexts") + .startObject("geo") + .field("lat", 52.22) + .field("lon", 4.53) + .endObject() + .endObject() + .endObject().endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg") + .geoContexts("geo", new GeoQueryContext(new GeoPoint(52.2263, 4.543))); + assertSuggestions("foo", prefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + } + + public void testGeoNeighbours() throws Exception { + String geohash = "gcpv"; + List<String> neighbours = new ArrayList<>(); + neighbours.add("gcpw"); + neighbours.add("gcpy"); + neighbours.add("u10n"); + neighbours.add("gcpt"); + neighbours.add("u10j"); + neighbours.add("gcps"); + neighbours.add("gcpu"); + neighbours.add("u10h"); + + LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>(); + map.put("geo", ContextBuilder.geo("geo").precision(4).build()); + final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map); + createIndexAndMapping(mapping); + int numDocs = 10; + List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + XContentBuilder source = jsonBuilder() + .startObject() + .startObject(FIELD) + .field("input", "suggestion" + i) + .field("weight", i + 1) + .startObject("contexts") + .field("geo", randomFrom(neighbours)) + .endObject() + .endObject().endObject(); + indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i) + .setSource(source)); + } + indexRandom(true, indexRequestBuilders); + ensureYellow(INDEX); + CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg"); + assertSuggestions("foo", prefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + + CompletionSuggestionBuilder geoNeighbourPrefix = SuggestBuilders.completionSuggestion("foo").field(FIELD).prefix("sugg") + .geoContexts("geo", new GeoQueryContext(geohash)); + + assertSuggestions("foo", geoNeighbourPrefix, "suggestion9", "suggestion8", "suggestion7", "suggestion6", "suggestion5"); + } + + public void assertSuggestions(String suggestionName, SuggestBuilder.SuggestionBuilder suggestBuilder, String... suggestions) { + SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(suggestBuilder + ).execute().actionGet(); + CompletionSuggestSearchIT.assertSuggestions(suggestResponse, suggestionName, suggestions); + } + + private void createIndexAndMapping(CompletionMappingBuilder completionMappingBuilder) throws IOException { + createIndexAndMappingAndSettings(Settings.EMPTY, completionMappingBuilder); + } + private void createIndexAndMappingAndSettings(Settings settings, CompletionMappingBuilder completionMappingBuilder) throws IOException { + XContentBuilder mapping = jsonBuilder().startObject() + .startObject(TYPE).startObject("properties") + .startObject(FIELD) + .field("type", "completion") + .field("analyzer", completionMappingBuilder.indexAnalyzer) + .field("search_analyzer", completionMappingBuilder.searchAnalyzer) + .field("preserve_separators", completionMappingBuilder.preserveSeparators) + .field("preserve_position_increments", completionMappingBuilder.preservePositionIncrements); + + if (completionMappingBuilder.contextMappings != null) { + mapping = mapping.startArray("contexts"); + for (Map.Entry<String, ContextMapping> contextMapping : completionMappingBuilder.contextMappings.entrySet()) { + mapping = mapping.startObject() + .field("name", contextMapping.getValue().name()) + .field("type", contextMapping.getValue().type().name()); + switch (contextMapping.getValue().type()) { + case CATEGORY: + final String fieldName = ((CategoryContextMapping) contextMapping.getValue()).getFieldName(); + if (fieldName != null) { + mapping = mapping.field("path", fieldName); + } + break; + case GEO: + final String name = ((GeoContextMapping) contextMapping.getValue()).getFieldName(); + mapping = mapping + .field("precision", ((GeoContextMapping) contextMapping.getValue()).getPrecision()); + if (name != null) { + mapping.field("path", name); + } + break; + } + + mapping = mapping.endObject(); + } + + mapping = mapping.endArray(); + } + mapping = mapping.endObject() + .endObject().endObject() + .endObject(); + + assertAcked(client().admin().indices().prepareCreate(INDEX) + .setSettings(Settings.settingsBuilder().put(indexSettings()).put(settings)) + .addMapping(TYPE, mapping) + .get()); + ensureYellow(); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/suggest/ContextSuggestSearchIT.java b/core/src/test/java/org/elasticsearch/search/suggest/ContextSuggestSearchIT.java deleted file mode 100644 index 17111ae0a7..0000000000 --- a/core/src/test/java/org/elasticsearch/search/suggest/ContextSuggestSearchIT.java +++ /dev/null @@ -1,1045 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.search.suggest; - -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; -import org.apache.lucene.util.GeoHashUtils; -import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; -import org.elasticsearch.action.suggest.SuggestRequest; -import org.elasticsearch.action.suggest.SuggestRequestBuilder; -import org.elasticsearch.action.suggest.SuggestResponse; -import org.elasticsearch.common.geo.GeoPoint; -import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.common.util.set.Sets; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.index.mapper.MapperParsingException; -import org.elasticsearch.search.suggest.Suggest.Suggestion; -import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry; -import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option; -import org.elasticsearch.search.suggest.completion.CompletionSuggestion; -import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder; -import org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder; -import org.elasticsearch.search.suggest.context.ContextBuilder; -import org.elasticsearch.search.suggest.context.ContextMapping; -import org.elasticsearch.test.ESIntegTestCase; -import org.hamcrest.Matchers; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestion; -import static org.elasticsearch.test.hamcrest.ElasticsearchGeoAssertions.assertDistance; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.is; - -@SuppressCodecs("*") // requires custom completion format -public class ContextSuggestSearchIT extends ESIntegTestCase { - - private static final String INDEX = "test"; - private static final String TYPE = "testType"; - private static final String FIELD = "testField"; - - private static final String[][] HEROS = { - { "Afari, Jamal", "Jamal Afari", "Jamal" }, - { "Allerdyce, St. John", "Allerdyce, John", "St. John", "St. John Allerdyce" }, - { "Beaubier, Jean-Paul", "Jean-Paul Beaubier", "Jean-Paul" }, - { "Beaubier, Jeanne-Marie", "Jeanne-Marie Beaubier", "Jeanne-Marie" }, - { "Braddock, Elizabeth \"Betsy\"", "Betsy", "Braddock, Elizabeth", "Elizabeth Braddock", "Elizabeth" }, - { "Cody Mushumanski gun Man", "the hunter", "gun man", "Cody Mushumanski" }, - { "Corbo, Adrian", "Adrian Corbo", "Adrian" }, - { "Corbo, Jared", "Jared Corbo", "Jared" }, - { "Creel, Carl \"Crusher\"", "Creel, Carl", "Crusher", "Carl Creel", "Carl" }, - { "Crichton, Lady Jacqueline Falsworth", "Lady Jacqueline Falsworth Crichton", "Lady Jacqueline Falsworth", - "Jacqueline Falsworth" }, { "Crichton, Kenneth", "Kenneth Crichton", "Kenneth" }, - { "MacKenzie, Al", "Al MacKenzie", "Al" }, - { "MacPherran, Mary \"Skeeter\"", "Mary MacPherran \"Skeeter\"", "MacPherran, Mary", "Skeeter", "Mary MacPherran" }, - { "MacTaggert, Moira", "Moira MacTaggert", "Moira" }, { "Rasputin, Illyana", "Illyana Rasputin", "Illyana" }, - { "Rasputin, Mikhail", "Mikhail Rasputin", "Mikhail" }, { "Rasputin, Piotr", "Piotr Rasputin", "Piotr" }, - { "Smythe, Alistair", "Alistair Smythe", "Alistair" }, { "Smythe, Spencer", "Spencer Smythe", "Spencer" }, - { "Whitemane, Aelfyre", "Aelfyre Whitemane", "Aelfyre" }, { "Whitemane, Kofi", "Kofi Whitemane", "Kofi" } }; - - public void testBasicGeo() throws Exception { - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, ContextBuilder.location("st").precision("5km").neighbors(true)))); - ensureYellow(); - - XContentBuilder source1 = jsonBuilder() - .startObject() - .startObject(FIELD) - .array("input", "Hotel Amsterdam", "Amsterdam") - .field("output", "Hotel Amsterdam in Berlin") - .startObject("context").latlon("st", 52.529172, 13.407333).endObject() - .endObject() - .endObject(); - client().prepareIndex(INDEX, TYPE, "1").setSource(source1).execute().actionGet(); - - XContentBuilder source2 = jsonBuilder() - .startObject() - .startObject(FIELD) - .array("input", "Hotel Berlin", "Berlin") - .field("output", "Hotel Berlin in Amsterdam") - .startObject("context").latlon("st", 52.363389, 4.888695).endObject() - .endObject() - .endObject(); - client().prepareIndex(INDEX, TYPE, "2").setSource(source2).execute().actionGet(); - - client().admin().indices().prepareRefresh(INDEX).get(); - - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionBuilder context = SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text("h").size(10) - .addGeoLocation("st", 52.52, 13.4); - - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - - assertEquals(suggestResponse.getSuggest().size(), 1); - assertEquals("Hotel Amsterdam in Berlin", suggestResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string()); - } - - public void testMultiLevelGeo() throws Exception { - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, ContextBuilder.location("st") - .precision(1) - .precision(2) - .precision(3) - .precision(4) - .precision(5) - .precision(6) - .precision(7) - .precision(8) - .precision(9) - .precision(10) - .precision(11) - .precision(12) - .neighbors(true)))); - ensureYellow(); - - XContentBuilder source1 = jsonBuilder() - .startObject() - .startObject(FIELD) - .array("input", "Hotel Amsterdam", "Amsterdam") - .field("output", "Hotel Amsterdam in Berlin") - .startObject("context").latlon("st", 52.529172, 13.407333).endObject() - .endObject() - .endObject(); - client().prepareIndex(INDEX, TYPE, "1").setSource(source1).execute().actionGet(); - - client().admin().indices().prepareRefresh(INDEX).get(); - - for (int precision = 1; precision <= 12; precision++) { - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionBuilder context = new CompletionSuggestionBuilder(suggestionName).field(FIELD).text("h").size(10) - .addGeoLocation("st", 52.529172, 13.407333, precision); - - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - assertEquals(suggestResponse.getSuggest().size(), 1); - assertEquals("Hotel Amsterdam in Berlin", suggestResponse.getSuggest().getSuggestion(suggestionName).iterator().next() - .getOptions().iterator().next().getText().string()); - } - } - - public void testMappingIdempotency() throws Exception { - List<Integer> precisions = new ArrayList<>(); - for (int i = 0; i < randomIntBetween(4, 12); i++) { - precisions.add(i+1); - } - Collections.shuffle(precisions, getRandom()); - XContentBuilder mapping = jsonBuilder().startObject().startObject(TYPE) - .startObject("properties").startObject("completion") - .field("type", "completion") - .startObject("context") - .startObject("location") - .field("type", "geo") - .array("precision", (Object[])precisions.toArray(new Integer[precisions.size()])) - .endObject() - .endObject().endObject() - .endObject().endObject(); - - assertAcked(prepareCreate(INDEX).addMapping(TYPE, mapping.string())); - ensureYellow(); - - Collections.shuffle(precisions, getRandom()); - mapping = jsonBuilder().startObject().startObject(TYPE) - .startObject("properties").startObject("completion") - .field("type", "completion") - .startObject("context") - .startObject("location") - .field("type", "geo") - .array("precision", (Object[])precisions.toArray(new Integer[precisions.size()])) - .endObject() - .endObject().endObject() - .endObject().endObject(); - assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(mapping.string()).get()); - } - - public void testGeoField() throws Exception { - XContentBuilder mapping = jsonBuilder(); - mapping.startObject(); - mapping.startObject(TYPE); - mapping.startObject("properties"); - mapping.startObject("pin"); - mapping.field("type", "geo_point"); - mapping.endObject(); - mapping.startObject(FIELD); - mapping.field("type", "completion"); - mapping.field("analyzer", "simple"); - - mapping.startObject("context"); - mapping.value(ContextBuilder.location("st", 5, true).field("pin").build()); - mapping.endObject(); - - mapping.endObject(); - mapping.endObject(); - mapping.endObject(); - mapping.endObject(); - - assertAcked(prepareCreate(INDEX).addMapping(TYPE, mapping)); - ensureYellow(); - - XContentBuilder source1 = jsonBuilder() - .startObject() - .latlon("pin", 52.529172, 13.407333) - .startObject(FIELD) - .array("input", "Hotel Amsterdam", "Amsterdam") - .field("output", "Hotel Amsterdam in Berlin") - .startObject("context").endObject() - .endObject() - .endObject(); - client().prepareIndex(INDEX, TYPE, "1").setSource(source1).execute().actionGet(); - - XContentBuilder source2 = jsonBuilder() - .startObject() - .latlon("pin", 52.363389, 4.888695) - .startObject(FIELD) - .array("input", "Hotel Berlin", "Berlin") - .field("output", "Hotel Berlin in Amsterdam") - .startObject("context").endObject() - .endObject() - .endObject(); - client().prepareIndex(INDEX, TYPE, "2").setSource(source2).execute().actionGet(); - - refresh(); - - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionBuilder context = SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text("h").size(10) - .addGeoLocation("st", 52.52, 13.4); - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - - assertEquals(suggestResponse.getSuggest().size(), 1); - assertEquals("Hotel Amsterdam in Berlin", suggestResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string()); - } - - public void testSimpleGeo() throws Exception { - String reinickendorf = "u337p3mp11e2"; - String pankow = "u33e0cyyjur4"; - String koepenick = "u33dm4f7fn40"; - String bernau = "u33etnjf1yjn"; - String berlin = "u33dc1v0xupz"; - String mitte = "u33dc0cpke4q"; - String steglitz = "u336m36rjh2p"; - String wilmersdorf = "u336wmw0q41s"; - String spandau = "u336uqek7gh6"; - String tempelhof = "u33d91jh3by0"; - String schoeneberg = "u336xdrkzbq7"; - String treptow = "u33d9unn7fp7"; - - double precision = 100.0; // meters - - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, ContextBuilder.location("st").precision(precision).neighbors(true)))); - ensureYellow(); - - String[] locations = { reinickendorf, pankow, koepenick, bernau, berlin, mitte, steglitz, wilmersdorf, spandau, tempelhof, - schoeneberg, treptow }; - - String[][] input = { { "pizza - reinickendorf", "pizza", "food" }, { "pizza - pankow", "pizza", "food" }, - { "pizza - koepenick", "pizza", "food" }, { "pizza - bernau", "pizza", "food" }, { "pizza - berlin", "pizza", "food" }, - { "pizza - mitte", "pizza - berlin mitte", "pizza", "food" }, - { "pizza - steglitz", "pizza - Berlin-Steglitz", "pizza", "food" }, { "pizza - wilmersdorf", "pizza", "food" }, - { "pizza - spandau", "spandau bei berlin", "pizza", "food" }, - { "pizza - tempelhof", "pizza - berlin-tempelhof", "pizza", "food" }, - { "pizza - schoeneberg", "pizza - schöneberg", "pizza - berlin schoeneberg", "pizza", "food" }, - { "pizza - treptow", "pizza", "food" } }; - - for (int i = 0; i < locations.length; i++) { - XContentBuilder source = jsonBuilder().startObject().startObject(FIELD).startArray("input").value(input[i]).endArray() - .startObject("context").field("st", locations[i]).endObject().field("payload", locations[i]).endObject().endObject(); - client().prepareIndex(INDEX, TYPE, "" + i).setSource(source).execute().actionGet(); - } - - refresh(); - - assertGeoSuggestionsInRange(berlin, "pizza", precision); - assertGeoSuggestionsInRange(reinickendorf, "pizza", precision); - assertGeoSuggestionsInRange(spandau, "pizza", precision); - assertGeoSuggestionsInRange(koepenick, "pizza", precision); - assertGeoSuggestionsInRange(schoeneberg, "pizza", precision); - assertGeoSuggestionsInRange(tempelhof, "pizza", precision); - assertGeoSuggestionsInRange(bernau, "pizza", precision); - assertGeoSuggestionsInRange(pankow, "pizza", precision); - assertGeoSuggestionsInRange(mitte, "pizza", precision); - assertGeoSuggestionsInRange(steglitz, "pizza", precision); - assertGeoSuggestionsInRange(mitte, "pizza", precision); - assertGeoSuggestionsInRange(wilmersdorf, "pizza", precision); - assertGeoSuggestionsInRange(treptow, "pizza", precision); - } - - public void testSimplePrefix() throws Exception { - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, ContextBuilder.category("st")))); - ensureYellow(); - - for (int i = 0; i < HEROS.length; i++) { - XContentBuilder source = jsonBuilder().startObject().startObject(FIELD).startArray("input").value(HEROS[i]).endArray() - .startObject("context").field("st", i%3).endObject() - .startObject("payload").field("group", i % 3).field("id", i).endObject() - .endObject().endObject(); - client().prepareIndex(INDEX, TYPE, "" + i).setSource(source).execute().actionGet(); - } - - refresh(); - - assertPrefixSuggestions(0, "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); - assertPrefixSuggestions(0, "b", "Beaubier, Jeanne-Marie"); - assertPrefixSuggestions(0, "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); - assertPrefixSuggestions(0, "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); - assertPrefixSuggestions(0, "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); - assertPrefixSuggestions(1, "s", "St. John", "St. John Allerdyce"); - assertPrefixSuggestions(2, "s", "Smythe, Alistair"); - assertPrefixSuggestions(1, "w", "Whitemane, Aelfyre"); - assertPrefixSuggestions(2, "w", "Whitemane, Kofi"); - } - - public void testTypeCategoryIsActuallyCalledCategory() throws Exception { - XContentBuilder mapping = jsonBuilder(); - mapping.startObject().startObject(TYPE).startObject("properties") - .startObject("suggest_field").field("type", "completion") - .startObject("context").startObject("color").field("type", "category").endObject().endObject() - .endObject() - .endObject().endObject().endObject(); - assertAcked(prepareCreate(INDEX).addMapping(TYPE, mapping)); - ensureYellow(); - XContentBuilder doc1 = jsonBuilder(); - doc1.startObject().startObject("suggest_field") - .field("input", "backpack_red") - .startObject("context").field("color", "red", "all_colors").endObject() - .endObject().endObject(); - XContentBuilder doc2 = jsonBuilder(); - doc2.startObject().startObject("suggest_field") - .field("input", "backpack_green") - .startObject("context").field("color", "green", "all_colors").endObject() - .endObject().endObject(); - - client().prepareIndex(INDEX, TYPE, "1") - .setSource(doc1).execute() - .actionGet(); - client().prepareIndex(INDEX, TYPE, "2") - .setSource(doc2).execute() - .actionGet(); - - refresh(); - getBackpackSuggestionAndCompare("all_colors", "backpack_red", "backpack_green"); - getBackpackSuggestionAndCompare("red", "backpack_red"); - getBackpackSuggestionAndCompare("green", "backpack_green"); - getBackpackSuggestionAndCompare("not_existing_color"); - - } - - private void getBackpackSuggestionAndCompare(String contextValue, String... expectedText) { - Set<String> expected = Sets.newHashSet(expectedText); - CompletionSuggestionBuilder context = SuggestBuilders.completionSuggestion("suggestion").field("suggest_field").text("back").size(10).addContextField("color", contextValue); - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - Suggest suggest = suggestResponse.getSuggest(); - assertEquals(suggest.size(), 1); - for (Suggestion<? extends Entry<? extends Option>> s : suggest) { - CompletionSuggestion suggestion = (CompletionSuggestion) s; - for (CompletionSuggestion.Entry entry : suggestion) { - List<CompletionSuggestion.Entry.Option> options = entry.getOptions(); - assertEquals(options.size(), expectedText.length); - for (CompletionSuggestion.Entry.Option option : options) { - assertTrue(expected.contains(option.getText().string())); - expected.remove(option.getText().string()); - } - } - } - } - - public void testBasic() throws Exception { - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, false, ContextBuilder.reference("st", "_type"), ContextBuilder.reference("nd", "_type")))); - ensureYellow(); - - client().prepareIndex(INDEX, TYPE, "1") - .setSource( - jsonBuilder().startObject().startObject(FIELD).startArray("input").value("my hotel").value("this hotel").endArray() - .startObject("context").endObject() - .field("payload", TYPE + "|" + TYPE).endObject().endObject()).execute() - .actionGet(); - - refresh(); - - assertDoubleFieldSuggestions(TYPE, TYPE, "m", "my hotel"); - } - - public void testSimpleField() throws Exception { - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, ContextBuilder.reference("st", "category")))); - ensureYellow(); - - for (int i = 0; i < HEROS.length; i++) { - client().prepareIndex(INDEX, TYPE, "" + i) - .setSource( - jsonBuilder().startObject().field("category", Integer.toString(i % 3)).startObject(FIELD).startArray("input") - .value(HEROS[i]).endArray().startObject("context").endObject().field("payload", Integer.toString(i % 3)) - .endObject().endObject()).execute().actionGet(); - } - - refresh(); - - assertFieldSuggestions("0", "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); - assertFieldSuggestions("0", "b", "Beaubier, Jeanne-Marie"); - assertFieldSuggestions("0", "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); - assertFieldSuggestions("0", "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); - assertFieldSuggestions("0", "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); - assertFieldSuggestions("1", "s", "St. John", "St. John Allerdyce"); - assertFieldSuggestions("2", "s", "Smythe, Alistair"); - assertFieldSuggestions("1", "w", "Whitemane, Aelfyre"); - assertFieldSuggestions("2", "w", "Whitemane, Kofi"); - - } - - // see issue #10987 - public void testEmptySuggestion() throws Exception { - String mapping = jsonBuilder() - .startObject() - .startObject(TYPE) - .startObject("properties") - .startObject(FIELD) - .field("type", "completion") - .startObject("context") - .startObject("type_context") - .field("path", "_type") - .field("type", "category") - .endObject() - .endObject() - .endObject() - .endObject() - .endObject() - .endObject() - .string(); - - assertAcked(client().admin().indices().prepareCreate(INDEX).addMapping(TYPE, mapping).get()); - ensureGreen(); - - client().prepareIndex(INDEX, TYPE, "1").setSource(FIELD, "") - .setRefresh(true).get(); - - } - - public void testMultiValueField() throws Exception { - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, ContextBuilder.reference("st", "category")))); - ensureYellow(); - - for (int i = 0; i < HEROS.length; i++) { - client().prepareIndex(INDEX, TYPE, "" + i) - .setSource( - jsonBuilder().startObject().startArray("category").value(Integer.toString(i % 3)).value("other").endArray() - .startObject(FIELD).startArray("input").value(HEROS[i]).endArray().startObject("context").endObject() - .field("payload", Integer.toString(i % 3)).endObject().endObject()).execute().actionGet(); - } - - refresh(); - - assertFieldSuggestions("0", "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); - assertFieldSuggestions("0", "b", "Beaubier, Jeanne-Marie"); - assertFieldSuggestions("0", "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); - assertFieldSuggestions("0", "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); - assertFieldSuggestions("0", "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); - assertFieldSuggestions("1", "s", "St. John", "St. John Allerdyce"); - assertFieldSuggestions("2", "s", "Smythe, Alistair"); - assertFieldSuggestions("1", "w", "Whitemane, Aelfyre"); - assertFieldSuggestions("2", "w", "Whitemane, Kofi"); - } - - public void testMultiContext() throws Exception { - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, ContextBuilder.reference("st", "categoryA"), ContextBuilder.reference("nd", "categoryB")))); - ensureYellow(); - - for (int i = 0; i < HEROS.length; i++) { - client().prepareIndex(INDEX, TYPE, "" + i) - .setSource( - jsonBuilder().startObject().field("categoryA").value("" + (char) ('0' + (i % 3))).field("categoryB") - .value("" + (char) ('A' + (i % 3))).startObject(FIELD).startArray("input").value(HEROS[i]).endArray() - .startObject("context").endObject().field("payload", ((char) ('0' + (i % 3))) + "" + (char) ('A' + (i % 3))) - .endObject().endObject()).execute().actionGet(); - } - - refresh(); - - assertMultiContextSuggestions("0", "A", "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); - assertMultiContextSuggestions("0", "A", "b", "Beaubier, Jeanne-Marie"); - assertMultiContextSuggestions("0", "A", "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); - assertMultiContextSuggestions("0", "A", "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); - assertMultiContextSuggestions("0", "A", "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); - assertMultiContextSuggestions("1", "B", "s", "St. John", "St. John Allerdyce"); - assertMultiContextSuggestions("2", "C", "s", "Smythe, Alistair"); - assertMultiContextSuggestions("1", "B", "w", "Whitemane, Aelfyre"); - assertMultiContextSuggestions("2", "C", "w", "Whitemane, Kofi"); - } - - public void testMultiContextWithFuzzyLogic() throws Exception { - assertAcked(prepareCreate(INDEX).addMapping(TYPE, createMapping(TYPE, ContextBuilder.reference("st", "categoryA"), ContextBuilder.reference("nd", "categoryB")))); - ensureYellow(); - - for (int i = 0; i < HEROS.length; i++) { - String source = jsonBuilder().startObject().field("categoryA", "" + (char) ('0' + (i % 3))) - .field("categoryB", "" + (char) ('a' + (i % 3))).startObject(FIELD).array("input", HEROS[i]) - .startObject("context").endObject().startObject("payload").field("categoryA", "" + (char) ('0' + (i % 3))) - .field("categoryB", "" + (char) ('a' + (i % 3))).endObject().endObject().endObject().string(); - client().prepareIndex(INDEX, TYPE, "" + i).setSource(source).execute().actionGet(); - } - - refresh(); - - String[] prefix1 = { "0", "1", "2" }; - String[] prefix2 = { "a", "b", "c" }; - String[] prefix3 = { "0", "1" }; - String[] prefix4 = { "a", "b" }; - - assertContextWithFuzzySuggestions(prefix1, prefix2, "mary", "MacKenzie, Al", "MacPherran, Mary", "MacPherran, Mary \"Skeeter\"", - "MacTaggert, Moira", "Mary MacPherran", "Mary MacPherran \"Skeeter\""); - assertContextWithFuzzySuggestions(prefix1, prefix2, "mac", "Mikhail", "Mary MacPherran \"Skeeter\"", "MacTaggert, Moira", - "Moira MacTaggert", "Moira", "MacKenzie, Al", "Mary MacPherran", "Mikhail Rasputin", "MacPherran, Mary", - "MacPherran, Mary \"Skeeter\""); - assertContextWithFuzzySuggestions(prefix3, prefix4, "mary", "MacPherran, Mary", "MacPherran, Mary \"Skeeter\"", - "MacTaggert, Moira", "Mary MacPherran", "Mary MacPherran \"Skeeter\""); - assertContextWithFuzzySuggestions(prefix3, prefix4, "mac", "MacPherran, Mary", "MacPherran, Mary \"Skeeter\"", "MacTaggert, Moira", - "Mary MacPherran", "Mary MacPherran \"Skeeter\"", "Mikhail", "Mikhail Rasputin", "Moira", "Moira MacTaggert"); - } - - public void testSimpleType() throws Exception { - String[] types = { TYPE + "A", TYPE + "B", TYPE + "C" }; - - CreateIndexRequestBuilder createIndexRequestBuilder = prepareCreate(INDEX); - for (String type : types) { - createIndexRequestBuilder.addMapping(type, createMapping(type, ContextBuilder.reference("st", "_type"))); - } - assertAcked(createIndexRequestBuilder); - ensureYellow(); - - for (int i = 0; i < HEROS.length; i++) { - String type = types[i % types.length]; - client().prepareIndex(INDEX, type, "" + i) - .setSource( - jsonBuilder().startObject().startObject(FIELD).startArray("input").value(HEROS[i]).endArray() - .startObject("context").endObject().field("payload", type).endObject().endObject()).execute().actionGet(); - } - - refresh(); - - assertFieldSuggestions(types[0], "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); - assertFieldSuggestions(types[0], "b", "Beaubier, Jeanne-Marie"); - assertFieldSuggestions(types[0], "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); - assertFieldSuggestions(types[0], "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); - assertFieldSuggestions(types[0], "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); - assertFieldSuggestions(types[1], "s", "St. John", "St. John Allerdyce"); - assertFieldSuggestions(types[2], "s", "Smythe, Alistair"); - assertFieldSuggestions(types[1], "w", "Whitemane, Aelfyre"); - assertFieldSuggestions(types[2], "w", "Whitemane, Kofi"); - } - - // issue 5525, default location didnt work with lat/lon map, and did not set default location appropriately - public void testGeoContextDefaultMapping() throws Exception { - GeoPoint berlinAlexanderplatz = GeoPoint.fromGeohash("u33dc1"); - - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("poi").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("location") - .field("type", "geo") - .field("precision", "500m") - .startObject("default").field("lat", berlinAlexanderplatz.lat()).field("lon", berlinAlexanderplatz.lon()).endObject() - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - assertAcked(prepareCreate(INDEX).addMapping("poi", xContentBuilder)); - ensureYellow(); - - index(INDEX, "poi", "1", jsonBuilder().startObject().startObject("suggest").field("input", "Berlin Alexanderplatz").endObject().endObject()); - refresh(); - - CompletionSuggestionBuilder suggestionBuilder = SuggestBuilders.completionSuggestion("suggestion").field("suggest").text("b").size(10).addGeoLocation("location", berlinAlexanderplatz.lat(), berlinAlexanderplatz.lon()); - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(suggestionBuilder).get(); - assertSuggestion(suggestResponse.getSuggest(), 0, "suggestion", "Berlin Alexanderplatz"); - } - - // issue 5525, setting the path of a category context and then indexing a document without that field returned an error - public void testThatMissingPrefixesForContextReturnException() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("service").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("color") - .field("type", "category") - .field("path", "color") - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - assertAcked(prepareCreate(INDEX).addMapping("service", xContentBuilder)); - ensureYellow(); - - // now index a document with color field - index(INDEX, "service", "1", jsonBuilder().startObject().field("color", "red").startObject("suggest").field("input", "backback").endObject().endObject()); - - // now index a document without a color field - try { - index(INDEX, "service", "2", jsonBuilder().startObject().startObject("suggest").field("input", "backback").endObject().endObject()); - fail("index operation was not supposed to be successful"); - } catch (IllegalArgumentException e) { - assertThat(e.getMessage(), containsString("one or more prefixes needed")); - } - } - - // issue 5525, the geo point parser did not work when the lat/lon values were inside of a value object - public void testThatLocationVenueCanBeParsedAsDocumented() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("poi").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("location") - .field("type", "geo") - .field("precision", "1m") - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - assertAcked(prepareCreate(INDEX).addMapping("poi", xContentBuilder)); - ensureYellow(); - - SuggestRequest suggestRequest = new SuggestRequest(INDEX); - XContentBuilder builder = jsonBuilder().startObject() - .startObject("suggest") - .field("text", "m") - .startObject("completion") - .field("field", "suggest") - .startObject("context").startObject("location").startObject("value").field("lat", 0).field("lon", 0).endObject().field("precision", "1km").endObject().endObject() - .endObject() - .endObject() - .endObject(); - suggestRequest.suggest(builder.bytes()); - - SuggestResponse suggestResponse = client().suggest(suggestRequest).get(); - assertNoFailures(suggestResponse); - } - - public void testThatCategoryDefaultWorks() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("item").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("color") - .field("type", "category").field("default", "red") - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - assertAcked(prepareCreate(INDEX).addMapping("item", xContentBuilder)); - ensureYellow(); - - index(INDEX, "item", "1", jsonBuilder().startObject().startObject("suggest").field("input", "Hoodie red").endObject().endObject()); - index(INDEX, "item", "2", jsonBuilder().startObject().startObject("suggest").field("input", "Hoodie blue").startObject("context").field("color", "blue").endObject().endObject().endObject()); - refresh(); - - CompletionSuggestionBuilder suggestionBuilder = SuggestBuilders.completionSuggestion("suggestion").field("suggest").text("h").size(10).addContextField("color", "red"); - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(suggestionBuilder).get(); - assertSuggestion(suggestResponse.getSuggest(), 0, "suggestion", "Hoodie red"); - } - - public void testThatDefaultCategoryAndPathWorks() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("item").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("color") - .field("type", "category") - .field("default", "red") - .field("path", "color") - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - assertAcked(prepareCreate(INDEX).addMapping("item", xContentBuilder)); - ensureYellow(); - - index(INDEX, "item", "1", jsonBuilder().startObject().startObject("suggest").field("input", "Hoodie red").endObject().endObject()); - index(INDEX, "item", "2", jsonBuilder().startObject().startObject("suggest").field("input", "Hoodie blue").endObject().field("color", "blue").endObject()); - refresh(); - - CompletionSuggestionBuilder suggestionBuilder = SuggestBuilders.completionSuggestion("suggestion").field("suggest").text("h").size(10).addContextField("color", "red"); - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(suggestionBuilder).get(); - assertSuggestion(suggestResponse.getSuggest(), 0, "suggestion", "Hoodie red"); - } - - public void testThatGeoPrecisionIsWorking() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("item").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("location") - .field("type", "geo") - .field("precision", 4) // this means geo hashes with a length of four are used, like u345 - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - assertAcked(prepareCreate(INDEX).addMapping("item", xContentBuilder)); - ensureYellow(); - - // lets create some locations by geohashes in different cells with the precision 4 - // this means, that poelchaustr is not a neighour to alexanderplatz, but they share the same prefix until the fourth char! - GeoPoint alexanderplatz = GeoPoint.fromGeohash("u33dc1"); - GeoPoint poelchaustr = GeoPoint.fromGeohash("u33du5"); - GeoPoint dahlem = GeoPoint.fromGeohash("u336q"); // berlin dahlem, should be included with that precision - GeoPoint middleOfNoWhere = GeoPoint.fromGeohash("u334"); // location for west from berlin, should not be included in any suggestions - - index(INDEX, "item", "1", jsonBuilder().startObject().startObject("suggest").field("input", "Berlin Alexanderplatz").field("weight", 3).startObject("context").startObject("location").field("lat", alexanderplatz.lat()).field("lon", alexanderplatz.lon()).endObject().endObject().endObject().endObject()); - index(INDEX, "item", "2", jsonBuilder().startObject().startObject("suggest").field("input", "Berlin Poelchaustr.").field("weight", 2).startObject("context").startObject("location").field("lat", poelchaustr.lat()).field("lon", poelchaustr.lon()).endObject().endObject().endObject().endObject()); - index(INDEX, "item", "3", jsonBuilder().startObject().startObject("suggest").field("input", "Berlin Far Away").field("weight", 1).startObject("context").startObject("location").field("lat", middleOfNoWhere.lat()).field("lon", middleOfNoWhere.lon()).endObject().endObject().endObject().endObject()); - index(INDEX, "item", "4", jsonBuilder().startObject().startObject("suggest").field("input", "Berlin Dahlem").field("weight", 1).startObject("context").startObject("location").field("lat", dahlem.lat()).field("lon", dahlem.lon()).endObject().endObject().endObject().endObject()); - refresh(); - - CompletionSuggestionBuilder suggestionBuilder = SuggestBuilders.completionSuggestion("suggestion").field("suggest").text("b").size(10).addGeoLocation("location", alexanderplatz.lat(), alexanderplatz.lon()); - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(suggestionBuilder).get(); - assertSuggestion(suggestResponse.getSuggest(), 0, "suggestion", "Berlin Alexanderplatz", "Berlin Poelchaustr.", "Berlin Dahlem"); - } - - public void testThatNeighborsCanBeExcluded() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("item").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("location") - .field("type", "geo") - .field("precision", 6) - .field("neighbors", false) - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - assertAcked(prepareCreate(INDEX).addMapping("item", xContentBuilder)); - ensureYellow(); - - GeoPoint alexanderplatz = GeoPoint.fromGeohash("u33dc1"); - // does not look like it, but is a direct neighbor - // this test would fail, if the precision was set 4, as then both cells would be the same, u33d - GeoPoint cellNeighbourOfAlexanderplatz = GeoPoint.fromGeohash("u33dbc"); - - index(INDEX, "item", "1", jsonBuilder().startObject().startObject("suggest").field("input", "Berlin Alexanderplatz").field("weight", 3).startObject("context").startObject("location").field("lat", alexanderplatz.lat()).field("lon", alexanderplatz.lon()).endObject().endObject().endObject().endObject()); - index(INDEX, "item", "2", jsonBuilder().startObject().startObject("suggest").field("input", "Berlin Hackescher Markt").field("weight", 2).startObject("context").startObject("location").field("lat", cellNeighbourOfAlexanderplatz.lat()).field("lon", cellNeighbourOfAlexanderplatz.lon()).endObject().endObject().endObject().endObject()); - refresh(); - - CompletionSuggestionBuilder suggestionBuilder = SuggestBuilders.completionSuggestion("suggestion").field("suggest").text("b").size(10).addGeoLocation("location", alexanderplatz.lat(), alexanderplatz.lon()); - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(suggestionBuilder).get(); - assertSuggestion(suggestResponse.getSuggest(), 0, "suggestion", "Berlin Alexanderplatz"); - } - - public void testThatGeoPathCanBeSelected() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("item").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("location") - .field("type", "geo") - .field("precision", "5m") - .field("path", "loc") - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - assertAcked(prepareCreate(INDEX).addMapping("item", xContentBuilder)); - ensureYellow(); - - GeoPoint alexanderplatz = GeoPoint.fromGeohash("u33dc1"); - index(INDEX, "item", "1", jsonBuilder().startObject().startObject("suggest").field("input", "Berlin Alexanderplatz").endObject().startObject("loc").field("lat", alexanderplatz.lat()).field("lon", alexanderplatz.lon()).endObject().endObject()); - refresh(); - - CompletionSuggestionBuilder suggestionBuilder = SuggestBuilders.completionSuggestion("suggestion").field("suggest").text("b").size(10).addGeoLocation("location", alexanderplatz.lat(), alexanderplatz.lon()); - SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(suggestionBuilder).get(); - assertSuggestion(suggestResponse.getSuggest(), 0, "suggestion", "Berlin Alexanderplatz"); - } - - public void testThatPrecisionIsRequired() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("item").startObject("properties").startObject("suggest") - .field("type", "completion") - .startObject("context").startObject("location") - .field("type", "geo") - .field("path", "loc") - .endObject().endObject() - .endObject().endObject().endObject() - .endObject(); - - try { - prepareCreate(INDEX).addMapping("item", xContentBuilder).get(); - fail("Expected MapperParsingException"); - } catch (MapperParsingException e) { - assertThat(e.getMessage(), is("Failed to parse mapping [item]: field [precision] is missing")); - } - } - - public void testThatLatLonParsingFromSourceWorks() throws Exception { - XContentBuilder xContentBuilder = jsonBuilder().startObject() - .startObject("mappings").startObject("test").startObject("properties").startObject("suggest_geo") - .field("type", "completion") - .startObject("context").startObject("location") - .field("type", "geo") - .field("precision", "1km") - .endObject().endObject() - .endObject().endObject().endObject() - .endObject().endObject(); - - assertAcked(prepareCreate("test").setSource(xContentBuilder.bytes())); - - double latitude = 52.22; - double longitude = 4.53; - String geohash = GeoHashUtils.stringEncode(longitude, latitude); - - XContentBuilder doc1 = jsonBuilder().startObject().startObject("suggest_geo").field("input", "Hotel Marriot in Amsterdam").startObject("context").startObject("location").field("lat", latitude).field("lon", longitude).endObject().endObject().endObject().endObject(); - index("test", "test", "1", doc1); - XContentBuilder doc2 = jsonBuilder().startObject().startObject("suggest_geo").field("input", "Hotel Marriot in Berlin").startObject("context").startObject("location").field("lat", 53.31).field("lon", 13.24).endObject().endObject().endObject().endObject(); - index("test", "test", "2", doc2); - refresh(); - - XContentBuilder source = jsonBuilder().startObject().startObject("suggestion").field("text", "h").startObject("completion").field("field", "suggest_geo").startObject("context").field("location", geohash).endObject().endObject().endObject().endObject(); - SuggestRequest suggestRequest = new SuggestRequest(INDEX).suggest(source.bytes()); - SuggestResponse suggestResponse = client().suggest(suggestRequest).get(); - assertSuggestion(suggestResponse.getSuggest(), 0, "suggestion", "Hotel Marriot in Amsterdam"); - - // this is exact the same request, but using lat/lon instead of geohash - source = jsonBuilder().startObject().startObject("suggestion").field("text", "h").startObject("completion").field("field", "suggest_geo").startObject("context").startObject("location").field("lat", latitude).field("lon", longitude).endObject().endObject().endObject().endObject().endObject(); - suggestRequest = new SuggestRequest(INDEX).suggest(source.bytes()); - suggestResponse = client().suggest(suggestRequest).get(); - assertSuggestion(suggestResponse.getSuggest(), 0, "suggestion", "Hotel Marriot in Amsterdam"); - } - - public void assertGeoSuggestionsInRange(String location, String suggest, double precision) throws IOException { - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionBuilder context = SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text(suggest).size(10) - .addGeoLocation("st", location); - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - - Suggest suggest2 = suggestResponse.getSuggest(); - assertTrue(suggest2.iterator().hasNext()); - for (Suggestion<? extends Entry<? extends Option>> s : suggest2) { - CompletionSuggestion suggestion = (CompletionSuggestion) s; - assertTrue(suggestion.iterator().hasNext()); - for (CompletionSuggestion.Entry entry : suggestion) { - List<CompletionSuggestion.Entry.Option> options = entry.getOptions(); - assertTrue(options.iterator().hasNext()); - for (CompletionSuggestion.Entry.Option option : options) { - String target = option.getPayloadAsString(); - assertDistance(location, target, Matchers.lessThanOrEqualTo(precision)); - } - } - } - } - - public void assertPrefixSuggestions(long prefix, String suggest, String... hits) throws IOException { - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionBuilder context = SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text(suggest) - .size(hits.length + 1).addCategory("st", Long.toString(prefix)); - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - ArrayList<String> suggestions = new ArrayList<>(); - Suggest suggest2 = suggestResponse.getSuggest(); - assertTrue(suggest2.iterator().hasNext()); - for (Suggestion<? extends Entry<? extends Option>> s : suggest2) { - CompletionSuggestion suggestion = (CompletionSuggestion) s; - for (CompletionSuggestion.Entry entry : suggestion) { - List<CompletionSuggestion.Entry.Option> options = entry.getOptions(); - for (CompletionSuggestion.Entry.Option option : options) { - Map<String, Object> payload = option.getPayloadAsMap(); - int group = (Integer) payload.get("group"); - String text = option.getText().string(); - assertEquals(prefix, group); - suggestions.add(text); - } - } - } - assertSuggestionsMatch(suggestions, hits); - } - - public void assertContextWithFuzzySuggestions(String[] prefix1, String[] prefix2, String suggest, String... hits) throws IOException { - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionFuzzyBuilder context = SuggestBuilders.fuzzyCompletionSuggestion(suggestionName).field(FIELD).text(suggest) - .size(hits.length + 10).addContextField("st", prefix1).addContextField("nd", prefix2).setFuzziness(Fuzziness.TWO); - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - - ArrayList<String> suggestions = new ArrayList<>(); - - Suggest suggest2 = suggestResponse.getSuggest(); - assertTrue(suggest2.iterator().hasNext()); - for (Suggestion<? extends Entry<? extends Option>> s : suggest2) { - CompletionSuggestion suggestion = (CompletionSuggestion) s; - for (CompletionSuggestion.Entry entry : suggestion) { - List<CompletionSuggestion.Entry.Option> options = entry.getOptions(); - for (CompletionSuggestion.Entry.Option option : options) { - Map<String, Object> payload = option.getPayloadAsMap(); - String text = option.getText().string(); - assertThat(prefix1, Matchers.hasItemInArray(payload.get("categoryA"))); - assertThat(prefix2, Matchers.hasItemInArray(payload.get("categoryB"))); - suggestions.add(text); - } - } - } - - assertSuggestionsMatch(suggestions, hits); - } - - public void assertFieldSuggestions(String value, String suggest, String... hits) throws IOException { - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionBuilder context = SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text(suggest).size(10) - .addContextField("st", value); - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - - ArrayList<String> suggestions = new ArrayList<>(); - - Suggest suggest2 = suggestResponse.getSuggest(); - for (Suggestion<? extends Entry<? extends Option>> s : suggest2) { - CompletionSuggestion suggestion = (CompletionSuggestion) s; - for (CompletionSuggestion.Entry entry : suggestion) { - List<CompletionSuggestion.Entry.Option> options = entry.getOptions(); - for (CompletionSuggestion.Entry.Option option : options) { - String payload = option.getPayloadAsString(); - String text = option.getText().string(); - assertEquals(value, payload); - suggestions.add(text); - } - } - } - assertSuggestionsMatch(suggestions, hits); - } - - public void assertDoubleFieldSuggestions(String field1, String field2, String suggest, String... hits) throws IOException { - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionBuilder context = SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text(suggest).size(10) - .addContextField("st", field1).addContextField("nd", field2); - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - ArrayList<String> suggestions = new ArrayList<>(); - - Suggest suggest2 = suggestResponse.getSuggest(); - for (Suggestion<? extends Entry<? extends Option>> s : suggest2) { - CompletionSuggestion suggestion = (CompletionSuggestion) s; - for (CompletionSuggestion.Entry entry : suggestion) { - List<CompletionSuggestion.Entry.Option> options = entry.getOptions(); - for (CompletionSuggestion.Entry.Option option : options) { - String payload = option.getPayloadAsString(); - String text = option.getText().string(); - assertEquals(field1 + "|" + field2, payload); - suggestions.add(text); - } - } - } - assertSuggestionsMatch(suggestions, hits); - } - - public void assertMultiContextSuggestions(String value1, String value2, String suggest, String... hits) throws IOException { - String suggestionName = randomAsciiOfLength(10); - CompletionSuggestionBuilder context = SuggestBuilders.completionSuggestion(suggestionName).field(FIELD).text(suggest).size(10) - .addContextField("st", value1).addContextField("nd", value2); - - SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); - SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); - ArrayList<String> suggestions = new ArrayList<>(); - - Suggest suggest2 = suggestResponse.getSuggest(); - for (Suggestion<? extends Entry<? extends Option>> s : suggest2) { - CompletionSuggestion suggestion = (CompletionSuggestion) s; - for (CompletionSuggestion.Entry entry : suggestion) { - List<CompletionSuggestion.Entry.Option> options = entry.getOptions(); - for (CompletionSuggestion.Entry.Option option : options) { - String payload = option.getPayloadAsString(); - String text = option.getText().string(); - assertEquals(value1 + value2, payload); - suggestions.add(text); - } - } - } - assertSuggestionsMatch(suggestions, hits); - } - - private void assertSuggestionsMatch(List<String> suggestions, String... hits) { - boolean[] suggested = new boolean[hits.length]; - Arrays.sort(hits); - Arrays.fill(suggested, false); - int numSuggestions = 0; - - for (String suggestion : suggestions) { - int hitpos = Arrays.binarySearch(hits, suggestion); - - assertEquals(hits[hitpos], suggestion); - assertTrue(hitpos >= 0); - assertTrue(!suggested[hitpos]); - - suggested[hitpos] = true; - numSuggestions++; - - } - assertEquals(hits.length, numSuggestions); - } - - private XContentBuilder createMapping(String type, ContextBuilder<?>... context) throws IOException { - return createMapping(type, false, context); - } - - private XContentBuilder createMapping(String type, boolean preserveSeparators, ContextBuilder<?>... context) throws IOException { - return createMapping(type, "simple", "simple", true, preserveSeparators, true, context); - } - - private XContentBuilder createMapping(String type, String indexAnalyzer, String searchAnalyzer, boolean payloads, boolean preserveSeparators, - boolean preservePositionIncrements, ContextBuilder<?>... contexts) throws IOException { - XContentBuilder mapping = jsonBuilder(); - mapping.startObject(); - mapping.startObject(type); - mapping.startObject("properties"); - mapping.startObject(FIELD); - mapping.field("type", "completion"); - mapping.field("analyzer", indexAnalyzer); - mapping.field("search_analyzer", searchAnalyzer); - mapping.field("payloads", payloads); - mapping.field("preserve_separators", preserveSeparators); - mapping.field("preserve_position_increments", preservePositionIncrements); - - mapping.startObject("context"); - for (ContextBuilder<? extends ContextMapping> context : contexts) { - mapping.value(context.build()); - } - mapping.endObject(); - - mapping.endObject(); - mapping.endObject(); - mapping.endObject(); - mapping.endObject(); - return mapping; - } -} diff --git a/core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java b/core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java deleted file mode 100644 index 398310d3a0..0000000000 --- a/core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search.suggest.completion; - -import com.carrotsearch.hppc.ObjectLongHashMap; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; -import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.fst.ByteSequenceOutputs; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.PairOutputs; -import org.apache.lucene.util.fst.PairOutputs.Pair; -import org.apache.lucene.util.fst.PositiveIntOutputs; -import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.core.CompletionFieldMapper; -import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder; -import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider; -import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory; -import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; - -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; - -import static org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester.HOLE_CHARACTER; - -/** - * This is an older implementation of the AnalyzingCompletionLookupProvider class - * We use this to test for backwards compatibility in our tests, namely - * CompletionPostingsFormatTests - * This ensures upgrades between versions work smoothly - */ -public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvider { - - // for serialization - public static final int SERIALIZE_PRESERVE_SEPARATORS = 1; - public static final int SERIALIZE_HAS_PAYLOADS = 2; - public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4; - - private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256; - private static final int MAX_GRAPH_EXPANSIONS = -1; - - public static final String CODEC_NAME = "analyzing"; - public static final int CODEC_VERSION = 1; - - private boolean preserveSep; - private boolean preservePositionIncrements; - private int maxSurfaceFormsPerAnalyzedForm; - private int maxGraphExpansions; - private boolean hasPayloads; - private final XAnalyzingSuggester prototype; - - // important, these are the settings from the old xanalyzingsuggester - public static final int SEP_LABEL = 0xFF; - public static final int END_BYTE = 0x0; - public static final int PAYLOAD_SEP = '\u001f'; - - public AnalyzingCompletionLookupProviderV1(boolean preserveSep, boolean exactFirst, boolean preservePositionIncrements, boolean hasPayloads) { - this.preserveSep = preserveSep; - this.preservePositionIncrements = preservePositionIncrements; - this.hasPayloads = hasPayloads; - this.maxSurfaceFormsPerAnalyzedForm = MAX_SURFACE_FORMS_PER_ANALYZED_FORM; - this.maxGraphExpansions = MAX_GRAPH_EXPANSIONS; - int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; - // needs to fixed in the suggester first before it can be supported - //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0; - prototype = new XAnalyzingSuggester(null, null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, - null, false, 1, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); - } - - @Override - public String getName() { - return "analyzing"; - } - - @Override - public FieldsConsumer consumer(final IndexOutput output) throws IOException { - // TODO write index header? - CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION); - return new FieldsConsumer() { - private Map<String, Long> fieldOffsets = new HashMap<>(); - - @Override - public void close() throws IOException { - try { /* - * write the offsets per field such that we know where - * we need to load the FSTs from - */ - long pointer = output.getFilePointer(); - output.writeVInt(fieldOffsets.size()); - for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) { - output.writeString(entry.getKey()); - output.writeVLong(entry.getValue()); - } - output.writeLong(pointer); - } finally { - IOUtils.close(output); - } - } - - @Override - public void write(Fields fields) throws IOException { - for (String field : fields) { - Terms terms = fields.terms(field); - if (terms == null) { - continue; - } - TermsEnum termsEnum = terms.iterator(); - PostingsEnum docsEnum = null; - final SuggestPayload spare = new SuggestPayload(); - int maxAnalyzedPathsForOneInput = 0; - final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP); - int docCount = 0; - while (true) { - BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS); - builder.startTerm(term); - int docFreq = 0; - while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - for (int i = 0; i < docsEnum.freq(); i++) { - final int position = docsEnum.nextPosition(); - AnalyzingCompletionLookupProviderV1.this.parsePayload(docsEnum.getPayload(), spare); - builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight); - // multi fields have the same surface form so we sum up here - maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1); - } - docFreq++; - docCount = Math.max(docCount, docsEnum.docID() + 1); - } - builder.finishTerm(docFreq); - } - /* - * Here we are done processing the field and we can - * buid the FST and write it to disk. - */ - FST<Pair<Long, BytesRef>> build = builder.build(); - assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; - /* - * it's possible that the FST is null if we have 2 segments that get merged - * and all docs that have a value in this field are deleted. This will cause - * a consumer to be created but it doesn't consume any values causing the FSTBuilder - * to return null. - */ - if (build != null) { - fieldOffsets.put(field, output.getFilePointer()); - build.save(output); - /* write some more meta-info */ - output.writeVInt(maxAnalyzedPathsForOneInput); - output.writeVInt(maxSurfaceFormsPerAnalyzedForm); - output.writeInt(maxGraphExpansions); // can be negative - int options = 0; - options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0; - options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; - options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; - output.writeVInt(options); - } - } - } - }; - } - - @Override - public LookupFactory load(IndexInput input) throws IOException { - CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION); - final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>(); - input.seek(input.length() - 8); - long metaPointer = input.readLong(); - input.seek(metaPointer); - int numFields = input.readVInt(); - - Map<Long, String> meta = new TreeMap<>(); - for (int i = 0; i < numFields; i++) { - String name = input.readString(); - long offset = input.readVLong(); - meta.put(offset, name); - } - long sizeInBytes = 0; - for (Map.Entry<Long, String> entry : meta.entrySet()) { - input.seek(entry.getKey()); - FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>( - PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); - int maxAnalyzedPathsForOneInput = input.readVInt(); - int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); - int maxGraphExpansions = input.readInt(); - int options = input.readVInt(); - boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0; - boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; - boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; - sizeInBytes += fst.ramBytesUsed(); - lookupMap.put(entry.getValue(), new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, - hasPayloads, maxAnalyzedPathsForOneInput, fst)); - } - final long ramBytesUsed = sizeInBytes; - return new LookupFactory() { - @Override - public Lookup getLookup(CompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { - AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName()); - if (analyzingSuggestHolder == null) { - return null; - } - int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; - - final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; - - XAnalyzingSuggester suggester; - if (suggestionContext.isFuzzy()) { - suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, - analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, - suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), - suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false, - analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, - analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); - } else { - suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, - analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, - analyzingSuggestHolder.preservePositionIncrements, - analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, - analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); - } - return suggester; - } - - @Override - public CompletionStats stats(String... fields) { - long sizeInBytes = 0; - ObjectLongHashMap<String> completionFields = null; - if (fields != null && fields.length > 0) { - completionFields = new ObjectLongHashMap<>(fields.length); - } - - for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { - sizeInBytes += entry.getValue().fst.ramBytesUsed(); - if (fields == null || fields.length == 0) { - continue; - } - for (String field : fields) { - // support for getting fields by regex as in fielddata - if (Regex.simpleMatch(field, entry.getKey())) { - long fstSize = entry.getValue().fst.ramBytesUsed(); - completionFields.addTo(field, fstSize); - } - } - } - - return new CompletionStats(sizeInBytes, completionFields); - } - - @Override - AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) { - return lookupMap.get(fieldType.names().indexName()); - } - - @Override - public long ramBytesUsed() { - return ramBytesUsed; - } - - @Override - public Collection<Accountable> getChildResources() { - return Accountables.namedAccountables("field", lookupMap); - } - }; - } - - /* - // might be readded when we change the current impl, right now not needed - static class AnalyzingSuggestHolder { - final boolean preserveSep; - final boolean preservePositionIncrements; - final int maxSurfaceFormsPerAnalyzedForm; - final int maxGraphExpansions; - final boolean hasPayloads; - final int maxAnalyzedPathsForOneInput; - final FST<Pair<Long, BytesRef>> fst; - - public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, - boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) { - this.preserveSep = preserveSep; - this.preservePositionIncrements = preservePositionIncrements; - this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; - this.maxGraphExpansions = maxGraphExpansions; - this.hasPayloads = hasPayloads; - this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput; - this.fst = fst; - } - - } - */ - - @Override - public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { - return prototype.toFiniteStrings(stream); - } -}
\ No newline at end of file diff --git a/core/src/test/java/org/elasticsearch/search/suggest/completion/CategoryContextMappingTests.java b/core/src/test/java/org/elasticsearch/search/suggest/completion/CategoryContextMappingTests.java new file mode 100644 index 0000000000..390555c81e --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/suggest/completion/CategoryContextMappingTests.java @@ -0,0 +1,302 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.suggest.document.ContextSuggestField; +import org.elasticsearch.common.xcontent.*; +import org.elasticsearch.index.mapper.*; +import org.elasticsearch.search.suggest.completion.context.CategoryContextMapping; +import org.elasticsearch.search.suggest.completion.context.CategoryQueryContext; +import org.elasticsearch.search.suggest.completion.context.ContextBuilder; +import org.elasticsearch.test.ESSingleNodeTestCase; + +import java.util.*; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.isIn; + +public class CategoryContextMappingTests extends ESSingleNodeTestCase { + + public void testIndexingWithNoContexts() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .startArray("contexts") + .startObject() + .field("name", "ctx") + .field("type", "category") + .endObject() + .endArray() + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", jsonBuilder() + .startObject() + .startArray("completion") + .startObject() + .array("input", "suggestion1", "suggestion2") + .field("weight", 3) + .endObject() + .startObject() + .array("input", "suggestion3", "suggestion4") + .field("weight", 4) + .endObject() + .startObject() + .field("input", "suggestion5", "suggestion6", "suggestion7") + .field("weight", 5) + .endObject() + .endArray() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertContextSuggestFields(fields, 7); + } + + public void testIndexingWithSimpleContexts() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .startArray("contexts") + .startObject() + .field("name", "ctx") + .field("type", "category") + .endObject() + .endArray() + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", jsonBuilder() + .startObject() + .startArray("completion") + .startObject() + .field("input", "suggestion5", "suggestion6", "suggestion7") + .startObject("contexts") + .field("ctx", "ctx1") + .endObject() + .field("weight", 5) + .endObject() + .endArray() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertContextSuggestFields(fields, 3); + } + + public void testIndexingWithContextList() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .startArray("contexts") + .startObject() + .field("name", "ctx") + .field("type", "category") + .endObject() + .endArray() + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", jsonBuilder() + .startObject() + .startObject("completion") + .field("input", "suggestion5", "suggestion6", "suggestion7") + .startObject("contexts") + .array("ctx", "ctx1", "ctx2", "ctx3") + .endObject() + .field("weight", 5) + .endObject() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertContextSuggestFields(fields, 3); + } + + public void testIndexingWithMultipleContexts() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .startArray("contexts") + .startObject() + .field("name", "ctx") + .field("type", "category") + .endObject() + .startObject() + .field("name", "type") + .field("type", "category") + .endObject() + .endArray() + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + XContentBuilder builder = jsonBuilder() + .startObject() + .startArray("completion") + .startObject() + .field("input", "suggestion5", "suggestion6", "suggestion7") + .field("weight", 5) + .startObject("contexts") + .array("ctx", "ctx1", "ctx2", "ctx3") + .array("type", "typr3", "ftg") + .endObject() + .endObject() + .endArray() + .endObject(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", builder.bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertContextSuggestFields(fields, 3); + } + + public void testQueryContextParsingBasic() throws Exception { + XContentBuilder builder = jsonBuilder().value("context1"); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + CategoryContextMapping mapping = ContextBuilder.category("cat").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + CategoryQueryContext queryContext = iterator.next(); + assertThat(queryContext.context.toString(), equalTo("context1")); + assertThat(queryContext.boost, equalTo(1)); + assertThat(queryContext.isPrefix, equalTo(false)); + } + + public void testQueryContextParsingArray() throws Exception { + XContentBuilder builder = jsonBuilder().startArray() + .value("context1") + .value("context2") + .endArray(); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + CategoryContextMapping mapping = ContextBuilder.category("cat").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + List<String> expectedContexts = new ArrayList<>(Arrays.asList("context1", "context2")); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + CategoryQueryContext queryContext = iterator.next(); + assertThat(queryContext.context.toString(), isIn(expectedContexts)); + assertTrue(iterator.hasNext()); + queryContext = iterator.next(); + assertThat(queryContext.context.toString(), isIn(expectedContexts)); + } + + public void testQueryContextParsingObject() throws Exception { + XContentBuilder builder = jsonBuilder().startObject() + .field("context", "context1") + .field("boost", 10) + .field("prefix", true) + .endObject(); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + CategoryContextMapping mapping = ContextBuilder.category("cat").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + CategoryQueryContext queryContext = iterator.next(); + assertThat(queryContext.context.toString(), equalTo("context1")); + assertThat(queryContext.boost, equalTo(10)); + assertThat(queryContext.isPrefix, equalTo(true)); + } + + + public void testQueryContextParsingObjectArray() throws Exception { + XContentBuilder builder = jsonBuilder().startArray() + .startObject() + .field("context", "context1") + .field("boost", 2) + .field("prefix", true) + .endObject() + .startObject() + .field("context", "context2") + .field("boost", 3) + .field("prefix", false) + .endObject() + .endArray(); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + CategoryContextMapping mapping = ContextBuilder.category("cat").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + CategoryQueryContext queryContext = iterator.next(); + assertThat(queryContext.context.toString(), equalTo("context1")); + assertThat(queryContext.boost, equalTo(2)); + assertThat(queryContext.isPrefix, equalTo(true)); + assertTrue(iterator.hasNext()); + queryContext = iterator.next(); + assertThat(queryContext.context.toString(), equalTo("context2")); + assertThat(queryContext.boost, equalTo(3)); + assertThat(queryContext.isPrefix, equalTo(false)); + } + + public void testQueryContextParsingMixed() throws Exception { + XContentBuilder builder = jsonBuilder().startArray() + .startObject() + .field("context", "context1") + .field("boost", 2) + .field("prefix", true) + .endObject() + .value("context2") + .endArray(); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + CategoryContextMapping mapping = ContextBuilder.category("cat").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + CategoryQueryContext queryContext = iterator.next(); + assertThat(queryContext.context.toString(), equalTo("context1")); + assertThat(queryContext.boost, equalTo(2)); + assertThat(queryContext.isPrefix, equalTo(true)); + assertTrue(iterator.hasNext()); + queryContext = iterator.next(); + assertThat(queryContext.context.toString(), equalTo("context2")); + assertThat(queryContext.boost, equalTo(1)); + assertThat(queryContext.isPrefix, equalTo(false)); + } + + public void testParsingContextFromDocument() throws Exception { + CategoryContextMapping mapping = ContextBuilder.category("cat").field("category").build(); + ParseContext.Document document = new ParseContext.Document(); + document.add(new StringField("category", "category1", Field.Store.NO)); + Set<CharSequence> context = mapping.parseContext(document); + assertThat(context.size(), equalTo(1)); + assertTrue(context.contains("category1")); + } + + static void assertContextSuggestFields(IndexableField[] fields, int expected) { + int actualFieldCount = 0; + for (IndexableField field : fields) { + if (field instanceof ContextSuggestField) { + actualFieldCount++; + } + } + assertThat(actualFieldCount, equalTo(expected)); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTests.java b/core/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTests.java deleted file mode 100644 index 4fbde2d905..0000000000 --- a/core/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTests.java +++ /dev/null @@ -1,536 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search.suggest.completion; - -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene54.Lucene54Codec; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.suggest.InputIterator; -import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; -import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LineFileDocs; -import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType.Names; -import org.elasticsearch.index.mapper.core.CompletionFieldMapper; -import org.elasticsearch.search.suggest.SuggestUtils; -import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory; -import org.elasticsearch.search.suggest.context.ContextMapping; -import org.elasticsearch.test.ESTestCase; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.is; - -public class CompletionPostingsFormatTests extends ESTestCase { - Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT.id).build(); - static final CompletionFieldMapper.CompletionFieldType FIELD_TYPE = CompletionFieldMapper.Defaults.FIELD_TYPE.clone(); - static final NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer()); - static { - FIELD_TYPE.setNames(new Names("foo")); - FIELD_TYPE.setIndexAnalyzer(analyzer); - FIELD_TYPE.setSearchAnalyzer(analyzer); - FIELD_TYPE.freeze(); - } - - public void testCompletionPostingsFormat() throws IOException { - AnalyzingCompletionLookupProviderV1 providerV1 = new AnalyzingCompletionLookupProviderV1(true, false, true, true); - AnalyzingCompletionLookupProvider currentProvider = new AnalyzingCompletionLookupProvider(true, false, true, true); - List<Completion090PostingsFormat.CompletionLookupProvider> providers = Arrays.asList(providerV1, currentProvider); - - Completion090PostingsFormat.CompletionLookupProvider randomProvider = providers.get(getRandom().nextInt(providers.size())); - RAMDirectory dir = new RAMDirectory(); - writeData(dir, randomProvider); - - IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); - LookupFactory load = currentProvider.load(input); - CompletionFieldMapper.CompletionFieldType fieldType = FIELD_TYPE.clone(); - fieldType.setProvider(currentProvider); - Lookup lookup = load.getLookup(fieldType, new CompletionSuggestionContext(null)); - List<LookupResult> result = lookup.lookup("ge", false, 10); - assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters")); - assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10")); - dir.close(); - } - - public void testProviderBackwardCompatibilityForVersion1() throws IOException { - AnalyzingCompletionLookupProviderV1 providerV1 = new AnalyzingCompletionLookupProviderV1(true, false, true, true); - AnalyzingCompletionLookupProvider currentProvider = new AnalyzingCompletionLookupProvider(true, false, true, true); - - RAMDirectory dir = new RAMDirectory(); - writeData(dir, providerV1); - - IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); - LookupFactory load = currentProvider.load(input); - CompletionFieldMapper.CompletionFieldType fieldType = FIELD_TYPE.clone(); - fieldType.setProvider(currentProvider); - AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder analyzingSuggestHolder = load.getAnalyzingSuggestHolder(fieldType); - assertThat(analyzingSuggestHolder.sepLabel, is(AnalyzingCompletionLookupProviderV1.SEP_LABEL)); - assertThat(analyzingSuggestHolder.payloadSep, is(AnalyzingCompletionLookupProviderV1.PAYLOAD_SEP)); - assertThat(analyzingSuggestHolder.endByte, is(AnalyzingCompletionLookupProviderV1.END_BYTE)); - dir.close(); - } - - public void testProviderVersion2() throws IOException { - AnalyzingCompletionLookupProvider currentProvider = new AnalyzingCompletionLookupProvider(true, false, true, true); - - RAMDirectory dir = new RAMDirectory(); - writeData(dir, currentProvider); - - IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); - LookupFactory load = currentProvider.load(input); - CompletionFieldMapper.CompletionFieldType fieldType = FIELD_TYPE.clone(); - fieldType.setProvider(currentProvider); - AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder analyzingSuggestHolder = load.getAnalyzingSuggestHolder(fieldType); - assertThat(analyzingSuggestHolder.sepLabel, is(XAnalyzingSuggester.SEP_LABEL)); - assertThat(analyzingSuggestHolder.payloadSep, is(XAnalyzingSuggester.PAYLOAD_SEP)); - assertThat(analyzingSuggestHolder.endByte, is(XAnalyzingSuggester.END_BYTE)); - dir.close(); - } - - public void testDuellCompletions() throws IOException, NoSuchFieldException, SecurityException, IllegalArgumentException, - IllegalAccessException { - final boolean preserveSeparators = getRandom().nextBoolean(); - final boolean preservePositionIncrements = getRandom().nextBoolean(); - final boolean usePayloads = getRandom().nextBoolean(); - final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0; - - XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(), null, new StandardAnalyzer(), - options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); - LineFileDocs docs = new LineFileDocs(getRandom()); - int num = scaledRandomIntBetween(150, 300); - final String[] titles = new String[num]; - final long[] weights = new long[num]; - for (int i = 0; i < titles.length; i++) { - Document nextDoc = docs.nextDoc(); - IndexableField field = nextDoc.getField("title"); - titles[i] = field.stringValue(); - weights[i] = between(0, 100); - - } - docs.close(); - final InputIterator primaryIter = new InputIterator() { - int index = 0; - long currentWeight = -1; - - @Override - public BytesRef next() throws IOException { - if (index < titles.length) { - currentWeight = weights[index]; - return new BytesRef(titles[index++]); - } - return null; - } - - @Override - public long weight() { - return currentWeight; - } - - @Override - public BytesRef payload() { - return null; - } - - @Override - public boolean hasPayloads() { - return false; - } - - @Override - public Set<BytesRef> contexts() { - return null; - } - - @Override - public boolean hasContexts() { - return false; - } - - }; - InputIterator iter; - if (usePayloads) { - iter = new InputIterator() { - @Override - public long weight() { - return primaryIter.weight(); - } - - @Override - public BytesRef next() throws IOException { - return primaryIter.next(); - } - - @Override - public BytesRef payload() { - return new BytesRef(Long.toString(weight())); - } - - @Override - public boolean hasPayloads() { - return true; - } - - @Override - public Set<BytesRef> contexts() { - return null; - } - - @Override - public boolean hasContexts() { - return false; - } - }; - } else { - iter = primaryIter; - } - reference.build(iter); - - AnalyzingCompletionLookupProvider currentProvider = new AnalyzingCompletionLookupProvider(preserveSeparators, false, preservePositionIncrements, usePayloads); - CompletionFieldMapper.CompletionFieldType fieldType = FIELD_TYPE.clone(); - fieldType.setProvider(currentProvider); - final CompletionFieldMapper mapper = new CompletionFieldMapper("foo", fieldType, Integer.MAX_VALUE, indexSettings, FieldMapper.MultiFields.empty(), null); - Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights); - if (buildAnalyzingLookup instanceof XAnalyzingSuggester) { - assertEquals(reference.getMaxAnalyzedPathsForOneInput(), ((XAnalyzingSuggester) buildAnalyzingLookup).getMaxAnalyzedPathsForOneInput()); - } - - for (int i = 0; i < titles.length; i++) { - int res = between(1, 10); - final StringBuilder builder = new StringBuilder(); - SuggestUtils.analyze(analyzer.tokenStream("foo", titles[i]), new SuggestUtils.TokenConsumer() { - @Override - public void nextToken() throws IOException { - if (builder.length() == 0) { - builder.append(this.charTermAttr.toString()); - } - } - }); - String firstTerm = builder.toString(); - String prefix = firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length())); - List<LookupResult> refLookup = reference.lookup(prefix, false, res); - List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res); - assertThat(refLookup.toString(),lookup.size(), equalTo(refLookup.size())); - for (int j = 0; j < refLookup.size(); j++) { - assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key)); - assertThat("prefix: " + prefix + " " + j + " -- missmatch cost: " + lookup.get(j).key + " - " + lookup.get(j).value + " | " + refLookup.get(j).key + " - " + refLookup.get(j).value , - lookup.get(j).value, equalTo(refLookup.get(j).value)); - assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload)); - if (usePayloads) { - assertThat(lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value))); - } - } - } - } - - public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights) - throws IOException { - RAMDirectory dir = new RAMDirectory(); - Codec codec = new Lucene54Codec() { - @Override - public PostingsFormat getPostingsFormatForField(String field) { - final PostingsFormat in = super.getPostingsFormatForField(field); - return mapper.fieldType().postingsFormat(in); - } - }; - IndexWriterConfig indexWriterConfig = new IndexWriterConfig(mapper.fieldType().indexAnalyzer()); - - indexWriterConfig.setCodec(codec); - IndexWriter writer = new IndexWriter(dir, indexWriterConfig); - for (int i = 0; i < weights.length; i++) { - Document doc = new Document(); - BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); - doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload)); - if (randomBoolean()) { - writer.commit(); - } - writer.addDocument(doc); - } - writer.commit(); - writer.forceMerge(1, true); - writer.commit(); - DirectoryReader reader = DirectoryReader.open(writer, true); - assertThat(reader.leaves().size(), equalTo(1)); - assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length)); - LeafReaderContext atomicReaderContext = reader.leaves().get(0); - Terms luceneTerms = atomicReaderContext.reader().terms(mapper.fieldType().names().fullName()); - Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper.fieldType(), new CompletionSuggestionContext(null)); - reader.close(); - writer.close(); - dir.close(); - return lookup; - } - - public void testNoDocs() throws IOException { - AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true); - RAMDirectory dir = new RAMDirectory(); - IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT); - FieldsConsumer consumer = provider.consumer(output); - consumer.write(new Fields() { - @Override - public Iterator<String> iterator() { - return Arrays.asList("foo").iterator(); - } - - @Override - public Terms terms(String field) throws IOException { - return null; - } - - @Override - public int size() { - return 1; - } - }); - consumer.close(); - output.close(); - - IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); - LookupFactory load = provider.load(input); - CompletionFieldMapper.CompletionFieldType fieldType = FIELD_TYPE.clone(); - fieldType.setProvider(provider); - assertNull(load.getLookup(fieldType, new CompletionSuggestionContext(null))); - dir.close(); - } - - // TODO ADD more unittests - private void writeData(Directory dir, Completion090PostingsFormat.CompletionLookupProvider provider) throws IOException { - IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT); - FieldsConsumer consumer = provider.consumer(output); - final List<TermPosAndPayload> terms = new ArrayList<>(); - terms.add(new TermPosAndPayload("foofightersgenerator", 256 - 2, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")))); - terms.add(new TermPosAndPayload("generator", 256 - 1, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")))); - Fields fields = new Fields() { - @Override - public Iterator<String> iterator() { - return Arrays.asList("foo").iterator(); - } - - @Override - public Terms terms(String field) throws IOException { - if (field.equals("foo")) { - return new Terms() { - @Override - public TermsEnum iterator() throws IOException { - final Iterator<TermPosAndPayload> iterator = terms.iterator(); - return new TermsEnum() { - private TermPosAndPayload current = null; - @Override - public SeekStatus seekCeil(BytesRef text) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public void seekExact(long ord) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public BytesRef term() throws IOException { - return current == null ? null : current.term; - } - - @Override - public long ord() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public int docFreq() throws IOException { - return current == null ? 0 : 1; - } - - @Override - public long totalTermFreq() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { - final TermPosAndPayload data = current; - return new PostingsEnum() { - boolean done = false; - @Override - public int nextPosition() throws IOException { - return data.pos; - } - - @Override - public int startOffset() throws IOException { - return 0; - } - - @Override - public int endOffset() throws IOException { - return 0; - } - - @Override - public BytesRef getPayload() throws IOException { - return data.payload; - } - - @Override - public int freq() throws IOException { - return 1; - } - - @Override - public int docID() { - if (done) { - return NO_MORE_DOCS; - } - return 0; - } - - @Override - public int nextDoc() throws IOException { - if (done) { - return NO_MORE_DOCS; - } - done = true; - return 0; - } - - @Override - public int advance(int target) throws IOException { - if (done) { - return NO_MORE_DOCS; - } - done = true; - return 0; - } - - @Override - public long cost() { - return 0; - } - }; - } - - @Override - public BytesRef next() throws IOException { - if (iterator.hasNext()) { - current = iterator.next(); - return current.term; - } - current = null; - return null; - } - }; - } - - @Override - public long size() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long getSumTotalTermFreq() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long getSumDocFreq() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public int getDocCount() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasFreqs() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasOffsets() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasPositions() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasPayloads() { - throw new UnsupportedOperationException(); - } - }; - } - return null; - } - - @Override - public int size() { - return 0; - } - }; - consumer.write(fields); - consumer.close(); - output.close(); - - } - - private static class TermPosAndPayload { - final BytesRef term; - final int pos; - final BytesRef payload; - - - private TermPosAndPayload(String term, int pos, BytesRef payload) { - this.term = new BytesRef(term); - this.pos = pos; - this.payload = payload; - } - } -} diff --git a/core/src/test/java/org/elasticsearch/search/suggest/completion/GeoContextMappingTests.java b/core/src/test/java/org/elasticsearch/search/suggest/completion/GeoContextMappingTests.java new file mode 100644 index 0000000000..87dbcb3fb3 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/suggest/completion/GeoContextMappingTests.java @@ -0,0 +1,308 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.completion; + +import org.apache.lucene.index.IndexableField; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.search.suggest.completion.context.*; +import org.elasticsearch.test.ESSingleNodeTestCase; + +import java.util.*; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.search.suggest.completion.CategoryContextMappingTests.assertContextSuggestFields; +import static org.hamcrest.Matchers.equalTo; + +public class GeoContextMappingTests extends ESSingleNodeTestCase { + + public void testIndexingWithNoContexts() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .startArray("contexts") + .startObject() + .field("name", "ctx") + .field("type", "geo") + .endObject() + .endArray() + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", jsonBuilder() + .startObject() + .startArray("completion") + .startObject() + .array("input", "suggestion1", "suggestion2") + .field("weight", 3) + .endObject() + .startObject() + .array("input", "suggestion3", "suggestion4") + .field("weight", 4) + .endObject() + .startObject() + .field("input", "suggestion5", "suggestion6", "suggestion7") + .field("weight", 5) + .endObject() + .endArray() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertContextSuggestFields(fields, 7); + } + + public void testIndexingWithSimpleContexts() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .startArray("contexts") + .startObject() + .field("name", "ctx") + .field("type", "geo") + .endObject() + .endArray() + .endObject() + .endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", jsonBuilder() + .startObject() + .startArray("completion") + .startObject() + .field("input", "suggestion5", "suggestion6", "suggestion7") + .startObject("contexts") + .startObject("ctx") + .field("lat", 43.6624803) + .field("lon", -79.3863353) + .endObject() + .endObject() + .field("weight", 5) + .endObject() + .endArray() + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertContextSuggestFields(fields, 3); + } + + public void testIndexingWithContextList() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .startArray("contexts") + .startObject() + .field("name", "ctx") + .field("type", "geo") + .endObject() + .endArray() + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", jsonBuilder() + .startObject() + .startObject("completion") + .field("input", "suggestion5", "suggestion6", "suggestion7") + .startObject("contexts") + .startArray("ctx") + .startObject() + .field("lat", 43.6624803) + .field("lon", -79.3863353) + .endObject() + .startObject() + .field("lat", 43.6624718) + .field("lon", -79.3873227) + .endObject() + .endArray() + .endObject() + .field("weight", 5) + .endObject() + .bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertContextSuggestFields(fields, 3); + } + + public void testIndexingWithMultipleContexts() throws Exception { + String mapping = jsonBuilder().startObject().startObject("type1") + .startObject("properties").startObject("completion") + .field("type", "completion") + .startArray("contexts") + .startObject() + .field("name", "loc1") + .field("type", "geo") + .endObject() + .startObject() + .field("name", "loc2") + .field("type", "geo") + .endObject() + .endArray() + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); + FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); + MappedFieldType completionFieldType = fieldMapper.fieldType(); + XContentBuilder builder = jsonBuilder() + .startObject() + .startArray("completion") + .startObject() + .field("input", "suggestion5", "suggestion6", "suggestion7") + .field("weight", 5) + .startObject("contexts") + .array("loc1", "ezs42e44yx96") + .array("loc2", "wh0n9447fwrc") + .endObject() + .endObject() + .endArray() + .endObject(); + ParsedDocument parsedDocument = defaultMapper.parse("test", "type1", "1", builder.bytes()); + IndexableField[] fields = parsedDocument.rootDoc().getFields(completionFieldType.names().indexName()); + assertContextSuggestFields(fields, 3); + } + + public void testParsingQueryContextBasic() throws Exception { + XContentBuilder builder = jsonBuilder().value("ezs42e44yx96"); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + GeoContextMapping mapping = ContextBuilder.geo("geo").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + GeoQueryContext queryContext = ((GeoQueryContext) iterator.next()); + assertThat(queryContext.context.toString(), equalTo("ezs42e44yx96")); + assertThat(queryContext.boost, equalTo(1)); + assertThat(queryContext.neighbours.length, equalTo(1)); + } + + public void testParsingQueryContextGeoPoint() throws Exception { + XContentBuilder builder = jsonBuilder().startObject() + .field("lat", 23.654242) + .field("lon", 90.047153) + .endObject(); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + GeoContextMapping mapping = ContextBuilder.geo("geo").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + GeoQueryContext queryContext = ((GeoQueryContext) iterator.next()); + assertThat(queryContext.context.toString(), equalTo("wh0n94")); + assertThat(queryContext.boost, equalTo(1)); + assertThat(queryContext.neighbours.length, equalTo(1)); + } + + public void testParsingQueryContextObject() throws Exception { + XContentBuilder builder = jsonBuilder().startObject() + .startObject("context") + .field("lat", 23.654242) + .field("lon", 90.047153) + .endObject() + .field("boost", 10) + .array("neighbours", 1, 2, 3) + .endObject(); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + GeoContextMapping mapping = ContextBuilder.geo("geo").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + GeoQueryContext queryContext = ((GeoQueryContext) iterator.next()); + assertThat(queryContext.context.toString(), equalTo("wh0n94")); + assertThat(queryContext.boost, equalTo(10)); + assertThat(queryContext.neighbours.length, equalTo(3)); + } + + public void testParsingQueryContextObjectArray() throws Exception { + XContentBuilder builder = jsonBuilder().startArray() + .startObject() + .startObject("context") + .field("lat", 23.654242) + .field("lon", 90.047153) + .endObject() + .field("boost", 10) + .array("neighbours", 1, 2, 3) + .endObject() + .startObject() + .startObject("context") + .field("lat", 22.337374) + .field("lon", 92.112583) + .endObject() + .field("boost", 2) + .array("neighbours", 3) + .endObject() + .endArray(); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + GeoContextMapping mapping = ContextBuilder.geo("geo").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + GeoQueryContext queryContext = ((GeoQueryContext) iterator.next()); + assertThat(queryContext.context.toString(), equalTo("wh0n94")); + assertThat(queryContext.boost, equalTo(10)); + assertThat(queryContext.neighbours.length, equalTo(3)); + assertTrue(iterator.hasNext()); + queryContext = ((GeoQueryContext) iterator.next()); + assertThat(queryContext.context.toString(), equalTo("w5cx04")); + assertThat(queryContext.boost, equalTo(2)); + assertThat(queryContext.neighbours.length, equalTo(1)); + } + + public void testParsingQueryContextMixed() throws Exception { + XContentBuilder builder = jsonBuilder().startArray() + .startObject() + .startObject("context") + .field("lat", 23.654242) + .field("lon", 90.047153) + .endObject() + .field("boost", 10) + .array("neighbours", 1, 2, 3) + .endObject() + .startObject() + .field("lat", 22.337374) + .field("lon", 92.112583) + .endObject() + .endArray(); + XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(builder.bytes()); + GeoContextMapping mapping = ContextBuilder.geo("geo").build(); + List<CategoryQueryContext> queryContexts = mapping.parseQueryContext(parser); + Iterator<CategoryQueryContext> iterator = queryContexts.iterator(); + assertTrue(iterator.hasNext()); + GeoQueryContext queryContext = ((GeoQueryContext) iterator.next()); + assertThat(queryContext.context.toString(), equalTo("wh0n94")); + assertThat(queryContext.boost, equalTo(10)); + assertThat(queryContext.neighbours.length, equalTo(3)); + assertTrue(iterator.hasNext()); + queryContext = ((GeoQueryContext) iterator.next()); + assertThat(queryContext.context.toString(), equalTo("w5cx04")); + assertThat(queryContext.boost, equalTo(1)); + assertThat(queryContext.neighbours.length, equalTo(1)); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/suggest/context/GeoLocationContextMappingTests.java b/core/src/test/java/org/elasticsearch/search/suggest/context/GeoLocationContextMappingTests.java deleted file mode 100644 index 0e4f566b0f..0000000000 --- a/core/src/test/java/org/elasticsearch/search/suggest/context/GeoLocationContextMappingTests.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.search.suggest.context; - -import org.apache.lucene.util.GeoHashUtils; -import org.elasticsearch.ElasticsearchParseException; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.suggest.context.ContextMapping.ContextConfig; -import org.elasticsearch.test.ESTestCase; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; - -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; - -/** - * - */ -public class GeoLocationContextMappingTests extends ESTestCase { - public void testThatParsingGeoPointsWorksWithCoercion() throws Exception { - XContentBuilder builder = jsonBuilder().startObject().field("lat", "52").field("lon", "4").endObject(); - XContentParser parser = XContentHelper.createParser(builder.bytes()); - parser.nextToken(); - - HashMap<String, Object> config = new HashMap<>(); - config.put("precision", 12); - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - mapping.parseQuery("foo", parser); - } - - public void testUseWithDefaultGeoHash() throws Exception { - XContentBuilder builder = jsonBuilder().startObject().field("lat", 52d).field("lon", 4d).endObject(); - XContentParser parser = XContentHelper.createParser(builder.bytes()); - parser.nextToken(); - - String geohash = GeoHashUtils.stringEncode(randomIntBetween(-180, +180), randomIntBetween(-90, +90)); - HashMap<String, Object> config = new HashMap<>(); - config.put("precision", 12); - config.put("default", geohash); - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - mapping.parseQuery("foo", parser); - } - - public void testUseWithDefaultLatLon() throws Exception { - XContentBuilder builder = jsonBuilder().startObject().field("lat", 52d).field("lon", 4d).endObject(); - XContentParser parser = XContentHelper.createParser(builder.bytes()); - parser.nextToken(); - - HashMap<String, Object> config = new HashMap<>(); - config.put("precision", 12); - HashMap<String, Double> pointAsMap = new HashMap<>(); - pointAsMap.put("lat", 51d); - pointAsMap.put("lon", 0d); - config.put("default", pointAsMap); - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - mapping.parseQuery("foo", parser); - } - - public void testUseWithDefaultBadLatLon() throws Exception { - XContentBuilder builder = jsonBuilder().startObject().field("lat", 52d).field("lon", 4d).endObject(); - XContentParser parser = XContentHelper.createParser(builder.bytes()); - parser.nextToken(); - - HashMap<String, Object> config = new HashMap<>(); - config.put("precision", 12); - HashMap<String, Double> pointAsMap = new HashMap<>(); - pointAsMap.put("latitude", 51d); // invalid field names - pointAsMap.put("longitude", 0d); // invalid field names - config.put("default", pointAsMap); - ElasticsearchParseException expected = null; - try { - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - mapping.parseQuery("foo", parser); - - } catch (ElasticsearchParseException e) { - expected = e; - } - assertNotNull(expected); - } - - public void testUseWithMultiplePrecisions() throws Exception { - XContentBuilder builder = jsonBuilder().startObject().field("lat", 52d).field("lon", 4d).endObject(); - XContentParser parser = XContentHelper.createParser(builder.bytes()); - parser.nextToken(); - - HashMap<String, Object> config = new HashMap<>(); - int numElements = randomIntBetween(1, 12); - ArrayList<Integer> precisions = new ArrayList<>(); - for (int i = 0; i < numElements; i++) { - precisions.add(randomIntBetween(1, 12)); - } - config.put("precision", precisions); - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - mapping.parseQuery("foo", parser); - } - - public void testHashcode() throws Exception { - HashMap<String, Object> config = new HashMap<>(); - if (randomBoolean()) { - config.put("precision", Arrays.asList(1, 2, 3, 4)); - } else { - config.put("precision", randomIntBetween(1, 12)); - } - if (randomBoolean()) { - HashMap<String, Double> pointAsMap = new HashMap<>(); - pointAsMap.put("lat", 51d); - pointAsMap.put("lon", 0d); - config.put("default", pointAsMap); - } - HashMap<String, Object> config2 = new HashMap<>(config); - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - GeolocationContextMapping mapping2 = GeolocationContextMapping.load("foo", config2); - - assertEquals(mapping, mapping2); - assertEquals(mapping.hashCode(), mapping2.hashCode()); - } - - public void testUseWithBadGeoContext() throws Exception { - double lon = 4d; - String badLat = "W"; - XContentBuilder builder = jsonBuilder().startObject().startArray("location").value(4d).value(badLat).endArray().endObject(); - XContentParser parser = XContentHelper.createParser(builder.bytes()); - parser.nextToken(); // start of object - parser.nextToken(); // "location" field name - parser.nextToken(); // array - - HashMap<String, Object> config = new HashMap<>(); - config.put("precision", randomIntBetween(1, 12)); - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - ElasticsearchParseException expected = null; - try { - ContextConfig geoconfig = mapping.parseContext(null, parser); - } catch (ElasticsearchParseException e) { - expected = e; - } - assertNotNull(expected); - } - - public void testUseWithLonLatGeoContext() throws Exception { - double lon = 4d; - double lat = 52d; - XContentBuilder builder = jsonBuilder().startObject().startArray("location").value(lon).value(lat).endArray().endObject(); - XContentParser parser = XContentHelper.createParser(builder.bytes()); - parser.nextToken(); // start of object - parser.nextToken(); // "location" field name - parser.nextToken(); // array - - HashMap<String, Object> config = new HashMap<>(); - config.put("precision", randomIntBetween(1, 12)); - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - mapping.parseContext(null, parser); - } - - public void testUseWithMultiGeoHashGeoContext() throws Exception { - String geohash1 = GeoHashUtils.stringEncode(randomIntBetween(-180, +180), randomIntBetween(-90, +90)); - String geohash2 = GeoHashUtils.stringEncode(randomIntBetween(-180, +180), randomIntBetween(-90, +90)); - XContentBuilder builder = jsonBuilder().startObject().startArray("location").value(geohash1).value(geohash2).endArray().endObject(); - XContentParser parser = XContentHelper.createParser(builder.bytes()); - parser.nextToken(); // start of object - parser.nextToken(); // "location" field name - parser.nextToken(); // array - - HashMap<String, Object> config = new HashMap<>(); - config.put("precision", randomIntBetween(1, 12)); - GeolocationContextMapping mapping = GeolocationContextMapping.load("foo", config); - ContextConfig parsedContext = mapping.parseContext(null, parser); - } - -} |