summaryrefslogtreecommitdiff
path: root/core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java')
-rw-r--r--core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java335
1 files changed, 0 insertions, 335 deletions
diff --git a/core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java b/core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java
deleted file mode 100644
index 398310d3a0..0000000000
--- a/core/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.search.suggest.completion;
-
-import com.carrotsearch.hppc.ObjectLongHashMap;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
-import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.fst.ByteSequenceOutputs;
-import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.PairOutputs;
-import org.apache.lucene.util.fst.PairOutputs.Pair;
-import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.elasticsearch.common.regex.Regex;
-import org.elasticsearch.index.mapper.MappedFieldType;
-import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
-import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder;
-import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider;
-import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
-import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
-import static org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester.HOLE_CHARACTER;
-
-/**
- * This is an older implementation of the AnalyzingCompletionLookupProvider class
- * We use this to test for backwards compatibility in our tests, namely
- * CompletionPostingsFormatTests
- * This ensures upgrades between versions work smoothly
- */
-public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvider {
-
- // for serialization
- public static final int SERIALIZE_PRESERVE_SEPARATORS = 1;
- public static final int SERIALIZE_HAS_PAYLOADS = 2;
- public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4;
-
- private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256;
- private static final int MAX_GRAPH_EXPANSIONS = -1;
-
- public static final String CODEC_NAME = "analyzing";
- public static final int CODEC_VERSION = 1;
-
- private boolean preserveSep;
- private boolean preservePositionIncrements;
- private int maxSurfaceFormsPerAnalyzedForm;
- private int maxGraphExpansions;
- private boolean hasPayloads;
- private final XAnalyzingSuggester prototype;
-
- // important, these are the settings from the old xanalyzingsuggester
- public static final int SEP_LABEL = 0xFF;
- public static final int END_BYTE = 0x0;
- public static final int PAYLOAD_SEP = '\u001f';
-
- public AnalyzingCompletionLookupProviderV1(boolean preserveSep, boolean exactFirst, boolean preservePositionIncrements, boolean hasPayloads) {
- this.preserveSep = preserveSep;
- this.preservePositionIncrements = preservePositionIncrements;
- this.hasPayloads = hasPayloads;
- this.maxSurfaceFormsPerAnalyzedForm = MAX_SURFACE_FORMS_PER_ANALYZED_FORM;
- this.maxGraphExpansions = MAX_GRAPH_EXPANSIONS;
- int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;
- // needs to fixed in the suggester first before it can be supported
- //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
- prototype = new XAnalyzingSuggester(null, null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements,
- null, false, 1, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
- }
-
- @Override
- public String getName() {
- return "analyzing";
- }
-
- @Override
- public FieldsConsumer consumer(final IndexOutput output) throws IOException {
- // TODO write index header?
- CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
- return new FieldsConsumer() {
- private Map<String, Long> fieldOffsets = new HashMap<>();
-
- @Override
- public void close() throws IOException {
- try { /*
- * write the offsets per field such that we know where
- * we need to load the FSTs from
- */
- long pointer = output.getFilePointer();
- output.writeVInt(fieldOffsets.size());
- for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
- output.writeString(entry.getKey());
- output.writeVLong(entry.getValue());
- }
- output.writeLong(pointer);
- } finally {
- IOUtils.close(output);
- }
- }
-
- @Override
- public void write(Fields fields) throws IOException {
- for (String field : fields) {
- Terms terms = fields.terms(field);
- if (terms == null) {
- continue;
- }
- TermsEnum termsEnum = terms.iterator();
- PostingsEnum docsEnum = null;
- final SuggestPayload spare = new SuggestPayload();
- int maxAnalyzedPathsForOneInput = 0;
- final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
- int docCount = 0;
- while (true) {
- BytesRef term = termsEnum.next();
- if (term == null) {
- break;
- }
- docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS);
- builder.startTerm(term);
- int docFreq = 0;
- while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
- for (int i = 0; i < docsEnum.freq(); i++) {
- final int position = docsEnum.nextPosition();
- AnalyzingCompletionLookupProviderV1.this.parsePayload(docsEnum.getPayload(), spare);
- builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
- // multi fields have the same surface form so we sum up here
- maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
- }
- docFreq++;
- docCount = Math.max(docCount, docsEnum.docID() + 1);
- }
- builder.finishTerm(docFreq);
- }
- /*
- * Here we are done processing the field and we can
- * buid the FST and write it to disk.
- */
- FST<Pair<Long, BytesRef>> build = builder.build();
- assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]";
- /*
- * it's possible that the FST is null if we have 2 segments that get merged
- * and all docs that have a value in this field are deleted. This will cause
- * a consumer to be created but it doesn't consume any values causing the FSTBuilder
- * to return null.
- */
- if (build != null) {
- fieldOffsets.put(field, output.getFilePointer());
- build.save(output);
- /* write some more meta-info */
- output.writeVInt(maxAnalyzedPathsForOneInput);
- output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
- output.writeInt(maxGraphExpansions); // can be negative
- int options = 0;
- options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
- options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
- options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
- output.writeVInt(options);
- }
- }
- }
- };
- }
-
- @Override
- public LookupFactory load(IndexInput input) throws IOException {
- CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
- final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
- input.seek(input.length() - 8);
- long metaPointer = input.readLong();
- input.seek(metaPointer);
- int numFields = input.readVInt();
-
- Map<Long, String> meta = new TreeMap<>();
- for (int i = 0; i < numFields; i++) {
- String name = input.readString();
- long offset = input.readVLong();
- meta.put(offset, name);
- }
- long sizeInBytes = 0;
- for (Map.Entry<Long, String> entry : meta.entrySet()) {
- input.seek(entry.getKey());
- FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>(
- PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
- int maxAnalyzedPathsForOneInput = input.readVInt();
- int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
- int maxGraphExpansions = input.readInt();
- int options = input.readVInt();
- boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
- boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
- boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
- sizeInBytes += fst.ramBytesUsed();
- lookupMap.put(entry.getValue(), new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
- hasPayloads, maxAnalyzedPathsForOneInput, fst));
- }
- final long ramBytesUsed = sizeInBytes;
- return new LookupFactory() {
- @Override
- public Lookup getLookup(CompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) {
- AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName());
- if (analyzingSuggestHolder == null) {
- return null;
- }
- int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;
-
- final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null;
-
- XAnalyzingSuggester suggester;
- if (suggestionContext.isFuzzy()) {
- suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags,
- analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
- suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(),
- suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false,
- analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
- analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
- } else {
- suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags,
- analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
- analyzingSuggestHolder.preservePositionIncrements,
- analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
- analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
- }
- return suggester;
- }
-
- @Override
- public CompletionStats stats(String... fields) {
- long sizeInBytes = 0;
- ObjectLongHashMap<String> completionFields = null;
- if (fields != null && fields.length > 0) {
- completionFields = new ObjectLongHashMap<>(fields.length);
- }
-
- for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
- sizeInBytes += entry.getValue().fst.ramBytesUsed();
- if (fields == null || fields.length == 0) {
- continue;
- }
- for (String field : fields) {
- // support for getting fields by regex as in fielddata
- if (Regex.simpleMatch(field, entry.getKey())) {
- long fstSize = entry.getValue().fst.ramBytesUsed();
- completionFields.addTo(field, fstSize);
- }
- }
- }
-
- return new CompletionStats(sizeInBytes, completionFields);
- }
-
- @Override
- AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
- return lookupMap.get(fieldType.names().indexName());
- }
-
- @Override
- public long ramBytesUsed() {
- return ramBytesUsed;
- }
-
- @Override
- public Collection<Accountable> getChildResources() {
- return Accountables.namedAccountables("field", lookupMap);
- }
- };
- }
-
- /*
- // might be readded when we change the current impl, right now not needed
- static class AnalyzingSuggestHolder {
- final boolean preserveSep;
- final boolean preservePositionIncrements;
- final int maxSurfaceFormsPerAnalyzedForm;
- final int maxGraphExpansions;
- final boolean hasPayloads;
- final int maxAnalyzedPathsForOneInput;
- final FST<Pair<Long, BytesRef>> fst;
-
- public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
- boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) {
- this.preserveSep = preserveSep;
- this.preservePositionIncrements = preservePositionIncrements;
- this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
- this.maxGraphExpansions = maxGraphExpansions;
- this.hasPayloads = hasPayloads;
- this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
- this.fst = fst;
- }
-
- }
- */
-
- @Override
- public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
- return prototype.toFiniteStrings(stream);
- }
-} \ No newline at end of file