diff options
author | Christoph Büscher <christoph@elastic.co> | 2016-08-10 19:22:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-08-10 19:22:22 +0200 |
commit | 563bf0154cfce4a944d6e81632d8565670facdfc (patch) | |
tree | 57d8880bc8f1f23915c1f05d0f197cda3f472043 /core/src/main/java/org | |
parent | acc50d58176b31d0513253abe6be5e9aca7a233f (diff) | |
parent | d11521318d37d54d13c8499a2b47bc5414310d4b (diff) |
Merge pull request #19920 from cbuescher/remove-SuggestUtil
Remove SuggestUtil helper class
Diffstat (limited to 'core/src/main/java/org')
15 files changed, 192 insertions, 254 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/DirectSpellcheckerSettings.java b/core/src/main/java/org/elasticsearch/search/suggest/DirectSpellcheckerSettings.java index 81c73df53f..1250dfdac3 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/DirectSpellcheckerSettings.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/DirectSpellcheckerSettings.java @@ -21,8 +21,13 @@ package org.elasticsearch.search.suggest; import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.SuggestMode; +import org.apache.lucene.search.spell.SuggestWord; +import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; +import org.apache.lucene.search.spell.SuggestWordQueue; import org.apache.lucene.util.automaton.LevenshteinAutomata; +import java.util.Comparator; + public class DirectSpellcheckerSettings { // NB: If this changes, make sure to change the default in TermBuilderSuggester @@ -49,6 +54,9 @@ public class DirectSpellcheckerSettings { private int minWordLength = DEFAULT_MIN_WORD_LENGTH; private float minDocFreq = DEFAULT_MIN_DOC_FREQ; + private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator(); + private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR; + public SuggestMode suggestMode() { return suggestMode; } @@ -129,6 +137,33 @@ public class DirectSpellcheckerSettings { this.minDocFreq = minDocFreq; } + public DirectSpellChecker createDirectSpellChecker() { + + DirectSpellChecker directSpellChecker = new DirectSpellChecker(); + directSpellChecker.setAccuracy(accuracy()); + Comparator<SuggestWord> comparator; + switch (sort()) { + case SCORE: + comparator = SCORE_COMPARATOR; + break; + case FREQUENCY: + comparator = LUCENE_FREQUENCY; + break; + default: + throw new IllegalArgumentException("Illegal suggest sort: " + sort()); + } + directSpellChecker.setComparator(comparator); + directSpellChecker.setDistance(stringDistance()); + directSpellChecker.setMaxEdits(maxEdits()); + directSpellChecker.setMaxInspections(maxInspections()); + directSpellChecker.setMaxQueryFrequency(maxTermFreq()); + directSpellChecker.setMinPrefix(prefixLength()); + directSpellChecker.setMinQueryLength(minWordLength()); + directSpellChecker.setThresholdFrequency(minDocFreq()); + directSpellChecker.setLowerCaseTerms(false); + return directSpellChecker; + } + @Override public String toString() { return "[" + diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java deleted file mode 100644 index f3a034cda6..0000000000 --- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.search.suggest; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.search.spell.DirectSpellChecker; -import org.apache.lucene.search.spell.SuggestWord; -import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; -import org.apache.lucene.search.spell.SuggestWordQueue; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.CharsRefBuilder; -import org.apache.lucene.util.IOUtils; -import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.io.FastCharArrayReader; - -import java.io.IOException; -import java.util.Comparator; - -public final class SuggestUtils { - private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator(); - private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR; - - private SuggestUtils() { - // utils!! - } - - public static DirectSpellChecker getDirectSpellChecker(DirectSpellcheckerSettings suggestion) { - DirectSpellChecker directSpellChecker = new DirectSpellChecker(); - directSpellChecker.setAccuracy(suggestion.accuracy()); - Comparator<SuggestWord> comparator; - switch (suggestion.sort()) { - case SCORE: - comparator = SCORE_COMPARATOR; - break; - case FREQUENCY: - comparator = LUCENE_FREQUENCY; - break; - default: - throw new IllegalArgumentException("Illegal suggest sort: " + suggestion.sort()); - } - directSpellChecker.setComparator(comparator); - directSpellChecker.setDistance(suggestion.stringDistance()); - directSpellChecker.setMaxEdits(suggestion.maxEdits()); - directSpellChecker.setMaxInspections(suggestion.maxInspections()); - directSpellChecker.setMaxQueryFrequency(suggestion.maxTermFreq()); - directSpellChecker.setMinPrefix(suggestion.prefixLength()); - directSpellChecker.setMinQueryLength(suggestion.minWordLength()); - directSpellChecker.setThresholdFrequency(suggestion.minDocFreq()); - directSpellChecker.setLowerCaseTerms(false); - return directSpellChecker; - } - - public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) { - result.clear(); - for (int i = 0; i < toJoin.length - 1; i++) { - result.append(toJoin[i]); - result.append(separator); - } - result.append(toJoin[toJoin.length-1]); - return result.get(); - } - - public abstract static class TokenConsumer { - protected CharTermAttribute charTermAttr; - protected PositionIncrementAttribute posIncAttr; - protected OffsetAttribute offsetAttr; - - public void reset(TokenStream stream) { - charTermAttr = stream.addAttribute(CharTermAttribute.class); - posIncAttr = stream.addAttribute(PositionIncrementAttribute.class); - offsetAttr = stream.addAttribute(OffsetAttribute.class); - } - - protected BytesRef fillBytesRef(BytesRefBuilder spare) { - spare.copyChars(charTermAttr); - return spare.get(); - } - - public abstract void nextToken() throws IOException; - - public void end() {} - } - - public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException { - spare.copyUTF8Bytes(toAnalyze); - return analyze(analyzer, spare.get(), field, consumer); - } - - public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException { - try (TokenStream ts = analyzer.tokenStream( - field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length))) { - return analyze(ts, consumer); - } - } - - /** NOTE: this method closes the TokenStream, even on exception, which is awkward - * because really the caller who called {@link Analyzer#tokenStream} should close it, - * but when trying that there are recursion issues when we try to use the same - * TokenStream twice in the same recursion... */ - public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { - int numTokens = 0; - boolean success = false; - try { - stream.reset(); - consumer.reset(stream); - while (stream.incrementToken()) { - consumer.nextToken(); - numTokens++; - } - consumer.end(); - success = true; - } finally { - if (success) { - stream.close(); - } else { - IOUtils.closeWhileHandlingException(stream); - } - } - return numTokens; - } - - public static class Fields { - public static final ParseField STRING_DISTANCE = new ParseField("string_distance"); - public static final ParseField SUGGEST_MODE = new ParseField("suggest_mode"); - public static final ParseField MAX_EDITS = new ParseField("max_edits"); - public static final ParseField MAX_INSPECTIONS = new ParseField("max_inspections"); - // TODO some of these constants are the same as MLT constants and - // could be moved to a shared class for consistency - public static final ParseField MAX_TERM_FREQ = new ParseField("max_term_freq"); - public static final ParseField PREFIX_LENGTH = new ParseField("prefix_length", "prefix_len"); - public static final ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len"); - public static final ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq"); - public static final ParseField SHARD_SIZE = new ParseField("shard_size"); - public static final ParseField ANALYZER = new ParseField("analyzer"); - public static final ParseField FIELD = new ParseField("field"); - public static final ParseField SIZE = new ParseField("size"); - public static final ParseField SORT = new ParseField("sort"); - public static final ParseField ACCURACY = new ParseField("accuracy"); - } -} diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java index 4b5b9cd4a8..3b216d9186 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java @@ -37,7 +37,6 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.QueryParseContext; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.SuggestionBuilder; import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext; import org.elasticsearch.search.suggest.completion.context.ContextMapping; @@ -48,7 +47,6 @@ import org.elasticsearch.search.suggest.completion2x.context.GeolocationContextM import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -90,10 +88,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug TLP_PARSER.declareField((parser, completionSuggestionContext, context) -> completionSuggestionContext.regexOptions = RegexOptions.parse(parser, context), RegexOptions.REGEX_OPTIONS, ObjectParser.ValueType.OBJECT); - TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::field, SuggestUtils.Fields.FIELD); - TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::analyzer, SuggestUtils.Fields.ANALYZER); - TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::size, SuggestUtils.Fields.SIZE); - TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::shardSize, SuggestUtils.Fields.SHARD_SIZE); + TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::field, FIELDNAME_FIELD); + TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::analyzer, ANALYZER_FIELD); + TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::size, SIZE_FIELD); + TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::shardSize, SHARDSIZE_FIELD); TLP_PARSER.declareField((p, v, c) -> { // Copy the current structure. We will parse, once the mapping is provided XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); @@ -353,7 +351,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug // now we should have field name, check and copy fields over to the suggestion builder we return if (field == null) { throw new ElasticsearchParseException( - "the required field option [" + SuggestUtils.Fields.FIELD.getPreferredName() + "] is missing"); + "the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing"); } return new CompletionSuggestionBuilder(field, builder); } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java index ffd21469f7..23db2b0fcb 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java @@ -20,7 +20,6 @@ package org.elasticsearch.search.suggest.phrase; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import java.util.Arrays; @@ -73,7 +72,7 @@ public final class Correction implements Comparable<Correction> { len += toJoin[i].length; } result.grow(len); - return SuggestUtils.join(separator, result, toJoin); + return WordScorer.join(separator, result, toJoin); } /** Lower scores sorts first; if scores are equal, diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java index 67fed51b62..f6faaaeea5 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java @@ -19,6 +19,10 @@ package org.elasticsearch.search.suggest.phrase; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; @@ -29,8 +33,10 @@ import org.apache.lucene.search.spell.SuggestMode; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRefBuilder; -import org.elasticsearch.search.suggest.SuggestUtils; +import org.apache.lucene.util.IOUtils; +import org.elasticsearch.common.io.FastCharArrayReader; import java.io.IOException; import java.util.ArrayList; @@ -44,7 +50,7 @@ import static java.lang.Math.log10; import static java.lang.Math.max; import static java.lang.Math.round; -final class DirectCandidateGenerator extends CandidateGenerator { +public final class DirectCandidateGenerator extends CandidateGenerator { private final DirectSpellChecker spellchecker; private final String field; @@ -140,7 +146,7 @@ final class DirectCandidateGenerator extends CandidateGenerator { return term; } final BytesRefBuilder result = byteSpare; - SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() { + analyze(preFilter, term, field, new TokenConsumer() { @Override public void nextToken() throws IOException { @@ -156,7 +162,7 @@ final class DirectCandidateGenerator extends CandidateGenerator { candidates.add(candidate); } else { final BytesRefBuilder result = byteSpare; - SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() { + analyze(postFilter, candidate.term, field, new TokenConsumer() { @Override public void nextToken() throws IOException { this.fillBytesRef(result); @@ -189,6 +195,27 @@ final class DirectCandidateGenerator extends CandidateGenerator { } + public abstract static class TokenConsumer { + protected CharTermAttribute charTermAttr; + protected PositionIncrementAttribute posIncAttr; + protected OffsetAttribute offsetAttr; + + public void reset(TokenStream stream) { + charTermAttr = stream.addAttribute(CharTermAttribute.class); + posIncAttr = stream.addAttribute(PositionIncrementAttribute.class); + offsetAttr = stream.addAttribute(OffsetAttribute.class); + } + + protected BytesRef fillBytesRef(BytesRefBuilder spare) { + spare.copyChars(charTermAttr); + return spare.get(); + } + + public abstract void nextToken() throws IOException; + + public void end() {} + } + public static class CandidateSet { public Candidate[] candidates; public final Candidate originalTerm; @@ -283,4 +310,40 @@ final class DirectCandidateGenerator extends CandidateGenerator { return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput); } + public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) + throws IOException { + spare.copyUTF8Bytes(toAnalyze); + CharsRef charsRef = spare.get(); + try (TokenStream ts = analyzer.tokenStream( + field, new FastCharArrayReader(charsRef.chars, charsRef.offset, charsRef.length))) { + return analyze(ts, consumer); + } + } + + /** NOTE: this method closes the TokenStream, even on exception, which is awkward + * because really the caller who called {@link Analyzer#tokenStream} should close it, + * but when trying that there are recursion issues when we try to use the same + * TokenStream twice in the same recursion... */ + public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { + int numTokens = 0; + boolean success = false; + try { + stream.reset(); + consumer.reset(stream); + while (stream.incrementToken()) { + consumer.nextToken(); + numTokens++; + } + consumer.end(); + success = true; + } finally { + if (success) { + stream.close(); + } else { + IOUtils.closeWhileHandlingException(stream); + } + } + return numTokens; + } + } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java index 67b3104334..9e3beb2ccf 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java @@ -51,21 +51,21 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator private static final String TYPE = "direct_generator"; - static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE); - static final ParseField FIELDNAME_FIELD = new ParseField("field"); - static final ParseField PREFILTER_FIELD = new ParseField("pre_filter"); - static final ParseField POSTFILTER_FIELD = new ParseField("post_filter"); - static final ParseField SUGGESTMODE_FIELD = new ParseField("suggest_mode"); - static final ParseField MIN_DOC_FREQ_FIELD = new ParseField("min_doc_freq"); - static final ParseField ACCURACY_FIELD = new ParseField("accuracy"); - static final ParseField SIZE_FIELD = new ParseField("size"); - static final ParseField SORT_FIELD = new ParseField("sort"); - static final ParseField STRING_DISTANCE_FIELD = new ParseField("string_distance"); - static final ParseField MAX_EDITS_FIELD = new ParseField("max_edits"); - static final ParseField MAX_INSPECTIONS_FIELD = new ParseField("max_inspections"); - static final ParseField MAX_TERM_FREQ_FIELD = new ParseField("max_term_freq"); - static final ParseField PREFIX_LENGTH_FIELD = new ParseField("prefix_length"); - static final ParseField MIN_WORD_LENGTH_FIELD = new ParseField("min_word_length"); + public static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE); + public static final ParseField FIELDNAME_FIELD = new ParseField("field"); + public static final ParseField PREFILTER_FIELD = new ParseField("pre_filter"); + public static final ParseField POSTFILTER_FIELD = new ParseField("post_filter"); + public static final ParseField SUGGESTMODE_FIELD = new ParseField("suggest_mode"); + public static final ParseField MIN_DOC_FREQ_FIELD = new ParseField("min_doc_freq"); + public static final ParseField ACCURACY_FIELD = new ParseField("accuracy"); + public static final ParseField SIZE_FIELD = new ParseField("size"); + public static final ParseField SORT_FIELD = new ParseField("sort"); + public static final ParseField STRING_DISTANCE_FIELD = new ParseField("string_distance"); + public static final ParseField MAX_EDITS_FIELD = new ParseField("max_edits"); + public static final ParseField MAX_INSPECTIONS_FIELD = new ParseField("max_inspections"); + public static final ParseField MAX_TERM_FREQ_FIELD = new ParseField("max_term_freq"); + public static final ParseField PREFIX_LENGTH_FIELD = new ParseField("prefix_length"); + public static final ParseField MIN_WORD_LENGTH_FIELD = new ParseField("min_word_length"); private final String field; private String preFilter; @@ -449,7 +449,8 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator return new LuceneLevenshteinDistance(); } else if ("levenstein".equals(distanceVal)) { return new LevensteinDistance(); - //TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein + // TODO Jaro and Winkler are 2 people - so apply same naming logic + // as damerau_levenshtein } else if ("jarowinkler".equals(distanceVal)) { return new JaroWinklerDistance(); } else if ("ngram".equals(distanceVal)) { diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java index 6b6301b49a..562da44846 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java @@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Terms; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import java.io.IOException; @@ -41,15 +40,15 @@ final class LaplaceScorer extends WordScorer { @Override protected double scoreBigram(Candidate word, Candidate w_1) throws IOException { - SuggestUtils.join(separator, spare, w_1.term, word.term); + join(separator, spare, w_1.term, word.term); return (alpha + frequency(spare.get())) / (alpha + w_1.frequency + vocabluarySize); } @Override protected double scoreTrigram(Candidate word, Candidate w_1, Candidate w_2) throws IOException { - SuggestUtils.join(separator, spare, w_2.term, w_1.term, word.term); + join(separator, spare, w_2.term, w_1.term, word.term); long trigramCount = frequency(spare.get()); - SuggestUtils.join(separator, spare, w_1.term, word.term); + join(separator, spare, w_1.term, word.term); return (alpha + trigramCount) / (alpha + frequency(spare.get()) + vocabluarySize); } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java index 2a5895a832..c6d67fe8cf 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java @@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Terms; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import java.io.IOException; @@ -56,7 +55,7 @@ public final class LinearInterpolatingScorer extends WordScorer { @Override protected double scoreBigram(Candidate word, Candidate w_1) throws IOException { - SuggestUtils.join(separator, spare, w_1.term, word.term); + join(separator, spare, w_1.term, word.term); final long count = frequency(spare.get()); if (count < 1) { return unigramLambda * scoreUnigram(word); @@ -66,12 +65,12 @@ public final class LinearInterpolatingScorer extends WordScorer { @Override protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException { - SuggestUtils.join(separator, spare, w.term, w_1.term, w_2.term); + join(separator, spare, w.term, w_1.term, w_2.term); final long count = frequency(spare.get()); if (count < 1) { return scoreBigram(w, w_1); } - SuggestUtils.join(separator, spare, w.term, w_1.term); + join(separator, spare, w.term, w_1.term); return trigramLambda * (count / (1.d + frequency(spare.get()))) + scoreBigram(w, w_1); } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java index ec9ca6e1da..e6e1767386 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java @@ -28,7 +28,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRefBuilder; import org.elasticsearch.common.io.FastCharArrayReader; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet; @@ -51,19 +50,19 @@ public final class NoisyChannelSpellChecker { public NoisyChannelSpellChecker(double nonErrorLikelihood) { this(nonErrorLikelihood, true, DEFAULT_TOKEN_LIMIT); } - + public NoisyChannelSpellChecker(double nonErrorLikelihood, boolean requireUnigram, int tokenLimit) { this.realWordLikelihood = nonErrorLikelihood; this.requireUnigram = requireUnigram; this.tokenLimit = tokenLimit; - + } public Result getCorrections(TokenStream stream, final CandidateGenerator generator, float maxErrors, int numCorrections, WordScorer wordScorer, float confidence, int gramSize) throws IOException { - + final List<CandidateSet> candidateSetsList = new ArrayList<>(); - SuggestUtils.analyze(stream, new SuggestUtils.TokenConsumer() { + DirectCandidateGenerator.analyze(stream, new DirectCandidateGenerator.TokenConsumer() { CandidateSet currentSet = null; private TypeAttribute typeAttribute; private final BytesRefBuilder termsRef = new BytesRefBuilder(); @@ -74,7 +73,7 @@ public final class NoisyChannelSpellChecker { super.reset(stream); typeAttribute = stream.addAttribute(TypeAttribute.class); } - + @Override public void nextToken() throws IOException { anyTokens = true; @@ -96,7 +95,7 @@ public final class NoisyChannelSpellChecker { currentSet = new CandidateSet(Candidate.EMPTY, generator.createCandidate(BytesRef.deepCopyOf(term), true)); } } - + @Override public void end() { if (currentSet != null) { @@ -107,11 +106,11 @@ public final class NoisyChannelSpellChecker { } } }); - + if (candidateSetsList.isEmpty() || candidateSetsList.size() >= tokenLimit) { return Result.EMPTY; } - + for (CandidateSet candidateSet : candidateSetsList) { generator.drawCandidates(candidateSet); } @@ -127,13 +126,13 @@ public final class NoisyChannelSpellChecker { cutoffScore = inputPhraseScore * confidence; } Correction[] bestCandidates = scorer.findBestCandiates(candidateSets, maxErrors, cutoffScore); - + return new Result(bestCandidates, cutoffScore); } public Result getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator, float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException { - + return getCorrections(tokenStream(analyzer, query, new CharsRefBuilder(), analysisField), generator, maxErrors, numCorrections, scorer, confidence, gramSize); } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java index 25f589794f..a9f5accb91 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java @@ -45,7 +45,6 @@ import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.suggest.Suggest.Suggestion; import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry; import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.SuggestionBuilder; import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext; @@ -84,7 +83,7 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> { final List<CandidateGenerator> gens = new ArrayList<>(generators.size()); for (int i = 0; i < numGenerators; i++) { PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i); - DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator); + DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker(); Terms terms = MultiFields.getTerms(indexReader, generator.field()); if (terms != null) { gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java index 1e0e6680aa..94ad7b8fad 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java @@ -43,7 +43,6 @@ import org.elasticsearch.script.CompiledScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptContext; import org.elasticsearch.script.ScriptService; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.SuggestionBuilder; import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext; import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator; @@ -596,7 +595,7 @@ public class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSuggestionB // now we should have field name, check and copy fields over to the suggestion builder we return if (fieldname == null) { throw new ElasticsearchParseException( - "the required field option [" + SuggestUtils.Fields.FIELD.getPreferredName() + "] is missing"); + "the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing"); } return new PhraseSuggestionBuilder(fieldname, tmpSuggestion); } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java index ed0573bf00..8eb08ef068 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java @@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Terms; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import java.io.IOException; @@ -41,7 +40,7 @@ class StupidBackoffScorer extends WordScorer { @Override protected double scoreBigram(Candidate word, Candidate w_1) throws IOException { - SuggestUtils.join(separator, spare, w_1.term, word.term); + join(separator, spare, w_1.term, word.term); final long count = frequency(spare.get()); if (count < 1) { return discount * scoreUnigram(word); @@ -53,12 +52,12 @@ class StupidBackoffScorer extends WordScorer { protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException { // First see if there are bigrams. If there aren't then skip looking up the trigram. This saves lookups // when the bigrams and trigrams are rare and we need both anyway. - SuggestUtils.join(separator, spare, w_1.term, w.term); + join(separator, spare, w_1.term, w.term); long bigramCount = frequency(spare.get()); if (bigramCount < 1) { return discount * scoreUnigram(w); } - SuggestUtils.join(separator, spare, w_2.term, w_1.term, w.term); + join(separator, spare, w_2.term, w_1.term, w.term); long trigramCount = frequency(spare.get()); if (trigramCount < 1) { return discount * (bigramCount / (w_1.frequency + 0.00000000001d)); diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java index 69e62c1a17..32d4feb4b2 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java @@ -100,6 +100,16 @@ public abstract class WordScorer { return scoreBigram(word, w_1); } + public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) { + result.clear(); + for (int i = 0; i < toJoin.length - 1; i++) { + result.append(toJoin[i]); + result.append(separator); + } + result.append(toJoin[toJoin.length-1]); + return result.get(); + } + public interface WordScorerFactory { WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException; diff --git a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java index a06baccb99..0d58e0f5ca 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java @@ -30,10 +30,10 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.text.Text; import org.elasticsearch.index.query.QueryParseContext; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.SuggestionBuilder; import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext; +import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator; import java.io.IOException; import java.util.ArrayList; @@ -48,7 +48,7 @@ public final class TermSuggester extends Suggester<TermSuggestionContext> { @Override public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { - DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings()); + DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker(); final IndexReader indexReader = searcher.getIndexReader(); TermSuggestion response = new TermSuggestion( name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort() @@ -70,10 +70,11 @@ public final class TermSuggester extends Suggester<TermSuggestionContext> { return response; } - private List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException { + private static List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException { final List<Token> result = new ArrayList<>(); final String field = suggestion.getField(); - SuggestUtils.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() { + DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, + new DirectCandidateGenerator.TokenConsumer() { @Override public void nextToken() { Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder()))); diff --git a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java index d7ec7e6cae..31e6c3718e 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java @@ -37,7 +37,6 @@ import org.elasticsearch.index.query.QueryParseContext; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.suggest.DirectSpellcheckerSettings; import org.elasticsearch.search.suggest.SortBy; -import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.SuggestionBuilder; import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext; @@ -52,16 +51,16 @@ import static org.elasticsearch.search.suggest.DirectSpellcheckerSettings.DEFAUL import static org.elasticsearch.search.suggest.DirectSpellcheckerSettings.DEFAULT_MIN_DOC_FREQ; import static org.elasticsearch.search.suggest.DirectSpellcheckerSettings.DEFAULT_MIN_WORD_LENGTH; import static org.elasticsearch.search.suggest.DirectSpellcheckerSettings.DEFAULT_PREFIX_LENGTH; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.ACCURACY; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MAX_EDITS; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MAX_INSPECTIONS; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MAX_TERM_FREQ; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MIN_DOC_FREQ; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MIN_WORD_LENGTH; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.PREFIX_LENGTH; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.SORT; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.STRING_DISTANCE; -import static org.elasticsearch.search.suggest.SuggestUtils.Fields.SUGGEST_MODE; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.ACCURACY_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MAX_EDITS_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MAX_INSPECTIONS_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MAX_TERM_FREQ_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MIN_DOC_FREQ_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MIN_WORD_LENGTH_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.PREFIX_LENGTH_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.SORT_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.STRING_DISTANCE_FIELD; +import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.SUGGESTMODE_FIELD; /** * Defines the actual suggest command. Each command uses the global options @@ -376,16 +375,16 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild @Override public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { - builder.field(SUGGEST_MODE.getPreferredName(), suggestMode); - builder.field(ACCURACY.getPreferredName(), accuracy); - builder.field(SORT.getPreferredName(), sort); - builder.field(STRING_DISTANCE.getPreferredName(), stringDistance); - builder.field(MAX_EDITS.getPreferredName(), maxEdits); - builder.field(MAX_INSPECTIONS.getPreferredName(), maxInspections); - builder.field(MAX_TERM_FREQ.getPreferredName(), maxTermFreq); - builder.field(PREFIX_LENGTH.getPreferredName(), prefixLength); - builder.field(MIN_WORD_LENGTH.getPreferredName(), minWordLength); - builder.field(MIN_DOC_FREQ.getPreferredName(), minDocFreq); + builder.field(SUGGESTMODE_FIELD.getPreferredName(), suggestMode); + builder.field(ACCURACY_FIELD.getPreferredName(), accuracy); + builder.field(SORT_FIELD.getPreferredName(), sort); + builder.field(STRING_DISTANCE_FIELD.getPreferredName(), stringDistance); + builder.field(MAX_EDITS_FIELD.getPreferredName(), maxEdits); + builder.field(MAX_INSPECTIONS_FIELD.getPreferredName(), maxInspections); + builder.field(MAX_TERM_FREQ_FIELD.getPreferredName(), maxTermFreq); + builder.field(PREFIX_LENGTH_FIELD.getPreferredName(), prefixLength); + builder.field(MIN_WORD_LENGTH_FIELD.getPreferredName(), minWordLength); + builder.field(MIN_DOC_FREQ_FIELD.getPreferredName(), minDocFreq); return builder; } @@ -408,25 +407,25 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild tmpSuggestion.size(parser.intValue()); } else if (parseFieldMatcher.match(currentFieldName, SuggestionBuilder.SHARDSIZE_FIELD)) { tmpSuggestion.shardSize(parser.intValue()); - } else if (parseFieldMatcher.match(currentFieldName, SUGGEST_MODE)) { + } else if (parseFieldMatcher.match(currentFieldName, SUGGESTMODE_FIELD)) { tmpSuggestion.suggestMode(SuggestMode.resolve(parser.text())); - } else if (parseFieldMatcher.match(currentFieldName, ACCURACY)) { + } else if (parseFieldMatcher.match(currentFieldName, ACCURACY_FIELD)) { tmpSuggestion.accuracy(parser.floatValue()); - } else if (parseFieldMatcher.match(currentFieldName, SORT)) { + } else if (parseFieldMatcher.match(currentFieldName, SORT_FIELD)) { tmpSuggestion.sort(SortBy.resolve(parser.text())); - } else if (parseFieldMatcher.match(currentFieldName, STRING_DISTANCE)) { + } else if (parseFieldMatcher.match(currentFieldName, STRING_DISTANCE_FIELD)) { tmpSuggestion.stringDistance(StringDistanceImpl.resolve(parser.text())); - } else if (parseFieldMatcher.match(currentFieldName, MAX_EDITS)) { + } else if (parseFieldMatcher.match(currentFieldName, MAX_EDITS_FIELD)) { tmpSuggestion.maxEdits(parser.intValue()); - } else if (parseFieldMatcher.match(currentFieldName, MAX_INSPECTIONS)) { + } else if (parseFieldMatcher.match(currentFieldName, MAX_INSPECTIONS_FIELD)) { tmpSuggestion.maxInspections(parser.intValue()); - } else if (parseFieldMatcher.match(currentFieldName, MAX_TERM_FREQ)) { + } else if (parseFieldMatcher.match(currentFieldName, MAX_TERM_FREQ_FIELD)) { tmpSuggestion.maxTermFreq(parser.floatValue()); - } else if (parseFieldMatcher.match(currentFieldName, PREFIX_LENGTH)) { + } else if (parseFieldMatcher.match(currentFieldName, PREFIX_LENGTH_FIELD)) { tmpSuggestion.prefixLength(parser.intValue()); - } else if (parseFieldMatcher.match(currentFieldName, MIN_WORD_LENGTH)) { + } else if (parseFieldMatcher.match(currentFieldName, MIN_WORD_LENGTH_FIELD)) { tmpSuggestion.minWordLength(parser.intValue()); - } else if (parseFieldMatcher.match(currentFieldName, MIN_DOC_FREQ)) { + } else if (parseFieldMatcher.match(currentFieldName, MIN_DOC_FREQ_FIELD)) { tmpSuggestion.minDocFreq(parser.floatValue()); } else { throw new ParsingException(parser.getTokenLocation(), @@ -440,7 +439,7 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild // now we should have field name, check and copy fields over to the suggestion builder we return if (fieldname == null) { throw new ElasticsearchParseException( - "the required field option [" + SuggestUtils.Fields.FIELD.getPreferredName() + "] is missing"); + "the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing"); } return new TermSuggestionBuilder(fieldname, tmpSuggestion); } |