summaryrefslogtreecommitdiff
path: root/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java')
-rw-r--r--plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java14
1 files changed, 13 insertions, 1 deletions
diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java
index 87e08c757b..9e41621525 100644
--- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java
+++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java
@@ -36,9 +36,13 @@ import java.io.Reader;
public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
private static final String USER_DICT_OPTION = "user_dictionary";
+ private static final String NBEST_COST = "nbest_cost";
+ private static final String NBEST_EXAMPLES = "nbest_examples";
private final UserDictionary userDictionary;
private final Mode mode;
+ private final String nBestExamples;
+ private final int nBestCost;
private boolean discartPunctuation;
@@ -47,6 +51,8 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
mode = getMode(settings);
userDictionary = getUserDictionary(env, settings);
discartPunctuation = settings.getAsBoolean("discard_punctuation", true);
+ nBestCost = settings.getAsInt(NBEST_COST, -1);
+ nBestExamples = settings.get(NBEST_EXAMPLES);
}
public static UserDictionary getUserDictionary(Environment env, Settings settings) {
@@ -83,7 +89,13 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
@Override
public Tokenizer create() {
- return new JapaneseTokenizer(userDictionary, discartPunctuation, mode);
+ JapaneseTokenizer t = new JapaneseTokenizer(userDictionary, discartPunctuation, mode);
+ int nBestCost = this.nBestCost;
+ if (nBestExamples != null) {
+ nBestCost = Math.max(nBestCost, t.calcNBestCost(nBestExamples));
+ }
+ t.setNBestCost(nBestCost);
+ return t;
}
}