diff options
author | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-06-14 01:26:36 +0200 |
---|---|---|
committer | Martijn van Groningen <martijn.v.groningen@gmail.com> | 2017-06-15 18:28:31 +0200 |
commit | 428e70758ac6895ac995f4315412f4d3729aea9b (patch) | |
tree | bb6404aac053c5ece590214a33e02304c2bab694 /core | |
parent | 2a78b0a19fb6584944d92ad34a91f2814b3dcbe4 (diff) |
Moved more token filters to analysis-common module.
The following token filters were moved: `edge_ngram`, `ngram`, `uppercase`, `lowercase`, `length`, `flatten_graph` and `unique`.
Relates to #23658
Diffstat (limited to 'core')
17 files changed, 18 insertions, 865 deletions
diff --git a/core/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java b/core/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java deleted file mode 100644 index cc853932ef..0000000000 --- a/core/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.analysis.miscellaneous; - -import org.apache.lucene.analysis.CharArraySet; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; - -import java.io.IOException; - -/** - * A token filter that generates unique tokens. Can remove unique tokens only on the same - * position increments as well. - */ -public class UniqueTokenFilter extends TokenFilter { - - private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); - private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); - - private final CharArraySet previous = new CharArraySet(8, false); - private final boolean onlyOnSamePosition; - - public UniqueTokenFilter(TokenStream in) { - this(in, false); - } - - public UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) { - super(in); - this.onlyOnSamePosition = onlyOnSamePosition; - } - - @Override - public final boolean incrementToken() throws IOException { - while (input.incrementToken()) { - final char term[] = termAttribute.buffer(); - final int length = termAttribute.length(); - - boolean duplicate; - if (onlyOnSamePosition) { - final int posIncrement = posIncAttribute.getPositionIncrement(); - if (posIncrement > 0) { - previous.clear(); - } - - duplicate = (posIncrement == 0 && previous.contains(term, 0, length)); - } else { - duplicate = previous.contains(term, 0, length); - } - - // clone the term, and add to the set of seen terms. - char saved[] = new char[length]; - System.arraycopy(term, 0, saved, 0, length); - previous.add(saved); - - if (!duplicate) { - return true; - } - } - return false; - } - - @Override - public final void reset() throws IOException { - super.reset(); - previous.clear(); - } -} - - diff --git a/core/src/main/java/org/elasticsearch/index/analysis/EdgeNGramTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/EdgeNGramTokenFilterFactory.java deleted file mode 100644 index 1d3b8e296e..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/EdgeNGramTokenFilterFactory.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; -import org.apache.lucene.analysis.ngram.NGramTokenFilter; -import org.apache.lucene.analysis.reverse.ReverseStringFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - - -public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory { - - private final int minGram; - - private final int maxGram; - - public static final int SIDE_FRONT = 1; - public static final int SIDE_BACK = 2; - private final int side; - - public EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE); - this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE); - this.side = parseSide(settings.get("side", "front")); - } - - static int parseSide(String side) { - switch(side) { - case "front": return SIDE_FRONT; - case "back": return SIDE_BACK; - default: throw new IllegalArgumentException("invalid side: " + side); - } - } - - @Override - public TokenStream create(TokenStream tokenStream) { - TokenStream result = tokenStream; - - // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect - if (side == SIDE_BACK) { - result = new ReverseStringFilter(result); - } - - result = new EdgeNGramTokenFilter(result, minGram, maxGram); - - // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect - if (side == SIDE_BACK) { - result = new ReverseStringFilter(result); - } - - return result; - } - - @Override - public boolean breaksFastVectorHighlighter() { - return true; - } -}
\ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java deleted file mode 100644 index 6c9487a2cb..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.core.FlattenGraphFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -public class FlattenGraphTokenFilterFactory extends AbstractTokenFilterFactory { - - public FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new FlattenGraphFilter(tokenStream); - } -} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/LengthTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/LengthTokenFilterFactory.java deleted file mode 100644 index 8a03802a7d..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/LengthTokenFilterFactory.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.LengthFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -public class LengthTokenFilterFactory extends AbstractTokenFilterFactory { - - private final int min; - private final int max; - - // ancient unsupported option - private static final String ENABLE_POS_INC_KEY = "enable_position_increments"; - - public LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - min = settings.getAsInt("min", 0); - max = settings.getAsInt("max", Integer.MAX_VALUE); - if (settings.get(ENABLE_POS_INC_KEY) != null) { - throw new IllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain"); - } - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new LengthFilter(tokenStream, min, max); - } -} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java deleted file mode 100644 index 1d9ca2272b..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.LowerCaseFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.el.GreekLowerCaseFilter; -import org.apache.lucene.analysis.ga.IrishLowerCaseFilter; -import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -/** - * Factory for {@link LowerCaseFilter} and some language-specific variants - * supported by the {@code language} parameter: - * <ul> - * <li>greek: {@link GreekLowerCaseFilter} - * <li>irish: {@link IrishLowerCaseFilter} - * <li>turkish: {@link TurkishLowerCaseFilter} - * </ul> - */ -public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent { - - private final String lang; - - public LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - this.lang = settings.get("language", null); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - if (lang == null) { - return new LowerCaseFilter(tokenStream); - } else if (lang.equalsIgnoreCase("greek")) { - return new GreekLowerCaseFilter(tokenStream); - } else if (lang.equalsIgnoreCase("irish")) { - return new IrishLowerCaseFilter(tokenStream); - } else if (lang.equalsIgnoreCase("turkish")) { - return new TurkishLowerCaseFilter(tokenStream); - } else { - throw new IllegalArgumentException("language [" + lang + "] not support for lower case"); - } - } - - @Override - public Object getMultiTermComponent() { - return this; - } -} - - diff --git a/core/src/main/java/org/elasticsearch/index/analysis/NGramTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/NGramTokenFilterFactory.java deleted file mode 100644 index 7926f585bc..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/NGramTokenFilterFactory.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.ngram.NGramTokenFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - - -public class NGramTokenFilterFactory extends AbstractTokenFilterFactory { - - private final int minGram; - - private final int maxGram; - - - public NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE); - this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new NGramTokenFilter(tokenStream, minGram, maxGram); - } -}
\ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/index/analysis/UniqueTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/UniqueTokenFilterFactory.java deleted file mode 100644 index 8606a60292..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/UniqueTokenFilterFactory.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -public class UniqueTokenFilterFactory extends AbstractTokenFilterFactory { - - private final boolean onlyOnSamePosition; - - public UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - this.onlyOnSamePosition = settings.getAsBooleanLenientForPreEs6Indices( - indexSettings.getIndexVersionCreated(), "only_on_same_position", false, deprecationLogger); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new UniqueTokenFilter(tokenStream, onlyOnSamePosition); - } -} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java deleted file mode 100644 index 551345fc2e..0000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.core.UpperCaseFilter; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; - -public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent { - - public UpperCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new UpperCaseFilter(tokenStream); - } - - @Override - public Object getMultiTermComponent() { - return this; - } -} - - diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 3f26b722f4..9220c06371 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -54,14 +54,12 @@ import org.elasticsearch.index.analysis.DecimalDigitFilterFactory; import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory; import org.elasticsearch.index.analysis.DutchAnalyzerProvider; import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory; -import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory; import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory; import org.elasticsearch.index.analysis.ElisionTokenFilterFactory; import org.elasticsearch.index.analysis.EnglishAnalyzerProvider; import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider; import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory; import org.elasticsearch.index.analysis.FinnishAnalyzerProvider; -import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory; import org.elasticsearch.index.analysis.FrenchAnalyzerProvider; import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory; import org.elasticsearch.index.analysis.GalicianAnalyzerProvider; @@ -83,14 +81,11 @@ import org.elasticsearch.index.analysis.KeepWordFilterFactory; import org.elasticsearch.index.analysis.KeywordAnalyzerProvider; import org.elasticsearch.index.analysis.KeywordTokenizerFactory; import org.elasticsearch.index.analysis.LatvianAnalyzerProvider; -import org.elasticsearch.index.analysis.LengthTokenFilterFactory; import org.elasticsearch.index.analysis.LetterTokenizerFactory; import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory; import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider; -import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory; import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory; import org.elasticsearch.index.analysis.MinHashTokenFilterFactory; -import org.elasticsearch.index.analysis.NGramTokenFilterFactory; import org.elasticsearch.index.analysis.NGramTokenizerFactory; import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider; import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory; @@ -133,8 +128,6 @@ import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.analysis.TruncateTokenFilterFactory; import org.elasticsearch.index.analysis.TurkishAnalyzerProvider; import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory; -import org.elasticsearch.index.analysis.UniqueTokenFilterFactory; -import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory; import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider; import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory; import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory; @@ -209,25 +202,16 @@ public final class AnalysisModule { NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter"); tokenFilters.register("stop", StopTokenFilterFactory::new); tokenFilters.register("reverse", ReverseTokenFilterFactory::new); - tokenFilters.register("length", LengthTokenFilterFactory::new); - tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new); - tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new); tokenFilters.register("kstem", KStemTokenFilterFactory::new); tokenFilters.register("standard", StandardTokenFilterFactory::new); - tokenFilters.register("nGram", NGramTokenFilterFactory::new); - tokenFilters.register("ngram", NGramTokenFilterFactory::new); - tokenFilters.register("edgeNGram", EdgeNGramTokenFilterFactory::new); - tokenFilters.register("edge_ngram", EdgeNGramTokenFilterFactory::new); tokenFilters.register("shingle", ShingleTokenFilterFactory::new); tokenFilters.register("min_hash", MinHashTokenFilterFactory::new); - tokenFilters.register("unique", UniqueTokenFilterFactory::new); tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new)); tokenFilters.register("limit", LimitTokenCountFilterFactory::new); tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new)); tokenFilters.register("stemmer", StemmerTokenFilterFactory::new); tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new); tokenFilters.register("elision", ElisionTokenFilterFactory::new); - tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new); tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new)); tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new)); tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); diff --git a/core/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java b/core/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java deleted file mode 100644 index 324e422531..0000000000 --- a/core/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.analysis.miscellaneous; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.elasticsearch.test.ESTestCase; - -import java.io.IOException; - -import static org.hamcrest.Matchers.equalTo; - -public class UniqueTokenFilterTests extends ESTestCase { - public void testSimple() throws IOException { - Analyzer analyzer = new Analyzer() { - @Override - protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(t, new UniqueTokenFilter(t)); - } - }; - - TokenStream test = analyzer.tokenStream("test", "this test with test"); - test.reset(); - CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class); - assertThat(test.incrementToken(), equalTo(true)); - assertThat(termAttribute.toString(), equalTo("this")); - - assertThat(test.incrementToken(), equalTo(true)); - assertThat(termAttribute.toString(), equalTo("test")); - - assertThat(test.incrementToken(), equalTo(true)); - assertThat(termAttribute.toString(), equalTo("with")); - - assertThat(test.incrementToken(), equalTo(false)); - } -} diff --git a/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java deleted file mode 100644 index 259da010da..0000000000 --- a/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import java.io.IOException; - -import org.apache.lucene.analysis.CannedTokenStream; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.Index; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.test.ESTokenStreamTestCase; -import org.elasticsearch.test.IndexSettingsModule; - -public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase { - - public void testBasic() throws IOException { - - Index index = new Index("test", "_na_"); - String name = "ngr"; - Settings indexSettings = newAnalysisSettingsBuilder().build(); - IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings); - Settings settings = newAnalysisSettingsBuilder().build(); - - // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input: - TokenStream in = new CannedTokenStream(0, 12, new Token[] { - token("wtf", 1, 5, 0, 3), - token("what", 0, 1, 0, 3), - token("wow", 0, 3, 0, 3), - token("the", 1, 1, 0, 3), - token("fudge", 1, 3, 0, 3), - token("that's", 1, 1, 0, 3), - token("funny", 1, 1, 0, 3), - token("happened", 1, 1, 4, 12) - }); - - TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in); - - // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: - assertTokenStreamContents(tokens, - new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"}, - new int[] {0, 0, 0, 0, 0, 0, 0, 4}, - new int[] {3, 3, 3, 3, 3, 3, 3, 12}, - new int[] {1, 0, 0, 1, 0, 1, 0, 1}, - new int[] {3, 1, 1, 1, 1, 1, 1, 1}, - 12); - } - - private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) { - final Token t = new Token(term, startOffset, endOffset); - t.setPositionIncrement(posInc); - t.setPositionLength(posLength); - return t; - } -} diff --git a/core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java deleted file mode 100644 index 5e1cf2e817..0000000000 --- a/core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; -import org.apache.lucene.analysis.reverse.ReverseStringFilter; -import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.settings.Settings.Builder; -import org.elasticsearch.index.Index; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.test.ESTokenStreamTestCase; -import org.elasticsearch.test.IndexSettingsModule; - -import java.io.IOException; -import java.io.StringReader; -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; -import static org.hamcrest.Matchers.instanceOf; - -public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase { - public void testParseTokenChars() { - final Index index = new Index("test", "_na_"); - final String name = "ngr"; - final Settings indexSettings = newAnalysisSettingsBuilder().build(); - IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings); - for (String tokenChars : Arrays.asList("letters", "number", "DIRECTIONALITY_UNDEFINED")) { - final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build(); - try { - new NGramTokenizerFactory(indexProperties, null, name, settings).create(); - fail(); - } catch (IllegalArgumentException expected) { - // OK - } - } - for (String tokenChars : Arrays.asList("letter", " digit ", "punctuation", "DIGIT", "CoNtRoL", "dash_punctuation")) { - final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build(); - indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings); - - new NGramTokenizerFactory(indexProperties, null, name, settings).create(); - // no exception - } - } - - public void testNoTokenChars() throws IOException { - final Index index = new Index("test", "_na_"); - final String name = "ngr"; - final Settings indexSettings = newAnalysisSettingsBuilder().build(); - final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build(); - Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); - tokenizer.setReader(new StringReader("1.34")); - assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"}); - } - - public void testPreTokenization() throws IOException { - // Make sure that pretokenization works well and that it can be used even with token chars which are supplementary characters - final Index index = new Index("test", "_na_"); - final String name = "ngr"; - final Settings indexSettings = newAnalysisSettingsBuilder().build(); - Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build(); - Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); - tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f ")); - assertTokenStreamContents(tokenizer, - new String[] {"Åb", "Åbc", "bc", "dé", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"}); - settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build(); - tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); - tokenizer.setReader(new StringReader(" a!$ 9")); - assertTokenStreamContents(tokenizer, - new String[] {" a", " a!", "a!", "a!$", "!$", "!$ ", "$ ", "$ 9", " 9"}); - } - - public void testPreTokenizationEdge() throws IOException { - // Make sure that pretokenization works well and that it can be used even with token chars which are supplementary characters - final Index index = new Index("test", "_na_"); - final String name = "ngr"; - final Settings indexSettings = newAnalysisSettingsBuilder().build(); - Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build(); - Tokenizer tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); - tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f ")); - assertTokenStreamContents(tokenizer, - new String[] {"Åb", "Åbc", "dé", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"}); - settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build(); - tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); - tokenizer.setReader(new StringReader(" a!$ 9")); - assertTokenStreamContents(tokenizer, - new String[] {" a", " a!"}); - } - - public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception { - int iters = scaledRandomIntBetween(20, 100); - for (int i = 0; i < iters; i++) { - final Index index = new Index("test", "_na_"); - final String name = "ngr"; - Version v = randomVersion(random()); - Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3); - boolean reverse = random().nextBoolean(); - if (reverse) { - builder.put("side", "back"); - } - Settings settings = builder.build(); - Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build(); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer); - if (reverse) { - assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class)); - } else { - assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class)); - } - } - } - - - private Version randomVersion(Random random) throws IllegalArgumentException, IllegalAccessException { - Field[] declaredFields = Version.class.getFields(); - List<Field> versionFields = new ArrayList<>(); - for (Field field : declaredFields) { - if ((field.getModifiers() & Modifier.STATIC) != 0 && field.getName().startsWith("V_") && field.getType() == Version.class) { - versionFields.add(field); - } - } - return (Version) versionFields.get(random.nextInt(versionFields.size())).get(Version.class); - } - -} diff --git a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 9cbd9fc5d7..2bc98b39dc 100644 --- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -19,7 +19,6 @@ package org.elasticsearch.search.fetch.subphase.highlight; import com.carrotsearch.randomizedtesting.generators.RandomPicks; - import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder; @@ -214,54 +213,6 @@ public class HighlighterSearchIT extends ESIntegTestCase { assertHighlight(search, 0, "name", 0, startsWith("<em>abc</em> <em>abc</em> <em>abc</em> <em>abc</em>")); } - public void testNgramHighlighting() throws IOException { - assertAcked(prepareCreate("test") - .addMapping("test", - "name", "type=text,analyzer=name_index_analyzer,search_analyzer=name_search_analyzer," - + "term_vector=with_positions_offsets", - "name2", "type=text,analyzer=name2_index_analyzer,search_analyzer=name_search_analyzer," - + "term_vector=with_positions_offsets") - .setSettings(Settings.builder() - .put(indexSettings()) - .put("analysis.filter.my_ngram.max_gram", 20) - .put("analysis.filter.my_ngram.min_gram", 1) - .put("analysis.filter.my_ngram.type", "ngram") - .put("analysis.tokenizer.my_ngramt.max_gram", 20) - .put("analysis.tokenizer.my_ngramt.min_gram", 1) - .put("analysis.tokenizer.my_ngramt.token_chars", "letter,digit") - .put("analysis.tokenizer.my_ngramt.type", "ngram") - .put("analysis.analyzer.name_index_analyzer.tokenizer", "my_ngramt") - .put("analysis.analyzer.name2_index_analyzer.tokenizer", "whitespace") - .put("analysis.analyzer.name2_index_analyzer.filter", "my_ngram") - .put("analysis.analyzer.name_search_analyzer.tokenizer", "whitespace"))); - client().prepareIndex("test", "test", "1") - .setSource("name", "logicacmg ehemals avinci - the know how company", - "name2", "logicacmg ehemals avinci - the know how company").get(); - refresh(); - ensureGreen(); - SearchResponse search = client().prepareSearch().setQuery(matchQuery("name", "logica m")) - .highlighter(new HighlightBuilder().field("name")).get(); - assertHighlight(search, 0, "name", 0, - equalTo("<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany")); - - search = client().prepareSearch().setQuery(matchQuery("name", "logica ma")).highlighter(new HighlightBuilder().field("name")).get(); - assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company")); - - search = client().prepareSearch().setQuery(matchQuery("name", "logica")).highlighter(new HighlightBuilder().field("name")).get(); - assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehemals avinci - the know how company")); - - search = client().prepareSearch().setQuery(matchQuery("name2", "logica m")).highlighter(new HighlightBuilder().field("name2")) - .get(); - assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>")); - - search = client().prepareSearch().setQuery(matchQuery("name2", "logica ma")).highlighter(new HighlightBuilder().field("name2")) - .get(); - assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how company")); - - search = client().prepareSearch().setQuery(matchQuery("name2", "logica")).highlighter(new HighlightBuilder().field("name2")).get(); - assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> ehemals avinci - the know how company")); - } - public void testEnsureNoNegativeOffsets() throws Exception { assertAcked(prepareCreate("test") .addMapping("type1", diff --git a/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java b/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java index 05a7227636..bd8cfbcaa5 100644 --- a/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java +++ b/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java @@ -19,16 +19,6 @@ package org.elasticsearch.search.query; -import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery; -import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; -import static org.hamcrest.Matchers.containsInAnyOrder; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.equalTo; - import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; @@ -56,6 +46,16 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery; +import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + public class QueryStringIT extends ESIntegTestCase { @Override protected Collection<Class<? extends Plugin>> nodePlugins() { @@ -91,10 +91,6 @@ public class QueryStringIT extends ESIntegTestCase { resp = client().prepareSearch("test").setQuery(queryStringQuery("Bar")).get(); assertHitCount(resp, 3L); assertHits(resp.getHits(), "1", "2", "3"); - - resp = client().prepareSearch("test").setQuery(queryStringQuery("foa")).get(); - assertHitCount(resp, 1L); - assertHits(resp.getHits(), "3"); } public void testWithDate() throws Exception { @@ -161,8 +157,6 @@ public class QueryStringIT extends ESIntegTestCase { assertHits(resp.getHits(), "1"); resp = client().prepareSearch("test").setQuery(queryStringQuery("Baz")).get(); assertHits(resp.getHits(), "1"); - resp = client().prepareSearch("test").setQuery(queryStringQuery("sbaz")).get(); - assertHits(resp.getHits(), "1"); resp = client().prepareSearch("test").setQuery(queryStringQuery("19")).get(); assertHits(resp.getHits(), "1"); // nested doesn't match because it's hidden @@ -223,11 +217,11 @@ public class QueryStringIT extends ESIntegTestCase { indexRandom(true, false, reqs); SearchResponse resp = client().prepareSearch("test2").setQuery( - queryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get(); + queryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get(); assertHitCount(resp, 0L); resp = client().prepareSearch("test2").setQuery( - queryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get(); + queryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get(); assertHits(resp.getHits(), "1"); assertHitCount(resp, 1L); diff --git a/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java index f22ec392b9..a32a806037 100644 --- a/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java +++ b/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java @@ -398,10 +398,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase { resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")).get(); assertHitCount(resp, 3L); assertHits(resp.getHits(), "1", "2", "3"); - - resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("foa")).get(); - assertHitCount(resp, 1L); - assertHits(resp.getHits(), "3"); } public void testWithDate() throws Exception { @@ -480,8 +476,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase { assertHits(resp.getHits(), "1"); resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")).get(); assertHits(resp.getHits(), "1"); - resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("sbaz")).get(); - assertHits(resp.getHits(), "1"); resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("19")).get(); assertHits(resp.getHits(), "1"); // nested doesn't match because it's hidden @@ -547,11 +541,11 @@ public class SimpleQueryStringIT extends ESIntegTestCase { indexRandom(true, false, reqs); SearchResponse resp = client().prepareSearch("test").setQuery( - simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get(); + simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get(); assertHitCount(resp, 0L); resp = client().prepareSearch("test").setQuery( - simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get(); + simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get(); assertHits(resp.getHits(), "1"); assertHitCount(resp, 1L); diff --git a/core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json b/core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json index 1a96fd7133..d9cbb485d1 100644 --- a/core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json +++ b/core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json @@ -6,22 +6,7 @@ "version": { "created": "5000099" }, - "analysis": { - "analyzer": { - "my_ngrams": { - "type": "custom", - "tokenizer": "standard", - "filter": ["my_ngrams"] - } - }, - "filter": { - "my_ngrams": { - "type": "ngram", - "min_gram": 2, - "max_gram": 2 - } - } - } + "query.default_field": "f1" } }, "mappings": { @@ -31,7 +16,7 @@ }, "properties": { "f1": {"type": "text"}, - "f2": {"type": "text", "analyzer": "my_ngrams"} + "f2": {"type": "text"} } } } diff --git a/core/src/test/resources/org/elasticsearch/search/query/all-query-index.json b/core/src/test/resources/org/elasticsearch/search/query/all-query-index.json index 86dde5aaf8..89c4121712 100644 --- a/core/src/test/resources/org/elasticsearch/search/query/all-query-index.json +++ b/core/src/test/resources/org/elasticsearch/search/query/all-query-index.json @@ -2,23 +2,7 @@ "settings": { "index": { "number_of_shards": 1, - "number_of_replicas": 0, - "analysis": { - "analyzer": { - "my_ngrams": { - "type": "custom", - "tokenizer": "standard", - "filter": ["my_ngrams"] - } - }, - "filter": { - "my_ngrams": { - "type": "ngram", - "min_gram": 2, - "max_gram": 2 - } - } - } + "number_of_replicas": 0 } }, "mappings": { @@ -26,7 +10,7 @@ "properties": { "f1": {"type": "text"}, "f2": {"type": "keyword"}, - "f3": {"type": "text", "analyzer": "my_ngrams"}, + "f3": {"type": "text"}, "f4": { "type": "text", "index_options": "docs" |