Moved more token filters to analysis-common module.

The following token filters were moved: `edge_ngram`, `ngram`, `uppercase`, `lowercase`, `length`, `flatten_graph` and `unique`. Relates to #23658
author: Martijn van Groningen <martijn.v.groningen@gmail.com> 2017-06-14 01:26:36 +0200
committer: Martijn van Groningen <martijn.v.groningen@gmail.com> 2017-06-15 18:28:31 +0200
commit: 428e70758ac6895ac995f4315412f4d3729aea9b (patch)
tree: bb6404aac053c5ece590214a33e02304c2bab694 /core/src/test/java/org/elasticsearch/index
parent: 2a78b0a19fb6584944d92ad34a91f2814b3dcbe4 (diff)
2 files changed, 0 insertions, 225 deletions
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java
deleted file mode 100644
index 259da010da..0000000000
--- a/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.CannedTokenStream;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.index.Index;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.test.ESTokenStreamTestCase;
-import org.elasticsearch.test.IndexSettingsModule;
-
-public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase {
-
-    public void testBasic() throws IOException {
-
-        Index index = new Index("test", "_na_");
-        String name = "ngr";
-        Settings indexSettings = newAnalysisSettingsBuilder().build();
-        IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
-        Settings settings = newAnalysisSettingsBuilder().build();
-
-        // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input:
-        TokenStream in = new CannedTokenStream(0, 12, new Token[] {
-                    token("wtf", 1, 5, 0, 3),
-                    token("what", 0, 1, 0, 3),
-                    token("wow", 0, 3, 0, 3),
-                    token("the", 1, 1, 0, 3),
-                    token("fudge", 1, 3, 0, 3),
-                    token("that's", 1, 1, 0, 3),
-                    token("funny", 1, 1, 0, 3),
-                    token("happened", 1, 1, 4, 12)
-                });
-
-        TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in);
-
-        // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
-        assertTokenStreamContents(tokens,
-                new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"},
-                new int[] {0, 0, 0, 0, 0, 0, 0, 4},
-                new int[] {3, 3, 3, 3, 3, 3, 3, 12},
-                new int[] {1, 0, 0, 1, 0, 1, 0, 1},
-                new int[] {3, 1, 1, 1, 1, 1, 1, 1},
-                12);
-    }
-
-    private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) {
-        final Token t = new Token(term, startOffset, endOffset);
-        t.setPositionIncrement(posInc);
-        t.setPositionLength(posLength);
-        return t;
-    }
-}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java
deleted file mode 100644
index 5e1cf2e817..0000000000
--- a/core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
-import org.apache.lucene.analysis.reverse.ReverseStringFilter;
-import org.elasticsearch.Version;
-import org.elasticsearch.cluster.metadata.IndexMetaData;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.common.settings.Settings.Builder;
-import org.elasticsearch.index.Index;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.test.ESTokenStreamTestCase;
-import org.elasticsearch.test.IndexSettingsModule;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.lang.reflect.Field;
-import java.lang.reflect.Modifier;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Random;
-
-import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween;
-import static org.hamcrest.Matchers.instanceOf;
-
-public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
-    public void testParseTokenChars() {
-        final Index index = new Index("test", "_na_");
-        final String name = "ngr";
-        final Settings indexSettings = newAnalysisSettingsBuilder().build();
-        IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
-        for (String tokenChars : Arrays.asList("letters", "number", "DIRECTIONALITY_UNDEFINED")) {
-            final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
-            try {
-                new NGramTokenizerFactory(indexProperties, null, name, settings).create();
-                fail();
-            } catch (IllegalArgumentException expected) {
-                // OK
-            }
-        }
-        for (String tokenChars : Arrays.asList("letter", " digit ", "punctuation", "DIGIT", "CoNtRoL", "dash_punctuation")) {
-            final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
-            indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
-
-            new NGramTokenizerFactory(indexProperties, null, name, settings).create();
-            // no exception
-        }
-    }
-
-    public void testNoTokenChars() throws IOException {
-        final Index index = new Index("test", "_na_");
-        final String name = "ngr";
-        final Settings indexSettings = newAnalysisSettingsBuilder().build();
-        final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build();
-        Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
-        tokenizer.setReader(new StringReader("1.34"));
-        assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"});
-    }
-
-    public void testPreTokenization() throws IOException {
-        // Make sure that pretokenization works well and that it can be used even with token chars which are supplementary characters
-        final Index index = new Index("test", "_na_");
-        final String name = "ngr";
-        final Settings indexSettings = newAnalysisSettingsBuilder().build();
-        Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
-        Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
-        tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
-        assertTokenStreamContents(tokenizer,
-                new String[] {"Åb", "Åbc", "bc", "dé", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"});
-        settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
-        tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
-        tokenizer.setReader(new StringReader(" a!$ 9"));
-        assertTokenStreamContents(tokenizer,
-            new String[] {" a", " a!", "a!", "a!$", "!$", "!$ ", "$ ", "$ 9", " 9"});
-    }
-
-    public void testPreTokenizationEdge() throws IOException {
-        // Make sure that pretokenization works well and that it can be used even with token chars which are supplementary characters
-        final Index index = new Index("test", "_na_");
-        final String name = "ngr";
-        final Settings indexSettings = newAnalysisSettingsBuilder().build();
-        Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
-        Tokenizer tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
-        tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
-        assertTokenStreamContents(tokenizer,
-                new String[] {"Åb", "Åbc", "dé", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"});
-        settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
-        tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
-        tokenizer.setReader(new StringReader(" a!$ 9"));
-        assertTokenStreamContents(tokenizer,
-                new String[] {" a", " a!"});
-    }
-
-    public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
-        int iters = scaledRandomIntBetween(20, 100);
-        for (int i = 0; i < iters; i++) {
-            final Index index = new Index("test", "_na_");
-            final String name = "ngr";
-            Version v = randomVersion(random());
-            Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3);
-            boolean reverse = random().nextBoolean();
-            if (reverse) {
-                builder.put("side", "back");
-            }
-            Settings settings = builder.build();
-            Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
-            Tokenizer tokenizer = new MockTokenizer();
-            tokenizer.setReader(new StringReader("foo bar"));
-            TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
-            if (reverse) {
-                assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
-            } else {
-                assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
-            }
-        }
-    }
-
-
-    private Version randomVersion(Random random) throws IllegalArgumentException, IllegalAccessException {
-        Field[] declaredFields = Version.class.getFields();
-        List<Field> versionFields = new ArrayList<>();
-        for (Field field : declaredFields) {
-            if ((field.getModifiers() & Modifier.STATIC) != 0 && field.getName().startsWith("V_") && field.getType() == Version.class) {
-                versionFields.add(field);
-            }
-        }
-        return (Version) versionFields.get(random.nextInt(versionFields.size())).get(Version.class);
-    }
-
-}
author	Martijn van Groningen <martijn.v.groningen@gmail.com>	2017-06-14 01:26:36 +0200
committer	Martijn van Groningen <martijn.v.groningen@gmail.com>	2017-06-15 18:28:31 +0200
commit	428e70758ac6895ac995f4315412f4d3729aea9b (patch)
tree	bb6404aac053c5ece590214a33e02304c2bab694 /core/src/test/java/org/elasticsearch/index
parent	2a78b0a19fb6584944d92ad34a91f2814b3dcbe4 (diff)