소스 검색

Moved more token filters to analysis-common module.

The following token filters were moved: `edge_ngram`, `ngram`, `uppercase`, `lowercase`, `length`, `flatten_graph` and `unique`.

Relates to #23658
Martijn van Groningen 8 년 전
부모
커밋
428e70758a
24개의 변경된 파일470개의 추가작업 그리고 196개의 파일을 삭제
  1. 0 1
      buildSrc/src/main/resources/checkstyle_suppressions.xml
  2. 0 16
      core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
  3. 0 49
      core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
  4. 12 18
      core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java
  5. 2 8
      core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java
  6. 2 17
      core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json
  7. 2 18
      core/src/test/resources/org/elasticsearch/search/query/all-query-index.json
  8. 11 3
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
  9. 9 8
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactory.java
  10. 3 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactory.java
  11. 4 3
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LengthTokenFilterFactory.java
  12. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenFilterFactory.java
  13. 4 3
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenFilterFactory.java
  14. 4 4
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilter.java
  15. 3 3
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilterFactory.java
  16. 3 1
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UpperCaseTokenFilterFactory.java
  17. 16 7
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
  18. 3 3
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java
  19. 26 12
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java
  20. 1 1
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/UniqueTokenFilterTests.java
  21. 182 0
      modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
  22. 41 0
      modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/20_ngram_search.yml
  23. 129 0
      modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/30_ngram_highligthing.yml
  24. 9 17
      test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java

+ 0 - 1
buildSrc/src/main/resources/checkstyle_suppressions.xml

@@ -571,7 +571,6 @@
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]NGramTokenizerFactoryTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />

+ 0 - 16
core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

@@ -54,14 +54,12 @@ import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
 import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
 import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
 import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
-import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
 import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
 import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
 import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
 import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
 import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
 import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
-import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
 import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
 import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
@@ -83,14 +81,11 @@ import org.elasticsearch.index.analysis.KeepWordFilterFactory;
 import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
 import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
 import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
-import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
 import org.elasticsearch.index.analysis.LetterTokenizerFactory;
 import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
 import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
-import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
 import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
 import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
-import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
 import org.elasticsearch.index.analysis.NGramTokenizerFactory;
 import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
 import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
@@ -133,8 +128,6 @@ import org.elasticsearch.index.analysis.TokenizerFactory;
 import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
 import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
 import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
-import org.elasticsearch.index.analysis.UniqueTokenFilterFactory;
-import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
 import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
 import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
 import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
@@ -209,25 +202,16 @@ public final class AnalysisModule {
         NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
         tokenFilters.register("stop", StopTokenFilterFactory::new);
         tokenFilters.register("reverse", ReverseTokenFilterFactory::new);
-        tokenFilters.register("length", LengthTokenFilterFactory::new);
-        tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
-        tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
         tokenFilters.register("kstem", KStemTokenFilterFactory::new);
         tokenFilters.register("standard", StandardTokenFilterFactory::new);
-        tokenFilters.register("nGram", NGramTokenFilterFactory::new);
-        tokenFilters.register("ngram", NGramTokenFilterFactory::new);
-        tokenFilters.register("edgeNGram", EdgeNGramTokenFilterFactory::new);
-        tokenFilters.register("edge_ngram", EdgeNGramTokenFilterFactory::new);
         tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
         tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
-        tokenFilters.register("unique", UniqueTokenFilterFactory::new);
         tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
         tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
         tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
         tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
         tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
         tokenFilters.register("elision", ElisionTokenFilterFactory::new);
-        tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
         tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
         tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
         tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));

+ 0 - 49
core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java

@@ -19,7 +19,6 @@
 package org.elasticsearch.search.fetch.subphase.highlight;
 
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
-
 import org.apache.lucene.search.join.ScoreMode;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchRequestBuilder;
@@ -214,54 +213,6 @@ public class HighlighterSearchIT extends ESIntegTestCase {
         assertHighlight(search, 0, "name", 0, startsWith("<em>abc</em> <em>abc</em> <em>abc</em> <em>abc</em>"));
     }
 
-    public void testNgramHighlighting() throws IOException {
-        assertAcked(prepareCreate("test")
-                .addMapping("test",
-                        "name", "type=text,analyzer=name_index_analyzer,search_analyzer=name_search_analyzer,"
-                            + "term_vector=with_positions_offsets",
-                        "name2", "type=text,analyzer=name2_index_analyzer,search_analyzer=name_search_analyzer,"
-                            + "term_vector=with_positions_offsets")
-                .setSettings(Settings.builder()
-                        .put(indexSettings())
-                        .put("analysis.filter.my_ngram.max_gram", 20)
-                        .put("analysis.filter.my_ngram.min_gram", 1)
-                        .put("analysis.filter.my_ngram.type", "ngram")
-                        .put("analysis.tokenizer.my_ngramt.max_gram", 20)
-                        .put("analysis.tokenizer.my_ngramt.min_gram", 1)
-                        .put("analysis.tokenizer.my_ngramt.token_chars", "letter,digit")
-                        .put("analysis.tokenizer.my_ngramt.type", "ngram")
-                        .put("analysis.analyzer.name_index_analyzer.tokenizer", "my_ngramt")
-                        .put("analysis.analyzer.name2_index_analyzer.tokenizer", "whitespace")
-                        .put("analysis.analyzer.name2_index_analyzer.filter", "my_ngram")
-                        .put("analysis.analyzer.name_search_analyzer.tokenizer", "whitespace")));
-        client().prepareIndex("test", "test", "1")
-            .setSource("name", "logicacmg ehemals avinci - the know how company",
-                       "name2", "logicacmg ehemals avinci - the know how company").get();
-        refresh();
-        ensureGreen();
-        SearchResponse search = client().prepareSearch().setQuery(matchQuery("name", "logica m"))
-                .highlighter(new HighlightBuilder().field("name")).get();
-        assertHighlight(search, 0, "name", 0,
-                equalTo("<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"));
-
-        search = client().prepareSearch().setQuery(matchQuery("name", "logica ma")).highlighter(new HighlightBuilder().field("name")).get();
-        assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"));
-
-        search = client().prepareSearch().setQuery(matchQuery("name", "logica")).highlighter(new HighlightBuilder().field("name")).get();
-        assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehemals avinci - the know how company"));
-
-        search = client().prepareSearch().setQuery(matchQuery("name2", "logica m")).highlighter(new HighlightBuilder().field("name2"))
-                .get();
-        assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"));
-
-        search = client().prepareSearch().setQuery(matchQuery("name2", "logica ma")).highlighter(new HighlightBuilder().field("name2"))
-                .get();
-        assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"));
-
-        search = client().prepareSearch().setQuery(matchQuery("name2", "logica")).highlighter(new HighlightBuilder().field("name2")).get();
-        assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> ehemals avinci - the know how company"));
-    }
-
     public void testEnsureNoNegativeOffsets() throws Exception {
         assertAcked(prepareCreate("test")
                 .addMapping("type1",

+ 12 - 18
core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java

@@ -19,16 +19,6 @@
 
 package org.elasticsearch.search.query;
 
-import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
-import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.equalTo;
-
 import org.apache.lucene.util.LuceneTestCase;
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
@@ -56,6 +46,16 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
+import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+
 public class QueryStringIT extends ESIntegTestCase {
     @Override
     protected Collection<Class<? extends Plugin>> nodePlugins() {
@@ -91,10 +91,6 @@ public class QueryStringIT extends ESIntegTestCase {
         resp = client().prepareSearch("test").setQuery(queryStringQuery("Bar")).get();
         assertHitCount(resp, 3L);
         assertHits(resp.getHits(), "1", "2", "3");
-
-        resp = client().prepareSearch("test").setQuery(queryStringQuery("foa")).get();
-        assertHitCount(resp, 1L);
-        assertHits(resp.getHits(), "3");
     }
 
     public void testWithDate() throws Exception {
@@ -161,8 +157,6 @@ public class QueryStringIT extends ESIntegTestCase {
         assertHits(resp.getHits(), "1");
         resp = client().prepareSearch("test").setQuery(queryStringQuery("Baz")).get();
         assertHits(resp.getHits(), "1");
-        resp = client().prepareSearch("test").setQuery(queryStringQuery("sbaz")).get();
-        assertHits(resp.getHits(), "1");
         resp = client().prepareSearch("test").setQuery(queryStringQuery("19")).get();
         assertHits(resp.getHits(), "1");
         // nested doesn't match because it's hidden
@@ -223,11 +217,11 @@ public class QueryStringIT extends ESIntegTestCase {
         indexRandom(true, false, reqs);
 
         SearchResponse resp = client().prepareSearch("test2").setQuery(
-                queryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
+                queryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
         assertHitCount(resp, 0L);
 
         resp = client().prepareSearch("test2").setQuery(
-                queryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
+                queryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
         assertHits(resp.getHits(), "1");
         assertHitCount(resp, 1L);
 

+ 2 - 8
core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java

@@ -398,10 +398,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
         resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")).get();
         assertHitCount(resp, 3L);
         assertHits(resp.getHits(), "1", "2", "3");
-
-        resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("foa")).get();
-        assertHitCount(resp, 1L);
-        assertHits(resp.getHits(), "3");
     }
 
     public void testWithDate() throws Exception {
@@ -480,8 +476,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
         assertHits(resp.getHits(), "1");
         resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")).get();
         assertHits(resp.getHits(), "1");
-        resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("sbaz")).get();
-        assertHits(resp.getHits(), "1");
         resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("19")).get();
         assertHits(resp.getHits(), "1");
         // nested doesn't match because it's hidden
@@ -547,11 +541,11 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
         indexRandom(true, false, reqs);
 
         SearchResponse resp = client().prepareSearch("test").setQuery(
-                simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
+                simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
         assertHitCount(resp, 0L);
 
         resp = client().prepareSearch("test").setQuery(
-                simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
+                simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
         assertHits(resp.getHits(), "1");
         assertHitCount(resp, 1L);
 

+ 2 - 17
core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json

@@ -6,22 +6,7 @@
       "version": {
         "created": "5000099"
       },
-      "analysis": {
-        "analyzer": {
-          "my_ngrams": {
-            "type": "custom",
-            "tokenizer": "standard",
-            "filter": ["my_ngrams"]
-          }
-        },
-        "filter": {
-          "my_ngrams": {
-            "type": "ngram",
-            "min_gram": 2,
-            "max_gram": 2
-          }
-        }
-      }
+      "query.default_field": "f1"
     }
   },
   "mappings": {
@@ -31,7 +16,7 @@
       },
       "properties": {
         "f1": {"type": "text"},
-        "f2": {"type": "text", "analyzer": "my_ngrams"}
+        "f2": {"type": "text"}
       }
     }
   }

+ 2 - 18
core/src/test/resources/org/elasticsearch/search/query/all-query-index.json

@@ -2,23 +2,7 @@
   "settings": {
     "index": {
       "number_of_shards": 1,
-      "number_of_replicas": 0,
-      "analysis": {
-        "analyzer": {
-          "my_ngrams": {
-            "type": "custom",
-            "tokenizer": "standard",
-            "filter": ["my_ngrams"]
-          }
-        },
-        "filter": {
-          "my_ngrams": {
-            "type": "ngram",
-            "min_gram": 2,
-            "max_gram": 2
-          }
-        }
-      }
+      "number_of_replicas": 0
     }
   },
   "mappings": {
@@ -26,7 +10,7 @@
       "properties": {
         "f1": {"type": "text"},
         "f2": {"type": "keyword"},
-        "f3": {"type": "text", "analyzer": "my_ngrams"},
+        "f3": {"type": "text"},
         "f4": {
           "type": "text",
           "index_options": "docs"

+ 11 - 3
modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

@@ -52,7 +52,6 @@ import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilter;
 import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
 import org.apache.lucene.analysis.miscellaneous.TrimFilter;
 import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
-import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
@@ -98,6 +97,15 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
         filters.put("trim", TrimTokenFilterFactory::new);
         filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
         filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
+        filters.put("unique", UniqueTokenFilterFactory::new);
+        filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
+        filters.put("length", LengthTokenFilterFactory::new);
+        filters.put("lowercase", LowerCaseTokenFilterFactory::new);
+        filters.put("uppercase", UpperCaseTokenFilterFactory::new);
+        filters.put("nGram", NGramTokenFilterFactory::new);
+        filters.put("ngram", NGramTokenFilterFactory::new);
+        filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
+        filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
         return filters;
     }
 
@@ -172,7 +180,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
         filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new));
         filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
         filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
-        filters.add(PreConfiguredTokenFilter.singleton("reverse", false, input -> new ReverseStringFilter(input)));
+        filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new));
         filters.add(PreConfiguredTokenFilter.singleton("russian_stem", false, input -> new SnowballFilter(input, "Russian")));
         filters.add(PreConfiguredTokenFilter.singleton("scandinavian_folding", true, ScandinavianFoldingFilter::new));
         filters.add(PreConfiguredTokenFilter.singleton("scandinavian_normalization", true, ScandinavianNormalizationFilter::new));
@@ -185,7 +193,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
         filters.add(PreConfiguredTokenFilter.singleton("trim", false, TrimFilter::new));
         filters.add(PreConfiguredTokenFilter.singleton("truncate", false, input -> new TruncateTokenFilter(input, 10)));
         filters.add(PreConfiguredTokenFilter.singleton("type_as_payload", false, TypeAsPayloadTokenFilter::new));
-        filters.add(PreConfiguredTokenFilter.singleton("unique", false, input -> new UniqueTokenFilter(input)));
+        filters.add(PreConfiguredTokenFilter.singleton("unique", false, UniqueTokenFilter::new));
         filters.add(PreConfiguredTokenFilter.singleton("uppercase", true, UpperCaseFilter::new));
         filters.add(PreConfiguredTokenFilter.singleton("word_delimiter", false, input ->
                 new WordDelimiterFilter(input,

+ 9 - 8
core/src/main/java/org/elasticsearch/index/analysis/EdgeNGramTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactory.java

@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 
 
 public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
@@ -38,13 +39,13 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
     public static final int SIDE_BACK = 2;
     private final int side;
 
-    public EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+    EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
         super(indexSettings, name, settings);
         this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
         this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
         this.side = parseSide(settings.get("side", "front"));
     }
-    
+
     static int parseSide(String side) {
         switch(side) {
             case "front": return SIDE_FRONT;
@@ -56,19 +57,19 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
     @Override
     public TokenStream create(TokenStream tokenStream) {
         TokenStream result = tokenStream;
-        
+
         // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
         if (side == SIDE_BACK) {
             result = new ReverseStringFilter(result);
         }
-        
+
         result = new EdgeNGramTokenFilter(result, minGram, maxGram);
-        
+
         // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
         if (side == SIDE_BACK) {
             result = new ReverseStringFilter(result);
         }
-        
+
         return result;
     }
 
@@ -76,4 +77,4 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
     public boolean breaksFastVectorHighlighter() {
         return true;
     }
-}
+}

+ 3 - 2
core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactory.java

@@ -17,17 +17,18 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.FlattenGraphFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 
 public class FlattenGraphTokenFilterFactory extends AbstractTokenFilterFactory {
 
-    public FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+    FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
         super(indexSettings, name, settings);
     }
 

+ 4 - 3
core/src/main/java/org/elasticsearch/index/analysis/LengthTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LengthTokenFilterFactory.java

@@ -17,23 +17,24 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.LengthFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 
 public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
 
     private final int min;
     private final int max;
-    
+
     // ancient unsupported option
     private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
 
-    public LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+    LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
         super(indexSettings, name, settings);
         min = settings.getAsInt("min", 0);
         max = settings.getAsInt("max", Integer.MAX_VALUE);

+ 4 - 2
core/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenFilterFactory.java

@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -27,6 +27,8 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
 
 /**
  * Factory for {@link LowerCaseFilter} and some language-specific variants
@@ -41,7 +43,7 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
 
     private final String lang;
 
-    public LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+    LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
         super(indexSettings, name, settings);
         this.lang = settings.get("language", null);
     }

+ 4 - 3
core/src/main/java/org/elasticsearch/index/analysis/NGramTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenFilterFactory.java

@@ -17,13 +17,14 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ngram.NGramTokenFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 
 
 public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
@@ -33,7 +34,7 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
     private final int maxGram;
 
 
-    public NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+    NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
         super(indexSettings, name, settings);
         this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
         this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
@@ -43,4 +44,4 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
     public TokenStream create(TokenStream tokenStream) {
         return new NGramTokenFilter(tokenStream, minGram, maxGram);
     }
-}
+}

+ 4 - 4
core/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilter.java

@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.apache.lucene.analysis.miscellaneous;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
@@ -31,7 +31,7 @@ import java.io.IOException;
  * A token filter that generates unique tokens. Can remove unique tokens only on the same
  * position increments as well.
  */
-public class UniqueTokenFilter extends TokenFilter {
+class UniqueTokenFilter extends TokenFilter {
 
     private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
     private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
@@ -39,11 +39,11 @@ public class UniqueTokenFilter extends TokenFilter {
     private final CharArraySet previous = new CharArraySet(8, false);
     private final boolean onlyOnSamePosition;
 
-    public UniqueTokenFilter(TokenStream in) {
+    UniqueTokenFilter(TokenStream in) {
         this(in, false);
     }
 
-    public UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
+    UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
         super(in);
         this.onlyOnSamePosition = onlyOnSamePosition;
     }

+ 3 - 3
core/src/main/java/org/elasticsearch/index/analysis/UniqueTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilterFactory.java

@@ -17,19 +17,19 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 
 public class UniqueTokenFilterFactory extends AbstractTokenFilterFactory {
 
     private final boolean onlyOnSamePosition;
 
-    public UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+    UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
         super(indexSettings, name, settings);
         this.onlyOnSamePosition = settings.getAsBooleanLenientForPreEs6Indices(
             indexSettings.getIndexVersionCreated(), "only_on_same_position", false, deprecationLogger);

+ 3 - 1
core/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UpperCaseTokenFilterFactory.java

@@ -17,13 +17,15 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.UpperCaseFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
 
 public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
 

+ 16 - 7
modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java

@@ -51,13 +51,22 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
     @Override
     protected Map<String, Class<?>> getTokenFilters() {
         Map<String, Class<?>> filters = new TreeMap<>(super.getTokenFilters());
-        filters.put("asciifolding",          ASCIIFoldingTokenFilterFactory.class);
-        filters.put("keywordmarker",         KeywordMarkerTokenFilterFactory.class);
-        filters.put("porterstem",            PorterStemTokenFilterFactory.class);
-        filters.put("snowballporter",        SnowballTokenFilterFactory.class);
-        filters.put("trim",                  TrimTokenFilterFactory.class);
-        filters.put("worddelimiter",         WordDelimiterTokenFilterFactory.class);
-        filters.put("worddelimitergraph",    WordDelimiterGraphTokenFilterFactory.class);
+        filters.put("asciifolding", ASCIIFoldingTokenFilterFactory.class);
+        filters.put("keywordmarker", KeywordMarkerTokenFilterFactory.class);
+        filters.put("porterstem", PorterStemTokenFilterFactory.class);
+        filters.put("snowballporter", SnowballTokenFilterFactory.class);
+        filters.put("trim", TrimTokenFilterFactory.class);
+        filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
+        filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
+        filters.put("flattengraph", FlattenGraphTokenFilterFactory.class);
+        filters.put("length", LengthTokenFilterFactory.class);
+        filters.put("greeklowercase", LowerCaseTokenFilterFactory.class);
+        filters.put("irishlowercase", LowerCaseTokenFilterFactory.class);
+        filters.put("lowercase", LowerCaseTokenFilterFactory.class);
+        filters.put("turkishlowercase", LowerCaseTokenFilterFactory.class);
+        filters.put("uppercase", UpperCaseTokenFilterFactory.class);
+        filters.put("ngram", NGramTokenFilterFactory.class);
+        filters.put("edgengram", EdgeNGramTokenFilterFactory.class);
         return filters;
     }
 

+ 3 - 3
core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java → modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java

@@ -17,9 +17,7 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
-
-import java.io.IOException;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.Token;
@@ -30,6 +28,8 @@ import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.test.ESTokenStreamTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
 
+import java.io.IOException;
+
 public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase {
 
     public void testBasic() throws IOException {

+ 26 - 12
core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java → modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java

@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
@@ -30,6 +30,8 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.settings.Settings.Builder;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
+import org.elasticsearch.index.analysis.NGramTokenizerFactory;
 import org.elasticsearch.test.ESTokenStreamTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
 
@@ -52,7 +54,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
         final Settings indexSettings = newAnalysisSettingsBuilder().build();
         IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
         for (String tokenChars : Arrays.asList("letters", "number", "DIRECTIONALITY_UNDEFINED")) {
-            final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
+            final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+                .put("token_chars", tokenChars).build();
             try {
                 new NGramTokenizerFactory(indexProperties, null, name, settings).create();
                 fail();
@@ -61,7 +64,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
             }
         }
         for (String tokenChars : Arrays.asList("letter", " digit ", "punctuation", "DIGIT", "CoNtRoL", "dash_punctuation")) {
-            final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
+            final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+                .put("token_chars", tokenChars).build();
             indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
 
             new NGramTokenizerFactory(indexProperties, null, name, settings).create();
@@ -73,8 +77,10 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
         final Index index = new Index("test", "_na_");
         final String name = "ngr";
         final Settings indexSettings = newAnalysisSettingsBuilder().build();
-        final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build();
-        Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
+        final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4)
+            .putArray("token_chars", new String[0]).build();
+        Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
+            .create();
         tokenizer.setReader(new StringReader("1.34"));
         assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"});
     }
@@ -84,12 +90,15 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
         final Index index = new Index("test", "_na_");
         final String name = "ngr";
         final Settings indexSettings = newAnalysisSettingsBuilder().build();
-        Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
-        Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
+        Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+            .put("token_chars", "letter,digit").build();
+        Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
+            .create();
         tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
         assertTokenStreamContents(tokenizer,
                 new String[] {"Åb", "Åbc", "bc", "dé", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"});
-        settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
+        settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+            .put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
         tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
         tokenizer.setReader(new StringReader(" a!$ 9"));
         assertTokenStreamContents(tokenizer,
@@ -102,12 +111,15 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
         final String name = "ngr";
         final Settings indexSettings = newAnalysisSettingsBuilder().build();
         Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
-        Tokenizer tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
+        Tokenizer tokenizer =
+            new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
         tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
         assertTokenStreamContents(tokenizer,
                 new String[] {"Åb", "Åbc", "dé", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"});
-        settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
-        tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
+        settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+            .put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
+        tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
+            .create();
         tokenizer.setReader(new StringReader(" a!$ 9"));
         assertTokenStreamContents(tokenizer,
                 new String[] {" a", " a!"});
@@ -128,7 +140,9 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
             Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
             Tokenizer tokenizer = new MockTokenizer();
             tokenizer.setReader(new StringReader("foo bar"));
-            TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
+            TokenStream edgeNGramTokenFilter =
+                new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
+                    .create(tokenizer);
             if (reverse) {
                 assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
             } else {

+ 1 - 1
core/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java → modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/UniqueTokenFilterTests.java

@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.apache.lucene.analysis.miscellaneous;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockTokenizer;

+ 182 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml

@@ -210,3 +210,185 @@
     - match:  { detail.tokenfilters.0.tokens.5.start_offset: 16 }
     - match:  { detail.tokenfilters.0.tokens.5.end_offset: 19 }
     - match:  { detail.tokenfilters.0.tokens.5.position: 5 }
+
+---
+"unique":
+    - do:
+        indices.analyze:
+          body:
+            text:      Foo Foo Bar!
+            tokenizer: whitespace
+            filter:    [unique]
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: Foo }
+    - match:  { tokens.1.token: Bar! }
+
+---
+"synonym_graph and flatten_graph":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                filter:
+                  my_synonym_graph:
+                    type: synonym_graph
+                    synonyms: ["automatic teller machine,atm,cash point"]
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text: this automatic teller machine is down
+            tokenizer: whitespace
+            filter: [my_synonym_graph]
+    - length: { tokens: 9 }
+    - match:  { tokens.0.token: this }
+    - match:  { tokens.0.position: 0 }
+    - is_false: tokens.0.positionLength
+    - match:  { tokens.1.token: atm }
+    - match:  { tokens.1.position: 1 }
+    - match:  { tokens.1.positionLength: 4 }
+    - match:  { tokens.2.token: cash }
+    - match:  { tokens.2.position: 1 }
+    - is_false: tokens.2.positionLength
+    - match:  { tokens.3.token: automatic }
+    - match:  { tokens.3.position: 1 }
+    - match:  { tokens.3.positionLength: 2 }
+    - match:  { tokens.4.token: point }
+    - match:  { tokens.4.position: 2 }
+    - match:  { tokens.4.positionLength: 3 }
+    - match:  { tokens.5.token: teller }
+    - match:  { tokens.5.position: 3 }
+    - is_false: tokens.5.positionLength
+    - match:  { tokens.6.token: machine }
+    - match:  { tokens.6.position: 4 }
+    - is_false: tokens.6.positionLength
+    - match:  { tokens.7.token: is }
+    - match:  { tokens.7.position: 5 }
+    - is_false: tokens.7.positionLength
+    - match:  { tokens.8.token: down }
+    - match:  { tokens.8.position: 6 }
+    - is_false: tokens.8.positionLength
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text: this automatic teller machine is down
+            tokenizer: whitespace
+            filter: [my_synonym_graph,flatten_graph]
+    - length: { tokens: 9 }
+    - match:  { tokens.0.token: this }
+    - match:  { tokens.0.position: 0 }
+    - is_false: tokens.0.positionLength
+    - match:  { tokens.1.token: atm }
+    - match:  { tokens.1.position: 1 }
+    - match:  { tokens.1.positionLength: 3 }
+    - match:  { tokens.2.token: cash }
+    - match:  { tokens.2.position: 1 }
+    - is_false: tokens.2.positionLength
+    - match:  { tokens.3.token: automatic }
+    - match:  { tokens.3.position: 1 }
+    - is_false: tokens.3.positionLength
+    - match:  { tokens.4.token: point }
+    - match:  { tokens.4.position: 2 }
+    - match:  { tokens.4.positionLength: 2 }
+    - match:  { tokens.5.token: teller }
+    - match:  { tokens.5.position: 2 }
+    - is_false: tokens.5.positionLength
+    - match:  { tokens.6.token: machine }
+    - match:  { tokens.6.position: 3 }
+    - is_false: tokens.6.positionLength
+    - match:  { tokens.7.token: is }
+    - match:  { tokens.7.position: 4 }
+    - is_false: tokens.7.positionLength
+    - match:  { tokens.8.token: down }
+    - match:  { tokens.8.position: 5 }
+    - is_false: tokens.8.positionLength
+
+---
+"length":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                filter:
+                  my_length:
+                    type: length
+                    min: 6
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:      foo bar foobar
+            tokenizer: whitespace
+            filter:    [my_length]
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: foobar }
+
+---
+"uppercase":
+    - do:
+        indices.analyze:
+          body:
+            text:      foobar
+            tokenizer: keyword
+            filter:    [uppercase]
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: FOOBAR }
+
+---
+"ngram":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                filter:
+                  my_ngram:
+                    type: ngram
+                    min_gram: 3
+                    max_gram: 3
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:      foobar
+            tokenizer: keyword
+            filter:    [my_ngram]
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: foo }
+    - match:  { tokens.1.token: oob }
+    - match:  { tokens.2.token: oba }
+    - match:  { tokens.3.token: bar }
+
+---
+"edge_ngram":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                filter:
+                  my_edge_ngram:
+                    type: edge_ngram
+                    min_gram: 3
+                    max_gram: 6
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:      foobar
+            tokenizer: keyword
+            filter:    [my_edge_ngram]
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: foo }
+    - match:  { tokens.1.token: foob }
+    - match:  { tokens.2.token: fooba }
+    - match:  { tokens.3.token: foobar }

+ 41 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/20_ngram_search.yml

@@ -0,0 +1,41 @@
+"ngram search":
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+            analysis:
+              analyzer:
+                my_analyzer:
+                  tokenizer: standard
+                  filter: [my_ngram]
+              filter:
+                my_ngram:
+                  type: ngram
+                  min: 2,
+                  max: 2
+          mappings:
+            doc:
+              properties:
+                text:
+                  type: text
+                  analyzer: my_analyzer
+
+  - do:
+      index:
+        index: test
+        type:  doc
+        id:    1
+        body:  { "text": "foo bar baz" }
+        refresh: true
+
+  - do:
+      search:
+        body:
+          query:
+            match:
+              text:
+                query: foa
+  - match: {hits.total: 1}

+ 129 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/30_ngram_highligthing.yml

@@ -0,0 +1,129 @@
+"ngram highlighting":
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+            analysis:
+              tokenizer:
+                my_ngramt:
+                  type: ngram
+                  min_gram: 1
+                  max_gram: 20
+                  token_chars: letter,digit
+              filter:
+                my_ngram:
+                  type: ngram
+                  min_gram: 1
+                  max_gram: 20
+              analyzer:
+                name2_index_analyzer:
+                  tokenizer: whitespace
+                  filter: [my_ngram]
+                name_index_analyzer:
+                  tokenizer: my_ngramt
+                name_search_analyzer:
+                  tokenizer: whitespace
+          mappings:
+            doc:
+              properties:
+                name:
+                  type: text
+                  term_vector: with_positions_offsets
+                  analyzer: name_index_analyzer
+                  search_analyzer: name_search_analyzer
+                name2:
+                  type: text
+                  term_vector: with_positions_offsets
+                  analyzer: name2_index_analyzer
+                  search_analyzer: name_search_analyzer
+
+  - do:
+      index:
+        index: test
+        type:  doc
+        id:    1
+        refresh: true
+        body:
+          name: logicacmg ehemals avinci - the know how company
+          name2: logicacmg ehemals avinci - the know how company
+
+  - do:
+      search:
+        body:
+          query:
+            match:
+              name:
+                query: logica m
+          highlight:
+            fields:
+              - name: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name.0: "<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"}
+
+  - do:
+      search:
+        body:
+          query:
+            match:
+              name:
+                query: logica ma
+          highlight:
+            fields:
+              - name: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"}
+
+  - do:
+      search:
+        body:
+          query:
+            match:
+              name:
+                query: logica
+          highlight:
+            fields:
+              - name: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehemals avinci - the know how company"}
+
+  - do:
+      search:
+        body:
+          query:
+            match:
+              name2:
+                query: logica m
+          highlight:
+            fields:
+              - name2: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"}
+
+  - do:
+      search:
+        body:
+          query:
+            match:
+              name2:
+                query: logica ma
+          highlight:
+            fields:
+              - name2: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"}
+
+  - do:
+      search:
+        body:
+          query:
+            match:
+              name2:
+                query: logica
+          highlight:
+            fields:
+              - name2: {}
+  - match: {hits.total: 1}
+  - match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> ehemals avinci - the know how company"}

+ 9 - 17
test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java

@@ -22,7 +22,6 @@ package org.elasticsearch.indices.analysis;
 import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.TokenizerFactory;
-import org.elasticsearch.Version;
 import org.elasticsearch.common.collect.MapBuilder;
 import org.elasticsearch.index.analysis.ApostropheFilterFactory;
 import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
@@ -36,10 +35,8 @@ import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
 import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
 import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
-import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
 import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
 import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
-import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
 import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
 import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
@@ -49,14 +46,11 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
 import org.elasticsearch.index.analysis.KeepWordFilterFactory;
 import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
-import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
 import org.elasticsearch.index.analysis.LetterTokenizerFactory;
 import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
-import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
 import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
 import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
 import org.elasticsearch.index.analysis.MultiTermAwareComponent;
-import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
 import org.elasticsearch.index.analysis.NGramTokenizerFactory;
 import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
 import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
@@ -82,7 +76,6 @@ import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
 import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
 import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
 import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
-import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
 import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
 import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
 import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
@@ -90,7 +83,6 @@ import org.elasticsearch.plugins.AnalysisPlugin;
 import org.elasticsearch.test.ESTestCase;
 
 import java.util.Collection;
-import java.util.EnumMap;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Locale;
@@ -165,7 +157,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
         .put("decimaldigit",              DecimalDigitFilterFactory.class)
         .put("delimitedpayload",          DelimitedPayloadTokenFilterFactory.class)
         .put("dictionarycompoundword",    DictionaryCompoundWordTokenFilterFactory.class)
-        .put("edgengram",                 EdgeNGramTokenFilterFactory.class)
+        .put("edgengram",                 MovedToAnalysisCommon.class)
         .put("elision",                   ElisionTokenFilterFactory.class)
         .put("englishminimalstem",        StemmerTokenFilterFactory.class)
         .put("englishpossessive",         StemmerTokenFilterFactory.class)
@@ -178,7 +170,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
         .put("germanlightstem",           StemmerTokenFilterFactory.class)
         .put("germanminimalstem",         StemmerTokenFilterFactory.class)
         .put("germannormalization",       GermanNormalizationFilterFactory.class)
-        .put("greeklowercase",            LowerCaseTokenFilterFactory.class)
+        .put("greeklowercase",            MovedToAnalysisCommon.class)
         .put("greekstem",                 StemmerTokenFilterFactory.class)
         .put("hindinormalization",        HindiNormalizationFilterFactory.class)
         .put("hindistem",                 StemmerTokenFilterFactory.class)
@@ -186,17 +178,17 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
         .put("hunspellstem",              HunspellTokenFilterFactory.class)
         .put("hyphenationcompoundword",   HyphenationCompoundWordTokenFilterFactory.class)
         .put("indicnormalization",        IndicNormalizationFilterFactory.class)
-        .put("irishlowercase",            LowerCaseTokenFilterFactory.class)
+        .put("irishlowercase",            MovedToAnalysisCommon.class)
         .put("indonesianstem",            StemmerTokenFilterFactory.class)
         .put("italianlightstem",          StemmerTokenFilterFactory.class)
         .put("keepword",                  KeepWordFilterFactory.class)
         .put("keywordmarker",             MovedToAnalysisCommon.class)
         .put("kstem",                     KStemTokenFilterFactory.class)
         .put("latvianstem",               StemmerTokenFilterFactory.class)
-        .put("length",                    LengthTokenFilterFactory.class)
+        .put("length",                    MovedToAnalysisCommon.class)
         .put("limittokencount",           LimitTokenCountFilterFactory.class)
-        .put("lowercase",                 LowerCaseTokenFilterFactory.class)
-        .put("ngram",                     NGramTokenFilterFactory.class)
+        .put("lowercase",                 MovedToAnalysisCommon.class)
+        .put("ngram",                     MovedToAnalysisCommon.class)
         .put("norwegianlightstem",        StemmerTokenFilterFactory.class)
         .put("norwegianminimalstem",      StemmerTokenFilterFactory.class)
         .put("patterncapturegroup",       PatternCaptureGroupTokenFilterFactory.class)
@@ -225,12 +217,12 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
         .put("synonymgraph",              SynonymGraphTokenFilterFactory.class)
         .put("trim",                      MovedToAnalysisCommon.class)
         .put("truncate",                  TruncateTokenFilterFactory.class)
-        .put("turkishlowercase",          LowerCaseTokenFilterFactory.class)
+        .put("turkishlowercase",          MovedToAnalysisCommon.class)
         .put("type",                      KeepTypesFilterFactory.class)
-        .put("uppercase",                 UpperCaseTokenFilterFactory.class)
+        .put("uppercase",                 MovedToAnalysisCommon.class)
         .put("worddelimiter",             MovedToAnalysisCommon.class)
         .put("worddelimitergraph",        MovedToAnalysisCommon.class)
-        .put("flattengraph",              FlattenGraphTokenFilterFactory.class)
+        .put("flattengraph",              MovedToAnalysisCommon.class)
 
         // TODO: these tokenfilters are not yet exposed: useful?