Browse Source

Move number of language analyzers to analysis-common module (#31143)

The following analyzers were moved from server module to analysis-common module:
`snowball`, `arabic`, `armenian`, `basque`, `bengali`, `brazilian`, `bulgarian`,
`catalan`, `chinese`, `cjk`, `czech`, `danish`, `dutch`, `english`, `finnish`,
`french`, `galician` and `german`.

Relates to #23658
Martijn van Groningen 7 years ago
parent
commit
07a57cc131
38 changed files with 952 additions and 348 deletions
  1. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicAnalyzerProvider.java
  2. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArmenianAnalyzerProvider.java
  3. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BasqueAnalyzerProvider.java
  4. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliAnalyzerProvider.java
  5. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianAnalyzerProvider.java
  6. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BulgarianAnalyzerProvider.java
  7. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CatalanAnalyzerProvider.java
  8. 5 4
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ChineseAnalyzerProvider.java
  9. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CjkAnalyzerProvider.java
  10. 136 4
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
  11. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechAnalyzerProvider.java
  12. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DanishAnalyzerProvider.java
  13. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchAnalyzerProvider.java
  14. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EnglishAnalyzerProvider.java
  15. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FinnishAnalyzerProvider.java
  16. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchAnalyzerProvider.java
  17. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GalicianAnalyzerProvider.java
  18. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanAnalyzerProvider.java
  19. 3 3
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzer.java
  20. 4 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzerProvider.java
  21. 5 5
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java
  22. 523 0
      modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/20_analyzers.yml
  23. 58 0
      modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/40_query_string.yml
  24. 11 4
      modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperIntegrationIT.java
  25. 3 4
      modules/reindex/src/test/resources/rest-api-spec/test/update_by_query/30_new_fields.yml
  26. 0 8
      rest-api-spec/src/main/resources/rest-api-spec/test/count/20_query_string.yml
  27. 0 10
      rest-api-spec/src/main/resources/rest-api-spec/test/explain/30_query_string.yml
  28. 0 8
      rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string.yml
  29. 0 8
      rest-api-spec/src/main/resources/rest-api-spec/test/search/60_query_string.yml
  30. 0 36
      server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
  31. 0 179
      server/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java
  32. 11 8
      server/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java
  33. 21 19
      server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java
  34. 2 2
      server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java
  35. 2 2
      server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java
  36. 48 5
      server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
  37. 55 4
      server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java
  38. 1 1
      server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.ar.ArabicAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArabicAnalyzer> {
 
     private final ArabicAnalyzer arabicAnalyzer;
 
-    public ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         arabicAnalyzer = new ArabicAnalyzer(
             Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArmenianAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArmenianAnalyzer> {
 
     private final ArmenianAnalyzer analyzer;
 
-    public ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new ArmenianAnalyzer(
             Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BasqueAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.eu.BasqueAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<BasqueAnalyzer> {
 
     private final BasqueAnalyzer analyzer;
 
-    public BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new BasqueAnalyzer(
             Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/BengaliAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.bn.BengaliAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class BengaliAnalyzerProvider extends AbstractIndexAnalyzerProvider<BengaliAnalyzer> {
 
     private final BengaliAnalyzer analyzer;
 
-    public BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new BengaliAnalyzer(
             Analysis.parseStopWords(env, settings, BengaliAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.br.BrazilianAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BrazilianAnalyzer> {
 
     private final BrazilianAnalyzer analyzer;
 
-    public BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new BrazilianAnalyzer(
             Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BulgarianAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BulgarianAnalyzer> {
 
     private final BulgarianAnalyzer analyzer;
 
-    public BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new BulgarianAnalyzer(
             Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CatalanAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.ca.CatalanAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<CatalanAnalyzer> {
 
     private final CatalanAnalyzer analyzer;
 
-    public CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new CatalanAnalyzer(
             Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet()),

+ 5 - 4
server/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ChineseAnalyzerProvider.java

@@ -17,12 +17,13 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
 
 /**
  * Only for old indexes
@@ -31,16 +32,16 @@ public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider<Stand
 
     private final StandardAnalyzer analyzer;
 
-    public ChineseAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+    ChineseAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
         super(indexSettings, name, settings);
         // old index: best effort
         analyzer = new StandardAnalyzer();
         analyzer.setVersion(version);
-        
+
     }
 
     @Override
     public StandardAnalyzer get() {
         return this.analyzer;
     }
-}
+}

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/CjkAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CjkAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyzer> {
 
     private final CJKAnalyzer analyzer;
 
-    public CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet());
 

+ 136 - 4
modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

@@ -24,11 +24,17 @@ import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ar.ArabicAnalyzer;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.ar.ArabicStemFilter;
+import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
+import org.apache.lucene.analysis.bn.BengaliAnalyzer;
 import org.apache.lucene.analysis.bn.BengaliNormalizationFilter;
+import org.apache.lucene.analysis.br.BrazilianAnalyzer;
 import org.apache.lucene.analysis.br.BrazilianStemFilter;
+import org.apache.lucene.analysis.ca.CatalanAnalyzer;
 import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
+import org.apache.lucene.analysis.cjk.CJKAnalyzer;
 import org.apache.lucene.analysis.cjk.CJKBigramFilter;
 import org.apache.lucene.analysis.cjk.CJKWidthFilter;
 import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
@@ -40,14 +46,22 @@ import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.UpperCaseFilter;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.cz.CzechAnalyzer;
 import org.apache.lucene.analysis.cz.CzechStemFilter;
+import org.apache.lucene.analysis.da.DanishAnalyzer;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
 import org.apache.lucene.analysis.de.GermanNormalizationFilter;
 import org.apache.lucene.analysis.de.GermanStemFilter;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.en.KStemFilter;
 import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.eu.BasqueAnalyzer;
 import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
+import org.apache.lucene.analysis.fi.FinnishAnalyzer;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.gl.GalicianAnalyzer;
 import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
+import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
 import org.apache.lucene.analysis.in.IndicNormalizationFilter;
 import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
@@ -64,6 +78,7 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
 import org.apache.lucene.analysis.ngram.NGramTokenFilter;
 import org.apache.lucene.analysis.ngram.NGramTokenizer;
+import org.apache.lucene.analysis.nl.DutchAnalyzer;
 import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
 import org.apache.lucene.analysis.pattern.PatternTokenizer;
 import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
@@ -73,6 +88,7 @@ import org.apache.lucene.analysis.shingle.ShingleFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.ClassicFilter;
 import org.apache.lucene.analysis.standard.ClassicTokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
 import org.apache.lucene.analysis.th.ThaiTokenizer;
 import org.apache.lucene.analysis.tr.ApostropheFilter;
@@ -113,6 +129,24 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
         analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
         analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
         analyzers.put("pattern", PatternAnalyzerProvider::new);
+        analyzers.put("snowball", SnowballAnalyzerProvider::new);
+        analyzers.put("arabic", ArabicAnalyzerProvider::new);
+        analyzers.put("armenian", ArmenianAnalyzerProvider::new);
+        analyzers.put("basque", BasqueAnalyzerProvider::new);
+        analyzers.put("bengali", BengaliAnalyzerProvider::new);
+        analyzers.put("brazilian", BrazilianAnalyzerProvider::new);
+        analyzers.put("bulgarian", BulgarianAnalyzerProvider::new);
+        analyzers.put("catalan", CatalanAnalyzerProvider::new);
+        analyzers.put("chinese", ChineseAnalyzerProvider::new);
+        analyzers.put("cjk", CjkAnalyzerProvider::new);
+        analyzers.put("czech", CzechAnalyzerProvider::new);
+        analyzers.put("danish", DanishAnalyzerProvider::new);
+        analyzers.put("dutch", DutchAnalyzerProvider::new);
+        analyzers.put("english", EnglishAnalyzerProvider::new);
+        analyzers.put("finnish", FinnishAnalyzerProvider::new);
+        analyzers.put("french", FrenchAnalyzerProvider::new);
+        analyzers.put("galician", GalicianAnalyzerProvider::new);
+        analyzers.put("german", GermanAnalyzerProvider::new);
         return analyzers;
     }
 
@@ -213,10 +247,108 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
     @Override
     public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
         List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
-        analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE,
-            version -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
-        analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version ->
-            new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET)));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET);
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version -> {
+            Analyzer a = new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true,
+                CharArraySet.EMPTY_SET);
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE, version -> {
+            Analyzer a =  new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new ArabicAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new ArmenianAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new BasqueAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new BengaliAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new BrazilianAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new BulgarianAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new CatalanAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.LUCENE, version -> {
+            // only for old indices, best effort
+            Analyzer a = new StandardAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new CJKAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new CzechAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new DanishAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new DutchAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new EnglishAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new FinnishAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new FrenchAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new GalicianAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
+        analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, version -> {
+            Analyzer a = new GermanAnalyzer();
+            a.setVersion(version.luceneVersion);
+            return a;
+        }));
         return analyzers;
     }
 

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.cz.CzechAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAnalyzer> {
 
     private final CzechAnalyzer analyzer;
 
-    public CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new CzechAnalyzer(
             Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DanishAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.da.DanishAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<DanishAnalyzer> {
 
     private final DanishAnalyzer analyzer;
 
-    public DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new DanishAnalyzer(
             Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.nl.DutchAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAnalyzer> {
 
     private final DutchAnalyzer analyzer;
 
-    public DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new DutchAnalyzer(
             Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EnglishAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<EnglishAnalyzer> {
 
     private final EnglishAnalyzer analyzer;
 
-    public EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new EnglishAnalyzer(
             Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FinnishAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.fi.FinnishAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<FinnishAnalyzer> {
 
     private final FinnishAnalyzer analyzer;
 
-    public FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new FinnishAnalyzer(
             Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<FrenchAnalyzer> {
 
     private final FrenchAnalyzer analyzer;
 
-    public FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new FrenchAnalyzer(
             Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GalicianAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.gl.GalicianAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<GalicianAnalyzer> {
 
     private final GalicianAnalyzer analyzer;
 
-    public GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new GalicianAnalyzer(
             Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet()),

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanAnalyzerProvider.java

@@ -17,19 +17,21 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.de.GermanAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<GermanAnalyzer> {
 
     private final GermanAnalyzer analyzer;
 
-    public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         analyzer = new GermanAnalyzer(
             Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()),

+ 3 - 3
server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzer.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzer.java

@@ -1,4 +1,4 @@
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 /*
  * Licensed to Elasticsearch under one or more contributor
@@ -48,12 +48,12 @@ public final class SnowballAnalyzer extends Analyzer {
   private CharArraySet stopSet;
 
   /** Builds the named analyzer with no stop words. */
-  public SnowballAnalyzer(String name) {
+  SnowballAnalyzer(String name) {
     this.name = name;
   }
 
   /** Builds the named analyzer with the given stop words. */
-  public SnowballAnalyzer(String name, CharArraySet stopWords) {
+  SnowballAnalyzer(String name, CharArraySet stopWords) {
     this(name);
     stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
   }

+ 4 - 2
server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzerProvider.java

@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.core.StopAnalyzer;
@@ -26,6 +26,8 @@ import org.apache.lucene.analysis.nl.DutchAnalyzer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.Analysis;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -60,7 +62,7 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
 
     private final SnowballAnalyzer analyzer;
 
-    public SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
 
         String language = settings.get("language", settings.get("name", "English"));

+ 5 - 5
server/src/test/java/org/elasticsearch/index/analysis/SnowballAnalyzerTests.java → modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java

@@ -1,4 +1,4 @@
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 /*
  * Licensed to Elasticsearch under one or more contributor
@@ -30,14 +30,14 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
     assertAnalyzesTo(a, "he abhorred accents",
         new String[]{"he", "abhor", "accent"});
   }
-  
+
   public void testStopwords() throws Exception {
     Analyzer a = new SnowballAnalyzer("English",
         StandardAnalyzer.STOP_WORDS_SET);
     assertAnalyzesTo(a, "the quick brown fox jumped",
         new String[]{"quick", "brown", "fox", "jump"});
   }
-  
+
   /**
    * Test turkish lowercasing
    */
@@ -48,7 +48,7 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
     assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
   }
 
-  
+
   public void testReusableTokenStream() throws Exception {
     Analyzer a = new SnowballAnalyzer("English");
     assertAnalyzesTo(a, "he abhorred accents",
@@ -56,4 +56,4 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
     assertAnalyzesTo(a, "she abhorred him",
         new String[]{"she", "abhor", "him"});
   }
-}
+}

+ 523 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/20_analyzers.yml

@@ -38,6 +38,25 @@
     - length: { tokens: 1 }
     - match:  { tokens.0.token: বার }
 
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: bengali
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     বাড়ী
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: বার }
+
 ---
 "fingerprint":
     - do:
@@ -69,3 +88,507 @@
     - length: { tokens: 2 }
     - match:  { tokens.0.token: foo }
     - match:  { tokens.1.token: bar }
+
+---
+"snowball":
+    - do:
+        indices.analyze:
+          body:
+            text:     the brown foxes
+            analyzer: snowball
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: brown }
+    - match:  { tokens.1.token: fox }
+
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_snowball:
+                    type: snowball
+                    language: "Dutch"
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     de bruine vossen
+            analyzer: my_snowball
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: bruin }
+    - match:  { tokens.1.token: voss }
+
+---
+"arabic":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: arabic
+
+    - do:
+        indices.analyze:
+          body:
+            text:     كبيرة
+            analyzer: arabic
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: كبير }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     كبيرة
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: كبير }
+
+---
+"armenian":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: armenian
+
+    - do:
+        indices.analyze:
+          body:
+            text:     արծիվ
+            analyzer: armenian
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: արծ }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     արծիվ
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: արծ }
+
+---
+"basque":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: basque
+
+    - do:
+        indices.analyze:
+          body:
+            text:     zaldiak
+            analyzer: basque
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: zaldi }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     zaldiak
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: zaldi }
+
+---
+"brazilian":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: brazilian
+
+    - do:
+        indices.analyze:
+          body:
+            text:     boataria
+            analyzer: brazilian
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: boat }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     boataria
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: boat }
+
+---
+"bulgarian":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: bulgarian
+
+    - do:
+        indices.analyze:
+          body:
+            text:     градове
+            analyzer: bulgarian
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: град }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     градове
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: град }
+
+---
+"catalan":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: catalan
+
+    - do:
+        indices.analyze:
+          body:
+            text:     llengües
+            analyzer: catalan
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: llengu }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     llengües
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: llengu }
+
+---
+"chinese":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: chinese
+
+    - do:
+        indices.analyze:
+          body:
+            text:     only for old indices
+            analyzer: chinese
+    - length: { tokens: 3 }
+    - match:  { tokens.0.token: only }
+    - match:  { tokens.1.token: old }
+    - match:  { tokens.2.token: indices }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     only for old indices
+            analyzer: my_analyzer
+    - length: { tokens: 3 }
+    - match:  { tokens.0.token: only }
+    - match:  { tokens.1.token: old }
+    - match:  { tokens.2.token: indices }
+
+---
+"cjk":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: cjk
+
+    - do:
+        indices.analyze:
+          body:
+            text:     多くの
+            analyzer: cjk
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: 多く }
+    - match:  { tokens.1.token: くの }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     多くの
+            analyzer: my_analyzer
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: 多く }
+    - match:  { tokens.1.token: くの }
+
+---
+"czech":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: czech
+
+    - do:
+        indices.analyze:
+          body:
+            text:     Pokud mluvime o volnem
+            analyzer: czech
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: mluvim }
+    - match:  { tokens.1.token: voln }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     Pokud mluvime o volnem
+            analyzer: my_analyzer
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: mluvim }
+    - match:  { tokens.1.token: voln }
+
+---
+"danish":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: danish
+
+    - do:
+        indices.analyze:
+          body:
+            text:     undersøgelse
+            analyzer: danish
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: undersøg }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     undersøgelse
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: undersøg }
+
+---
+"dutch":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: dutch
+
+    - do:
+        indices.analyze:
+          body:
+            text:     lidstaten
+            analyzer: dutch
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: lidstat }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     lidstaten
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: lidstat }
+
+---
+"english":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: english
+
+    - do:
+        indices.analyze:
+          body:
+            text:     books
+            analyzer: english
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: book }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     books
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: book }
+
+---
+"finnish":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: finnish
+
+    - do:
+        indices.analyze:
+          body:
+            text:     edeltäjiinsä
+            analyzer: finnish
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: edeltäj }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     edeltäjiinsä
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: edeltäj }
+
+---
+"french":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: french
+
+    - do:
+        indices.analyze:
+          body:
+            text:     sécuritaires
+            analyzer: french
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: securitair }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     sécuritaires
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: securitair }
+
+---
+"galician":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: galician
+
+    - do:
+        indices.analyze:
+          body:
+            text:     corresponderá
+            analyzer: galician
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: correspond }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     corresponderá
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: correspond }
+
+---
+"german":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                analyzer:
+                  my_analyzer:
+                    type: german
+
+    - do:
+        indices.analyze:
+          body:
+            text:     Tischen
+            analyzer: german
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: tisch }
+
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:     Tischen
+            analyzer: my_analyzer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: tisch }

+ 58 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/40_query_string.yml

@@ -0,0 +1,58 @@
+---
+"Test query string with snowball":
+  - do:
+      indices.create:
+          index:  test
+          body:
+            mappings:
+              test:
+                properties:
+                  field:
+                    type: text
+                  number:
+                    type: integer
+
+  - do:
+      index:
+          index:  test
+          type:   test
+          id:     1
+          body:   { field: foo bar}
+
+  - do:
+      indices.refresh:
+        index: [test]
+
+  - do:
+      indices.validate_query:
+        index: test
+        q: field:bars
+        analyzer: snowball
+
+  - is_true: valid
+
+  - do:
+      search:
+        index: test
+        q: field:bars
+        analyzer: snowball
+
+  - match: {hits.total: 1}
+
+  - do:
+      explain:
+        index:  test
+        type:   test
+        id:     1
+        q: field:bars
+        analyzer: snowball
+
+  - is_true: matched
+
+  - do:
+      count:
+        index: test
+        q: field:bars
+        analyzer: snowball
+
+  - match: {count : 1}

+ 11 - 4
modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperIntegrationIT.java

@@ -27,6 +27,7 @@ import org.elasticsearch.action.bulk.BulkResponse;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.aggregations.AggregationBuilders;
@@ -109,7 +110,13 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
     }
 
     private void init() throws IOException {
-        prepareCreate("test").addMapping("test", jsonBuilder().startObject()
+        Settings.Builder settings = Settings.builder();
+        settings.put(indexSettings());
+        settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard");
+        settings.put("index.analysis.analyzer.mock_english.filter", "stop");
+        prepareCreate("test")
+            .setSettings(settings)
+            .addMapping("test", jsonBuilder().startObject()
                 .startObject("test")
                     .startObject("properties")
                         .startObject("foo")
@@ -133,7 +140,7 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
                                 .endObject()
                                 .startObject("token_count_without_position_increments")
                                     .field("type", "token_count")
-                                    .field("analyzer", "english")
+                                    .field("analyzer", "mock_english")
                                     .field("enable_position_increments", false)
                                     .field("store", true)
                                 .endObject()
@@ -214,13 +221,13 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
         assertThat(hit.field("foo.token_count"), not(nullValue()));
         assertThat(hit.field("foo.token_count").getValues().size(), equalTo(standardTermCounts.length));
         for (int i = 0; i < standardTermCounts.length; i++) {
-            assertThat((Integer) hit.field("foo.token_count").getValues().get(i), equalTo(standardTermCounts[i]));
+            assertThat(hit.field("foo.token_count").getValues().get(i), equalTo(standardTermCounts[i]));
         }
 
         assertThat(hit.field("foo.token_count_without_position_increments"), not(nullValue()));
         assertThat(hit.field("foo.token_count_without_position_increments").getValues().size(), equalTo(englishTermCounts.length));
         for (int i = 0; i < englishTermCounts.length; i++) {
-            assertThat((Integer) hit.field("foo.token_count_without_position_increments").getValues().get(i),
+            assertThat(hit.field("foo.token_count_without_position_increments").getValues().get(i),
                     equalTo(englishTermCounts[i]));
         }
 

+ 3 - 4
modules/reindex/src/test/resources/rest-api-spec/test/update_by_query/30_new_fields.yml

@@ -8,14 +8,14 @@
             place:
               properties:
                 name:
-                  type: text
+                  type: keyword
   - do:
       index:
         index:   test
         type:    place
         id:      1
         refresh: true
-        body:    { "name": "bob's house" }
+        body:    { "name": "bob! house" }
 
   - do:
       indices.put_mapping:
@@ -24,11 +24,10 @@
         body:
           properties:
             name:
-              type: text
+              type: keyword
               fields:
                 english:
                   type:     text
-                  analyzer: english
 
   - do:
       search:

+ 0 - 8
rest-api-spec/src/main/resources/rest-api-spec/test/count/20_query_string.yml

@@ -44,14 +44,6 @@
 
   - match: {count : 0}
 
-  - do:
-      count:
-        index: test
-        q: field:bars
-        analyzer: snowball
-
-  - match: {count : 1}
-
   - do:
       count:
         index: test

+ 0 - 10
rest-api-spec/src/main/resources/rest-api-spec/test/explain/30_query_string.yml

@@ -50,16 +50,6 @@
 
   - is_false: matched
 
-  - do:
-      explain:
-        index:  test
-        type:   test
-        id:     1
-        q: field:bars
-        analyzer: snowball
-
-  - is_true: matched
-
   - do:
       explain:
         index:  test

+ 0 - 8
rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string.yml

@@ -35,14 +35,6 @@
 
   - is_true: valid
 
-  - do:
-      indices.validate_query:
-        index: test
-        q: field:bars
-        analyzer: snowball
-
-  - is_true: valid
-
   - do:
       indices.validate_query:
         index: test

+ 0 - 8
rest-api-spec/src/main/resources/rest-api-spec/test/search/60_query_string.yml

@@ -44,14 +44,6 @@
 
   - match: {hits.total: 0}
 
-  - do:
-      search:
-        index: test
-        q: field:bars
-        analyzer: snowball
-
-  - match: {hits.total: 1}
-
   - do:
       search:
         index: test

+ 0 - 36
server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

@@ -29,24 +29,7 @@ import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AnalysisRegistry;
 import org.elasticsearch.index.analysis.AnalyzerProvider;
-import org.elasticsearch.index.analysis.ArabicAnalyzerProvider;
-import org.elasticsearch.index.analysis.ArmenianAnalyzerProvider;
-import org.elasticsearch.index.analysis.BasqueAnalyzerProvider;
-import org.elasticsearch.index.analysis.BengaliAnalyzerProvider;
-import org.elasticsearch.index.analysis.BrazilianAnalyzerProvider;
-import org.elasticsearch.index.analysis.BulgarianAnalyzerProvider;
-import org.elasticsearch.index.analysis.CatalanAnalyzerProvider;
 import org.elasticsearch.index.analysis.CharFilterFactory;
-import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
-import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
-import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
-import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
-import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
-import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
-import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
-import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
-import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
-import org.elasticsearch.index.analysis.GermanAnalyzerProvider;
 import org.elasticsearch.index.analysis.GreekAnalyzerProvider;
 import org.elasticsearch.index.analysis.HindiAnalyzerProvider;
 import org.elasticsearch.index.analysis.HungarianAnalyzerProvider;
@@ -68,7 +51,6 @@ import org.elasticsearch.index.analysis.RomanianAnalyzerProvider;
 import org.elasticsearch.index.analysis.RussianAnalyzerProvider;
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
 import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
-import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
 import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
 import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
 import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
@@ -245,24 +227,6 @@ public final class AnalysisModule {
         analyzers.register("stop", StopAnalyzerProvider::new);
         analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
         analyzers.register("keyword", KeywordAnalyzerProvider::new);
-        analyzers.register("snowball", SnowballAnalyzerProvider::new);
-        analyzers.register("arabic", ArabicAnalyzerProvider::new);
-        analyzers.register("armenian", ArmenianAnalyzerProvider::new);
-        analyzers.register("basque", BasqueAnalyzerProvider::new);
-        analyzers.register("bengali", BengaliAnalyzerProvider::new);
-        analyzers.register("brazilian", BrazilianAnalyzerProvider::new);
-        analyzers.register("bulgarian", BulgarianAnalyzerProvider::new);
-        analyzers.register("catalan", CatalanAnalyzerProvider::new);
-        analyzers.register("chinese", ChineseAnalyzerProvider::new);
-        analyzers.register("cjk", CjkAnalyzerProvider::new);
-        analyzers.register("czech", CzechAnalyzerProvider::new);
-        analyzers.register("danish", DanishAnalyzerProvider::new);
-        analyzers.register("dutch", DutchAnalyzerProvider::new);
-        analyzers.register("english", EnglishAnalyzerProvider::new);
-        analyzers.register("finnish", FinnishAnalyzerProvider::new);
-        analyzers.register("french", FrenchAnalyzerProvider::new);
-        analyzers.register("galician", GalicianAnalyzerProvider::new);
-        analyzers.register("german", GermanAnalyzerProvider::new);
         analyzers.register("greek", GreekAnalyzerProvider::new);
         analyzers.register("hindi", HindiAnalyzerProvider::new);
         analyzers.register("hungarian", HungarianAnalyzerProvider::new);

+ 0 - 179
server/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java

@@ -20,37 +20,21 @@ package org.elasticsearch.indices.analysis;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.ar.ArabicAnalyzer;
-import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
-import org.apache.lucene.analysis.bn.BengaliAnalyzer;
-import org.apache.lucene.analysis.br.BrazilianAnalyzer;
-import org.apache.lucene.analysis.ca.CatalanAnalyzer;
-import org.apache.lucene.analysis.cjk.CJKAnalyzer;
 import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.SimpleAnalyzer;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.cz.CzechAnalyzer;
-import org.apache.lucene.analysis.da.DanishAnalyzer;
-import org.apache.lucene.analysis.de.GermanAnalyzer;
 import org.apache.lucene.analysis.el.GreekAnalyzer;
-import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.es.SpanishAnalyzer;
-import org.apache.lucene.analysis.eu.BasqueAnalyzer;
 import org.apache.lucene.analysis.fa.PersianAnalyzer;
-import org.apache.lucene.analysis.fi.FinnishAnalyzer;
-import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 import org.apache.lucene.analysis.ga.IrishAnalyzer;
-import org.apache.lucene.analysis.gl.GalicianAnalyzer;
 import org.apache.lucene.analysis.hi.HindiAnalyzer;
 import org.apache.lucene.analysis.hu.HungarianAnalyzer;
-import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
 import org.apache.lucene.analysis.id.IndonesianAnalyzer;
 import org.apache.lucene.analysis.it.ItalianAnalyzer;
 import org.apache.lucene.analysis.lt.LithuanianAnalyzer;
 import org.apache.lucene.analysis.lv.LatvianAnalyzer;
-import org.apache.lucene.analysis.nl.DutchAnalyzer;
 import org.apache.lucene.analysis.no.NorwegianAnalyzer;
 import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
 import org.apache.lucene.analysis.ro.RomanianAnalyzer;
@@ -61,7 +45,6 @@ import org.apache.lucene.analysis.sv.SwedishAnalyzer;
 import org.apache.lucene.analysis.th.ThaiAnalyzer;
 import org.apache.lucene.analysis.tr.TurkishAnalyzer;
 import org.elasticsearch.Version;
-import org.elasticsearch.index.analysis.SnowballAnalyzer;
 import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
 
 import java.util.Locale;
@@ -129,168 +112,6 @@ public enum PreBuiltAnalyzers {
         }
     },
 
-    SNOWBALL {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer analyzer = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-            analyzer.setVersion(version.luceneVersion);
-            return analyzer;
-        }
-    },
-
-    ARABIC {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new ArabicAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    ARMENIAN {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new ArmenianAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    BASQUE {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new BasqueAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    BENGALI {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new BengaliAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    BRAZILIAN {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new BrazilianAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    BULGARIAN {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new BulgarianAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    CATALAN {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new CatalanAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    CHINESE(CachingStrategy.ONE) {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new StandardAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    CJK {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new CJKAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    CZECH {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new CzechAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    DUTCH {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new DutchAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    DANISH {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new DanishAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    ENGLISH {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new EnglishAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    FINNISH {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new FinnishAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    FRENCH {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new FrenchAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    GALICIAN {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new GalicianAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
-    GERMAN {
-        @Override
-        protected Analyzer create(Version version) {
-            Analyzer a = new GermanAnalyzer();
-            a.setVersion(version.luceneVersion);
-            return a;
-        }
-    },
-
     GREEK {
         @Override
         protected Analyzer create(Version version) {

+ 11 - 8
server/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java

@@ -61,14 +61,17 @@ public class PreBuiltAnalyzerTests extends ESSingleNodeTestCase {
     }
 
     public void testThatInstancesAreCachedAndReused() {
-        assertSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT),
-                PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT));
-        // same lucene version should be cached
-        assertSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_2_1),
-                PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_2_2));
-
-        assertNotSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_0_0),
-                PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_0_1));
+        assertSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT),
+                PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT));
+        // same es version should be cached
+        assertSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_2_1),
+                PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_2_1));
+        assertNotSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_0_0),
+                PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_0_1));
+
+        // Same Lucene version should be cached:
+        assertSame(PreBuiltAnalyzers.STOP.getAnalyzer(Version.V_5_2_1),
+            PreBuiltAnalyzers.STOP.getAnalyzer(Version.V_5_2_2));
     }
 
     public void testThatAnalyzersAreUsedInMapping() throws IOException {

+ 21 - 19
server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java

@@ -55,7 +55,6 @@ import org.elasticsearch.index.mapper.MapperService.MergeReason;
 import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
 import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
 import org.elasticsearch.index.query.QueryShardContext;
-import org.elasticsearch.index.search.MatchQuery;
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.test.ESSingleNodeTestCase;
@@ -87,6 +86,9 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             .putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
             .put("index.analysis.analyzer.synonym.tokenizer", "standard")
             .put("index.analysis.analyzer.synonym.filter", "mySynonyms")
+            // Stop filter remains in server as it is part of lucene-core
+            .put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard")
+            .put("index.analysis.analyzer.my_stop_analyzer.filter", "stop")
             .build();
         indexService = createIndex("test", settings);
         parser = indexService.mapperService().documentMapperParser();
@@ -621,7 +623,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes").endObject()
                 .field("index_options", "offsets")
                 .endObject().endObject().endObject().endObject());
@@ -637,7 +639,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes").endObject()
                 .field("index_options", "freqs")
                 .endObject().endObject().endObject().endObject());
@@ -654,7 +656,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes").endObject()
                 .field("index_options", "positions")
                 .endObject().endObject().endObject().endObject());
@@ -675,7 +677,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes").endObject()
                 .field("term_vector", "with_positions_offsets")
                 .endObject().endObject().endObject().endObject());
@@ -696,7 +698,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes").endObject()
                 .field("term_vector", "with_positions")
                 .endObject().endObject().endObject().endObject());
@@ -725,7 +727,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             .startObject("properties")
             .startObject("field")
             .field("type", "text")
-            .field("analyzer", "english")
+            .field("analyzer", "my_stop_analyzer")
             .field("index_phrases", true)
             .endObject()
             .startObject("synfield")
@@ -742,20 +744,20 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
         queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
 
         Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext);
-        assertThat(q, is(new PhraseQuery("field._index_phrase", "two word")));
+        assertThat(q, is(new PhraseQuery("field._index_phrase", "two words")));
 
         Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext);
-        assertThat(q2, is(new PhraseQuery("field._index_phrase", "three word", "word here")));
+        assertThat(q2, is(new PhraseQuery("field._index_phrase", "three words", "words here")));
 
         Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext);
-        assertThat(q3, is(new PhraseQuery(1, "field", "two", "word")));
+        assertThat(q3, is(new PhraseQuery(1, "field", "two", "words")));
 
         Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext);
         assertThat(q4, is(new TermQuery(new Term("field", "singleton"))));
 
         Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext);
         assertThat(q5,
-            is(new PhraseQuery.Builder().add(new Term("field", "sparkl")).add(new Term("field", "stopword"), 2).build()));
+            is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build()));
 
         Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext);
         assertThat(q6, is(new MultiPhraseQuery.Builder()
@@ -778,7 +780,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
             ts.reset();
             assertTrue(ts.incrementToken());
-            assertEquals("some english", termAtt.toString());
+            assertEquals("Some English", termAtt.toString());
         }
 
         {
@@ -821,7 +823,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes")
                 .field("min_chars", 1)
                 .field("max_chars", 10)
@@ -855,7 +857,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes").endObject()
                 .endObject().endObject()
                 .endObject().endObject());
@@ -880,7 +882,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String illegalMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes")
                 .field("min_chars", 1)
                 .field("max_chars", 10)
@@ -903,7 +905,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes")
                 .field("min_chars", 11)
                 .field("max_chars", 10)
@@ -920,7 +922,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes")
                 .field("min_chars", 0)
                 .field("max_chars", 10)
@@ -937,7 +939,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .startObject("index_prefixes")
                 .field("min_chars", 1)
                 .field("max_chars", 25)
@@ -954,7 +956,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
             String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
                 .startObject("properties").startObject("field")
                 .field("type", "text")
-                .field("analyzer", "english")
+                .field("analyzer", "standard")
                 .field("index_prefixes", (String) null)
                 .endObject().endObject()
                 .endObject().endObject());

+ 2 - 2
server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java

@@ -1300,7 +1300,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
         assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
         Query query = new QueryStringQueryBuilder("the quick fox")
             .field(STRING_FIELD_NAME)
-            .analyzer("english")
+            .analyzer("stop")
             .toQuery(createShardContext());
         BooleanQuery expected = new BooleanQuery.Builder()
             .add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
@@ -1313,7 +1313,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
         assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
         Query query = new QueryStringQueryBuilder("the* quick fox")
             .field(STRING_FIELD_NAME)
-            .analyzer("english")
+            .analyzer("stop")
             .toQuery(createShardContext());
         BooleanQuery expected = new BooleanQuery.Builder()
             .add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), Occur.SHOULD)

+ 2 - 2
server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java

@@ -629,7 +629,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
         assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
         Query query = new SimpleQueryStringBuilder("the quick fox")
             .field(STRING_FIELD_NAME)
-            .analyzer("english")
+            .analyzer("stop")
             .toQuery(createShardContext());
         BooleanQuery expected = new BooleanQuery.Builder()
             .add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
@@ -642,7 +642,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
         assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
         Query query = new SimpleQueryStringBuilder("the* quick fox")
             .field(STRING_FIELD_NAME)
-            .analyzer("english")
+            .analyzer("stop")
             .toQuery(createShardContext());
         BooleanQuery expected = new BooleanQuery.Builder()
             .add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), BooleanClause.Occur.SHOULD)

+ 48 - 5
server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java

@@ -22,6 +22,9 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.search.join.ScoreMode;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchRequestBuilder;
@@ -36,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
 import org.elasticsearch.index.analysis.AnalyzerProvider;
+import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
 import org.elasticsearch.index.query.AbstractQueryBuilder;
 import org.elasticsearch.index.query.IdsQueryBuilder;
 import org.elasticsearch.index.query.MatchQueryBuilder;
@@ -66,9 +70,11 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 
+import static java.util.Collections.singletonList;
 import static java.util.Collections.singletonMap;
 import static org.elasticsearch.client.Requests.searchRequest;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
@@ -113,7 +119,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
 
     @Override
     protected Collection<Class<? extends Plugin>> nodePlugins() {
-        return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockWhitespacePlugin.class);
+        return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockAnalysisPlugin.class);
     }
 
     public void testHighlightingWithStoredKeyword() throws IOException {
@@ -765,14 +771,19 @@ public class HighlighterSearchIT extends ESIntegTestCase {
     }
 
     private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception {
+        Settings.Builder settings = Settings.builder();
+        settings.put(indexSettings());
+        settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard");
+        settings.put("index.analysis.analyzer.mock_english.filter", "mock_snowball");
         assertAcked(prepareCreate("test")
+            .setSettings(settings)
             .addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1")
                 .startObject("properties")
                     .startObject("foo")
                         .field("type", "text")
                         .field("term_vector", "with_positions_offsets")
                         .field("store", true)
-                        .field("analyzer", "english")
+                        .field("analyzer", "mock_english")
                         .startObject("fields")
                             .startObject("plain")
                                 .field("type", "text")
@@ -785,7 +796,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
                         .field("type", "text")
                         .field("term_vector", "with_positions_offsets")
                         .field("store", true)
-                        .field("analyzer", "english")
+                        .field("analyzer", "mock_english")
                         .startObject("fields")
                             .startObject("plain")
                                 .field("type", "text")
@@ -2819,7 +2830,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
         assertAcked(prepareCreate("test").setSettings(builder.build())
             .addMapping("type1", "field1",
                 "type=text,term_vector=with_positions_offsets,search_analyzer=synonym," +
-                    "analyzer=english,index_options=offsets"));
+                    "analyzer=standard,index_options=offsets"));
         ensureGreen();
 
         client().prepareIndex("test", "type1", "0").setSource(
@@ -2983,7 +2994,39 @@ public class HighlighterSearchIT extends ESIntegTestCase {
         }
     }
 
-    public static class MockWhitespacePlugin extends Plugin implements AnalysisPlugin {
+    public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {
+
+        public final class MockSnowBall extends TokenFilter {
+            private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+            /** Sole constructor. */
+            MockSnowBall(TokenStream in) {
+                super(in);
+            }
+
+            @Override
+            public boolean incrementToken() throws IOException {
+                if (input.incrementToken()) {
+                    final char[] buffer = termAtt.buffer();
+                    final int length = termAtt.length();
+                    if (buffer[length - 1] == 's') {
+                        termAtt.setLength(length - 1);
+                    }
+                    if (length > 3) {
+                        if (buffer[length - 1] == 'g' && buffer[length - 2] == 'n' && buffer[length - 3] == 'i') {
+                            termAtt.setLength(length- 3);
+                        }
+                    }
+                    return true;
+                } else
+                    return false;
+            }
+        }
+
+        @Override
+        public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
+            return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new));
+        }
 
         @Override
         public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {

+ 55 - 4
server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java

@@ -19,6 +19,12 @@
 
 package org.elasticsearch.search.query;
 
+import org.apache.lucene.analysis.CharacterUtils;
+import org.apache.lucene.analysis.MockLowerCaseFilter;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
 import org.elasticsearch.action.index.IndexRequestBuilder;
@@ -28,12 +34,19 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.index.analysis.CharFilterFactory;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
+import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
+import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
+import org.elasticsearch.index.analysis.TokenizerFactory;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.query.BoolQueryBuilder;
 import org.elasticsearch.index.query.Operator;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.index.query.SimpleQueryStringBuilder;
 import org.elasticsearch.index.query.SimpleQueryStringFlag;
+import org.elasticsearch.indices.analysis.AnalysisModule;
+import org.elasticsearch.plugins.AnalysisPlugin;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHits;
@@ -42,14 +55,19 @@ import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.InternalSettingsPlugin;
 
 import java.io.IOException;
+import java.io.Reader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
+import java.util.function.Function;
 
+import static java.util.Collections.singletonList;
+import static java.util.Collections.singletonMap;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
 import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery;
@@ -72,11 +90,15 @@ import static org.hamcrest.Matchers.equalTo;
 public class SimpleQueryStringIT extends ESIntegTestCase {
     @Override
     protected Collection<Class<? extends Plugin>> nodePlugins() {
-        return Arrays.asList(InternalSettingsPlugin.class); // uses index.version.created
+        return Arrays.asList(MockAnalysisPlugin.class, InternalSettingsPlugin.class); // uses index.version.created
     }
 
     public void testSimpleQueryString() throws ExecutionException, InterruptedException {
-        createIndex("test");
+        Settings.Builder settings = Settings.builder();
+        settings.put(indexSettings());
+        settings.put("index.analysis.analyzer.mock_snowball.tokenizer", "standard");
+        settings.put("index.analysis.analyzer.mock_snowball.filter", "mock_snowball");
+        createIndex("test", settings.build());
         indexRandom(true, false,
                 client().prepareIndex("test", "type1", "1").setSource("body", "foo"),
                 client().prepareIndex("test", "type1", "2").setSource("body", "bar"),
@@ -108,7 +130,7 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
         assertSearchHits(searchResponse, "4", "5");
 
         searchResponse = client().prepareSearch().setQuery(
-                simpleQueryStringQuery("eggplants").analyzer("snowball")).get();
+                simpleQueryStringQuery("eggplants").analyzer("mock_snowball")).get();
         assertHitCount(searchResponse, 1L);
         assertFirstHit(searchResponse, hasId("4"));
 
@@ -312,7 +334,7 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
                 .startObject("properties")
                 .startObject("location")
                 .field("type", "text")
-                .field("analyzer", "german")
+                .field("analyzer", "standard")
                 .endObject()
                 .endObject()
                 .endObject()
@@ -583,4 +605,33 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
         }
         assertThat(hitIds, containsInAnyOrder(ids));
     }
+
+    public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {
+
+        public final class MockSnowBall extends TokenFilter {
+            private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+            /** Sole constructor. */
+            MockSnowBall(TokenStream in) {
+                super(in);
+            }
+
+            @Override
+            public boolean incrementToken() throws IOException {
+                if (input.incrementToken()) {
+                    char[] buffer = termAtt.buffer();
+                    if (buffer[termAtt.length() - 1] == 's') {
+                        termAtt.setLength(termAtt.length() - 1);
+                    }
+                    return true;
+                } else
+                    return false;
+            }
+        }
+
+        @Override
+        public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
+            return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new));
+        }
+    }
 }

+ 1 - 1
server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java

@@ -87,7 +87,7 @@ public class SimpleValidateQueryIT extends ESIntegTestCase {
                 .setSource(XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties")
                         .startObject("foo").field("type", "text").endObject()
                         .startObject("bar").field("type", "integer").endObject()
-                        .startObject("baz").field("type", "text").field("analyzer", "snowball").endObject()
+                        .startObject("baz").field("type", "text").field("analyzer", "standard").endObject()
                         .startObject("pin").startObject("properties").startObject("location").field("type", "geo_point").endObject().endObject().endObject()
                         .endObject().endObject().endObject())
                 .execute().actionGet();