Browse Source

Move more token filters to analysis-common module

The following token filters were moved: common grams, limit token, pattern capture and pattern raplace.

Relates to #23658
Martijn van Groningen 8 years ago
parent
commit
6db708ef75
19 changed files with 180 additions and 66 deletions
  1. 0 3
      buildSrc/src/main/resources/checkstyle_suppressions.xml
  2. 0 8
      core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
  3. 0 0
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java
  4. 4 1
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
  5. 10 5
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactory.java
  6. 7 6
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactory.java
  7. 3 2
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternCaptureGroupTokenFilterFactory.java
  8. 1 1
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java
  9. 2 1
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterFactory.java
  10. 5 0
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
  11. 30 21
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java
  12. 11 5
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java
  13. 7 4
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java
  14. 0 0
      modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/common_words.txt
  15. 0 0
      modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/commongrams.json
  16. 0 0
      modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/commongrams_query_mode.json
  17. 0 0
      modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/pattern_capture.json
  18. 95 0
      modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
  19. 5 9
      test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java

+ 0 - 3
buildSrc/src/main/resources/checkstyle_suppressions.xml

@@ -266,7 +266,6 @@
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicyConfig.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLog.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]AnalysisRegistry.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CommonGramsTokenFilterFactory.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CustomAnalyzerProvider.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]ShingleTokenFilterFactory.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]StemmerOverrideTokenFilterFactory.java" checks="LineLength" />
@@ -564,9 +563,7 @@
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]engine[/\\]InternalEngineMergeIT.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]engine[/\\]InternalEngineTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]fielddata[/\\]AbstractFieldDataTestCase.java" checks="LineLength" />

+ 0 - 8
core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

@@ -46,7 +46,6 @@ import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
 import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
 import org.elasticsearch.index.analysis.ClassicFilterFactory;
 import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
-import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
 import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
 import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
@@ -80,7 +79,6 @@ import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
 import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
 import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
 import org.elasticsearch.index.analysis.LetterTokenizerFactory;
-import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
 import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
 import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
 import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
@@ -88,8 +86,6 @@ import org.elasticsearch.index.analysis.NGramTokenizerFactory;
 import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
 import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
 import org.elasticsearch.index.analysis.PatternAnalyzerProvider;
-import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
-import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
 import org.elasticsearch.index.analysis.PatternTokenizerFactory;
 import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
 import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
@@ -196,13 +192,9 @@ public final class AnalysisModule {
         tokenFilters.register("standard", StandardTokenFilterFactory::new);
         tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
         tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
-        tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
-        tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
         tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
         tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
         tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
-        tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
-        tokenFilters.register("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
         tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new);
         tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new);
         tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new);

+ 0 - 0
core/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java


+ 4 - 1
modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

@@ -68,7 +68,6 @@ import org.apache.lucene.analysis.util.ElisionFilter;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
 import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
-import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
 import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
 import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
 import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
@@ -115,6 +114,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
         filters.put("reverse", ReverseTokenFilterFactory::new);
         filters.put("elision", ElisionTokenFilterFactory::new);
         filters.put("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
+        filters.put("limit", LimitTokenCountFilterFactory::new);
+        filters.put("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
+        filters.put("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
+        filters.put("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
         return filters;
     }
 

+ 10 - 5
core/src/main/java/org/elasticsearch/index/analysis/CommonGramsTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactory.java

@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
@@ -26,6 +26,8 @@ import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.elasticsearch.index.analysis.Analysis;
 
 public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
 
@@ -35,14 +37,17 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
 
     private final boolean queryMode;
 
-    public CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
-        this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger);
-        this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "query_mode", false, deprecationLogger);
+        this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(),
+                "ignore_case", false, deprecationLogger);
+        this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(),
+                "query_mode", false, deprecationLogger);
         this.words = Analysis.parseCommonWords(env, settings, null, ignoreCase);
 
         if (this.words == null) {
-            throw new IllegalArgumentException("missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
+            throw new IllegalArgumentException(
+                    "missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
         }
     }
 

+ 7 - 6
core/src/main/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactory.java

@@ -17,23 +17,24 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 
 public class LimitTokenCountFilterFactory extends AbstractTokenFilterFactory {
 
-    public static final int DEFAULT_MAX_TOKEN_COUNT = 1;
-    public static final boolean DEFAULT_CONSUME_ALL_TOKENS = false;
+    static final int DEFAULT_MAX_TOKEN_COUNT = 1;
+    static final boolean DEFAULT_CONSUME_ALL_TOKENS = false;
 
-    final int maxTokenCount;
-    final boolean consumeAllTokens;
+    private final int maxTokenCount;
+    private final boolean consumeAllTokens;
 
-    public LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name, settings);
         this.maxTokenCount = settings.getAsInt("max_token_count", DEFAULT_MAX_TOKEN_COUNT);
         this.consumeAllTokens = settings.getAsBooleanLenientForPreEs6Indices(

+ 3 - 2
core/src/main/java/org/elasticsearch/index/analysis/PatternCaptureGroupTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternCaptureGroupTokenFilterFactory.java

@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 
 import org.apache.lucene.analysis.TokenFilter;
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 
 import java.util.regex.Pattern;
 
@@ -34,7 +35,7 @@ public class PatternCaptureGroupTokenFilterFactory extends AbstractTokenFilterFa
     private static final String PATTERNS_KEY = "patterns";
     private static final String PRESERVE_ORIG_KEY = "preserve_original";
 
-    public PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+    PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
         super(indexSettings, name, settings);
         String[] regexes = settings.getAsArray(PATTERNS_KEY, null, false);
         if (regexes == null) {

+ 1 - 1
modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java

@@ -35,7 +35,7 @@ public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory i
     private final Pattern pattern;
     private final String replacement;
 
-    public PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, name);
 
         String sPattern = settings.get("pattern");

+ 2 - 1
core/src/main/java/org/elasticsearch/index/analysis/PatternReplaceTokenFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterFactory.java

@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
@@ -25,6 +25,7 @@ import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 
 import java.util.regex.Pattern;
 

+ 5 - 0
modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java

@@ -101,6 +101,11 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
         filters.put("reversestring", ReverseTokenFilterFactory.class);
         filters.put("elision", ElisionTokenFilterFactory.class);
         filters.put("truncate", TruncateTokenFilterFactory.class);
+        filters.put("limittokencount", LimitTokenCountFilterFactory.class);
+        filters.put("commongrams", CommonGramsTokenFilterFactory.class);
+        filters.put("commongramsquery", CommonGramsTokenFilterFactory.class);
+        filters.put("patternreplace", PatternReplaceTokenFilterFactory.class);
+        filters.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class);
         return filters;
     }
 

+ 30 - 21
core/src/test/java/org/elasticsearch/index/analysis/commongrams/CommonGramsTokenFilterFactoryTests.java → modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java

@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis.commongrams;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Tokenizer;
@@ -60,7 +60,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                      .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                      .build();
 
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             {
                 TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
                 String source = "the quick brown is a fox Or noT";
@@ -77,7 +77,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                      .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                      .putArray("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
                      .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             {
                 TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
                 String source = "the quick brown is a fox Or noT";
@@ -96,10 +96,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .putArray("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1");
             String source = "the quick brown is a fox or noT";
-            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "fox_or", "or", "or_noT", "noT" };
+            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a",
+                    "a_fox", "fox", "fox_or", "or", "or_noT", "noT" };
             Tokenizer tokenizer = new WhitespaceTokenizer();
             tokenizer.setReader(new StringReader(source));
             assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@@ -110,10 +111,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .putArray("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2");
             String source = "the quick brown is a fox or why noT";
-            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "or", "why", "why_noT", "noT" };
+            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "" +
+                    "a_fox", "fox", "or", "why", "why_noT", "noT" };
             Tokenizer tokenizer = new WhitespaceTokenizer();
             tokenizer.setReader(new StringReader(source));
             assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@@ -123,10 +125,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                     .putArray("index.analysis.filter.common_grams_3.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3");
             String source = "the quick brown is a fox Or noT";
-            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "Or", "noT" };
+            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a",
+                    "a_fox", "fox", "Or", "noT" };
             Tokenizer tokenizer = new WhitespaceTokenizer();
             tokenizer.setReader(new StringReader(source));
             assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@@ -134,25 +137,27 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
     }
 
     public void testCommonGramsAnalysis() throws IOException {
-        String json = "/org/elasticsearch/index/analysis/commongrams/commongrams.json";
+        String json = "/org/elasticsearch/analysis/common/commongrams.json";
         Settings settings = Settings.builder()
                      .loadFromStream(json, getClass().getResourceAsStream(json))
                      .put(Environment.PATH_HOME_SETTING.getKey(), createHome())
                      .build();
         {
-            IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
+            IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
                 .indexAnalyzers;
             Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
             String source = "the quick brown is a fox or not";
-            String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" };
+            String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox",
+                    "fox", "fox_or", "or", "not" };
             assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
         }
         {
-            IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
+            IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
                 .indexAnalyzers;
             Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
             String source = "the quick brown is a fox or not";
-            String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" };
+            String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox",
+                    "fox", "fox_or", "or", "not" };
             assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
         }
     }
@@ -165,7 +170,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                     .put("index.analysis.filter.common_grams_1.ignore_case", true)
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1");
             String source = "the quick brown is a fox or noT";
             String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox_or", "or_noT" };
@@ -180,7 +185,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                     .put("index.analysis.filter.common_grams_2.ignore_case", false)
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2");
             String source = "the quick brown is a fox or why noT";
             String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" };
@@ -194,7 +199,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                     .putArray("index.analysis.filter.common_grams_3.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3");
             String source = "the quick brown is a fox or why noT";
             String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" };
@@ -208,7 +213,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
                     .putArray("index.analysis.filter.common_grams_4.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_4");
             String source = "the quick brown is a fox Or noT";
             String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "Or", "noT" };
@@ -219,13 +224,13 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
     }
 
     public void testQueryModeCommonGramsAnalysis() throws IOException {
-        String json = "/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json";
+        String json = "/org/elasticsearch/analysis/common/commongrams_query_mode.json";
         Settings settings = Settings.builder()
                 .loadFromStream(json, getClass().getResourceAsStream(json))
             .put(Environment.PATH_HOME_SETTING.getKey(), createHome())
                 .build();
         {
-            IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
+            IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
                 .indexAnalyzers;
             Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
             String source = "the quick brown is a fox or not";
@@ -233,7 +238,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
             assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
         }
         {
-            IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
+            IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
                 .indexAnalyzers;
             Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
             String source = "the quick brown is a fox or not";
@@ -251,4 +256,8 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
         return home;
     }
 
+    private static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException {
+        return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
+    }
+
 }

+ 11 - 5
core/src/test/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactoryTests.java → modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java

@@ -17,12 +17,14 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.analysis.AnalysisTestsHelper;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.ESTokenStreamTestCase;
 
@@ -35,7 +37,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
                 .put("index.analysis.filter.limit_default.type", "limit")
                 .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                 .build();
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+        ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
         {
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_default");
             String source = "the quick brown fox";
@@ -62,7 +64,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
                     .put("index.analysis.filter.limit_1.consume_all_tokens", true)
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
             String source = "the quick brown fox";
             String[] expected = new String[] { "the", "quick", "brown" };
@@ -77,7 +79,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
                     .put("index.analysis.filter.limit_1.consume_all_tokens", false)
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
             String source = "the quick brown fox";
             String[] expected = new String[] { "the", "quick", "brown" };
@@ -93,7 +95,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
                     .put("index.analysis.filter.limit_1.consume_all_tokens", true)
                     .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                     .build();
-            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+            ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
             TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
             String source = "the quick brown fox";
             String[] expected = new String[] { "the", "quick", "brown", "fox" };
@@ -103,4 +105,8 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
         }
     }
 
+    private static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException {
+        return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
+    }
+
 }

+ 7 - 4
core/src/test/java/org/elasticsearch/index/analysis/PatternCaptureTokenFilterTests.java → modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java

@@ -17,13 +17,15 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.test.ESTokenStreamTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
 
@@ -32,7 +34,7 @@ import static org.hamcrest.Matchers.containsString;
 
 public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
     public void testPatternCaptureTokenFilter() throws Exception {
-        String json = "/org/elasticsearch/index/analysis/pattern_capture.json";
+        String json = "/org/elasticsearch/analysis/common/pattern_capture.json";
         Settings settings = Settings.builder()
                 .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
                 .loadFromStream(json, getClass().getResourceAsStream(json))
@@ -40,7 +42,7 @@ public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
                 .build();
 
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
-        IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings).indexAnalyzers;
+        IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
         NamedAnalyzer analyzer1 = indexAnalyzers.get("single");
 
         assertTokenStreamContents(analyzer1.tokenStream("test", "foobarbaz"), new String[]{"foobarbaz","foobar","foo"});
@@ -56,7 +58,8 @@ public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
 
     public void testNoPatterns() {
         try {
-            new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null, "pattern_capture", Settings.builder().put("pattern", "foobar").build());
+            new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null,
+                    "pattern_capture", Settings.builder().put("pattern", "foobar").build());
             fail ("Expected IllegalArgumentException");
         } catch (IllegalArgumentException e) {
             assertThat(e.getMessage(), containsString("required setting 'patterns' is missing"));

+ 0 - 0
core/src/test/resources/org/elasticsearch/index/analysis/commongrams/common_words.txt → modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/common_words.txt


+ 0 - 0
core/src/test/resources/org/elasticsearch/index/analysis/commongrams/commongrams.json → modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/commongrams.json


+ 0 - 0
core/src/test/resources/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json → modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/commongrams_query_mode.json


+ 0 - 0
core/src/test/resources/org/elasticsearch/index/analysis/pattern_capture.json → modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/pattern_capture.json


+ 95 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml

@@ -568,3 +568,98 @@
             filter:    [my_truncate]
     - length: { tokens: 1 }
     - match:  { tokens.0.token: foo }
+
+---
+"pattern_capture":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                filter:
+                  my_pattern_capture:
+                    type: pattern_capture
+                    preserve_original: false
+                    patterns: ["([^@]+)"]
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:      foo@bar.baz
+            tokenizer: keyword
+            filter:    [my_pattern_capture]
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: foo }
+    - match:  { tokens.1.token: bar.baz }
+
+---
+"pattern_replace":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                filter:
+                  my_pattern_replace:
+                    type: pattern_replace
+                    pattern: a
+                    replacement: b
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:      a
+            tokenizer: keyword
+            filter:    [my_pattern_replace]
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: b }
+
+---
+"limit_count":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                filter:
+                  my_limit:
+                    type: limit
+                    max_token_count: 2
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:      a b c
+            tokenizer: whitespace
+            filter:    [my_limit]
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: a }
+    - match:  { tokens.1.token: b }
+
+---
+"common_grams":
+    - do:
+        indices.create:
+          index: test
+          body:
+            settings:
+              analysis:
+                filter:
+                  my_limit:
+                    type: common_grams
+                    common_words: [a]
+    - do:
+        indices.analyze:
+          index: test
+          body:
+            text:      a b c
+            tokenizer: whitespace
+            filter:    [my_limit]
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: a }
+    - match:  { tokens.1.token: a_b }
+    - match:  { tokens.2.token: b }
+    - match:  { tokens.3.token: c }

+ 5 - 9
test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java

@@ -31,7 +31,6 @@ import org.elasticsearch.index.analysis.CJKBigramFilterFactory;
 import org.elasticsearch.index.analysis.CJKWidthFilterFactory;
 import org.elasticsearch.index.analysis.ClassicFilterFactory;
 import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
-import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
 import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
 import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
@@ -45,14 +44,11 @@ import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
 import org.elasticsearch.index.analysis.KeepWordFilterFactory;
 import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
 import org.elasticsearch.index.analysis.LetterTokenizerFactory;
-import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
 import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
 import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
 import org.elasticsearch.index.analysis.MultiTermAwareComponent;
 import org.elasticsearch.index.analysis.NGramTokenizerFactory;
 import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
-import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
-import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
 import org.elasticsearch.index.analysis.PatternTokenizerFactory;
 import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
 import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
@@ -143,8 +139,8 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
         .put("cjkbigram",                 CJKBigramFilterFactory.class)
         .put("cjkwidth",                  CJKWidthFilterFactory.class)
         .put("classic",                   ClassicFilterFactory.class)
-        .put("commongrams",               CommonGramsTokenFilterFactory.class)
-        .put("commongramsquery",          CommonGramsTokenFilterFactory.class)
+        .put("commongrams",               MovedToAnalysisCommon.class)
+        .put("commongramsquery",          MovedToAnalysisCommon.class)
         .put("czechstem",                 CzechStemTokenFilterFactory.class)
         .put("decimaldigit",              DecimalDigitFilterFactory.class)
         .put("delimitedpayload",          DelimitedPayloadTokenFilterFactory.class)
@@ -178,13 +174,13 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
         .put("kstem",                     MovedToAnalysisCommon.class)
         .put("latvianstem",               MovedToAnalysisCommon.class)
         .put("length",                    MovedToAnalysisCommon.class)
-        .put("limittokencount",           LimitTokenCountFilterFactory.class)
+        .put("limittokencount",           MovedToAnalysisCommon.class)
         .put("lowercase",                 MovedToAnalysisCommon.class)
         .put("ngram",                     MovedToAnalysisCommon.class)
         .put("norwegianlightstem",        MovedToAnalysisCommon.class)
         .put("norwegianminimalstem",      MovedToAnalysisCommon.class)
-        .put("patterncapturegroup",       PatternCaptureGroupTokenFilterFactory.class)
-        .put("patternreplace",            PatternReplaceTokenFilterFactory.class)
+        .put("patterncapturegroup",       MovedToAnalysisCommon.class)
+        .put("patternreplace",            MovedToAnalysisCommon.class)
         .put("persiannormalization",      PersianNormalizationFilterFactory.class)
         .put("porterstem",                MovedToAnalysisCommon.class)
         .put("portuguesestem",            MovedToAnalysisCommon.class)