Browse Source

Move char filters into analysis-common (#24261)

Another step down the road to dropping the
lucene-analyzers-common dependency from core.

Note that this removes some tests that no longer compile from
core. I played around with adding them to the analysis-common
module where they would compile but we already test these in
the tests generated from the example usage in the documentation.

I'm not super happy with the way that `requriesAnalysisSettings`
works with regards to plugins. I think it'd be fairly bug-prone
for plugin authors to use. But I'm making it visible as is for
now and I'll rethink later.

A part of #23658
Nik Everett 8 years ago
parent
commit
7c3efb829b
19 changed files with 241 additions and 379 deletions
  1. 1 1
      core/src/main/java/org/elasticsearch/index/analysis/CharFilterFactory.java
  2. 2 19
      core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
  3. 29 4
      core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java
  4. 0 2
      core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java
  5. 0 86
      core/src/test/java/org/elasticsearch/index/analysis/CharFilterTests.java
  6. 48 4
      core/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java
  7. 8 21
      core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java
  8. 0 183
      core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java
  9. 0 28
      core/src/test/resources/org/elasticsearch/index/analysis/test1.json
  10. 0 21
      core/src/test/resources/org/elasticsearch/index/analysis/test1.yml
  11. 14 0
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
  12. 4 1
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MappingCharFilterFactory.java
  13. 3 1
      modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java
  14. 0 0
      modules/analysis-common/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java
  15. 10 1
      modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
  16. 18 0
      modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/20_analyzers.yaml
  17. 54 0
      modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml
  18. 47 1
      modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/50_char_filters.yaml
  19. 3 6
      test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java

+ 1 - 1
core/src/main/java/org/elasticsearch/index/analysis/CharFilterFactory.java

@@ -25,5 +25,5 @@ public interface CharFilterFactory {
 
     String name();
 
-    Reader create(Reader tokenStream);
+    Reader create(Reader reader);
 }

+ 2 - 19
core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

@@ -69,7 +69,6 @@ import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.GreekAnalyzerProvider;
 import org.elasticsearch.index.analysis.HindiAnalyzerProvider;
 import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
-import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
 import org.elasticsearch.index.analysis.HungarianAnalyzerProvider;
 import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
 import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory;
@@ -89,7 +88,6 @@ import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
 import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
 import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
 import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
-import org.elasticsearch.index.analysis.MappingCharFilterFactory;
 import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
 import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
 import org.elasticsearch.index.analysis.NGramTokenizerFactory;
@@ -97,7 +95,6 @@ import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
 import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
 import org.elasticsearch.index.analysis.PatternAnalyzerProvider;
 import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
-import org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory;
 import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
 import org.elasticsearch.index.analysis.PatternTokenizerFactory;
 import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
@@ -146,6 +143,8 @@ import org.elasticsearch.plugins.AnalysisPlugin;
 import java.io.IOException;
 import java.util.List;
 
+import static org.elasticsearch.plugins.AnalysisPlugin.requriesAnalysisSettings;
+
 /**
  * Sets up {@link AnalysisRegistry}.
  */
@@ -184,9 +183,6 @@ public final class AnalysisModule {
 
     private NamedRegistry<AnalysisProvider<CharFilterFactory>> setupCharFilters(List<AnalysisPlugin> plugins) {
         NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = new NamedRegistry<>("char_filter");
-        charFilters.register("html_strip", HtmlStripCharFilterFactory::new);
-        charFilters.register("pattern_replace", requriesAnalysisSettings(PatternReplaceCharFilterFactory::new));
-        charFilters.register("mapping", requriesAnalysisSettings(MappingCharFilterFactory::new));
         charFilters.extractAndRegister(plugins, AnalysisPlugin::getCharFilters);
         return charFilters;
     }
@@ -340,19 +336,6 @@ public final class AnalysisModule {
         return normalizers;
     }
 
-    private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
-        return new AnalysisModule.AnalysisProvider<T>() {
-            @Override
-            public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
-                return provider.get(indexSettings, environment, name, settings);
-            }
-
-            @Override
-            public boolean requiresAnalysisSettings() {
-                return true;
-            }
-        };
-    }
 
     /**
      * The basic factory interface for analysis components.

+ 29 - 4
core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java

@@ -23,12 +23,16 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AnalyzerProvider;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.index.analysis.TokenizerFactory;
 import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
 
+import java.io.IOException;
 import java.util.Map;
 
 import static java.util.Collections.emptyMap;
@@ -52,28 +56,32 @@ import static java.util.Collections.emptyMap;
  */
 public interface AnalysisPlugin {
     /**
-     * Override to add additional {@link CharFilter}s.
+     * Override to add additional {@link CharFilter}s. See {@link #requriesAnalysisSettings(AnalysisProvider)}
+     * how to on get the configuration from the index.
      */
     default Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
         return emptyMap();
     }
 
     /**
-     * Override to add additional {@link TokenFilter}s.
+     * Override to add additional {@link TokenFilter}s. See {@link #requriesAnalysisSettings(AnalysisProvider)}
+     * how to on get the configuration from the index.
      */
     default Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         return emptyMap();
     }
 
     /**
-     * Override to add additional {@link Tokenizer}s.
+     * Override to add additional {@link Tokenizer}s. See {@link #requriesAnalysisSettings(AnalysisProvider)}
+     * how to on get the configuration from the index.
      */
     default Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
         return emptyMap();
     }
 
     /**
-     * Override to add additional {@link Analyzer}s.
+     * Override to add additional {@link Analyzer}s. See {@link #requriesAnalysisSettings(AnalysisProvider)}
+     * how to on get the configuration from the index.
      */
     default Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
         return emptyMap();
@@ -85,4 +93,21 @@ public interface AnalysisPlugin {
     default Map<String, org.apache.lucene.analysis.hunspell.Dictionary> getHunspellDictionaries() {
         return emptyMap();
     }
+
+    /**
+     * Mark an {@link AnalysisProvider} as requiring the index's settings.
+     */
+    static <T> AnalysisProvider<T> requriesAnalysisSettings(AnalysisProvider<T> provider) {
+        return new AnalysisProvider<T>() {
+            @Override
+            public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
+                return provider.get(indexSettings, environment, name, settings);
+            }
+
+            @Override
+            public boolean requiresAnalysisSettings() {
+                return true;
+            }
+        };
+    }
 }

+ 0 - 2
core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java

@@ -40,8 +40,6 @@ import org.elasticsearch.test.IndexSettingsModule;
 import org.elasticsearch.test.VersionUtils;
 
 import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
 import java.util.Map;
 
 import static java.util.Collections.emptyMap;

+ 0 - 86
core/src/test/java/org/elasticsearch/index/analysis/CharFilterTests.java

@@ -1,86 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.elasticsearch.index.analysis;
-
-import org.elasticsearch.Version;
-import org.elasticsearch.cluster.metadata.IndexMetaData;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.test.ESTokenStreamTestCase;
-import org.elasticsearch.test.IndexSettingsModule;
-
-import static org.elasticsearch.test.ESTestCase.createTestAnalysis;
-
-public class CharFilterTests extends ESTokenStreamTestCase {
-    public void testMappingCharFilter() throws Exception {
-        Settings settings = Settings.builder()
-                .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
-                .put("index.analysis.char_filter.my_mapping.type", "mapping")
-                .putArray("index.analysis.char_filter.my_mapping.mappings", "ph=>f", "qu=>q")
-                .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
-                .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping")
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .build();
-        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
-        IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings).indexAnalyzers;
-        NamedAnalyzer analyzer1 = indexAnalyzers.get("custom_with_char_filter");
-
-        assertTokenStreamContents(analyzer1.tokenStream("test", "jeff quit phish"), new String[]{"jeff", "qit", "fish"});
-
-        // Repeat one more time to make sure that char filter is reinitialized correctly
-        assertTokenStreamContents(analyzer1.tokenStream("test", "jeff quit phish"), new String[]{"jeff", "qit", "fish"});
-    }
-
-    public void testHtmlStripCharFilter() throws Exception {
-        Settings settings = Settings.builder()
-                .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
-                .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
-                .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "html_strip")
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .build();
-        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
-
-        IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings).indexAnalyzers;
-        NamedAnalyzer analyzer1 = indexAnalyzers.get("custom_with_char_filter");
-
-        assertTokenStreamContents(analyzer1.tokenStream("test", "<b>hello</b>!"), new String[]{"hello"});
-
-        // Repeat one more time to make sure that char filter is reinitialized correctly
-        assertTokenStreamContents(analyzer1.tokenStream("test", "<b>hello</b>!"), new String[]{"hello"});
-    }
-
-    public void testPatternReplaceCharFilter() throws Exception {
-        Settings settings = Settings.builder()
-            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
-            .put("index.analysis.char_filter.my_mapping.type", "pattern_replace")
-            .put("index.analysis.char_filter.my_mapping.pattern", "ab*")
-            .put("index.analysis.char_filter.my_mapping.replacement", "oo")
-            .put("index.analysis.char_filter.my_mapping.flags", "CASE_INSENSITIVE")
-            .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
-            .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping")
-            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-            .build();
-        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
-        IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings).indexAnalyzers;
-        NamedAnalyzer analyzer1 = indexAnalyzers.get("custom_with_char_filter");
-
-        assertTokenStreamContents(analyzer1.tokenStream("test", "faBBbBB aBbbbBf"), new String[]{"foo", "oof"});
-    }
-}

+ 48 - 4
core/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java

@@ -22,13 +22,18 @@ package org.elasticsearch.index.analysis;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
+import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
+import org.elasticsearch.plugins.AnalysisPlugin;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.ESTokenStreamTestCase;
 
 import java.io.IOException;
+import java.io.Reader;
+import java.util.Map;
 
-public class CustomNormalizerTests extends ESTokenStreamTestCase {
+import static java.util.Collections.singletonMap;
 
+public class CustomNormalizerTests extends ESTokenStreamTestCase {
     public void testBasics() throws IOException {
         Settings settings = Settings.builder()
                 .putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase", "asciifolding")
@@ -66,12 +71,11 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
 
     public void testCharFilters() throws IOException {
         Settings settings = Settings.builder()
-                .put("index.analysis.char_filter.my_mapping.type", "mapping")
-                .putArray("index.analysis.char_filter.my_mapping.mappings", "a => z")
+                .put("index.analysis.char_filter.my_mapping.type", "mock_char_filter")
                 .putArray("index.analysis.normalizer.my_normalizer.char_filter", "my_mapping")
                 .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                 .build();
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new MockCharFilterPlugin());
         assertNull(analysis.indexAnalyzers.get("my_normalizer"));
         NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
         assertNotNull(normalizer);
@@ -99,4 +103,44 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
                 () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
         assertEquals("Custom normalizer [my_normalizer] may not use char filter [html_strip]", e.getMessage());
     }
+
+    private class MockCharFilterPlugin implements AnalysisPlugin {
+        @Override
+        public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
+            return singletonMap("mock_char_filter", (indexSettings, env, name, settings) -> {
+                class Factory implements CharFilterFactory, MultiTermAwareComponent {
+                    @Override
+                    public String name() {
+                        return name;
+                    }
+                    @Override
+                    public Reader create(Reader reader) {
+                        return new Reader() {
+
+                         @Override
+                         public int read(char[] cbuf, int off, int len) throws IOException {
+                             int result = reader.read(cbuf, off, len);
+                             for (int i = off; i < result; i++) {
+                                 if (cbuf[i] == 'a') {
+                                     cbuf[i] = 'z';
+                                 }
+                             }
+                             return result;
+                         }
+
+                         @Override
+                         public void close() throws IOException {
+                             reader.close();
+                         }
+                        };
+                    }
+                    @Override
+                    public Object getMultiTermComponent() {
+                        return this;
+                    }
+                }
+                return new Factory();
+            });
+        }
+    }
 }

+ 8 - 21
core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java

@@ -32,7 +32,6 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.SimpleFSDirectory;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
-import org.elasticsearch.common.inject.ModuleTestCase;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.env.Environment;
@@ -40,17 +39,17 @@ import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.Analysis;
 import org.elasticsearch.index.analysis.AnalysisRegistry;
 import org.elasticsearch.index.analysis.AnalysisTestsHelper;
+import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.CustomAnalyzer;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
-import org.elasticsearch.index.analysis.MappingCharFilterFactory;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
-import org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory;
 import org.elasticsearch.index.analysis.StandardTokenizerFactory;
 import org.elasticsearch.index.analysis.StopTokenFilterFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory;
 import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
 import org.elasticsearch.plugins.AnalysisPlugin;
+import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
 import org.elasticsearch.test.VersionUtils;
 import org.hamcrest.MatcherAssert;
@@ -72,7 +71,7 @@ import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
 
-public class AnalysisModuleTests extends ModuleTestCase {
+public class AnalysisModuleTests extends ESTestCase {
 
     public IndexAnalyzers getIndexAnalyzers(Settings settings) throws IOException {
         return getIndexAnalyzers(getNewRegistry(settings), settings);
@@ -90,6 +89,11 @@ public class AnalysisModuleTests extends ModuleTestCase {
                 public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
                     return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
                 }
+
+                @Override
+                public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
+                    return AnalysisPlugin.super.getCharFilters();
+                }
             })).getAnalysisRegistry();
         } catch (IOException e) {
             throw new RuntimeException(e);
@@ -184,29 +188,12 @@ public class AnalysisModuleTests extends ModuleTestCase {
         StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
         assertThat(stop1.stopWords().size(), equalTo(1));
 
-        analyzer = indexAnalyzers.get("custom2").analyzer();
-        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
-
         // verify position increment gap
         analyzer = indexAnalyzers.get("custom6").analyzer();
         assertThat(analyzer, instanceOf(CustomAnalyzer.class));
         CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
         assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
 
-        // verify characters  mapping
-        analyzer = indexAnalyzers.get("custom5").analyzer();
-        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
-        CustomAnalyzer custom5 = (CustomAnalyzer) analyzer;
-        assertThat(custom5.charFilters()[0], instanceOf(MappingCharFilterFactory.class));
-
-        // check custom pattern replace filter
-        analyzer = indexAnalyzers.get("custom3").analyzer();
-        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
-        CustomAnalyzer custom3 = (CustomAnalyzer) analyzer;
-        PatternReplaceCharFilterFactory patternReplaceCharFilterFactory = (PatternReplaceCharFilterFactory) custom3.charFilters()[0];
-        assertThat(patternReplaceCharFilterFactory.getPattern().pattern(), equalTo("sample(.*)"));
-        assertThat(patternReplaceCharFilterFactory.getReplacement(), equalTo("replacedSample $1"));
-
         // check custom class name (my)
         analyzer = indexAnalyzers.get("custom4").analyzer();
         assertThat(analyzer, instanceOf(CustomAnalyzer.class));

+ 0 - 183
core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java

@@ -111,36 +111,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
         assertThat(analyzeResponse.getTokens().get(0).getPositionLength(), equalTo(1));
     }
 
-    public void testAnalyzeWithCharFilters() throws Exception {
-        assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
-                .setSettings(Settings.builder().put(indexSettings())
-                        .put("index.analysis.char_filter.custom_mapping.type", "mapping")
-                        .putArray("index.analysis.char_filter.custom_mapping.mappings", "ph=>f", "qu=>q")
-                        .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
-                        .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping")));
-        ensureGreen();
-
-        AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").addCharFilter("html_strip").get();
-        assertThat(analyzeResponse.getTokens().size(), equalTo(4));
-
-        analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("html_strip").get();
-        assertThat(analyzeResponse.getTokens().size(), equalTo(1));
-        assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
-
-        analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("custom_mapping").get();
-        assertThat(analyzeResponse.getTokens().size(), equalTo(1));
-        assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish"));
-
-        analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").addCharFilter("html_strip").addCharFilter("custom_mapping").get();
-        assertThat(analyzeResponse.getTokens().size(), equalTo(3));
-        AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
-        assertThat(token.getTerm(), equalTo("jeff"));
-        token = analyzeResponse.getTokens().get(1);
-        assertThat(token.getTerm(), equalTo("qit"));
-        token = analyzeResponse.getTokens().get(2);
-        assertThat(token.getTerm(), equalTo("fish"));
-    }
-
     public void testAnalyzeWithNonDefaultPostionLength() throws Exception {
         assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
             .setSettings(Settings.builder().put(indexSettings())
@@ -263,46 +233,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
         assertThat(token.getPositionLength(), equalTo(1));
     }
 
-    public void testDetailAnalyze() throws Exception {
-        assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
-            .setSettings(
-                    Settings.builder()
-                    .put("index.analysis.char_filter.my_mapping.type", "mapping")
-                    .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
-                    .put("index.analysis.analyzer.test_analyzer.type", "custom")
-                    .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
-                    .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
-                    .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
-                    .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball")));
-        ensureGreen();
-
-        for (int i = 0; i < 10; i++) {
-            AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH")
-                .setExplain(true).addCharFilter("my_mapping").setTokenizer("keyword").addTokenFilter("lowercase").get();
-
-            assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
-            //charfilters
-            assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
-            assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
-            assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
-            assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("THIS IS A FISH"));
-            //tokenizer
-            assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
-            assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
-            assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("THIS IS A FISH"));
-            assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
-            assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
-            //tokenfilters
-            assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
-            assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
-            assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
-            assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("this is a fish"));
-            assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0));
-            assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0));
-            assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(15));
-        }
-    }
-
     public void testDetailAnalyzeWithNoIndex() throws Exception {
         //analyzer only
         AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
@@ -414,90 +344,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
         assertThat(token.getPositionLength(), equalTo(1));
     }
 
-    public void testDetailAnalyzeWithMultiValuesWithCustomAnalyzer() throws Exception {
-        assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
-            .setSettings(
-                    Settings.builder()
-                    .put("index.analysis.char_filter.my_mapping.type", "mapping")
-                    .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
-                    .put("index.analysis.analyzer.test_analyzer.type", "custom")
-                    .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
-                    .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
-                    .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
-                    .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball", "lowercase")));
-        ensureGreen();
-
-        client().admin().indices().preparePutMapping("test")
-            .setType("document").setSource("simple", "type=text,analyzer=simple,position_increment_gap=100").get();
-
-        //only analyzer =
-        String[] texts = new String[]{"this is a PHISH", "the troubled text"};
-        AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts)
-            .setExplain(true).setAnalyzer("test_analyzer").setText(texts).execute().get();
-
-        // charfilter
-        assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
-        assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
-        assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(2));
-        assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("this is a FISH"));
-        assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[1], equalTo("the troubled text"));
-
-        // tokenizer
-        assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("standard"));
-        assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(7));
-        AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().tokenizer().getTokens()[3];
-
-        assertThat(token.getTerm(), equalTo("FISH"));
-        assertThat(token.getPosition(), equalTo(3));
-        assertThat(token.getStartOffset(), equalTo(10));
-        assertThat(token.getEndOffset(), equalTo(15));
-        assertThat(token.getPositionLength(), equalTo(1));
-
-        token = analyzeResponse.detail().tokenizer().getTokens()[5];
-        assertThat(token.getTerm(), equalTo("troubled"));
-        assertThat(token.getPosition(), equalTo(105));
-        assertThat(token.getStartOffset(), equalTo(20));
-        assertThat(token.getEndOffset(), equalTo(28));
-        assertThat(token.getPositionLength(), equalTo(1));
-
-        // tokenfilter(snowball)
-        assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2));
-        assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
-        assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(7));
-        token = analyzeResponse.detail().tokenfilters()[0].getTokens()[3];
-
-        assertThat(token.getTerm(), equalTo("FISH"));
-        assertThat(token.getPosition(), equalTo(3));
-        assertThat(token.getStartOffset(), equalTo(10));
-        assertThat(token.getEndOffset(), equalTo(15));
-        assertThat(token.getPositionLength(), equalTo(1));
-
-        token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
-        assertThat(token.getTerm(), equalTo("troubl"));
-        assertThat(token.getPosition(), equalTo(105));
-        assertThat(token.getStartOffset(), equalTo(20));
-        assertThat(token.getEndOffset(), equalTo(28));
-        assertThat(token.getPositionLength(), equalTo(1));
-
-        // tokenfilter(lowercase)
-        assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("lowercase"));
-        assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(7));
-        token = analyzeResponse.detail().tokenfilters()[1].getTokens()[3];
-
-        assertThat(token.getTerm(), equalTo("fish"));
-        assertThat(token.getPosition(), equalTo(3));
-        assertThat(token.getStartOffset(), equalTo(10));
-        assertThat(token.getEndOffset(), equalTo(15));
-        assertThat(token.getPositionLength(), equalTo(1));
-
-        token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
-        assertThat(token.getTerm(), equalTo("troubl"));
-        assertThat(token.getPosition(), equalTo(105));
-        assertThat(token.getStartOffset(), equalTo(20));
-        assertThat(token.getEndOffset(), equalTo(28));
-        assertThat(token.getPositionLength(), equalTo(1));
-    }
-
     public void testNonExistTokenizer() {
         IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
             () -> client().admin().indices()
@@ -575,35 +421,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
         assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getPositionLength(), equalTo(1));
     }
 
-
-    public void testCustomCharFilterInRequest() throws Exception {
-        Map<String, Object> charFilterSettings = new HashMap<>();
-        charFilterSettings.put("type", "mapping");
-        charFilterSettings.put("mappings", new String[]{"ph => f", "qu => q"});
-        AnalyzeResponse analyzeResponse = client().admin().indices()
-            .prepareAnalyze()
-            .setText("jeff quit phish")
-            .setTokenizer("keyword")
-            .addCharFilter(charFilterSettings)
-            .setExplain(true)
-            .get();
-
-        assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
-        //charfilters
-        assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
-        assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("_anonymous_charfilter_[0]"));
-        assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
-        assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("jeff qit fish"));
-        //tokenizer
-        assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
-        assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
-        assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("jeff qit fish"));
-        assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
-        assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
-        assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPositionLength(), equalTo(1));
-    }
-
-
     public void testCustomTokenizerInRequest() throws Exception {
         Map<String, Object> tokenizerSettings = new HashMap<>();
         tokenizerSettings.put("type", "nGram");

+ 0 - 28
core/src/test/resources/org/elasticsearch/index/analysis/test1.json

@@ -6,22 +6,6 @@
                     "type":"standard"
                 }
             },
-            "char_filter":{
-                "my_html":{
-                    "type":"html_strip",
-                    "escaped_tags":["xxx", "yyy"],
-                    "read_ahead":1024
-                },
-                "my_pattern":{
-                    "type":"pattern_replace",
-                    "pattern":"sample(.*)",
-                    "replacement":"replacedSample $1"
-                },
-                "my_mapping":{
-                    "type":"mapping",
-                    "mappings":["ph=>f", "qu=>q"]
-                }
-            },
             "filter":{
                 "stop":{
                     "type":"stop",
@@ -48,22 +32,10 @@
                     "tokenizer":"standard",
                     "filter":["stop", "stop2"]
                 },
-                "custom2":{
-                    "tokenizer":"standard",
-                    "char_filter":["html_strip", "my_html"]
-                },
-                "custom3":{
-                    "tokenizer":"standard",
-                    "char_filter":["my_pattern"]
-                },
                 "custom4":{
                     "tokenizer":"standard",
                     "filter":["my"]
                 },
-                "custom5":{
-                    "tokenizer":"standard",
-                    "char_filter":["my_mapping"]
-                },
                 "custom6":{
                     "tokenizer":"standard",
                     "position_increment_gap": 256

+ 0 - 21
core/src/test/resources/org/elasticsearch/index/analysis/test1.yml

@@ -3,18 +3,6 @@ index :
     tokenizer :
       standard :
         type : standard
-    char_filter :
-      my_html :
-        type : html_strip
-        escaped_tags : [xxx, yyy]
-        read_ahead : 1024
-      my_pattern :
-        type: pattern_replace
-        pattern: sample(.*)
-        replacement: replacedSample $1
-      my_mapping :
-        type : mapping
-        mappings : [ph=>f, qu=>q]
     filter :
       stop :
         type : stop
@@ -34,18 +22,9 @@ index :
       custom1 :
         tokenizer : standard
         filter : [stop, stop2]
-      custom2 :
-        tokenizer : standard
-        char_filter : [html_strip, my_html]
-      custom3 :
-        tokenizer : standard
-        char_filter : [my_pattern]
       custom4 :
         tokenizer : standard
         filter : [my]
-      custom5 :
-        tokenizer : standard
-        char_filter : [my_mapping]
       custom6 :
         tokenizer : standard
         position_increment_gap: 256

+ 14 - 0
modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

@@ -19,6 +19,8 @@
 
 package org.elasticsearch.analysis.common;
 
+import org.elasticsearch.index.analysis.CharFilterFactory;
+import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
 import org.elasticsearch.plugins.AnalysisPlugin;
@@ -26,6 +28,9 @@ import org.elasticsearch.plugins.Plugin;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.TreeMap;
+
+import static org.elasticsearch.plugins.AnalysisPlugin.requriesAnalysisSettings;
 
 public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
     @Override
@@ -36,4 +41,13 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
         filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
         return filters;
     }
+
+    @Override
+    public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
+        Map<String, AnalysisProvider<CharFilterFactory>> filters = new TreeMap<>();
+        filters.put("html_strip", HtmlStripCharFilterFactory::new);
+        filters.put("pattern_replace", requriesAnalysisSettings(PatternReplaceCharFilterFactory::new));
+        filters.put("mapping", requriesAnalysisSettings(MappingCharFilterFactory::new));
+        return filters;
+    }
 }

+ 4 - 1
core/src/main/java/org/elasticsearch/index/analysis/MappingCharFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MappingCharFilterFactory.java

@@ -17,13 +17,16 @@
  * under the License.
  */
 
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import org.apache.lucene.analysis.charfilter.MappingCharFilter;
 import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
+import org.elasticsearch.index.analysis.Analysis;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
 
 import java.io.Reader;
 import java.util.List;

+ 3 - 1
core/src/main/java/org/elasticsearch/index/analysis/PatternReplaceCharFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java

@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
 
 import java.io.Reader;
 import java.util.regex.Pattern;
@@ -27,6 +27,8 @@ import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
 
 public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
 

+ 0 - 0
core/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java → modules/analysis-common/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java


+ 10 - 1
modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java

@@ -20,10 +20,12 @@
 package org.elasticsearch.analysis.common;
 
 import org.elasticsearch.AnalysisFactoryTestCase;
+import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
 
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.TreeMap;
 
 import static java.util.Collections.emptyList;
 import static java.util.stream.Collectors.toList;
@@ -46,7 +48,14 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
 
     @Override
     protected Map<String, Class<?>> getCharFilters() {
-        Map<String, Class<?>> filters = new HashMap<>(super.getCharFilters());
+        Map<String, Class<?>> filters = new TreeMap<>(super.getCharFilters());
+        filters.put("htmlstrip",      HtmlStripCharFilterFactory.class);
+        filters.put("mapping",        MappingCharFilterFactory.class);
+        filters.put("patternreplace", PatternReplaceCharFilterFactory.class);
+
+        // TODO: these charfilters are not yet exposed: useful?
+        // handling of zwnj for persian
+        filters.put("persian",        Void.class);
         return filters;
     }
 

+ 18 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/20_analyzers.yaml

@@ -9,3 +9,21 @@
     - length: { tokens: 2 }
     - match:  { tokens.0.token: Foo }
     - match:  { tokens.1.token: Bar! }
+
+    - do:
+        indices.analyze:
+          body:
+            text:     Foo Bar!
+            explain:  true
+            analyzer: whitespace
+    - match:  { detail.custom_analyzer: false }
+    - match:  { detail.analyzer.name: whitespace }
+    - length: { detail.analyzer.tokens: 2 }
+    - match:  { detail.analyzer.tokens.0.token: Foo }
+    - match:  { detail.analyzer.tokens.0.start_offset: 0 }
+    - match:  { detail.analyzer.tokens.0.end_offset: 3 }
+    - match:  { detail.analyzer.tokens.0.position: 0 }
+    - match:  { detail.analyzer.tokens.1.token: Bar! }
+    - match:  { detail.analyzer.tokens.1.start_offset: 4 }
+    - match:  { detail.analyzer.tokens.1.end_offset: 8 }
+    - match:  { detail.analyzer.tokens.1.position: 1 }

+ 54 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml

@@ -80,3 +80,57 @@
     - match:  { tokens.1.token: qu1ck }
     - match:  { tokens.2.token: brown }
     - match:  { tokens.3.token: fox }
+
+    - do:
+        indices.analyze:
+          body:
+            text:      the qu1ck brown fox
+            explain:   true
+            tokenizer: standard
+            filter:    [word_delimiter_graph]
+    - match:  { detail.custom_analyzer: true }
+    - match:  { detail.tokenizer.name: standard }
+    - length: { detail.tokenizer.tokens: 4 }
+    - match:  { detail.tokenizer.tokens.0.token: the }
+    - match:  { detail.tokenizer.tokens.0.start_offset: 0 }
+    - match:  { detail.tokenizer.tokens.0.end_offset: 3 }
+    - match:  { detail.tokenizer.tokens.0.position: 0 }
+    - match:  { detail.tokenizer.tokens.1.token: qu1ck }
+    - match:  { detail.tokenizer.tokens.1.start_offset: 4 }
+    - match:  { detail.tokenizer.tokens.1.end_offset: 9 }
+    - match:  { detail.tokenizer.tokens.1.position: 1 }
+    - match:  { detail.tokenizer.tokens.2.token: brown }
+    - match:  { detail.tokenizer.tokens.2.start_offset: 10 }
+    - match:  { detail.tokenizer.tokens.2.end_offset: 15 }
+    - match:  { detail.tokenizer.tokens.2.position: 2 }
+    - match:  { detail.tokenizer.tokens.3.token: fox }
+    - match:  { detail.tokenizer.tokens.3.start_offset: 16 }
+    - match:  { detail.tokenizer.tokens.3.end_offset: 19 }
+    - match:  { detail.tokenizer.tokens.3.position: 3 }
+    - length: { detail.tokenfilters: 1 }
+    - match:  { detail.tokenfilters.0.name: word_delimiter_graph }
+    - length: { detail.tokenfilters.0.tokens: 6 }
+    - match:  { detail.tokenfilters.0.tokens.0.token: the }
+    - match:  { detail.tokenfilters.0.tokens.0.start_offset: 0 }
+    - match:  { detail.tokenfilters.0.tokens.0.end_offset: 3 }
+    - match:  { detail.tokenfilters.0.tokens.0.position: 0 }
+    - match:  { detail.tokenfilters.0.tokens.1.token: qu }
+    - match:  { detail.tokenfilters.0.tokens.1.start_offset: 4 }
+    - match:  { detail.tokenfilters.0.tokens.1.end_offset: 6 }
+    - match:  { detail.tokenfilters.0.tokens.1.position: 1 }
+    - match:  { detail.tokenfilters.0.tokens.2.token: "1" }
+    - match:  { detail.tokenfilters.0.tokens.2.start_offset: 6 }
+    - match:  { detail.tokenfilters.0.tokens.2.end_offset: 7 }
+    - match:  { detail.tokenfilters.0.tokens.2.position: 2 }
+    - match:  { detail.tokenfilters.0.tokens.3.token: ck }
+    - match:  { detail.tokenfilters.0.tokens.3.start_offset: 7 }
+    - match:  { detail.tokenfilters.0.tokens.3.end_offset: 9 }
+    - match:  { detail.tokenfilters.0.tokens.3.position: 3 }
+    - match:  { detail.tokenfilters.0.tokens.4.token: brown }
+    - match:  { detail.tokenfilters.0.tokens.4.start_offset: 10 }
+    - match:  { detail.tokenfilters.0.tokens.4.end_offset: 15 }
+    - match:  { detail.tokenfilters.0.tokens.4.position: 4 }
+    - match:  { detail.tokenfilters.0.tokens.5.token: fox }
+    - match:  { detail.tokenfilters.0.tokens.5.start_offset: 16 }
+    - match:  { detail.tokenfilters.0.tokens.5.end_offset: 19 }
+    - match:  { detail.tokenfilters.0.tokens.5.position: 5 }

+ 47 - 1
modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/50_char_filters.yaml

@@ -1,5 +1,33 @@
-## Smoke tests for analyzers included in the analysis-common module
+## Smoke tests for char filters included in the analysis-common module
 
+"html_strip":
+    - do:
+        indices.analyze:
+          body:
+            text: <html>test<yyy>foo</yyy></html>
+            tokenizer: keyword
+            char_filter:
+              - type: html_strip
+                escaped_tags: ["xxx", "yyy"]
+                read_ahead: 1024
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: "\ntest<yyy>foo</yyy>\n" }
+
+---
+"pattern_replace":
+    - do:
+        indices.analyze:
+          body:
+            text: sample6 sample1
+            tokenizer: keyword
+            char_filter:
+              - type: pattern_replace
+                pattern: sample(.*)
+                replacement: replacedSample $1
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: "replacedSample 6 sample1" }
+
+---
 "mapping":
     - do:
         indices.analyze:
@@ -11,3 +39,21 @@
                 mappings: ["ph => f", "qu => q"]
     - length: { tokens: 1 }
     - match:  { tokens.0.token: "jeff qit fish" }
+
+    - do:
+        indices.analyze:
+          body:
+            text: jeff quit phish
+            explain: true
+            tokenizer: keyword
+            char_filter:
+              - type: mapping
+                mappings: ["ph => f", "qu => q"]
+    - match:  { detail.custom_analyzer: true }
+    - length: { detail.charfilters.0.filtered_text: 1 }
+    - match:  { detail.charfilters.0.filtered_text.0: "jeff qit fish" }
+    - length: { detail.tokenizer.tokens: 1 }
+    - match:  { detail.tokenizer.tokens.0.token: "jeff qit fish" }
+    - match:  { detail.tokenizer.tokens.0.start_offset: 0 }
+    - match:  { detail.tokenizer.tokens.0.end_offset: 15 }
+    - match:  { detail.tokenizer.tokens.0.position: 0 }

+ 3 - 6
test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java

@@ -45,7 +45,6 @@ import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
 import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
 import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
-import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
 import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
 import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory;
 import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
@@ -58,14 +57,12 @@ import org.elasticsearch.index.analysis.LetterTokenizerFactory;
 import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
 import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
 import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
-import org.elasticsearch.index.analysis.MappingCharFilterFactory;
 import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
 import org.elasticsearch.index.analysis.MultiTermAwareComponent;
 import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
 import org.elasticsearch.index.analysis.NGramTokenizerFactory;
 import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
 import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
-import org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory;
 import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
 import org.elasticsearch.index.analysis.PatternTokenizerFactory;
 import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
@@ -325,9 +322,9 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
 
     static final Map<String,Class<?>> KNOWN_CHARFILTERS = new MapBuilder<String,Class<?>>()
         // exposed in ES
-        .put("htmlstrip",      HtmlStripCharFilterFactory.class)
-        .put("mapping",        MappingCharFilterFactory.class)
-        .put("patternreplace", PatternReplaceCharFilterFactory.class)
+        .put("htmlstrip",      MovedToAnalysisCommon.class)
+        .put("mapping",        MovedToAnalysisCommon.class)
+        .put("patternreplace", MovedToAnalysisCommon.class)
 
         // TODO: these charfilters are not yet exposed: useful?
         // handling of zwnj for persian