|
@@ -10,17 +10,22 @@ package org.elasticsearch.indices.analysis;
|
|
|
|
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
|
import org.apache.lucene.analysis.CharFilter;
|
|
|
+import org.apache.lucene.analysis.FilteringTokenFilter;
|
|
|
import org.apache.lucene.analysis.TokenFilter;
|
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
|
import org.apache.lucene.analysis.Tokenizer;
|
|
|
+import org.apache.lucene.analysis.charfilter.MappingCharFilter;
|
|
|
+import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
|
|
|
import org.apache.lucene.analysis.hunspell.Dictionary;
|
|
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
|
+import org.apache.lucene.analysis.util.CharTokenizer;
|
|
|
import org.apache.lucene.store.Directory;
|
|
|
import org.apache.lucene.tests.analysis.MockTokenizer;
|
|
|
import org.elasticsearch.Version;
|
|
|
import org.elasticsearch.cluster.metadata.IndexMetadata;
|
|
|
import org.elasticsearch.common.io.Streams;
|
|
|
import org.elasticsearch.common.settings.Settings;
|
|
|
+import org.elasticsearch.core.SuppressForbidden;
|
|
|
import org.elasticsearch.env.Environment;
|
|
|
import org.elasticsearch.env.TestEnvironment;
|
|
|
import org.elasticsearch.index.IndexSettings;
|
|
@@ -38,7 +43,13 @@ import org.elasticsearch.index.analysis.StopTokenFilterFactory;
|
|
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
|
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
|
|
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
|
|
+import org.elasticsearch.plugin.analysis.api.AnalysisMode;
|
|
|
+import org.elasticsearch.plugin.api.NamedComponent;
|
|
|
import org.elasticsearch.plugins.AnalysisPlugin;
|
|
|
+import org.elasticsearch.plugins.scanners.NameToPluginInfo;
|
|
|
+import org.elasticsearch.plugins.scanners.NamedComponentReader;
|
|
|
+import org.elasticsearch.plugins.scanners.PluginInfo;
|
|
|
+import org.elasticsearch.plugins.scanners.StablePluginsRegistry;
|
|
|
import org.elasticsearch.test.ESTestCase;
|
|
|
import org.elasticsearch.test.IndexSettingsModule;
|
|
|
import org.elasticsearch.test.VersionUtils;
|
|
@@ -59,6 +70,7 @@ import java.util.List;
|
|
|
import java.util.Map;
|
|
|
import java.util.Set;
|
|
|
|
|
|
+import static java.util.Collections.emptyList;
|
|
|
import static java.util.Collections.singletonList;
|
|
|
import static java.util.Collections.singletonMap;
|
|
|
import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
|
|
@@ -92,7 +104,7 @@ public class AnalysisModuleTests extends ESTestCase {
|
|
|
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
|
|
|
return AnalysisPlugin.super.getCharFilters();
|
|
|
}
|
|
|
- })).getAnalysisRegistry();
|
|
|
+ }), new StablePluginsRegistry()).getAnalysisRegistry();
|
|
|
} catch (IOException e) {
|
|
|
throw new RuntimeException(e);
|
|
|
}
|
|
@@ -260,7 +272,8 @@ public class AnalysisModuleTests extends ESTestCase {
|
|
|
)
|
|
|
);
|
|
|
}
|
|
|
- })
|
|
|
+ }),
|
|
|
+ new StablePluginsRegistry()
|
|
|
).getAnalysisRegistry();
|
|
|
|
|
|
Version version = VersionUtils.randomVersion(random());
|
|
@@ -325,7 +338,8 @@ public class AnalysisModuleTests extends ESTestCase {
|
|
|
)
|
|
|
);
|
|
|
}
|
|
|
- })
|
|
|
+ }),
|
|
|
+ new StablePluginsRegistry()
|
|
|
).getAnalysisRegistry();
|
|
|
|
|
|
Version version = VersionUtils.randomVersion(random());
|
|
@@ -411,7 +425,8 @@ public class AnalysisModuleTests extends ESTestCase {
|
|
|
)
|
|
|
);
|
|
|
}
|
|
|
- })
|
|
|
+ }),
|
|
|
+ new StablePluginsRegistry()
|
|
|
).getAnalysisRegistry();
|
|
|
|
|
|
Version version = VersionUtils.randomVersion(random());
|
|
@@ -457,10 +472,191 @@ public class AnalysisModuleTests extends ESTestCase {
|
|
|
public Map<String, Dictionary> getHunspellDictionaries() {
|
|
|
return singletonMap("foo", dictionary);
|
|
|
}
|
|
|
- }));
|
|
|
+ }), new StablePluginsRegistry());
|
|
|
assertSame(dictionary, module.getHunspellService().getDictionary("foo"));
|
|
|
}
|
|
|
|
|
|
+ @NamedComponent(name = "stableCharFilterFactory")
|
|
|
+ public static class TestCharFilterFactory implements org.elasticsearch.plugin.analysis.api.CharFilterFactory {
|
|
|
+ @SuppressForbidden(reason = "need a public constructor")
|
|
|
+ public TestCharFilterFactory() {}
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public Reader create(Reader reader) {
|
|
|
+ return new ReplaceHash(reader);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public Reader normalize(Reader reader) {
|
|
|
+ return new ReplaceHash(reader);
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ static class ReplaceHash extends MappingCharFilter {
|
|
|
+
|
|
|
+ ReplaceHash(Reader in) {
|
|
|
+ super(charMap(), in);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static NormalizeCharMap charMap() {
|
|
|
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
|
|
|
+ builder.add("#", "3");
|
|
|
+ return builder.build();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @NamedComponent(name = "stableTokenFilterFactory")
|
|
|
+ public static class TestTokenFilterFactory implements org.elasticsearch.plugin.analysis.api.TokenFilterFactory {
|
|
|
+
|
|
|
+ @SuppressForbidden(reason = "need a public constructor")
|
|
|
+ public TestTokenFilterFactory() {}
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public TokenStream create(TokenStream tokenStream) {
|
|
|
+
|
|
|
+ return new Skip1TokenFilter(tokenStream);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public TokenStream normalize(TokenStream tokenStream) {
|
|
|
+ return new AppendTokenFilter(tokenStream, "1");
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public AnalysisMode getAnalysisMode() {
|
|
|
+ return org.elasticsearch.plugin.analysis.api.TokenFilterFactory.super.getAnalysisMode();
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ static class Skip1TokenFilter extends FilteringTokenFilter {
|
|
|
+
|
|
|
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
|
+
|
|
|
+ Skip1TokenFilter(TokenStream in) {
|
|
|
+ super(in);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ protected boolean accept() throws IOException {
|
|
|
+ return termAtt.buffer()[0] != '1';
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @NamedComponent(name = "stableTokenizerFactory")
|
|
|
+ public static class TestTokenizerFactory implements org.elasticsearch.plugin.analysis.api.TokenizerFactory {
|
|
|
+ @SuppressForbidden(reason = "need a public constructor")
|
|
|
+ public TestTokenizerFactory() {}
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public Tokenizer create() {
|
|
|
+ return new UnderscoreTokenizer();
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ static class UnderscoreTokenizer extends CharTokenizer {
|
|
|
+
|
|
|
+ @Override
|
|
|
+ protected boolean isTokenChar(int c) {
|
|
|
+ return c != '_';
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @NamedComponent(name = "stableAnalyzerFactory")
|
|
|
+ public static class TestAnalyzerFactory implements org.elasticsearch.plugin.analysis.api.AnalyzerFactory {
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public Analyzer create() {
|
|
|
+ return new CustomAnalyzer();
|
|
|
+ }
|
|
|
+
|
|
|
+ static class CustomAnalyzer extends Analyzer {
|
|
|
+
|
|
|
+ @Override
|
|
|
+ protected TokenStreamComponents createComponents(String fieldName) {
|
|
|
+ var tokenizer = new UnderscoreTokenizer();
|
|
|
+ var tokenFilter = new Skip1TokenFilter(tokenizer);
|
|
|
+ return new TokenStreamComponents(r -> tokenizer.setReader(new ReplaceHash(r)), tokenFilter);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testStablePlugins() throws IOException {
|
|
|
+ ClassLoader classLoader = getClass().getClassLoader();
|
|
|
+ AnalysisRegistry registry = new AnalysisModule(
|
|
|
+ TestEnvironment.newEnvironment(emptyNodeSettings),
|
|
|
+ emptyList(),
|
|
|
+ new StablePluginsRegistry(
|
|
|
+ new NamedComponentReader(),
|
|
|
+ Map.of(
|
|
|
+ org.elasticsearch.plugin.analysis.api.CharFilterFactory.class.getCanonicalName(),
|
|
|
+ new NameToPluginInfo(
|
|
|
+ Map.of(
|
|
|
+ "stableCharFilterFactory",
|
|
|
+ new PluginInfo("stableCharFilterFactory", TestCharFilterFactory.class.getName(), classLoader)
|
|
|
+ )
|
|
|
+ ),
|
|
|
+ org.elasticsearch.plugin.analysis.api.TokenFilterFactory.class.getCanonicalName(),
|
|
|
+ new NameToPluginInfo(
|
|
|
+ Map.of(
|
|
|
+ "stableTokenFilterFactory",
|
|
|
+ new PluginInfo("stableTokenFilterFactory", TestTokenFilterFactory.class.getName(), classLoader)
|
|
|
+ )
|
|
|
+ ),
|
|
|
+ org.elasticsearch.plugin.analysis.api.TokenizerFactory.class.getCanonicalName(),
|
|
|
+ new NameToPluginInfo(
|
|
|
+ Map.of(
|
|
|
+ "stableTokenizerFactory",
|
|
|
+ new PluginInfo("stableTokenizerFactory", TestTokenizerFactory.class.getName(), classLoader)
|
|
|
+ )
|
|
|
+ ),
|
|
|
+ org.elasticsearch.plugin.analysis.api.AnalyzerFactory.class.getCanonicalName(),
|
|
|
+ new NameToPluginInfo(
|
|
|
+ Map.of(
|
|
|
+ "stableAnalyzerFactory",
|
|
|
+ new PluginInfo("stableAnalyzerFactory", TestAnalyzerFactory.class.getName(), classLoader)
|
|
|
+ )
|
|
|
+ )
|
|
|
+ )
|
|
|
+ )
|
|
|
+ ).getAnalysisRegistry();
|
|
|
+
|
|
|
+ Version version = VersionUtils.randomVersion(random());
|
|
|
+ IndexAnalyzers analyzers = getIndexAnalyzers(
|
|
|
+ registry,
|
|
|
+ Settings.builder()
|
|
|
+ .put("index.analysis.analyzer.char_filter_test.tokenizer", "standard")
|
|
|
+ .put("index.analysis.analyzer.char_filter_test.char_filter", "stableCharFilterFactory")
|
|
|
+
|
|
|
+ .put("index.analysis.analyzer.token_filter_test.tokenizer", "standard")
|
|
|
+ .put("index.analysis.analyzer.token_filter_test.filter", "stableTokenFilterFactory")
|
|
|
+
|
|
|
+ .put("index.analysis.analyzer.tokenizer_test.tokenizer", "stableTokenizerFactory")
|
|
|
+
|
|
|
+ .put("index.analysis.analyzer.analyzer_provider_test.type", "stableAnalyzerFactory")
|
|
|
+
|
|
|
+ .put(IndexMetadata.SETTING_VERSION_CREATED, version)
|
|
|
+ .build()
|
|
|
+ );
|
|
|
+ assertTokenStreamContents(analyzers.get("char_filter_test").tokenStream("", "t#st"), new String[] { "t3st" });
|
|
|
+ assertTokenStreamContents(
|
|
|
+ analyzers.get("token_filter_test").tokenStream("", "1test 2test 1test 3test "),
|
|
|
+ new String[] { "2test", "3test" }
|
|
|
+ );
|
|
|
+ assertTokenStreamContents(analyzers.get("tokenizer_test").tokenStream("", "x_y_z"), new String[] { "x", "y", "z" });
|
|
|
+ assertTokenStreamContents(analyzers.get("analyzer_provider_test").tokenStream("", "1x_y_#z"), new String[] { "y", "3z" });
|
|
|
+
|
|
|
+ assertThat(analyzers.get("char_filter_test").normalize("", "t#st").utf8ToString(), equalTo("t3st"));
|
|
|
+ assertThat(
|
|
|
+ analyzers.get("token_filter_test").normalize("", "1test 2test 1test 3test ").utf8ToString(),
|
|
|
+ equalTo("1test 2test 1test 3test 1")
|
|
|
+ );
|
|
|
+
|
|
|
+ // TODO does it makes sense to test normalize on tokenizer and analyzer?
|
|
|
+ }
|
|
|
+
|
|
|
// Simple char filter that appends text to the term
|
|
|
public static class AppendCharFilter extends CharFilter {
|
|
|
|