|
@@ -63,6 +63,7 @@ import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
|
|
|
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
|
|
|
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
|
|
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
|
|
+import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
|
|
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
|
|
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
|
|
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
|
|
@@ -100,7 +101,9 @@ import java.util.TreeSet;
|
|
|
import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
+import static java.util.Collections.emptyMap;
|
|
|
import static java.util.Collections.singletonList;
|
|
|
+import static org.hamcrest.Matchers.empty;
|
|
|
import static org.hamcrest.Matchers.typeCompatibleWith;
|
|
|
|
|
|
/**
|
|
@@ -275,20 +278,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|
|
.put("persian", Void.class)
|
|
|
.immutableMap();
|
|
|
|
|
|
- static final Map<PreBuiltCharFilters, Class<?>> PREBUILT_CHARFILTERS;
|
|
|
- static {
|
|
|
- PREBUILT_CHARFILTERS = new EnumMap<>(PreBuiltCharFilters.class);
|
|
|
- for (PreBuiltCharFilters tokenizer : PreBuiltCharFilters.values()) {
|
|
|
- Class<?> luceneFactoryClazz;
|
|
|
- switch (tokenizer) {
|
|
|
- default:
|
|
|
- luceneFactoryClazz = org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(
|
|
|
- toCamelCase(tokenizer.getCharFilterFactory(Version.CURRENT).name()));
|
|
|
- }
|
|
|
- PREBUILT_CHARFILTERS.put(tokenizer, luceneFactoryClazz);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
/**
|
|
|
* The plugin being tested. Core uses an "empty" plugin so we don't have to throw null checks all over the place.
|
|
|
*/
|
|
@@ -352,9 +341,17 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|
|
}
|
|
|
tokenizers.put(tokenizer.name().toLowerCase(Locale.ROOT), luceneFactoryClazz);
|
|
|
}
|
|
|
+ // TODO drop aliases once they are moved to module
|
|
|
+ tokenizers.put("nGram", tokenizers.get("ngram"));
|
|
|
+ tokenizers.put("edgeNGram", tokenizers.get("edge_ngram"));
|
|
|
+ tokenizers.put("PathHierarchy", tokenizers.get("path_hierarchy"));
|
|
|
return tokenizers;
|
|
|
}
|
|
|
|
|
|
+ public Map<String, Class<?>> getPreConfiguredCharFilters() {
|
|
|
+ return emptyMap();
|
|
|
+ }
|
|
|
+
|
|
|
public void testTokenizers() {
|
|
|
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers());
|
|
|
missing.removeAll(getTokenizers().keySet());
|
|
@@ -430,10 +427,12 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|
|
Collection<Object> actual = new HashSet<>();
|
|
|
|
|
|
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters =
|
|
|
- AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin));
|
|
|
+ new HashMap<>(AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)));
|
|
|
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenFilters().entrySet()) {
|
|
|
String name = entry.getKey();
|
|
|
Class<?> luceneFactory = entry.getValue();
|
|
|
+ PreConfiguredTokenFilter filter = preConfiguredTokenFilters.remove(name);
|
|
|
+ assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
|
|
|
if (luceneFactory == Void.class) {
|
|
|
continue;
|
|
|
}
|
|
@@ -441,8 +440,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|
|
luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
|
|
|
}
|
|
|
assertThat(luceneFactory, typeCompatibleWith(TokenFilterFactory.class));
|
|
|
- PreConfiguredTokenFilter filter = preConfiguredTokenFilters.get(name);
|
|
|
- assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
|
|
|
if (filter.shouldUseFilterForMultitermQueries()) {
|
|
|
actual.add("token filter [" + name + "]");
|
|
|
}
|
|
@@ -450,10 +447,15 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|
|
expected.add("token filter [" + name + "]");
|
|
|
}
|
|
|
}
|
|
|
- Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin));
|
|
|
+ assertThat("pre configured token filter not registered with test", preConfiguredTokenFilters.keySet(), empty());
|
|
|
+
|
|
|
+ Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = new HashMap<>(
|
|
|
+ AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin)));
|
|
|
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenizers().entrySet()) {
|
|
|
String name = entry.getKey();
|
|
|
Class<?> luceneFactory = entry.getValue();
|
|
|
+ PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.remove(name);
|
|
|
+ assertNotNull("test claims pre built tokenizer [" + name + "] should be available but it wasn't", tokenizer);
|
|
|
if (luceneFactory == Void.class) {
|
|
|
continue;
|
|
|
}
|
|
@@ -461,7 +463,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|
|
luceneFactory = TokenizerFactory.lookupClass(toCamelCase(name));
|
|
|
}
|
|
|
assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class));
|
|
|
- PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.get(name);
|
|
|
if (tokenizer.hasMultiTermComponent()) {
|
|
|
actual.add(tokenizer);
|
|
|
}
|
|
@@ -469,20 +470,30 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|
|
expected.add(tokenizer);
|
|
|
}
|
|
|
}
|
|
|
- for (Map.Entry<PreBuiltCharFilters, Class<?>> entry : PREBUILT_CHARFILTERS.entrySet()) {
|
|
|
- PreBuiltCharFilters charFilter = entry.getKey();
|
|
|
+ assertThat("pre configured tokenizer not registered with test", preConfiguredTokenizers.keySet(), empty());
|
|
|
+
|
|
|
+ Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = new HashMap<>(
|
|
|
+ AnalysisModule.setupPreConfiguredCharFilters(singletonList(plugin)));
|
|
|
+ for (Map.Entry<String, Class<?>> entry : getPreConfiguredCharFilters().entrySet()) {
|
|
|
+ String name = entry.getKey();
|
|
|
Class<?> luceneFactory = entry.getValue();
|
|
|
+ PreConfiguredCharFilter filter = preConfiguredCharFilters.remove(name);
|
|
|
+ assertNotNull("test claims pre built char filter [" + name + "] should be available but it wasn't", filter);
|
|
|
if (luceneFactory == Void.class) {
|
|
|
continue;
|
|
|
}
|
|
|
- assertTrue(CharFilterFactory.class.isAssignableFrom(luceneFactory));
|
|
|
- if (charFilter.getCharFilterFactory(Version.CURRENT) instanceof MultiTermAwareComponent) {
|
|
|
- actual.add(charFilter);
|
|
|
+ if (luceneFactory == null) {
|
|
|
+ luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
|
|
|
+ }
|
|
|
+ assertThat(luceneFactory, typeCompatibleWith(CharFilterFactory.class));
|
|
|
+ if (filter.shouldUseFilterForMultitermQueries()) {
|
|
|
+ actual.add(filter);
|
|
|
}
|
|
|
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
|
|
|
- expected.add(charFilter);
|
|
|
+ expected.add("token filter [" + name + "]");
|
|
|
}
|
|
|
}
|
|
|
+ assertThat("pre configured char filter not registered with test", preConfiguredCharFilters.keySet(), empty());
|
|
|
|
|
|
Set<Object> classesMissingMultiTermSupport = new HashSet<>(expected);
|
|
|
classesMissingMultiTermSupport.removeAll(actual);
|