|
@@ -53,6 +53,7 @@ import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
|
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
|
|
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
|
|
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
|
|
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
|
|
import org.elasticsearch.index.analysis.MappingCharFilterFactory;
|
|
import org.elasticsearch.index.analysis.MappingCharFilterFactory;
|
|
|
|
+import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
|
|
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
|
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
|
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
|
|
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
|
|
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
|
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
|
@@ -93,7 +94,7 @@ import java.util.Map;
|
|
import java.util.Set;
|
|
import java.util.Set;
|
|
import java.util.TreeSet;
|
|
import java.util.TreeSet;
|
|
|
|
|
|
-/**
|
|
|
|
|
|
+/**
|
|
* Alerts us if new analyzers are added to lucene, so we don't miss them.
|
|
* Alerts us if new analyzers are added to lucene, so we don't miss them.
|
|
* <p>
|
|
* <p>
|
|
* If we don't want to expose one for a specific reason, just map it to Void.
|
|
* If we don't want to expose one for a specific reason, just map it to Void.
|
|
@@ -115,11 +116,11 @@ public class AnalysisFactoryTestCase extends ESTestCase {
|
|
.put("thai", ThaiTokenizerFactory.class)
|
|
.put("thai", ThaiTokenizerFactory.class)
|
|
.put("uax29urlemail", UAX29URLEmailTokenizerFactory.class)
|
|
.put("uax29urlemail", UAX29URLEmailTokenizerFactory.class)
|
|
.put("whitespace", WhitespaceTokenizerFactory.class)
|
|
.put("whitespace", WhitespaceTokenizerFactory.class)
|
|
-
|
|
|
|
|
|
+
|
|
// this one "seems to mess up offsets". probably shouldn't be a tokenizer...
|
|
// this one "seems to mess up offsets". probably shouldn't be a tokenizer...
|
|
.put("wikipedia", Void.class)
|
|
.put("wikipedia", Void.class)
|
|
.immutableMap();
|
|
.immutableMap();
|
|
-
|
|
|
|
|
|
+
|
|
static final Map<String,Class<?>> KNOWN_TOKENFILTERS = new MapBuilder<String,Class<?>>()
|
|
static final Map<String,Class<?>> KNOWN_TOKENFILTERS = new MapBuilder<String,Class<?>>()
|
|
// exposed in ES
|
|
// exposed in ES
|
|
.put("apostrophe", ApostropheFilterFactory.class)
|
|
.put("apostrophe", ApostropheFilterFactory.class)
|
|
@@ -184,6 +185,7 @@ public class AnalysisFactoryTestCase extends ESTestCase {
|
|
.put("scandinaviannormalization", ScandinavianNormalizationFilterFactory.class)
|
|
.put("scandinaviannormalization", ScandinavianNormalizationFilterFactory.class)
|
|
.put("serbiannormalization", SerbianNormalizationFilterFactory.class)
|
|
.put("serbiannormalization", SerbianNormalizationFilterFactory.class)
|
|
.put("shingle", ShingleTokenFilterFactory.class)
|
|
.put("shingle", ShingleTokenFilterFactory.class)
|
|
|
|
+ .put("minhash", MinHashTokenFilterFactory.class)
|
|
.put("snowballporter", SnowballTokenFilterFactory.class)
|
|
.put("snowballporter", SnowballTokenFilterFactory.class)
|
|
.put("soraninormalization", SoraniNormalizationFilterFactory.class)
|
|
.put("soraninormalization", SoraniNormalizationFilterFactory.class)
|
|
.put("soranistem", StemmerTokenFilterFactory.class)
|
|
.put("soranistem", StemmerTokenFilterFactory.class)
|
|
@@ -199,7 +201,7 @@ public class AnalysisFactoryTestCase extends ESTestCase {
|
|
.put("type", KeepTypesFilterFactory.class)
|
|
.put("type", KeepTypesFilterFactory.class)
|
|
.put("uppercase", UpperCaseTokenFilterFactory.class)
|
|
.put("uppercase", UpperCaseTokenFilterFactory.class)
|
|
.put("worddelimiter", WordDelimiterTokenFilterFactory.class)
|
|
.put("worddelimiter", WordDelimiterTokenFilterFactory.class)
|
|
-
|
|
|
|
|
|
+
|
|
// TODO: these tokenfilters are not yet exposed: useful?
|
|
// TODO: these tokenfilters are not yet exposed: useful?
|
|
|
|
|
|
// suggest stop
|
|
// suggest stop
|
|
@@ -228,16 +230,15 @@ public class AnalysisFactoryTestCase extends ESTestCase {
|
|
.put("fingerprint", Void.class)
|
|
.put("fingerprint", Void.class)
|
|
// for tee-sinks
|
|
// for tee-sinks
|
|
.put("daterecognizer", Void.class)
|
|
.put("daterecognizer", Void.class)
|
|
- .put("minhash", Void.class)
|
|
|
|
|
|
|
|
.immutableMap();
|
|
.immutableMap();
|
|
-
|
|
|
|
|
|
+
|
|
static final Map<String,Class<?>> KNOWN_CHARFILTERS = new MapBuilder<String,Class<?>>()
|
|
static final Map<String,Class<?>> KNOWN_CHARFILTERS = new MapBuilder<String,Class<?>>()
|
|
// exposed in ES
|
|
// exposed in ES
|
|
.put("htmlstrip", HtmlStripCharFilterFactory.class)
|
|
.put("htmlstrip", HtmlStripCharFilterFactory.class)
|
|
.put("mapping", MappingCharFilterFactory.class)
|
|
.put("mapping", MappingCharFilterFactory.class)
|
|
.put("patternreplace", PatternReplaceCharFilterFactory.class)
|
|
.put("patternreplace", PatternReplaceCharFilterFactory.class)
|
|
-
|
|
|
|
|
|
+
|
|
// TODO: these charfilters are not yet exposed: useful?
|
|
// TODO: these charfilters are not yet exposed: useful?
|
|
// handling of zwnj for persian
|
|
// handling of zwnj for persian
|
|
.put("persian", Void.class)
|
|
.put("persian", Void.class)
|