|
@@ -33,6 +33,7 @@ import org.apache.commons.codec.language.bm.PhoneticEngine;
|
|
|
import org.apache.commons.codec.language.bm.RuleType;
|
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
|
import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
|
|
|
+import org.apache.lucene.analysis.phonetic.DaitchMokotoffSoundexFilter;
|
|
|
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
|
|
|
import org.apache.lucene.analysis.phonetic.PhoneticFilter;
|
|
|
import org.elasticsearch.common.settings.Settings;
|
|
@@ -53,6 +54,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|
|
private List<String> languageset;
|
|
|
private NameType nametype;
|
|
|
private RuleType ruletype;
|
|
|
+ private boolean isDaitchMokotoff;
|
|
|
|
|
|
public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
|
|
super(indexSettings, name, settings);
|
|
@@ -60,6 +62,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|
|
this.nametype = null;
|
|
|
this.ruletype = null;
|
|
|
this.maxcodelength = 0;
|
|
|
+ this.isDaitchMokotoff = false;
|
|
|
this.replace = settings.getAsBoolean("replace", true);
|
|
|
// weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
|
|
|
String encodername = settings.get("encoder", "metaphone");
|
|
@@ -106,7 +109,8 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|
|
} else if ("nysiis".equalsIgnoreCase(encodername)) {
|
|
|
this.encoder = new Nysiis();
|
|
|
} else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
|
|
|
- this.encoder = new DaitchMokotoffSoundex();
|
|
|
+ this.encoder = null;
|
|
|
+ this.isDaitchMokotoff = true;
|
|
|
} else {
|
|
|
throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
|
|
|
}
|
|
@@ -115,6 +119,9 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|
|
@Override
|
|
|
public TokenStream create(TokenStream tokenStream) {
|
|
|
if (encoder == null) {
|
|
|
+ if (isDaitchMokotoff) {
|
|
|
+ return new DaitchMokotoffSoundexFilter(tokenStream, !replace);
|
|
|
+ }
|
|
|
if (ruletype != null && nametype != null) {
|
|
|
LanguageSet langset = null;
|
|
|
if (languageset != null && languageset.size() > 0) {
|