Browse Source

Add support for `daitch_mokotoff`

[Daitch Mokotoff](https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex) support has been added in Lucene 5.
So we can now support it as well.
David Pilato 10 years ago
parent
commit
52bf365013

+ 1 - 1
docs/plugins/analysis-phonetic.asciidoc

@@ -42,7 +42,7 @@ The `phonetic` token filter takes the following settings:
     Which phonetic encoder to use.  Accepts `metaphone` (default),
     `doublemetaphone`, `soundex`, `refinedsoundex`, `caverphone1`,
     `caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`,
-    `beidermorse`.
+    `beidermorse`, `daitch_mokotoff`.
 
 `replace`::
 

+ 2 - 1
plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java

@@ -30,7 +30,6 @@ import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
 import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
 import org.apache.lucene.analysis.phonetic.PhoneticFilter;
 import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.common.inject.assistedinject.Assisted;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
@@ -105,6 +104,8 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
             this.encoder = new HaasePhonetik();
         } else if ("nysiis".equalsIgnoreCase(encodername)) {
             this.encoder = new Nysiis();
+        } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
+            this.encoder = new DaitchMokotoffSoundex();
         } else {
             throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
         }

+ 3 - 0
plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml

@@ -28,3 +28,6 @@ index:
       nysiisfilter:
         type: phonetic
         encoder: nysiis
+      daitch_mokotoff:
+        type: phonetic
+        encoder: daitch_mokotoff

+ 32 - 0
plugins/analysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/50_daitch_mokotoff.yaml

@@ -0,0 +1,32 @@
+# Integration tests for Phonetic analysis components
+#
+
+
+"Daitch Mokotoff":
+    - do:
+        indices.create:
+            index:  phonetic_sample
+            body:
+                settings:
+                    index:
+                        analysis:
+                            analyzer:
+                                my_analyzer:
+                                    tokenizer:  standard
+                                    filter: ["standard", "lowercase", "daitch_mokotoff"]
+                            filter:
+                                daitch_mokotoff:
+                                    type: phonetic
+                                    encoder: daitch_mokotoff
+    - do:
+        cluster.health:
+            wait_for_status: yellow
+    - do:
+        indices.analyze:
+            index: phonetic_sample
+            analyzer: my_analyzer
+            text: Moskowitz
+
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: "645740" }
+