7 years ago · 88c270d844
--- a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc
@@ -50,7 +50,49 @@ PUT /test_index
 
				 The above configures a `search_synonyms` filter, with a path of
			
 
				 `analysis/synonym.txt` (relative to the `config` location). The
			
 
				 `search_synonyms` analyzer is then configured with the filter.
			
 
				-Additional settings are: `expand` (defaults to `true`).
			
 
				+
			
 
				+Additional settings are:
			
 
				+
			
 
				+* `expand` (defaults to `true`).
			
 
				+* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important
			
 
				+to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT /test_index
			
 
				+{
			
 
				+    "settings": {
			
 
				+        "index" : {
			
 
				+            "analysis" : {
			
 
				+                "analyzer" : {
			
 
				+                    "synonym" : {
			
 
				+                        "tokenizer" : "standard",
			
 
				+                        "filter" : ["my_stop", "synonym_graph"]
			
 
				+                    }
			
 
				+                },
			
 
				+                "filter" : {
			
 
				+                	"my_stop": {
			
 
				+                		"type" : "stop",
			
 
				+                		"stopwords": ["bar"]
			
 
				+                	},
			
 
				+                    "synonym_graph" : {
			
 
				+                        "type" : "synonym_graph",
			
 
				+                        "lenient": true,
			
 
				+                        "synonyms" : ["foo, bar => baz"]
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+With the above request the word `bar` gets skipped but a mapping `foo => baz` is still added. However, if the mapping
			
 
				+being added was "foo, baz => bar" nothing would get added to the synonym list. This is because the target word for the
			
 
				+mapping is itself eliminated because it was a stop word. Similarly, if the mapping was "bar, foo, baz" and `expand` was
			
 
				+set to `false` no mapping would get added as when `expand=false` the target mapping is the first word. However, if
			
 
				+`expand=true` then the mappings added would be equivalent to `foo, baz => foo, baz` i.e, all mappings other than the
			
 
				+stop word.
			
 
				 
			
 
				 [float]
			
 
				 ==== `tokenizer` and `ignore_case` are deprecated
			
--- a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc
@@ -33,12 +33,55 @@ PUT /test_index
 
				 
			
 
				 The above configures a `synonym` filter, with a path of
			
 
				 `analysis/synonym.txt` (relative to the `config` location). The
			
 
				-`synonym` analyzer is then configured with the filter. Additional
			
 
				-settings is: `expand` (defaults to `true`).
			
 
				+`synonym` analyzer is then configured with the filter.
			
 
				 
			
 
				 This filter tokenize synonyms with whatever tokenizer and token filters
			
 
				 appear before it in the chain.
			
 
				 
			
 
				+Additional settings are:
			
 
				+
			
 
				+* `expand` (defaults to `true`).
			
 
				+* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important
			
 
				+to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT /test_index
			
 
				+{
			
 
				+    "settings": {
			
 
				+        "index" : {
			
 
				+            "analysis" : {
			
 
				+                "analyzer" : {
			
 
				+                    "synonym" : {
			
 
				+                        "tokenizer" : "standard",
			
 
				+                        "filter" : ["my_stop", "synonym"]
			
 
				+                    }
			
 
				+                },
			
 
				+                "filter" : {
			
 
				+                	"my_stop": {
			
 
				+                		"type" : "stop",
			
 
				+                		"stopwords": ["bar"]
			
 
				+                	},
			
 
				+                    "synonym" : {
			
 
				+                        "type" : "synonym",
			
 
				+                        "lenient": true,
			
 
				+                        "synonyms" : ["foo, bar => baz"]
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+With the above request the word `bar` gets skipped but a mapping `foo => baz` is still added. However, if the mapping
			
 
				+being added was "foo, baz => bar" nothing would get added to the synonym list. This is because the target word for the
			
 
				+mapping is itself eliminated because it was a stop word. Similarly, if the mapping was "bar, foo, baz" and `expand` was
			
 
				+set to `false` no mapping would get added as when `expand=false` the target mapping is the first word. However, if
			
 
				+`expand=true` then the mappings added would be equivalent to `foo, baz => foo, baz` i.e, all mappings other than the
			
 
				+stop word.
			
 
				+
			
 
				+
			
 
				 [float]
			
 
				 ==== `tokenizer` and `ignore_case` are deprecated
			
 
				 
			
--- a/server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java
+++ b/server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java
@@ -0,0 +1,68 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.logging.log4j.Logger;
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.synonym.SolrSynonymParser;
			
 
				+import org.apache.lucene.util.CharsRef;
			
 
				+import org.apache.lucene.util.CharsRefBuilder;
			
 
				+import org.elasticsearch.common.logging.Loggers;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+public class ESSolrSynonymParser extends SolrSynonymParser {
			
 
				+
			
 
				+    private final boolean lenient;
			
 
				+    private static final Logger logger =
			
 
				+        Loggers.getLogger(ESSolrSynonymParser.class, "ESSolrSynonymParser");
			
 
				+
			
 
				+    public ESSolrSynonymParser(boolean dedup, boolean expand, boolean lenient, Analyzer analyzer) {
			
 
				+        super(dedup, expand, analyzer);
			
 
				+        this.lenient = lenient;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void add(CharsRef input, CharsRef output, boolean includeOrig) {
			
 
				+        // This condition follows up on the overridden analyze method. In case lenient was set to true and there was an
			
 
				+        // exception during super.analyze we return a zero-length CharsRef for that word which caused an exception. When
			
 
				+        // the synonym mappings for the words are added using the add method we skip the ones that were left empty by
			
 
				+        // analyze i.e., in the case when lenient is set we only add those combinations which are non-zero-length. The
			
 
				+        // else would happen only in the case when the input or output is empty and lenient is set, in which case we
			
 
				+        // quietly ignore it. For more details on the control-flow see SolrSynonymParser::addInternal.
			
 
				+        if (lenient == false || (input.length > 0 && output.length > 0)) {
			
 
				+            super.add(input, output, includeOrig);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
			
 
				+        try {
			
 
				+            return super.analyze(text, reuse);
			
 
				+        } catch (IllegalArgumentException ex) {
			
 
				+            if (lenient) {
			
 
				+                logger.info("Synonym rule for [" + text + "] was ignored");
			
 
				+                return new CharsRef("");
			
 
				+            } else {
			
 
				+                throw ex;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java
+++ b/server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java
@@ -0,0 +1,68 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.logging.log4j.Logger;
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
			
 
				+import org.apache.lucene.util.CharsRef;
			
 
				+import org.apache.lucene.util.CharsRefBuilder;
			
 
				+import org.elasticsearch.common.logging.Loggers;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+public class ESWordnetSynonymParser extends WordnetSynonymParser {
			
 
				+
			
 
				+    private final boolean lenient;
			
 
				+    private static final Logger logger =
			
 
				+        Loggers.getLogger(ESSolrSynonymParser.class, "ESWordnetSynonymParser");
			
 
				+
			
 
				+    public ESWordnetSynonymParser(boolean dedup, boolean expand, boolean lenient, Analyzer analyzer) {
			
 
				+        super(dedup, expand, analyzer);
			
 
				+        this.lenient = lenient;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void add(CharsRef input, CharsRef output, boolean includeOrig) {
			
 
				+        // This condition follows up on the overridden analyze method. In case lenient was set to true and there was an
			
 
				+        // exception during super.analyze we return a zero-length CharsRef for that word which caused an exception. When
			
 
				+        // the synonym mappings for the words are added using the add method we skip the ones that were left empty by
			
 
				+        // analyze i.e., in the case when lenient is set we only add those combinations which are non-zero-length. The
			
 
				+        // else would happen only in the case when the input or output is empty and lenient is set, in which case we
			
 
				+        // quietly ignore it. For more details on the control-flow see SolrSynonymParser::addInternal.
			
 
				+        if (lenient == false || (input.length > 0 && output.length > 0)) {
			
 
				+            super.add(input, output, includeOrig);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
			
 
				+        try {
			
 
				+            return super.analyze(text, reuse);
			
 
				+        } catch (IllegalArgumentException ex) {
			
 
				+            if (lenient) {
			
 
				+                logger.info("Synonym rule for [" + text + "] was ignored");
			
 
				+                return new CharsRef("");
			
 
				+            } else {
			
 
				+                throw ex;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java
+++ b/server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java
@@ -21,10 +21,8 @@ package org.elasticsearch.index.analysis;
 
				 
			
 
				 import org.apache.lucene.analysis.Analyzer;
			
 
				 import org.apache.lucene.analysis.TokenStream;
			
 
				-import org.apache.lucene.analysis.synonym.SolrSynonymParser;
			
 
				 import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
			
 
				 import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				-import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.env.Environment;
			
 
				 import org.elasticsearch.index.IndexSettings;
			
@@ -58,11 +56,11 @@ public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
 
				             try {
			
 
				                 SynonymMap.Builder parser;
			
 
				                 if ("wordnet".equalsIgnoreCase(format)) {
			
 
				-                    parser = new WordnetSynonymParser(true, expand, analyzerForParseSynonym);
			
 
				-                    ((WordnetSynonymParser) parser).parse(rulesReader);
			
 
				+                    parser = new ESWordnetSynonymParser(true, expand, lenient, analyzerForParseSynonym);
			
 
				+                    ((ESWordnetSynonymParser) parser).parse(rulesReader);
			
 
				                 } else {
			
 
				-                    parser = new SolrSynonymParser(true, expand, analyzerForParseSynonym);
			
 
				-                    ((SolrSynonymParser) parser).parse(rulesReader);
			
 
				+                    parser = new ESSolrSynonymParser(true, expand, lenient, analyzerForParseSynonym);
			
 
				+                    ((ESSolrSynonymParser) parser).parse(rulesReader);
			
 
				                 }
			
 
				                 synonymMap = parser.build();
			
 
				             } catch (Exception e) {
			
--- a/server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java
+++ b/server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java
@@ -21,10 +21,8 @@ package org.elasticsearch.index.analysis;
 
				 
			
 
				 import org.apache.lucene.analysis.Analyzer;
			
 
				 import org.apache.lucene.analysis.TokenStream;
			
 
				-import org.apache.lucene.analysis.synonym.SolrSynonymParser;
			
 
				 import org.apache.lucene.analysis.synonym.SynonymFilter;
			
 
				 import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				-import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.env.Environment;
			
 
				 import org.elasticsearch.index.IndexSettings;
			
@@ -38,6 +36,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
 
				 
			
 
				     protected final String format;
			
 
				     protected final boolean expand;
			
 
				+    protected final boolean lenient;
			
 
				     protected final Settings settings;
			
 
				 
			
 
				     public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
			
@@ -52,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
 
				         }
			
 
				 
			
 
				         this.expand = settings.getAsBoolean("expand", true);
			
 
				+        this.lenient = settings.getAsBoolean("lenient", false);
			
 
				         this.format = settings.get("format", "");
			
 
				     }
			
 
				 
			
@@ -93,11 +93,11 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
 
				             try {
			
 
				                 SynonymMap.Builder parser;
			
 
				                 if ("wordnet".equalsIgnoreCase(format)) {
			
 
				-                    parser = new WordnetSynonymParser(true, expand, analyzerForParseSynonym);
			
 
				-                    ((WordnetSynonymParser) parser).parse(rulesReader);
			
 
				+                    parser = new ESWordnetSynonymParser(true, expand, lenient, analyzerForParseSynonym);
			
 
				+                    ((ESWordnetSynonymParser) parser).parse(rulesReader);
			
 
				                 } else {
			
 
				-                    parser = new SolrSynonymParser(true, expand, analyzerForParseSynonym);
			
 
				-                    ((SolrSynonymParser) parser).parse(rulesReader);
			
 
				+                    parser = new ESSolrSynonymParser(true, expand, lenient, analyzerForParseSynonym);
			
 
				+                    ((ESSolrSynonymParser) parser).parse(rulesReader);
			
 
				                 }
			
 
				                 synonymMap = parser.build();
			
 
				             } catch (Exception e) {
			
--- a/server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java
+++ b/server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java
@@ -0,0 +1,78 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.CharArraySet;
			
 
				+import org.apache.lucene.analysis.StopFilter;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.Tokenizer;
			
 
				+import org.apache.lucene.analysis.standard.StandardAnalyzer;
			
 
				+import org.apache.lucene.analysis.standard.StandardTokenizer;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymFilter;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+import org.elasticsearch.test.ESTokenStreamTestCase;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.StringReader;
			
 
				+import java.text.ParseException;
			
 
				+
			
 
				+import static org.hamcrest.Matchers.containsString;
			
 
				+
			
 
				+public class ESSolrSynonymParserTests extends ESTokenStreamTestCase {
			
 
				+
			
 
				+    public void testLenientParser() throws IOException, ParseException {
			
 
				+        ESSolrSynonymParser parser = new ESSolrSynonymParser(true, false, true, new StandardAnalyzer());
			
 
				+        String rules =
			
 
				+            "&,and\n" +
			
 
				+            "come,advance,approach\n";
			
 
				+        StringReader rulesReader = new StringReader(rules);
			
 
				+        parser.parse(rulesReader);
			
 
				+        SynonymMap synonymMap = parser.build();
			
 
				+        Tokenizer tokenizer = new StandardTokenizer();
			
 
				+        tokenizer.setReader(new StringReader("approach quietly then advance & destroy"));
			
 
				+        TokenStream ts = new SynonymFilter(tokenizer, synonymMap, false);
			
 
				+        assertTokenStreamContents(ts, new String[]{"come", "quietly", "then", "come", "destroy"});
			
 
				+    }
			
 
				+
			
 
				+    public void testLenientParserWithSomeIncorrectLines() throws IOException, ParseException {
			
 
				+        CharArraySet stopSet = new CharArraySet(1, true);
			
 
				+        stopSet.add("bar");
			
 
				+        ESSolrSynonymParser parser =
			
 
				+            new ESSolrSynonymParser(true, false, true, new StandardAnalyzer(stopSet));
			
 
				+        String rules = "foo,bar,baz";
			
 
				+        StringReader rulesReader = new StringReader(rules);
			
 
				+        parser.parse(rulesReader);
			
 
				+        SynonymMap synonymMap = parser.build();
			
 
				+        Tokenizer tokenizer = new StandardTokenizer();
			
 
				+        tokenizer.setReader(new StringReader("first word is foo, then bar and lastly baz"));
			
 
				+        TokenStream ts = new SynonymFilter(new StopFilter(tokenizer, stopSet), synonymMap, false);
			
 
				+        assertTokenStreamContents(ts, new String[]{"first", "word", "is", "foo", "then", "and", "lastly", "foo"});
			
 
				+    }
			
 
				+
			
 
				+    public void testNonLenientParser() {
			
 
				+        ESSolrSynonymParser parser = new ESSolrSynonymParser(true, false, false, new StandardAnalyzer());
			
 
				+        String rules =
			
 
				+            "&,and=>and\n" +
			
 
				+            "come,advance,approach\n";
			
 
				+        StringReader rulesReader = new StringReader(rules);
			
 
				+        ParseException ex = expectThrows(ParseException.class, () -> parser.parse(rulesReader));
			
 
				+        assertThat(ex.getMessage(), containsString("Invalid synonym rule at line 1"));
			
 
				+    }
			
 
				+}
			
--- a/server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java
+++ b/server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java
@@ -0,0 +1,88 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.CharArraySet;
			
 
				+import org.apache.lucene.analysis.StopFilter;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.Tokenizer;
			
 
				+import org.apache.lucene.analysis.standard.StandardAnalyzer;
			
 
				+import org.apache.lucene.analysis.standard.StandardTokenizer;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymFilter;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+import org.elasticsearch.test.ESTokenStreamTestCase;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.StringReader;
			
 
				+import java.text.ParseException;
			
 
				+
			
 
				+import static org.hamcrest.Matchers.containsString;
			
 
				+
			
 
				+public class ESWordnetSynonymParserTests extends ESTokenStreamTestCase {
			
 
				+
			
 
				+    public void testLenientParser() throws IOException, ParseException {
			
 
				+        ESWordnetSynonymParser parser = new ESWordnetSynonymParser(true, false, true, new StandardAnalyzer());
			
 
				+        String rules =
			
 
				+            "s(100000001,1,'&',a,1,0).\n" +
			
 
				+            "s(100000001,2,'and',a,1,0).\n" +
			
 
				+            "s(100000002,1,'come',v,1,0).\n" +
			
 
				+            "s(100000002,2,'advance',v,1,0).\n" +
			
 
				+            "s(100000002,3,'approach',v,1,0).";
			
 
				+        StringReader rulesReader = new StringReader(rules);
			
 
				+        parser.parse(rulesReader);
			
 
				+        SynonymMap synonymMap = parser.build();
			
 
				+        Tokenizer tokenizer = new StandardTokenizer();
			
 
				+        tokenizer.setReader(new StringReader("approach quietly then advance & destroy"));
			
 
				+        TokenStream ts = new SynonymFilter(tokenizer, synonymMap, false);
			
 
				+        assertTokenStreamContents(ts, new String[]{"come", "quietly", "then", "come", "destroy"});
			
 
				+    }
			
 
				+
			
 
				+    public void testLenientParserWithSomeIncorrectLines() throws IOException, ParseException {
			
 
				+        CharArraySet stopSet = new CharArraySet(1, true);
			
 
				+        stopSet.add("bar");
			
 
				+        ESWordnetSynonymParser parser =
			
 
				+            new ESWordnetSynonymParser(true, false, true, new StandardAnalyzer(stopSet));
			
 
				+        String rules =
			
 
				+            "s(100000001,1,'foo',v,1,0).\n" +
			
 
				+            "s(100000001,2,'bar',v,1,0).\n" +
			
 
				+            "s(100000001,3,'baz',v,1,0).";
			
 
				+        StringReader rulesReader = new StringReader(rules);
			
 
				+        parser.parse(rulesReader);
			
 
				+        SynonymMap synonymMap = parser.build();
			
 
				+        Tokenizer tokenizer = new StandardTokenizer();
			
 
				+        tokenizer.setReader(new StringReader("first word is foo, then bar and lastly baz"));
			
 
				+        TokenStream ts = new SynonymFilter(new StopFilter(tokenizer, stopSet), synonymMap, false);
			
 
				+        assertTokenStreamContents(ts, new String[]{"first", "word", "is", "foo", "then", "and", "lastly", "foo"});
			
 
				+    }
			
 
				+
			
 
				+    public void testNonLenientParser() {
			
 
				+        ESWordnetSynonymParser parser = new ESWordnetSynonymParser(true, false, false, new StandardAnalyzer());
			
 
				+        String rules =
			
 
				+            "s(100000001,1,'&',a,1,0).\n" +
			
 
				+            "s(100000001,2,'and',a,1,0).\n" +
			
 
				+            "s(100000002,1,'come',v,1,0).\n" +
			
 
				+            "s(100000002,2,'advance',v,1,0).\n" +
			
 
				+            "s(100000002,3,'approach',v,1,0).";
			
 
				+        StringReader rulesReader = new StringReader(rules);
			
 
				+        ParseException ex = expectThrows(ParseException.class, () -> parser.parse(rulesReader));
			
 
				+        assertThat(ex.getMessage(), containsString("Invalid synonym rule at line 1"));
			
 
				+    }
			
 
				+
			
 
				+}