6 years ago · f51db164fa
--- a/docs/plugins/analysis-stempel.asciidoc
+++ b/docs/plugins/analysis-stempel.asciidoc
@@ -12,7 +12,107 @@ include::install_remove.asciidoc[]
 
				 
			
 
				 [[analysis-stempel-tokenizer]]
			
 
				 [float]
			
 
				-==== `stempel` tokenizer and token filter
			
 
				+==== `stempel` tokenizer and token filters
			
 
				 
			
 
				-The plugin provides the `polish` analyzer and `polish_stem` token filter,
			
 
				+The plugin provides the `polish` analyzer and the `polish_stem` and `polish_stop` token filters,
			
 
				 which are not configurable.
			
 
				+
			
 
				+==== Reimplementing and extending the analyzers
			
 
				+
			
 
				+The `polish` analyzer could be reimplemented as a `custom` analyzer that can
			
 
				+then be extended and configured differently as follows:
			
 
				+
			
 
				+[source,js]
			
 
				+----------------------------------------------------
			
 
				+PUT /stempel_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "rebuilt_stempel": {
			
 
				+          "tokenizer":  "standard",
			
 
				+          "filter": [
			
 
				+            "lowercase",
			
 
				+            "polish_stop",
			
 
				+            "polish_stem"
			
 
				+          ]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[s/\n$/\nstartyaml\n  - compare_analyzers: {index: stempel_example, first: polish, second: rebuilt_stempel}\nendyaml\n/]
			
 
				+
			
 
				+[[analysis-polish-stop]]
			
 
				+==== `polish_stop` token filter
			
 
				+
			
 
				+The `polish_stop` token filter filters out Polish stopwords (`_polish_`), and
			
 
				+any other custom stopwords specified by the user. This filter only supports
			
 
				+the predefined `_polish_` stopwords list.  If you want to use a different
			
 
				+predefined list, then use the
			
 
				+{ref}/analysis-stop-tokenfilter.html[`stop` token filter] instead.
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT /polish_stop_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "index": {
			
 
				+      "analysis": {
			
 
				+        "analyzer": {
			
 
				+          "analyzer_with_stop": {
			
 
				+            "tokenizer": "standard",
			
 
				+            "filter": [
			
 
				+              "lowercase",
			
 
				+              "polish_stop"
			
 
				+            ]
			
 
				+          }
			
 
				+        },
			
 
				+        "filter": {
			
 
				+          "polish_stop": {
			
 
				+            "type": "polish_stop",
			
 
				+            "stopwords": [
			
 
				+              "_polish_",
			
 
				+              "jeść"
			
 
				+            ]
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+GET polish_stop_example/_analyze
			
 
				+{
			
 
				+  "analyzer": "analyzer_with_stop",
			
 
				+  "text": "Gdzie kucharek sześć, tam nie ma co jeść."
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+The above request returns:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens" : [
			
 
				+    {
			
 
				+      "token" : "kucharek",
			
 
				+      "start_offset" : 6,
			
 
				+      "end_offset" : 14,
			
 
				+      "type" : "<ALPHANUM>",
			
 
				+      "position" : 1
			
 
				+    },
			
 
				+    {
			
 
				+      "token" : "sześć",
			
 
				+      "start_offset" : 15,
			
 
				+      "end_offset" : 20,
			
 
				+      "type" : "<ALPHANUM>",
			
 
				+      "position" : 2
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
--- a/plugins/analysis-stempel/src/main/java/org/elasticsearch/index/analysis/pl/PolishStopTokenFilterFactory.java
+++ b/plugins/analysis-stempel/src/main/java/org/elasticsearch/index/analysis/pl/PolishStopTokenFilterFactory.java
@@ -0,0 +1,73 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis.pl;
			
 
				+
			
 
				+
			
 
				+import org.apache.lucene.analysis.CharArraySet;
			
 
				+import org.apache.lucene.analysis.StopFilter;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.pl.PolishAnalyzer;
			
 
				+import org.apache.lucene.search.suggest.analyzing.SuggestStopFilter;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.Analysis;
			
 
				+
			
 
				+import java.util.Map;
			
 
				+import java.util.Set;
			
 
				+
			
 
				+import static java.util.Collections.singletonMap;
			
 
				+
			
 
				+public class PolishStopTokenFilterFactory extends AbstractTokenFilterFactory {
			
 
				+    private static final Map<String, Set<?>> NAMED_STOP_WORDS = singletonMap("_polish_", PolishAnalyzer.getDefaultStopSet());
			
 
				+
			
 
				+    private final CharArraySet stopWords;
			
 
				+
			
 
				+    private final boolean ignoreCase;
			
 
				+
			
 
				+    private final boolean removeTrailing;
			
 
				+
			
 
				+    public PolishStopTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
			
 
				+        super(indexSettings, name, settings);
			
 
				+        this.ignoreCase = settings.getAsBoolean("ignore_case", false);
			
 
				+        this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
			
 
				+        this.stopWords = Analysis.parseWords(env, settings, "stopwords",
			
 
				+                PolishAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        if (removeTrailing) {
			
 
				+            return new StopFilter(tokenStream, stopWords);
			
 
				+        } else {
			
 
				+            return new SuggestStopFilter(tokenStream, stopWords);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public Set<?> stopWords() {
			
 
				+        return stopWords;
			
 
				+    }
			
 
				+
			
 
				+    public boolean ignoreCase() {
			
 
				+        return ignoreCase;
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/plugins/analysis-stempel/src/main/java/org/elasticsearch/plugin/analysis/stempel/AnalysisStempelPlugin.java
+++ b/plugins/analysis-stempel/src/main/java/org/elasticsearch/plugin/analysis/stempel/AnalysisStempelPlugin.java
@@ -24,6 +24,7 @@ import org.elasticsearch.index.analysis.AnalyzerProvider;
 
				 import org.elasticsearch.index.analysis.TokenFilterFactory;
			
 
				 import org.elasticsearch.index.analysis.pl.PolishAnalyzerProvider;
			
 
				 import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.pl.PolishStopTokenFilterFactory;
			
 
				 import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
			
 
				 import org.elasticsearch.plugins.AnalysisPlugin;
			
 
				 import org.elasticsearch.plugins.Plugin;
			
@@ -35,7 +36,8 @@ import static java.util.Collections.singletonMap;
 
				 public class AnalysisStempelPlugin extends Plugin implements AnalysisPlugin {
			
 
				     @Override
			
 
				     public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
			
 
				-        return singletonMap("polish_stem", PolishStemTokenFilterFactory::new);
			
 
				+        return Map.of("polish_stem", PolishStemTokenFilterFactory::new,
			
 
				+                      "polish_stop", PolishStopTokenFilterFactory::new);
			
 
				     }
			
 
				 
			
 
				     @Override