123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112 |
- [[analysis-stempel]]
- === Stempel Polish Analysis Plugin
- The Stempel Analysis plugin integrates Lucene's Stempel analysis
- module for Polish into elasticsearch.
- :plugin_name: analysis-stempel
- include::install_remove.asciidoc[]
- [[analysis-stempel-tokenizer]]
- [discrete]
- ==== `stempel` tokenizer and token filters
- The plugin provides the `polish` analyzer and the `polish_stem` and `polish_stop` token filters,
- which are not configurable.
- ==== Reimplementing and extending the analyzers
- The `polish` analyzer could be reimplemented as a `custom` analyzer that can
- then be extended and configured differently as follows:
- [source,console]
- ----------------------------------------------------
- PUT /stempel_example
- {
- "settings": {
- "analysis": {
- "analyzer": {
- "rebuilt_stempel": {
- "tokenizer": "standard",
- "filter": [
- "lowercase",
- "polish_stop",
- "polish_stem"
- ]
- }
- }
- }
- }
- }
- ----------------------------------------------------
- // TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: stempel_example, first: polish, second: rebuilt_stempel}\nendyaml\n/]
- [[analysis-polish-stop]]
- ==== `polish_stop` token filter
- The `polish_stop` token filter filters out Polish stopwords (`_polish_`), and
- any other custom stopwords specified by the user. This filter only supports
- the predefined `_polish_` stopwords list. If you want to use a different
- predefined list, then use the
- {ref}/analysis-stop-tokenfilter.html[`stop` token filter] instead.
- [source,console]
- --------------------------------------------------
- PUT /polish_stop_example
- {
- "settings": {
- "index": {
- "analysis": {
- "analyzer": {
- "analyzer_with_stop": {
- "tokenizer": "standard",
- "filter": [
- "lowercase",
- "polish_stop"
- ]
- }
- },
- "filter": {
- "polish_stop": {
- "type": "polish_stop",
- "stopwords": [
- "_polish_",
- "jeść"
- ]
- }
- }
- }
- }
- }
- }
- GET polish_stop_example/_analyze
- {
- "analyzer": "analyzer_with_stop",
- "text": "Gdzie kucharek sześć, tam nie ma co jeść."
- }
- --------------------------------------------------
- The above request returns:
- [source,console-result]
- --------------------------------------------------
- {
- "tokens" : [
- {
- "token" : "kucharek",
- "start_offset" : 6,
- "end_offset" : 14,
- "type" : "<ALPHANUM>",
- "position" : 1
- },
- {
- "token" : "sześć",
- "start_offset" : 15,
- "end_offset" : 20,
- "type" : "<ALPHANUM>",
- "position" : 2
- }
- ]
- }
- --------------------------------------------------
|