5 years ago · 3cf6569e0e
--- a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc
@@ -1,15 +1,96 @@
 
				 [[analysis-elision-tokenfilter]]
			
 
				-=== Elision Token Filter
			
 
				+=== Elision token filter
			
 
				+++++
			
 
				+<titleabbrev>Elision</titleabbrev>
			
 
				+++++
			
 
				 
			
 
				-A token filter which removes elisions. For example, "l'avion" (the
			
 
				-plane) will tokenized as "avion" (plane).
			
 
				+Removes specified https://en.wikipedia.org/wiki/Elision[elisions] from
			
 
				+the beginning of tokens. For example, you can use this filter to change
			
 
				+`l'avion` to `avion`.
			
 
				 
			
 
				-Requires either an `articles` parameter which is a set of stop word articles, or
			
 
				-`articles_path` which points to a text file containing the stop set. Also optionally
			
 
				-accepts `articles_case`, which indicates whether the filter treats those articles as
			
 
				-case sensitive.
			
 
				+When not customized, the filter removes the following French elisions by default:
			
 
				 
			
 
				-For example:
			
 
				+`l'`, `m'`, `t'`, `qu'`, `n'`, `s'`, `j'`, `d'`, `c'`, `jusqu'`, `quoiqu'`,
			
 
				+`lorsqu'`, `puisqu'`
			
 
				+
			
 
				+Customized versions of this filter are included in several of {es}'s built-in
			
 
				+<<analysis-lang-analyzer,language analyzers>>:
			
 
				+
			
 
				+* <<catalan-analyzer, Catalan analyzer>>
			
 
				+* <<french-analyzer, French analyzer>>
			
 
				+* <<irish-analyzer, Irish analyzer>>
			
 
				+* <<italian-analyzer, Italian analyzer>>
			
 
				+
			
 
				+This filter uses Lucene's
			
 
				+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/util/ElisionFilter.html[ElisionFilter].
			
 
				+
			
 
				+[[analysis-elision-tokenfilter-analyze-ex]]
			
 
				+==== Example
			
 
				+
			
 
				+The following <<indices-analyze,analyze API>> request uses the `elision`
			
 
				+filter to remove `j'` from `j’examine près du wharf`:
			
 
				+
			
 
				+[source,console]
			
 
				+--------------------------------------------------
			
 
				+GET _analyze
			
 
				+{
			
 
				+  "tokenizer" : "standard",
			
 
				+  "filter" : ["elision"],
			
 
				+  "text" : "j’examine près du wharf"
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+The filter produces the following tokens:
			
 
				+
			
 
				+[source,text]
			
 
				+--------------------------------------------------
			
 
				+[ examine, près, du, wharf ]
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+/////////////////////
			
 
				+[source,console-result]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens" : [
			
 
				+    {
			
 
				+      "token" : "examine",
			
 
				+      "start_offset" : 0,
			
 
				+      "end_offset" : 9,
			
 
				+      "type" : "<ALPHANUM>",
			
 
				+      "position" : 0
			
 
				+    },
			
 
				+    {
			
 
				+      "token" : "près",
			
 
				+      "start_offset" : 10,
			
 
				+      "end_offset" : 14,
			
 
				+      "type" : "<ALPHANUM>",
			
 
				+      "position" : 1
			
 
				+    },
			
 
				+    {
			
 
				+      "token" : "du",
			
 
				+      "start_offset" : 15,
			
 
				+      "end_offset" : 17,
			
 
				+      "type" : "<ALPHANUM>",
			
 
				+      "position" : 2
			
 
				+    },
			
 
				+    {
			
 
				+      "token" : "wharf",
			
 
				+      "start_offset" : 18,
			
 
				+      "end_offset" : 23,
			
 
				+      "type" : "<ALPHANUM>",
			
 
				+      "position" : 3
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+/////////////////////
			
 
				+
			
 
				+[[analysis-elision-tokenfilter-analyzer-ex]]
			
 
				+==== Add to an analyzer
			
 
				+
			
 
				+The following <<indices-create-index,create index API>> request uses the
			
 
				+`elision` filter to configure a new 
			
 
				+<<analysis-custom-analyzer,custom analyzer>>.
			
 
				 
			
 
				 [source,console]
			
 
				 --------------------------------------------------
			
@@ -18,16 +99,85 @@ PUT /elision_example
 
				     "settings" : {
			
 
				         "analysis" : {
			
 
				             "analyzer" : {
			
 
				-                "default" : {
			
 
				-                    "tokenizer" : "standard",
			
 
				+                "whitespace_elision" : {
			
 
				+                    "tokenizer" : "whitespace",
			
 
				                     "filter" : ["elision"]
			
 
				                 }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+[[analysis-elision-tokenfilter-configure-parms]]
			
 
				+==== Configurable parameters
			
 
				+
			
 
				+[[analysis-elision-tokenfilter-articles]]
			
 
				+`articles`::
			
 
				++
			
 
				+--
			
 
				+(Required+++*+++, array of string)
			
 
				+List of elisions to remove.
			
 
				+
			
 
				+To be removed, the elision must be at the beginning of a token and be
			
 
				+immediately followed by an apostrophe. Both the elision and apostrophe are
			
 
				+removed.
			
 
				+
			
 
				+For custom `elision` filters, either this parameter or `articles_path` must be
			
 
				+specified.
			
 
				+--
			
 
				+
			
 
				+`articles_path`::
			
 
				++
			
 
				+--
			
 
				+(Required+++*+++, string)
			
 
				+Path to a file that contains a list of elisions to remove.
			
 
				+
			
 
				+This path must be absolute or relative to the `config` location, and the file
			
 
				+must be UTF-8 encoded. Each elision in the file must be separated by a line
			
 
				+break.
			
 
				+
			
 
				+To be removed, the elision must be at the beginning of a token and be
			
 
				+immediately followed by an apostrophe. Both the elision and apostrophe are
			
 
				+removed.
			
 
				+
			
 
				+For custom `elision` filters, either this parameter or `articles` must be
			
 
				+specified.
			
 
				+--
			
 
				+
			
 
				+`articles_case`::
			
 
				+(Optional, boolean)
			
 
				+If `true`, the filter treats any provided elisions as case sensitive.
			
 
				+Defaults to `false`.
			
 
				+
			
 
				+[[analysis-elision-tokenfilter-customize]]
			
 
				+==== Customize
			
 
				+
			
 
				+To customize the `elision` filter, duplicate it to create the basis
			
 
				+for a new custom token filter. You can modify the filter using its configurable
			
 
				+parameters.
			
 
				+
			
 
				+For example, the following request creates a custom case-sensitive `elision`
			
 
				+filter that removes the `l'`, `m'`, `t'`, `qu'`, `n'`, `s'`,
			
 
				+and `j'` elisions:
			
 
				+
			
 
				+[source,console]
			
 
				+--------------------------------------------------
			
 
				+PUT /elision_case_sensitive_example
			
 
				+{
			
 
				+    "settings" : {
			
 
				+        "analysis" : {
			
 
				+            "analyzer" : {
			
 
				+                "default" : {
			
 
				+                    "tokenizer" : "whitespace",
			
 
				+                    "filter" : ["elision_case_sensitive"]
			
 
				+                }
			
 
				             },
			
 
				             "filter" : {
			
 
				-                "elision" : {
			
 
				+                "elision_case_sensitive" : {
			
 
				                     "type" : "elision",
			
 
				-                    "articles_case": true,
			
 
				-                    "articles" : ["l", "m", "t", "qu", "n", "s", "j"]
			
 
				+                    "articles" : ["l", "m", "t", "qu", "n", "s", "j"],
			
 
				+                    "articles_case": true
			
 
				                 }
			
 
				             }
			
 
				         }