|
@@ -1,10 +1,83 @@
|
|
|
[[analysis-asciifolding-tokenfilter]]
|
|
|
-=== ASCII Folding Token Filter
|
|
|
+=== ASCII folding token filter
|
|
|
+++++
|
|
|
+<titleabbrev>ASCII folding</titleabbrev>
|
|
|
+++++
|
|
|
|
|
|
-A token filter of type `asciifolding` that converts alphabetic, numeric,
|
|
|
-and symbolic Unicode characters which are not in the first 127 ASCII
|
|
|
-characters (the "Basic Latin" Unicode block) into their ASCII
|
|
|
-equivalents, if one exists. Example:
|
|
|
+Converts alphabetic, numeric, and symbolic characters that are not in the Basic
|
|
|
+Latin Unicode block (first 127 ASCII characters) to their ASCII equivalent, if
|
|
|
+one exists. For example, the filter changes `à` to `a`.
|
|
|
+
|
|
|
+This filter uses Lucene's
|
|
|
+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html[ASCIIFoldingFilter].
|
|
|
+
|
|
|
+[[analysis-asciifolding-tokenfilter-analyze-ex]]
|
|
|
+==== Example
|
|
|
+
|
|
|
+The following <<indices-analyze,analyze API>> request uses the `asciifolding`
|
|
|
+filter to drop the diacritical marks in `açaí à la carte`:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+GET /_analyze
|
|
|
+{
|
|
|
+ "tokenizer" : "standard",
|
|
|
+ "filter" : ["asciifolding"],
|
|
|
+ "text" : "açaí à la carte"
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+The filter produces the following tokens:
|
|
|
+
|
|
|
+[source,text]
|
|
|
+--------------------------------------------------
|
|
|
+[ acai, a, la, carte ]
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+/////////////////////
|
|
|
+[source,console-result]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "tokens" : [
|
|
|
+ {
|
|
|
+ "token" : "acai",
|
|
|
+ "start_offset" : 0,
|
|
|
+ "end_offset" : 4,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 0
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "a",
|
|
|
+ "start_offset" : 5,
|
|
|
+ "end_offset" : 6,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 1
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "la",
|
|
|
+ "start_offset" : 7,
|
|
|
+ "end_offset" : 9,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 2
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "carte",
|
|
|
+ "start_offset" : 10,
|
|
|
+ "end_offset" : 15,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 3
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+/////////////////////
|
|
|
+
|
|
|
+[[analysis-asciifolding-tokenfilter-analyzer-ex]]
|
|
|
+==== Add to an analyzer
|
|
|
+
|
|
|
+The following <<indices-create-index,create index API>> request uses the
|
|
|
+`asciifolding` filter to configure a new
|
|
|
+<<analysis-custom-analyzer,custom analyzer>>.
|
|
|
|
|
|
[source,console]
|
|
|
--------------------------------------------------
|
|
@@ -13,7 +86,7 @@ PUT /asciifold_example
|
|
|
"settings" : {
|
|
|
"analysis" : {
|
|
|
"analyzer" : {
|
|
|
- "default" : {
|
|
|
+ "standard_asciifolding" : {
|
|
|
"tokenizer" : "standard",
|
|
|
"filter" : ["asciifolding"]
|
|
|
}
|
|
@@ -23,9 +96,23 @@ PUT /asciifold_example
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-Accepts `preserve_original` setting which defaults to false but if true
|
|
|
-will keep the original token as well as emit the folded token. For
|
|
|
-example:
|
|
|
+[[analysis-asciifolding-tokenfilter-configure-parms]]
|
|
|
+==== Configurable parameters
|
|
|
+
|
|
|
+`preserve_original`::
|
|
|
+(Optional, boolean)
|
|
|
+If `true`, emit both original tokens and folded tokens.
|
|
|
+Defaults to `false`.
|
|
|
+
|
|
|
+[[analysis-asciifolding-tokenfilter-customize]]
|
|
|
+==== Customize
|
|
|
+
|
|
|
+To customize the `asciifolding` filter, duplicate it to create the basis
|
|
|
+for a new custom token filter. You can modify the filter using its configurable
|
|
|
+parameters.
|
|
|
+
|
|
|
+For example, the following request creates a custom `asciifolding` filter with
|
|
|
+`preserve_original` set to true:
|
|
|
|
|
|
[source,console]
|
|
|
--------------------------------------------------
|
|
@@ -34,7 +121,7 @@ PUT /asciifold_example
|
|
|
"settings" : {
|
|
|
"analysis" : {
|
|
|
"analyzer" : {
|
|
|
- "default" : {
|
|
|
+ "standard_asciifolding" : {
|
|
|
"tokenizer" : "standard",
|
|
|
"filter" : ["my_ascii_folding"]
|
|
|
}
|