|
@@ -1,25 +1,135 @@
|
|
|
[[analysis-lowercase-tokenfilter]]
|
|
|
-=== Lowercase Token Filter
|
|
|
+=== Lowercase token filter
|
|
|
+++++
|
|
|
+<titleabbrev>Lowercase</titleabbrev>
|
|
|
+++++
|
|
|
|
|
|
-A token filter of type `lowercase` that normalizes token text to lower
|
|
|
-case.
|
|
|
+Changes token text to lowercase. For example, you can use the `lowercase` filter
|
|
|
+to change `THE Lazy DoG` to `the lazy dog`.
|
|
|
|
|
|
-Lowercase token filter supports Greek, Irish, and Turkish lowercase token
|
|
|
-filters through the `language` parameter. Below is a usage example in a
|
|
|
-custom analyzer
|
|
|
+In addition to a default filter, the `lowercase` token filter provides access to
|
|
|
+Lucene's language-specific lowercase filters for Greek, Irish, and Turkish.
|
|
|
+
|
|
|
+[[analysis-lowercase-tokenfilter-analyze-ex]]
|
|
|
+==== Example
|
|
|
+
|
|
|
+The following <<indices-analyze,analyze API>> request uses the default
|
|
|
+`lowercase` filter to change the `THE Quick FoX JUMPs` to lowercase:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+GET _analyze
|
|
|
+{
|
|
|
+ "tokenizer" : "standard",
|
|
|
+ "filter" : ["lowercase"],
|
|
|
+ "text" : "THE Quick FoX JUMPs"
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+The filter produces the following tokens:
|
|
|
+
|
|
|
+[source,text]
|
|
|
+--------------------------------------------------
|
|
|
+[ the, quick, fox, jumps ]
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+/////////////////////
|
|
|
+[source,console-result]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "tokens" : [
|
|
|
+ {
|
|
|
+ "token" : "the",
|
|
|
+ "start_offset" : 0,
|
|
|
+ "end_offset" : 3,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 0
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "quick",
|
|
|
+ "start_offset" : 4,
|
|
|
+ "end_offset" : 9,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 1
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "fox",
|
|
|
+ "start_offset" : 10,
|
|
|
+ "end_offset" : 13,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 2
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "jumps",
|
|
|
+ "start_offset" : 14,
|
|
|
+ "end_offset" : 19,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 3
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+/////////////////////
|
|
|
+
|
|
|
+[[analysis-lowercase-tokenfilter-analyzer-ex]]
|
|
|
+==== Add to an analyzer
|
|
|
+
|
|
|
+The following <<indices-create-index,create index API>> request uses the
|
|
|
+`lowercase` filter to configure a new
|
|
|
+<<analysis-custom-analyzer,custom analyzer>>.
|
|
|
|
|
|
[source,console]
|
|
|
--------------------------------------------------
|
|
|
-PUT /lowercase_example
|
|
|
+PUT lowercase_example
|
|
|
+{
|
|
|
+ "settings" : {
|
|
|
+ "analysis" : {
|
|
|
+ "analyzer" : {
|
|
|
+ "whitespace_lowercase" : {
|
|
|
+ "tokenizer" : "whitespace",
|
|
|
+ "filter" : ["lowercase"]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+[[analysis-lowercase-tokenfilter-configure-parms]]
|
|
|
+==== Configurable parameters
|
|
|
+
|
|
|
+`language`::
|
|
|
++
|
|
|
+--
|
|
|
+(Optional, string)
|
|
|
+Language-specific lowercase token filter to use. Valid values include:
|
|
|
+
|
|
|
+`greek`::: Uses Lucene's https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/el/GreekLowerCaseFilter.html[GreekLowerCaseFilter]
|
|
|
+
|
|
|
+`irish`::: Uses Lucene's http://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.html[IrishLowerCaseFilter]
|
|
|
+
|
|
|
+`turkish`::: Uses Lucene's https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.html[TurkishLowerCaseFilter]
|
|
|
+
|
|
|
+If not specified, defaults to Lucene's https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.html[LowerCaseFilter].
|
|
|
+--
|
|
|
+
|
|
|
+[[analysis-lowercase-tokenfilter-customize]]
|
|
|
+==== Customize
|
|
|
+
|
|
|
+To customize the `lowercase` filter, duplicate it to create the basis
|
|
|
+for a new custom token filter. You can modify the filter using its configurable
|
|
|
+parameters.
|
|
|
+
|
|
|
+For example, the following request creates a custom `lowercase` filter for the
|
|
|
+Greek language:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+PUT custom_lowercase_example
|
|
|
{
|
|
|
"settings": {
|
|
|
"analysis": {
|
|
|
"analyzer": {
|
|
|
- "standard_lowercase_example": {
|
|
|
- "type": "custom",
|
|
|
- "tokenizer": "standard",
|
|
|
- "filter": ["lowercase"]
|
|
|
- },
|
|
|
"greek_lowercase_example": {
|
|
|
"type": "custom",
|
|
|
"tokenizer": "standard",
|