|
@@ -4,5 +4,102 @@
|
|
|
<titleabbrev>Uppercase</titleabbrev>
|
|
|
++++
|
|
|
|
|
|
-A token filter of type `uppercase` that normalizes token text to upper
|
|
|
-case.
|
|
|
+Changes token text to uppercase. For example, you can use the `uppercase` filter
|
|
|
+to change `the Lazy DoG` to `THE LAZY DOG`.
|
|
|
+
|
|
|
+This filter uses Lucene's
|
|
|
+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html[UpperCaseFilter].
|
|
|
+
|
|
|
+[WARNING]
|
|
|
+====
|
|
|
+Depending on the language, an uppercase character can map to multiple
|
|
|
+lowercase characters. Using the `uppercase` filter could result in the loss of
|
|
|
+lowercase character information.
|
|
|
+
|
|
|
+To avoid this loss but still have a consistent lettercase, use the <<analysis-lowercase-tokenfilter,`lowercase`>> filter instead.
|
|
|
+====
|
|
|
+
|
|
|
+[[analysis-uppercase-tokenfilter-analyze-ex]]
|
|
|
+==== Example
|
|
|
+
|
|
|
+The following <<indices-analyze,analyze API>> request uses the default
|
|
|
+`uppercase` filter to change the `the Quick FoX JUMPs` to uppercase:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+GET _analyze
|
|
|
+{
|
|
|
+ "tokenizer" : "standard",
|
|
|
+ "filter" : ["uppercase"],
|
|
|
+ "text" : "the Quick FoX JUMPs"
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+The filter produces the following tokens:
|
|
|
+
|
|
|
+[source,text]
|
|
|
+--------------------------------------------------
|
|
|
+[ THE, QUICK, FOX, JUMPS ]
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+/////////////////////
|
|
|
+[source,console-result]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "tokens" : [
|
|
|
+ {
|
|
|
+ "token" : "THE",
|
|
|
+ "start_offset" : 0,
|
|
|
+ "end_offset" : 3,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 0
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "QUICK",
|
|
|
+ "start_offset" : 4,
|
|
|
+ "end_offset" : 9,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 1
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "FOX",
|
|
|
+ "start_offset" : 10,
|
|
|
+ "end_offset" : 13,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 2
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "JUMPS",
|
|
|
+ "start_offset" : 14,
|
|
|
+ "end_offset" : 19,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 3
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+/////////////////////
|
|
|
+
|
|
|
+[[analysis-uppercase-tokenfilter-analyzer-ex]]
|
|
|
+==== Add to an analyzer
|
|
|
+
|
|
|
+The following <<indices-create-index,create index API>> request uses the
|
|
|
+`uppercase` filter to configure a new
|
|
|
+<<analysis-custom-analyzer,custom analyzer>>.
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+PUT uppercase_example
|
|
|
+{
|
|
|
+ "settings" : {
|
|
|
+ "analysis" : {
|
|
|
+ "analyzer" : {
|
|
|
+ "whitespace_uppercase" : {
|
|
|
+ "tokenizer" : "whitespace",
|
|
|
+ "filter" : ["uppercase"]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|