5 years ago · e964a97005
--- a/docs/reference/analysis/tokenfilters/length-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/length-tokenfilter.asciidoc
@@ -1,16 +1,170 @@
 
				 [[analysis-length-tokenfilter]]
			
 
				-=== Length Token Filter
			
 
				+=== Length token filter
			
 
				+++++
			
 
				+<titleabbrev>Length</titleabbrev>
			
 
				+++++
			
 
				 
			
 
				-A token filter of type `length` that removes words that are too long or
			
 
				-too short for the stream.
			
 
				+Removes tokens shorter or longer than specified character lengths.
			
 
				+For example, you can use the `length` filter to exclude tokens shorter than 2
			
 
				+characters and tokens longer than 5 characters.
			
 
				 
			
 
				-The following are settings that can be set for a `length` token filter
			
 
				-type:
			
 
				+This filter uses Lucene's
			
 
				+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/miscellaneous/LengthFilter.html[LengthFilter].
			
 
				 
			
 
				-[cols="<,<",options="header",]
			
 
				-|===========================================================
			
 
				-|Setting |Description
			
 
				-|`min` |The minimum number. Defaults to `0`.
			
 
				-|`max` |The maximum number. Defaults to `Integer.MAX_VALUE`, which is `2^31-1` or 2147483647.
			
 
				-|===========================================================
			
 
				+[TIP]
			
 
				+====
			
 
				+The `length` filter removes entire tokens. If you'd prefer to shorten tokens to
			
 
				+a specific length, use the <<analysis-truncate-tokenfilter,`truncate`>> filter.
			
 
				+====
			
 
				 
			
 
				+[[analysis-length-tokenfilter-analyze-ex]]
			
 
				+==== Example
			
 
				+
			
 
				+The following <<indices-analyze,analyze API>> request uses the `length`
			
 
				+filter to remove tokens longer than 4 characters:
			
 
				+
			
 
				+[source,console]
			
 
				+--------------------------------------------------
			
 
				+GET _analyze
			
 
				+{
			
 
				+  "tokenizer": "whitespace",
			
 
				+  "filter": [
			
 
				+    {
			
 
				+      "type": "length",
			
 
				+      "min": 0,
			
 
				+      "max": 4
			
 
				+    }
			
 
				+  ],
			
 
				+  "text": "the quick brown fox jumps over the lazy dog"
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+The filter produces the following tokens:
			
 
				+
			
 
				+[source,text]
			
 
				+--------------------------------------------------
			
 
				+[ the, fox, over, the, lazy, dog ]
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+/////////////////////
			
 
				+[source,console-result]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens": [
			
 
				+    {
			
 
				+      "token": "the",
			
 
				+      "start_offset": 0,
			
 
				+      "end_offset": 3,
			
 
				+      "type": "word",
			
 
				+      "position": 0
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "fox",
			
 
				+      "start_offset": 16,
			
 
				+      "end_offset": 19,
			
 
				+      "type": "word",
			
 
				+      "position": 3
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "over",
			
 
				+      "start_offset": 26,
			
 
				+      "end_offset": 30,
			
 
				+      "type": "word",
			
 
				+      "position": 5
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "the",
			
 
				+      "start_offset": 31,
			
 
				+      "end_offset": 34,
			
 
				+      "type": "word",
			
 
				+      "position": 6
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "lazy",
			
 
				+      "start_offset": 35,
			
 
				+      "end_offset": 39,
			
 
				+      "type": "word",
			
 
				+      "position": 7
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "dog",
			
 
				+      "start_offset": 40,
			
 
				+      "end_offset": 43,
			
 
				+      "type": "word",
			
 
				+      "position": 8
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+/////////////////////
			
 
				+
			
 
				+[[analysis-length-tokenfilter-analyzer-ex]]
			
 
				+==== Add to an analyzer
			
 
				+
			
 
				+The following <<indices-create-index,create index API>> request uses the
			
 
				+`length` filter to configure a new 
			
 
				+<<analysis-custom-analyzer,custom analyzer>>.
			
 
				+
			
 
				+[source,console]
			
 
				+--------------------------------------------------
			
 
				+PUT length_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "standard_length": {
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": [ "length" ]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+[[analysis-length-tokenfilter-configure-parms]]
			
 
				+==== Configurable parameters
			
 
				+
			
 
				+`min`::
			
 
				+(Optional, integer)
			
 
				+Minimum character length of a token. Shorter tokens are excluded from the
			
 
				+output. Defaults to `0`.
			
 
				+
			
 
				+`max`::
			
 
				+(Optional, integer)
			
 
				+Maximum character length of a token. Longer tokens are excluded from the output.
			
 
				+Defaults to `Integer.MAX_VALUE`, which is `2^31-1` or `2147483647`.
			
 
				+
			
 
				+[[analysis-length-tokenfilter-customize]]
			
 
				+==== Customize
			
 
				+
			
 
				+To customize the `length` filter, duplicate it to create the basis
			
 
				+for a new custom token filter. You can modify the filter using its configurable
			
 
				+parameters.
			
 
				+
			
 
				+For example, the following request creates a custom `length` filter that removes
			
 
				+tokens shorter than 2 characters and tokens longer than 10 characters:
			
 
				+
			
 
				+[source,console]
			
 
				+--------------------------------------------------
			
 
				+PUT length_custom_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "whitespace_length_2_to_10_char": {
			
 
				+          "tokenizer": "whitespace",
			
 
				+          "filter": [ "length_2_to_10_char" ]
			
 
				+        }
			
 
				+      },
			
 
				+      "filter": {
			
 
				+        "length_2_to_10_char": {
			
 
				+          "type": "length",
			
 
				+          "min": 2,
			
 
				+          "max": 10
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------