5 years ago · 9907b0aab8
--- a/docs/reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc
@@ -1,32 +1,134 @@
 
				 [[analysis-limit-token-count-tokenfilter]]
			
 
				-=== Limit Token Count Token Filter
			
 
				+=== Limit token count token filter
			
 
				+++++
			
 
				+<titleabbrev>Limit token count</titleabbrev>
			
 
				+++++
			
 
				 
			
 
				-Limits the number of tokens that are indexed per document and field.
			
 
				+Limits the number of output tokens. The `limit` filter is commonly used to limit
			
 
				+the size of document field values based on token count.
			
 
				 
			
 
				-[cols="<,<",options="header",]
			
 
				-|=======================================================================
			
 
				-|Setting |Description
			
 
				-|`max_token_count` |The maximum number of tokens that should be indexed
			
 
				-per document and field. The default is `1`
			
 
				+By default, the `limit` filter keeps only the first token in a stream. For
			
 
				+example, the filter can change the token stream `[ one, two, three ]` to 
			
 
				+`[ one ]`.
			
 
				 
			
 
				-|`consume_all_tokens` |If set to `true` the filter exhaust the stream
			
 
				-even if `max_token_count` tokens have been consumed already. The default
			
 
				-is `false`.
			
 
				-|=======================================================================
			
 
				+This filter uses Lucene's
			
 
				+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.html[LimitTokenCountFilter].
			
 
				 
			
 
				-Here is an example:
			
 
				+[TIP]
			
 
				+====
			
 
				+ If you want to limit the size of field values based on
			
 
				+_character length_, use the <<ignore-above,`ignore_above`>> mapping parameter.
			
 
				+====
			
 
				+
			
 
				+[[analysis-limit-token-count-tokenfilter-configure-parms]]
			
 
				+==== Configurable parameters
			
 
				+
			
 
				+`max_token_count`::
			
 
				+(Optional, integer)
			
 
				+Maximum number of tokens to keep. Once this limit is reached, any remaining
			
 
				+tokens are excluded from the output. Defaults to `1`.
			
 
				+
			
 
				+`consume_all_tokens`::
			
 
				+(Optional, boolean)
			
 
				+If `true`, the `limit` filter exhausts the token stream, even if the
			
 
				+`max_token_count` has already been reached. Defaults to `false`.
			
 
				+
			
 
				+[[analysis-limit-token-count-tokenfilter-analyze-ex]]
			
 
				+==== Example
			
 
				+
			
 
				+The following <<indices-analyze,analyze API>> request uses the `limit`
			
 
				+filter to keep only the first two tokens in `quick fox jumps over lazy dog`:
			
 
				+
			
 
				+[source,console]
			
 
				+--------------------------------------------------
			
 
				+GET _analyze
			
 
				+{
			
 
				+  "tokenizer": "standard",
			
 
				+    "filter": [
			
 
				+    {
			
 
				+      "type": "limit",
			
 
				+      "max_token_count": 2
			
 
				+    }
			
 
				+  ],
			
 
				+  "text": "quick fox jumps over lazy dog"
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+The filter produces the following tokens:
			
 
				+
			
 
				+[source,text]
			
 
				+--------------------------------------------------
			
 
				+[ quick, fox ]
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+/////////////////////
			
 
				+[source,console-result]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens": [
			
 
				+    {
			
 
				+      "token": "quick",
			
 
				+      "start_offset": 0,
			
 
				+      "end_offset": 5,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 0
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "fox",
			
 
				+      "start_offset": 6,
			
 
				+      "end_offset": 9,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 1
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+/////////////////////
			
 
				+
			
 
				+[[analysis-limit-token-count-tokenfilter-analyzer-ex]]
			
 
				+==== Add to an analyzer
			
 
				+
			
 
				+The following <<indices-create-index,create index API>> request uses the
			
 
				+`limit` filter to configure a new 
			
 
				+<<analysis-custom-analyzer,custom analyzer>>.
			
 
				 
			
 
				 [source,console]
			
 
				 --------------------------------------------------
			
 
				-PUT /limit_example
			
 
				+PUT limit_example
			
 
				 {
			
 
				   "settings": {
			
 
				     "analysis": {
			
 
				       "analyzer": {
			
 
				-        "limit_example": {
			
 
				-          "type": "custom",
			
 
				+        "standard_one_token_limit": {
			
 
				           "tokenizer": "standard",
			
 
				-          "filter": ["lowercase", "five_token_limit"]
			
 
				+          "filter": [ "limit" ]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+[[analysis-limit-token-count-tokenfilter-customize]]
			
 
				+==== Customize
			
 
				+
			
 
				+To customize the `limit` filter, duplicate it to create the basis
			
 
				+for a new custom token filter. You can modify the filter using its configurable
			
 
				+parameters.
			
 
				+
			
 
				+For example, the following request creates a custom `limit` filter that keeps
			
 
				+only the first five tokens of a stream:
			
 
				+
			
 
				+[source,console]
			
 
				+--------------------------------------------------
			
 
				+PUT custom_limit_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "whitespace_five_token_limit": {
			
 
				+          "tokenizer": "whitespace",
			
 
				+          "filter": [ "five_token_limit" ]
			
 
				         }
			
 
				       },
			
 
				       "filter": {