|
@@ -1,32 +1,134 @@
|
|
|
[[analysis-limit-token-count-tokenfilter]]
|
|
|
-=== Limit Token Count Token Filter
|
|
|
+=== Limit token count token filter
|
|
|
+++++
|
|
|
+<titleabbrev>Limit token count</titleabbrev>
|
|
|
+++++
|
|
|
|
|
|
-Limits the number of tokens that are indexed per document and field.
|
|
|
+Limits the number of output tokens. The `limit` filter is commonly used to limit
|
|
|
+the size of document field values based on token count.
|
|
|
|
|
|
-[cols="<,<",options="header",]
|
|
|
-|=======================================================================
|
|
|
-|Setting |Description
|
|
|
-|`max_token_count` |The maximum number of tokens that should be indexed
|
|
|
-per document and field. The default is `1`
|
|
|
+By default, the `limit` filter keeps only the first token in a stream. For
|
|
|
+example, the filter can change the token stream `[ one, two, three ]` to
|
|
|
+`[ one ]`.
|
|
|
|
|
|
-|`consume_all_tokens` |If set to `true` the filter exhaust the stream
|
|
|
-even if `max_token_count` tokens have been consumed already. The default
|
|
|
-is `false`.
|
|
|
-|=======================================================================
|
|
|
+This filter uses Lucene's
|
|
|
+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.html[LimitTokenCountFilter].
|
|
|
|
|
|
-Here is an example:
|
|
|
+[TIP]
|
|
|
+====
|
|
|
+ If you want to limit the size of field values based on
|
|
|
+_character length_, use the <<ignore-above,`ignore_above`>> mapping parameter.
|
|
|
+====
|
|
|
+
|
|
|
+[[analysis-limit-token-count-tokenfilter-configure-parms]]
|
|
|
+==== Configurable parameters
|
|
|
+
|
|
|
+`max_token_count`::
|
|
|
+(Optional, integer)
|
|
|
+Maximum number of tokens to keep. Once this limit is reached, any remaining
|
|
|
+tokens are excluded from the output. Defaults to `1`.
|
|
|
+
|
|
|
+`consume_all_tokens`::
|
|
|
+(Optional, boolean)
|
|
|
+If `true`, the `limit` filter exhausts the token stream, even if the
|
|
|
+`max_token_count` has already been reached. Defaults to `false`.
|
|
|
+
|
|
|
+[[analysis-limit-token-count-tokenfilter-analyze-ex]]
|
|
|
+==== Example
|
|
|
+
|
|
|
+The following <<indices-analyze,analyze API>> request uses the `limit`
|
|
|
+filter to keep only the first two tokens in `quick fox jumps over lazy dog`:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+GET _analyze
|
|
|
+{
|
|
|
+ "tokenizer": "standard",
|
|
|
+ "filter": [
|
|
|
+ {
|
|
|
+ "type": "limit",
|
|
|
+ "max_token_count": 2
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "text": "quick fox jumps over lazy dog"
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+The filter produces the following tokens:
|
|
|
+
|
|
|
+[source,text]
|
|
|
+--------------------------------------------------
|
|
|
+[ quick, fox ]
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+/////////////////////
|
|
|
+[source,console-result]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "tokens": [
|
|
|
+ {
|
|
|
+ "token": "quick",
|
|
|
+ "start_offset": 0,
|
|
|
+ "end_offset": 5,
|
|
|
+ "type": "<ALPHANUM>",
|
|
|
+ "position": 0
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token": "fox",
|
|
|
+ "start_offset": 6,
|
|
|
+ "end_offset": 9,
|
|
|
+ "type": "<ALPHANUM>",
|
|
|
+ "position": 1
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+/////////////////////
|
|
|
+
|
|
|
+[[analysis-limit-token-count-tokenfilter-analyzer-ex]]
|
|
|
+==== Add to an analyzer
|
|
|
+
|
|
|
+The following <<indices-create-index,create index API>> request uses the
|
|
|
+`limit` filter to configure a new
|
|
|
+<<analysis-custom-analyzer,custom analyzer>>.
|
|
|
|
|
|
[source,console]
|
|
|
--------------------------------------------------
|
|
|
-PUT /limit_example
|
|
|
+PUT limit_example
|
|
|
{
|
|
|
"settings": {
|
|
|
"analysis": {
|
|
|
"analyzer": {
|
|
|
- "limit_example": {
|
|
|
- "type": "custom",
|
|
|
+ "standard_one_token_limit": {
|
|
|
"tokenizer": "standard",
|
|
|
- "filter": ["lowercase", "five_token_limit"]
|
|
|
+ "filter": [ "limit" ]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+[[analysis-limit-token-count-tokenfilter-customize]]
|
|
|
+==== Customize
|
|
|
+
|
|
|
+To customize the `limit` filter, duplicate it to create the basis
|
|
|
+for a new custom token filter. You can modify the filter using its configurable
|
|
|
+parameters.
|
|
|
+
|
|
|
+For example, the following request creates a custom `limit` filter that keeps
|
|
|
+only the first five tokens of a stream:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+PUT custom_limit_example
|
|
|
+{
|
|
|
+ "settings": {
|
|
|
+ "analysis": {
|
|
|
+ "analyzer": {
|
|
|
+ "whitespace_five_token_limit": {
|
|
|
+ "tokenizer": "whitespace",
|
|
|
+ "filter": [ "five_token_limit" ]
|
|
|
}
|
|
|
},
|
|
|
"filter": {
|