6 years ago · 676433e64f
--- a/docs/reference/indices/analyze.asciidoc
+++ b/docs/reference/indices/analyze.asciidoc
@@ -1,15 +1,145 @@
 
				 [[indices-analyze]]
			
 
				-=== Analyze
			
 
				+=== Analyze API
			
 
				+++++
			
 
				+<titleabbrev>Analyze</titleabbrev>
			
 
				+++++
			
 
				 
			
 
				-Performs the analysis process on a text and return the tokens breakdown
			
 
				-of the text.
			
 
				+Performs <<analysis,analysis>> on a text string
			
 
				+and returns the resulting tokens.
			
 
				 
			
 
				-Can be used without specifying an index against one of the many built in
			
 
				-analyzers:
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+GET /_analyze
			
 
				+{
			
 
				+  "analyzer" : "standard",
			
 
				+  "text" : "Quick Brown Foxes!"
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+
			
 
				+[[analyze-api-request]]
			
 
				+==== {api-request-title}
			
 
				+
			
 
				+`GET /_analyze`
			
 
				+
			
 
				+`POST /_analyze`
			
 
				+
			
 
				+`GET /<index>/_analyze`
			
 
				+
			
 
				+`POST /<index>/_analyze`
			
 
				+
			
 
				+
			
 
				+[[analyze-api-path-params]]
			
 
				+==== {api-path-parms-title}
			
 
				+
			
 
				+`<index>`::
			
 
				++
			
 
				+--
			
 
				+(Optional, string)
			
 
				+Index used to derive the analyzer.
			
 
				+
			
 
				+If specified,
			
 
				+the `analyzer` or `<field>` parameter overrides this value.
			
 
				+
			
 
				+If no analyzer or field are specified,
			
 
				+the analyze API uses the default analyzer for the index.
			
 
				+
			
 
				+If no index is specified 
			
 
				+or the index does not have a default analyzer,
			
 
				+the analyze API uses the <<analysis-standard-analyzer,standard analyzer>>.
			
 
				+--
			
 
				+
			
 
				+
			
 
				+[[analyze-api-query-params]]
			
 
				+==== {api-query-parms-title}
			
 
				+
			
 
				+`analyzer`::
			
 
				++
			
 
				+--
			
 
				+(Optional, string or <<analysis-custom-analyzer,custom analyzer object>>)
			
 
				+Analyzer used to analyze for the provided `text`.
			
 
				+
			
 
				+See <<analysis-analyzers>> for a list of built-in analyzers.
			
 
				+You can also provide a <<analysis-custom-analyzer,custom analyzer>>.
			
 
				+
			
 
				+If this parameter is not specified,
			
 
				+the analyze API uses the analyzer defined in the field's mapping.
			
 
				+
			
 
				+If no field is specified,
			
 
				+the analyze API uses the default analyzer for the index.
			
 
				+
			
 
				+If no index is specified,
			
 
				+or the index does not have a default analyzer,
			
 
				+the analyze API uses the <<analysis-standard-analyzer,standard analyzer>>.
			
 
				+--
			
 
				+
			
 
				+`attributes`::
			
 
				+(Optional, array of strings)
			
 
				+Array of token attributes used to filter the output of the `explain` parameter.
			
 
				+
			
 
				+`char_filter`::
			
 
				+(Optional, array of strings)
			
 
				+Array of character filters used to preprocess characters before the tokenizer.
			
 
				+See <<analysis-charfilters>> for a list of character filters.
			
 
				+
			
 
				+`explain`::
			
 
				+(Optional, boolean)
			
 
				+If `true`, the response includes token attributes and additional details.
			
 
				+Defaults to `false`.
			
 
				+experimental:[The format of the additional detail information is labelled as experimental in Lucene and it may change in the future.]
			
 
				+
			
 
				+`field`::
			
 
				++
			
 
				+--
			
 
				+(Optional, string)
			
 
				+Field used to derive the analyzer.
			
 
				+To use this parameter,
			
 
				+you must specify an index.
			
 
				+
			
 
				+If specified,
			
 
				+the `analyzer` parameter overrides this value.
			
 
				+
			
 
				+If no field is specified,
			
 
				+the analyze API uses the default analyzer for the index.
			
 
				+
			
 
				+If no index is specified
			
 
				+or the index does not have a default analyzer,
			
 
				+the analyze API uses the <<analysis-standard-analyzer,standard analyzer>>.
			
 
				+--
			
 
				+
			
 
				+`filter`::
			
 
				+(Optional, Array of strings)
			
 
				+Array of token filters used to apply after the tokenizer.
			
 
				+See <<analysis-tokenfilters>> for a list of token filters.
			
 
				+
			
 
				+`normalizer`::
			
 
				+(Optional, string)
			
 
				+Normalizer to use to convert text into a single token.
			
 
				+See <<analysis-normalizers>> for a list of normalizers.
			
 
				+
			
 
				+`text`::
			
 
				+(Required, string or array of strings)
			
 
				+Text to analyze.
			
 
				+If an array of strings is provided, it is analyzed as a multi-value field.
			
 
				+
			
 
				+`tokenizer`::
			
 
				+(Optional, string)
			
 
				+Tokenizer to use to convert text into tokens.
			
 
				+See <<analysis-tokenizers>> for a list of tokenizers.
			
 
				+
			
 
				+[[analyze-api-example]]
			
 
				+==== {api-examples-title}
			
 
				+
			
 
				+[[analyze-api-no-index-ex]]
			
 
				+===== No index specified
			
 
				+
			
 
				+You can apply any of the built-in analyzers to the text string without
			
 
				+specifying an index.
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET _analyze
			
 
				+GET /_analyze
			
 
				 {
			
 
				   "analyzer" : "standard",
			
 
				   "text" : "this is a test"
			
@@ -17,11 +147,14 @@ GET _analyze
 
				 --------------------------------------------------
			
 
				 // CONSOLE
			
 
				 
			
 
				-If text parameter is provided as array of strings, it is analyzed as a multi-valued field.
			
 
				+[[analyze-api-text-array-ex]]
			
 
				+===== Array of text strings
			
 
				+
			
 
				+If the `text` parameter is provided as array of strings, it is analyzed as a multi-value field.
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET _analyze
			
 
				+GET /_analyze
			
 
				 {
			
 
				   "analyzer" : "standard",
			
 
				   "text" : ["this is a test", "the second text"]
			
@@ -29,13 +162,16 @@ GET _analyze
 
				 --------------------------------------------------
			
 
				 // CONSOLE
			
 
				 
			
 
				-Or by building a custom transient analyzer out of tokenizers,
			
 
				-token filters and char filters. Token filters can use the shorter 'filter'
			
 
				-parameter name:
			
 
				+[[analyze-api-custom-analyzer-ex]]
			
 
				+===== Custom analyzer
			
 
				+
			
 
				+You can use the analyze API to test a custom transient analyzer built from
			
 
				+tokenizers, token filters, and char filters. Token filters use the `filter`
			
 
				+parameter:
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET _analyze
			
 
				+GET /_analyze
			
 
				 {
			
 
				   "tokenizer" : "keyword",
			
 
				   "filter" : ["lowercase"],
			
@@ -46,7 +182,7 @@ GET _analyze
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET _analyze
			
 
				+GET /_analyze
			
 
				 {
			
 
				   "tokenizer" : "keyword",
			
 
				   "filter" : ["lowercase"],
			
@@ -62,7 +198,7 @@ Custom tokenizers, token filters, and character filters can be specified in the
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET _analyze
			
 
				+GET /_analyze
			
 
				 {
			
 
				   "tokenizer" : "whitespace",
			
 
				   "filter" : ["lowercase", {"type": "stop", "stopwords": ["a", "is", "this"]}],
			
@@ -71,11 +207,14 @@ GET _analyze
 
				 --------------------------------------------------
			
 
				 // CONSOLE
			
 
				 
			
 
				-It can also run against a specific index:
			
 
				+[[analyze-api-specific-index-ex]]
			
 
				+===== Specific index
			
 
				+
			
 
				+You can also run the analyze API against a specific index:
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET analyze_sample/_analyze
			
 
				+GET /analyze_sample/_analyze
			
 
				 {
			
 
				   "text" : "this is a test"
			
 
				 }
			
@@ -89,7 +228,7 @@ can also be provided to use a different analyzer:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET analyze_sample/_analyze
			
 
				+GET /analyze_sample/_analyze
			
 
				 {
			
 
				   "analyzer" : "whitespace",
			
 
				   "text" : "this is a test"
			
@@ -98,11 +237,14 @@ GET analyze_sample/_analyze
 
				 // CONSOLE
			
 
				 // TEST[setup:analyze_sample]
			
 
				 
			
 
				-Also, the analyzer can be derived based on a field mapping, for example:
			
 
				+[[analyze-api-field-ex]]
			
 
				+===== Derive analyzer from a field mapping
			
 
				+
			
 
				+The analyzer can be derived based on a field mapping, for example:
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET analyze_sample/_analyze
			
 
				+GET /analyze_sample/_analyze
			
 
				 {
			
 
				   "field" : "obj1.field1",
			
 
				   "text" : "this is a test"
			
@@ -114,11 +256,14 @@ GET analyze_sample/_analyze
 
				 Will cause the analysis to happen based on the analyzer configured in the
			
 
				 mapping for `obj1.field1` (and if not, the default index analyzer).
			
 
				 
			
 
				+[[analyze-api-normalizer-ex]]
			
 
				+===== Normalizer
			
 
				+
			
 
				 A `normalizer` can be provided for keyword field with normalizer associated with the `analyze_sample` index.
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET analyze_sample/_analyze
			
 
				+GET /analyze_sample/_analyze
			
 
				 {
			
 
				   "normalizer" : "my_normalizer",
			
 
				   "text" : "BaR"
			
@@ -131,7 +276,7 @@ Or by building a custom transient normalizer out of token filters and char filte
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET _analyze
			
 
				+GET /_analyze
			
 
				 {
			
 
				   "filter" : ["lowercase"],
			
 
				   "text" : "BaR"
			
@@ -140,7 +285,7 @@ GET _analyze
 
				 // CONSOLE
			
 
				 
			
 
				 [[explain-analyze-api]]
			
 
				-==== Explain Analyze
			
 
				+===== Explain analyze
			
 
				 
			
 
				 If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token.
			
 
				 You can filter token attributes you want to output by setting `attributes` option.
			
@@ -149,7 +294,7 @@ NOTE: The format of the additional detail information is labelled as experimenta
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET _analyze
			
 
				+GET /_analyze
			
 
				 {
			
 
				   "tokenizer" : "standard",
			
 
				   "filter" : ["snowball"],
			
@@ -210,8 +355,7 @@ The request returns the following result:
 
				 <1> Output only "keyword" attribute, since specify "attributes" in the request.
			
 
				 
			
 
				 [[tokens-limit-settings]]
			
 
				-[float]
			
 
				-=== Settings to prevent tokens explosion
			
 
				+===== Setting a token limit
			
 
				 Generating excessive amount of tokens may cause a node to run out of memory.
			
 
				 The following setting allows to limit the number of tokens that can be produced:
			
 
				 
			
@@ -225,7 +369,7 @@ The following setting allows to limit the number of tokens that can be produced:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-PUT analyze_sample
			
 
				+PUT /analyze_sample
			
 
				 {
			
 
				   "settings" : {
			
 
				     "index.analyze.max_token_count" : 20000
			
@@ -237,7 +381,7 @@ PUT analyze_sample
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET analyze_sample/_analyze
			
 
				+GET /analyze_sample/_analyze
			
 
				 {
			
 
				   "text" : "this is a test"
			
 
				 }