5 жил өмнө · 3c28a10b85
--- a/docs/reference/analysis.asciidoc
+++ b/docs/reference/analysis.asciidoc
@@ -4,141 +4,40 @@
 
				 [partintro]
			
 
				 --
			
 
				 
			
 
				-_Text analysis_ is the process of converting text, like the body of any email,
			
 
				-into _tokens_ or _terms_ which are added to the inverted index for searching.
			
 
				-Analysis is performed by an <<analysis-analyzers,_analyzer_>> which can be
			
 
				-either a built-in analyzer or a <<analysis-custom-analyzer,`custom`>> analyzer
			
 
				-defined per index.
			
 
				+_Text analysis_ is the process of converting unstructured text, like
			
 
				+the body of an email or a product description, into a structured format that's
			
 
				+optimized for search.
			
 
				 
			
 
				 [float]
			
 
				-== Index time analysis
			
 
				+[[when-to-configure-analysis]]
			
 
				+=== When to configure text analysis
			
 
				 
			
 
				-For instance, at index time the built-in <<english-analyzer,`english`>> _analyzer_ 
			
 
				-will first convert the sentence:
			
 
				+{es} performs text analysis when indexing or searching <<text,`text`>> fields.
			
 
				 
			
 
				-[source,text]
			
 
				-------
			
 
				-"The QUICK brown foxes jumped over the lazy dog!"
			
 
				-------
			
 
				+If your index doesn't contain `text` fields, no further setup is needed; you can
			
 
				+skip the pages in this section.
			
 
				 
			
 
				-into distinct tokens. It will then lowercase each token, remove frequent
			
 
				-stopwords ("the") and reduce the terms to their word stems (foxes -> fox,
			
 
				-jumped -> jump, lazy -> lazi). In the end, the following terms will be added
			
 
				-to the inverted index:
			
 
				+However, if you use `text` fields or your text searches aren't returning results
			
 
				+as expected, configuring text analysis can often help. You should also look into
			
 
				+analysis configuration if you're using {es} to:
			
 
				 
			
 
				-[source,text]
			
 
				-------
			
 
				-[ quick, brown, fox, jump, over, lazi, dog ]
			
 
				-------
			
 
				+* Build a search engine
			
 
				+* Mine unstructured data
			
 
				+* Fine-tune search for a specific language
			
 
				+* Perform lexicographic or linguistic research
			
 
				 
			
 
				 [float]
			
 
				-[[specify-index-time-analyzer]]
			
 
				-=== Specifying an index time analyzer
			
 
				-
			
 
				-{es} determines which index-time analyzer to use by
			
 
				-checking the following parameters in order:
			
 
				-
			
 
				-. The <<analyzer,`analyzer`>> mapping parameter of the field
			
 
				-. The `default` analyzer parameter in the index settings
			
 
				-
			
 
				-If none of these parameters are specified, the
			
 
				-<<analysis-standard-analyzer,`standard` analyzer>> is used.
			
 
				-
			
 
				-[discrete]
			
 
				-[[specify-index-time-field-analyzer]]
			
 
				-==== Specify the index-time analyzer for a field
			
 
				-
			
 
				-Each <<text,`text`>> field in a mapping can specify its own
			
 
				-<<analyzer,`analyzer`>>:
			
 
				-
			
 
				-[source,console]
			
 
				--------------------------
			
 
				-PUT my_index
			
 
				-{
			
 
				-  "mappings": {
			
 
				-    "properties": {
			
 
				-      "title": {
			
 
				-        "type":     "text",
			
 
				-        "analyzer": "standard"
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				--------------------------
			
 
				-
			
 
				-[discrete]
			
 
				-[[specify-index-time-default-analyzer]]
			
 
				-==== Specify a default index-time analyzer
			
 
				-
			
 
				-When <<indices-create-index,creating an index>>, you can set a default
			
 
				-index-time analyzer using the `default` analyzer setting:
			
 
				-
			
 
				-[source,console]
			
 
				-----
			
 
				-PUT my_index
			
 
				-{
			
 
				-  "settings": {
			
 
				-    "analysis": {
			
 
				-      "analyzer": {
			
 
				-        "default": {
			
 
				-          "type": "whitespace"
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-----
			
 
				-
			
 
				-A default index-time analyzer is useful when mapping multiple `text` fields that
			
 
				-use the same analyzer. It's also used as a general fallback analyzer for both
			
 
				-index-time and search-time analysis.
			
 
				-
			
 
				-[float]
			
 
				-== Search time analysis
			
 
				-
			
 
				-This same analysis process is applied to the query string at search time in
			
 
				-<<full-text-queries,full text queries>> like the
			
 
				-<<query-dsl-match-query,`match` query>>
			
 
				-to convert the text in the query string into terms of the same form as those
			
 
				-that are stored in the inverted index.
			
 
				-
			
 
				-For instance, a user might search for:
			
 
				-
			
 
				-[source,text]
			
 
				-------
			
 
				-"a quick fox"
			
 
				-------
			
 
				-
			
 
				-which would be analysed by the same `english` analyzer into the following terms:
			
 
				-
			
 
				-[source,text]
			
 
				-------
			
 
				-[ quick, fox ]
			
 
				-------
			
 
				-
			
 
				-Even though the exact words used in the query string don't appear in the
			
 
				-original text (`quick` vs `QUICK`, `fox` vs `foxes`), because we have applied
			
 
				-the same analyzer to both the text and the query string, the terms from the
			
 
				-query string exactly match the terms from the text in the inverted index,
			
 
				-which means that this query would match our example document.
			
 
				-
			
 
				-[float]
			
 
				-=== Specifying a search time analyzer
			
 
				-
			
 
				-Usually the same analyzer should be used both at
			
 
				-index time and at search time, and <<full-text-queries,full text queries>>
			
 
				-like the  <<query-dsl-match-query,`match` query>> will use the mapping to look
			
 
				-up the analyzer to use for each field.
			
 
				-
			
 
				-The analyzer to use to search a particular field is determined by
			
 
				-looking for:
			
 
				-
			
 
				-* An `analyzer` specified in the query itself.
			
 
				-* The <<search-analyzer,`search_analyzer`>> mapping parameter.
			
 
				-* The <<analyzer,`analyzer`>> mapping parameter.
			
 
				-* An analyzer in the index settings called `default_search`.
			
 
				-* An analyzer in the index settings called `default`.
			
 
				-* The `standard` analyzer.
			
 
				+[[analysis-toc]]
			
 
				+=== In this section
			
 
				+
			
 
				+* <<analysis-overview>>
			
 
				+* <<analysis-concepts>>
			
 
				+* <<configure-text-analysis>>
			
 
				+* <<analysis-analyzers>>
			
 
				+* <<analysis-tokenizers>>
			
 
				+* <<analysis-tokenfilters>>
			
 
				+* <<analysis-charfilters>>
			
 
				+* <<analysis-normalizers>>
			
 
				 
			
 
				 --
			
 
				 
			
@@ -156,5 +55,4 @@ include::analysis/tokenfilters.asciidoc[]
 
				 
			
 
				 include::analysis/charfilters.asciidoc[]
			
 
				 
			
 
				-include::analysis/normalizers.asciidoc[]
			
 
				-
			
 
				+include::analysis/normalizers.asciidoc[]
			
--- a/docs/reference/analysis/concepts.asciidoc
+++ b/docs/reference/analysis/concepts.asciidoc
@@ -7,5 +7,7 @@
 
				 This section explains the fundamental concepts of text analysis in {es}.
			
 
				 
			
 
				 * <<analyzer-anatomy>>
			
 
				+* <<analysis-index-search-time>>
			
 
				 
			
 
				-include::anatomy.asciidoc[]
			
 
				+include::anatomy.asciidoc[]
			
 
				+include::index-search-time.asciidoc[]
			
--- a/docs/reference/analysis/configure-text-analysis.asciidoc
+++ b/docs/reference/analysis/configure-text-analysis.asciidoc
@@ -20,10 +20,13 @@ the process.
 
				 * <<test-analyzer>>
			
 
				 * <<configuring-analyzers>>
			
 
				 * <<analysis-custom-analyzer>>
			
 
				+* <specify-analyer>>
			
 
				 
			
 
				 
			
 
				 include::testing.asciidoc[]
			
 
				 
			
 
				 include::analyzers/configuring.asciidoc[]
			
 
				 
			
 
				-include::analyzers/custom-analyzer.asciidoc[]
			
 
				+include::analyzers/custom-analyzer.asciidoc[]
			
 
				+
			
 
				+include::specify-analyzer.asciidoc[]
			
--- a/docs/reference/analysis/index-search-time.asciidoc
+++ b/docs/reference/analysis/index-search-time.asciidoc
@@ -0,0 +1,175 @@
 
				+[[analysis-index-search-time]]
			
 
				+=== Index and search analysis
			
 
				+
			
 
				+Text analysis occurs at two times:
			
 
				+
			
 
				+Index time::
			
 
				+When a document is indexed, any <<text,`text`>> field values are analyzed.
			
 
				+
			
 
				+Search time::
			
 
				+When running a <<full-text-queries,full-text search>> on a `text` field,
			
 
				+the query string (the text the user is searching for) is analyzed.
			
 
				++
			
 
				+Search time is also called _query time_.
			
 
				+
			
 
				+The analyzer, or set of analysis rules, used at each time is called the _index
			
 
				+analyzer_ or _search analyzer_ respectively.
			
 
				+
			
 
				+[[analysis-same-index-search-analyzer]]
			
 
				+==== How the index and search analyzer work together
			
 
				+
			
 
				+In most cases, the same analyzer should be used at index and search time. This
			
 
				+ensures the values and query strings for a field are changed into the same form
			
 
				+of tokens. In turn, this ensures the tokens match as expected during a search.
			
 
				+
			
 
				+.**Example**
			
 
				+[%collapsible]
			
 
				+====
			
 
				+
			
 
				+A document is indexed with the following value in a `text` field:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+The QUICK brown foxes jumped over the dog!
			
 
				+------
			
 
				+
			
 
				+The index analyzer for the field converts the value into tokens and normalizes
			
 
				+them. In this case, each of the tokens represents a word:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+[ quick, brown, fox, jump, over, dog ]
			
 
				+------
			
 
				+
			
 
				+These tokens are then indexed.
			
 
				+
			
 
				+Later, a user searches the same `text` field for:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+"Quick fox"
			
 
				+------
			
 
				+
			
 
				+The user expects this search to match the sentence indexed earlier,
			
 
				+`The QUICK brown foxes jumped over the dog!`.
			
 
				+
			
 
				+However, the query string does not contain the exact words used in the
			
 
				+document's original text:
			
 
				+
			
 
				+* `quick` vs `QUICK`
			
 
				+* `fox` vs `foxes`
			
 
				+
			
 
				+To account for this, the query string is analyzed using the same analyzer. This
			
 
				+analyzer produces the following tokens:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+[ quick, fox ]
			
 
				+------
			
 
				+
			
 
				+To execute the serach, {es} compares these query string tokens to the tokens
			
 
				+indexed in the `text` field.
			
 
				+
			
 
				+[options="header"]
			
 
				+|===
			
 
				+|Token     | Query string | `text` field
			
 
				+|`quick`   | X            | X
			
 
				+|`brown`   |              | X
			
 
				+|`fox`     | X            | X
			
 
				+|`jump`    |              | X
			
 
				+|`over`    |              | X
			
 
				+|`dog`     |              | X
			
 
				+|===
			
 
				+
			
 
				+Because the field value are query string were analyzed in the same way, they
			
 
				+created similar tokens. The tokens `quick` and `fox` are exact matches. This
			
 
				+means the search matches the document containing `"The QUICK brown foxes jumped
			
 
				+over the dog!"`, just as the user expects.
			
 
				+====
			
 
				+
			
 
				+[[different-analyzers]]
			
 
				+==== When to use a different search analyzer
			
 
				+
			
 
				+While less common, it sometimes makes sense to use different analyzers at index
			
 
				+and search time. To enable this, {es} allows you to
			
 
				+<<specify-search-analyzer,specify a separate search analyzer>>.
			
 
				+
			
 
				+Generally, a separate search analyzer should only be specified when using the
			
 
				+same form of tokens for field values and query strings would create unexpected
			
 
				+or irrelevant search matches.
			
 
				+
			
 
				+[[different-analyzer-ex]]
			
 
				+.*Example*
			
 
				+[%collapsible]
			
 
				+====
			
 
				+{es} is used to create a search engine that matches only words that start with
			
 
				+a provided prefix. For instance, a search for `tr` should return `tram` or
			
 
				+`trope`—but never `taxi` or `bat`.
			
 
				+
			
 
				+A document is added to the search engine's index; this document contains one
			
 
				+such word in a `text` field:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+"Apple"
			
 
				+------
			
 
				+
			
 
				+The index analyzer for the field converts the value into tokens and normalizes
			
 
				+them. In this case, each of the tokens represents a potential prefix for
			
 
				+the word:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+[ a, ap, app, appl, apple]
			
 
				+------
			
 
				+
			
 
				+These tokens are then indexed.
			
 
				+
			
 
				+Later, a user searches the same `text` field for:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+"appli"
			
 
				+------
			
 
				+
			
 
				+The user expects this search to match only words that start with `appli`,
			
 
				+such as `appliance` or `application`. The search should not match `apple`.
			
 
				+
			
 
				+However, if the index analyzer is used to analyze this query string, it would
			
 
				+produce the following tokens:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+[ a, ap, app, appl, appli ]
			
 
				+------
			
 
				+
			
 
				+When {es} compares these query string tokens to the ones indexed for `apple`,
			
 
				+it finds several matches.
			
 
				+
			
 
				+[options="header"]
			
 
				+|===
			
 
				+|Token      | `appli`      | `apple`
			
 
				+|`a`        | X            | X
			
 
				+|`ap`       | X            | X
			
 
				+|`app`      | X            | X
			
 
				+|`appl`     | X            | X
			
 
				+|`appli`    |              | X
			
 
				+|===
			
 
				+
			
 
				+This means the search would erroneously match `apple`. Not only that, it would
			
 
				+match any word starting with `a`.
			
 
				+
			
 
				+To fix this, you can specify a different search analyzer for query strings used
			
 
				+on the `text` field.
			
 
				+
			
 
				+In this case, you could specify a search analyzer that produces a single token
			
 
				+rather than a set of prefixes:
			
 
				+
			
 
				+[source,text]
			
 
				+------
			
 
				+[ appli ]
			
 
				+------
			
 
				+
			
 
				+This query string token would only match tokens for words that start with
			
 
				+`appli`, which better aligns with the user's search expectations.
			
 
				+====
			
--- a/docs/reference/analysis/specify-analyzer.asciidoc
+++ b/docs/reference/analysis/specify-analyzer.asciidoc
@@ -0,0 +1,202 @@
 
				+[[specify-analyzer]]
			
 
				+=== Specify an analyzer
			
 
				+
			
 
				+{es} offers a variety of ways to specify built-in or custom analyzers:
			
 
				+
			
 
				+* By `text` field, index, or query
			
 
				+* For <<analysis-index-search-time,index or search time>>
			
 
				+
			
 
				+[TIP]
			
 
				+.Keep it simple
			
 
				+====
			
 
				+The flexibility to specify analyzers at different levels and for different times
			
 
				+is great... _but only when it's needed_.
			
 
				+
			
 
				+In most cases, a simple approach works best: Specify an analyzer for each
			
 
				+`text` field, as outlined in <<specify-index-field-analyzer>>.
			
 
				+
			
 
				+This approach works well with {es}'s default behavior, letting you use the same
			
 
				+analyzer for indexing and search. It also lets you quickly see which analyzer
			
 
				+applies to which field using the <<indices-get-mapping,get mapping API>>.
			
 
				+
			
 
				+If you don't typically create mappings for your indices, you can use
			
 
				+<<indices-templates,index templates>> to achieve a similar effect.
			
 
				+====
			
 
				+
			
 
				+[[specify-index-time-analyzer]]
			
 
				+==== How {es} determines the index analyzer
			
 
				+
			
 
				+{es} determines which index analyzer to use by checking the following parameters
			
 
				+in order:
			
 
				+
			
 
				+. The <<analyzer,`analyzer`>> mapping parameter for the field.
			
 
				+  See <<specify-index-field-analyzer>>.
			
 
				+. The `analysis.analyzer.default` index setting.
			
 
				+  See <<specify-index-time-default-analyzer>>.
			
 
				+
			
 
				+If none of these parameters are specified, the
			
 
				+<<analysis-standard-analyzer,`standard` analyzer>> is used.
			
 
				+
			
 
				+[[specify-index-field-analyzer]]
			
 
				+==== Specify the analyzer for a field
			
 
				+
			
 
				+When mapping an index, you can use the <<analyzer,`analyzer`>> mapping parameter
			
 
				+to specify an analyzer for each `text` field.
			
 
				+
			
 
				+The following <<indices-create-index,create index API>> request sets the
			
 
				+`whitespace` analyzer as the analyzer for the `title` field.
			
 
				+
			
 
				+[source,console]
			
 
				+----
			
 
				+PUT my_index
			
 
				+{
			
 
				+  "mappings": {
			
 
				+    "properties": {
			
 
				+      "title": {
			
 
				+        "type": "text",
			
 
				+        "analyzer": "whitespace"
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----
			
 
				+
			
 
				+[[specify-index-time-default-analyzer]]
			
 
				+==== Specify the default analyzer for an index
			
 
				+
			
 
				+In addition to a field-level analyzer, you can set a fallback analyzer for
			
 
				+using the `analysis.analyzer.default` setting.
			
 
				+
			
 
				+The following <<indices-create-index,create index API>> request sets the
			
 
				+`simple` analyzer as the fallback analyzer for `my_index`.
			
 
				+
			
 
				+[source,console]
			
 
				+----
			
 
				+PUT my_index
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "default": {
			
 
				+          "type": "simple"
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----
			
 
				+
			
 
				+[[specify-search-analyzer]]
			
 
				+==== How {es} determines the search analyzer
			
 
				+
			
 
				+// tag::search-analyzer-warning[]
			
 
				+[WARNING]
			
 
				+====
			
 
				+In most cases, specifying a different search analyzer is unnecessary. Doing so
			
 
				+could negatively impact relevancy and result in unexpected search results.
			
 
				+
			
 
				+If you choose to specify a separate search analyzer, we recommend you thoroughly
			
 
				+<<test-analyzer,test your analysis configuration>> before deploying in
			
 
				+production.
			
 
				+====
			
 
				+// end::search-analyzer-warning[]
			
 
				+
			
 
				+At search time, {es} determines which analyzer to use by checking the following
			
 
				+parameters in order:
			
 
				+
			
 
				+. The <<analyzer,`analyzer`>> parameter in the search query.
			
 
				+  See <<specify-search-query-analyzer>>.
			
 
				+. The <<search-analyzer,`search_analyzer`>> mapping parameter for the field.
			
 
				+  See <<specify-search-field-analyzer>>.
			
 
				+. The `analysis.analyzer.default_search` index setting.
			
 
				+  See <<specify-search-default-analyzer>>.
			
 
				+. The <<analyzer,`analyzer`>> mapping parameter for the field.
			
 
				+  See <<specify-index-field-analyzer>>.
			
 
				+
			
 
				+If none of these parameters are specified, the
			
 
				+<<analysis-standard-analyzer,`standard` analyzer>> is used.
			
 
				+
			
 
				+[[specify-search-query-analyzer]]
			
 
				+==== Specify the search analyzer for a query
			
 
				+
			
 
				+When writing a <<full-text-queries,full-text query>>, you can use the `analyzer`
			
 
				+parameter to specify a search analyzer. If provided, this overrides any other
			
 
				+search analyzers.
			
 
				+
			
 
				+The following <<search-search,search API>> request sets the `stop` analyzer as
			
 
				+the search analyzer for a <<query-dsl-match-query,`match`>> query.
			
 
				+
			
 
				+[source,console]
			
 
				+----
			
 
				+GET my_index/_search
			
 
				+{
			
 
				+  "query": {
			
 
				+    "match": {
			
 
				+      "message": {
			
 
				+        "query": "Quick foxes",
			
 
				+        "analyzer": "stop"
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----
			
 
				+// TEST[s/^/PUT my_index\n/]
			
 
				+
			
 
				+[[specify-search-field-analyzer]]
			
 
				+==== Specify the search analyzer for a field
			
 
				+
			
 
				+When mapping an index, you can use the <<analyzer,`search_analyzer`>> mapping
			
 
				+parameter to specify a search analyzer for each `text` field.
			
 
				+
			
 
				+If a search analyzer is provided, the index analyzer must also be specified
			
 
				+using the `analyzer` parameter.
			
 
				+
			
 
				+The following <<indices-create-index,create index API>> request sets the
			
 
				+`simple` analyzer as the search analyzer for the `title` field.
			
 
				+
			
 
				+[source,console]
			
 
				+----
			
 
				+PUT my_index
			
 
				+{
			
 
				+  "mappings": {
			
 
				+    "properties": {
			
 
				+      "title": {
			
 
				+        "type": "text",
			
 
				+        "analyzer": "whitespace",
			
 
				+        "search_analyzer": "simple"
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----
			
 
				+
			
 
				+[[specify-search-default-analyzer]]
			
 
				+==== Specify the default search analyzer for an index
			
 
				+
			
 
				+When <<indices-create-index,creating an index>>, you can set a default search
			
 
				+analyzer using the `analysis.analyzer.default_search` setting.
			
 
				+
			
 
				+If a search analyzer is provided, a default index analyzer must also be
			
 
				+specified using the `analysis.analyzer.default` setting.
			
 
				+
			
 
				+The following  <<indices-create-index,create index API>> request sets the
			
 
				+`whitespace` analyzer as the default search analyzer for the `my_index` index.
			
 
				+
			
 
				+[source,console]
			
 
				+----
			
 
				+PUT my_index
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "default": {
			
 
				+          "type": "simple"
			
 
				+        },
			
 
				+        "default_search": {
			
 
				+          "type": "whitespace"
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----