5 年之前 · 3c28a10b85
--- a/docs/reference/analysis.asciidoc
+++ b/docs/reference/analysis.asciidoc
@@ -4,141 +4,40 @@
 
															 [partintro]
														
 
															 --
														
 
															-_Text analysis_ is the process of converting text, like the body of any email,
														
 
															-into _tokens_ or _terms_ which are added to the inverted index for searching.
														
 
															-Analysis is performed by an <<analysis-analyzers,_analyzer_>> which can be
														
 
															-either a built-in analyzer or a <<analysis-custom-analyzer,`custom`>> analyzer
														
 
															-defined per index.
														
 
															+_Text analysis_ is the process of converting unstructured text, like
														
 
															+the body of an email or a product description, into a structured format that's
														
 
															+optimized for search.
														
 
															 [float]
														
 
															-== Index time analysis
														
 
															+[[when-to-configure-analysis]]
														
 
															+=== When to configure text analysis
														
 
															-For instance, at index time the built-in <<english-analyzer,`english`>> _analyzer_ 
														
 
															-will first convert the sentence:
														
 
															+{es} performs text analysis when indexing or searching <<text,`text`>> fields.
														
 
															-[source,text]
														
 
															-------
														
 
															-"The QUICK brown foxes jumped over the lazy dog!"
														
 
															-------
														
 
															+If your index doesn't contain `text` fields, no further setup is needed; you can
														
 
															+skip the pages in this section.
														
 
															-into distinct tokens. It will then lowercase each token, remove frequent
														
 
															-stopwords ("the") and reduce the terms to their word stems (foxes -> fox,
														
 
															-jumped -> jump, lazy -> lazi). In the end, the following terms will be added
														
 
															-to the inverted index:
														
 
															+However, if you use `text` fields or your text searches aren't returning results
														
 
															+as expected, configuring text analysis can often help. You should also look into
														
 
															+analysis configuration if you're using {es} to:
														
 
															-[source,text]
														
 
															-------
														
 
															-[ quick, brown, fox, jump, over, lazi, dog ]
														
 
															-------
														
 
															+* Build a search engine
														
 
															+* Mine unstructured data
														
 
															+* Fine-tune search for a specific language
														
 
															+* Perform lexicographic or linguistic research
														
 
															 [float]
														
 
															-[[specify-index-time-analyzer]]
														
 
															-=== Specifying an index time analyzer
														
 
															-
														
 
															-{es} determines which index-time analyzer to use by
														
 
															-checking the following parameters in order:
														
 
															-
														
 
															-. The <<analyzer,`analyzer`>> mapping parameter of the field
														
 
															-. The `default` analyzer parameter in the index settings
														
 
															-
														
 
															-If none of these parameters are specified, the
														
 
															-<<analysis-standard-analyzer,`standard` analyzer>> is used.
														
 
															-
														
 
															-[discrete]
														
 
															-[[specify-index-time-field-analyzer]]
														
 
															-==== Specify the index-time analyzer for a field
														
 
															-
														
 
															-Each <<text,`text`>> field in a mapping can specify its own
														
 
															-<<analyzer,`analyzer`>>:
														
 
															-
														
 
															-[source,console]
														
 
															--------------------------
														
 
															-PUT my_index
														
 
															-{
														
 
															-  "mappings": {
														
 
															-    "properties": {
														
 
															-      "title": {
														
 
															-        "type":     "text",
														
 
															-        "analyzer": "standard"
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															--------------------------
														
 
															-
														
 
															-[discrete]
														
 
															-[[specify-index-time-default-analyzer]]
														
 
															-==== Specify a default index-time analyzer
														
 
															-
														
 
															-When <<indices-create-index,creating an index>>, you can set a default
														
 
															-index-time analyzer using the `default` analyzer setting:
														
 
															-
														
 
															-[source,console]
														
 
															-----
														
 
															-PUT my_index
														
 
															-{
														
 
															-  "settings": {
														
 
															-    "analysis": {
														
 
															-      "analyzer": {
														
 
															-        "default": {
														
 
															-          "type": "whitespace"
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-----
														
 
															-
														
 
															-A default index-time analyzer is useful when mapping multiple `text` fields that
														
 
															-use the same analyzer. It's also used as a general fallback analyzer for both
														
 
															-index-time and search-time analysis.
														
 
															-
														
 
															-[float]
														
 
															-== Search time analysis
														
 
															-
														
 
															-This same analysis process is applied to the query string at search time in
														
 
															-<<full-text-queries,full text queries>> like the
														
 
															-<<query-dsl-match-query,`match` query>>
														
 
															-to convert the text in the query string into terms of the same form as those
														
 
															-that are stored in the inverted index.
														
 
															-
														
 
															-For instance, a user might search for:
														
 
															-
														
 
															-[source,text]
														
 
															-------
														
 
															-"a quick fox"
														
 
															-------
														
 
															-
														
 
															-which would be analysed by the same `english` analyzer into the following terms:
														
 
															-
														
 
															-[source,text]
														
 
															-------
														
 
															-[ quick, fox ]
														
 
															-------
														
 
															-
														
 
															-Even though the exact words used in the query string don't appear in the
														
 
															-original text (`quick` vs `QUICK`, `fox` vs `foxes`), because we have applied
														
 
															-the same analyzer to both the text and the query string, the terms from the
														
 
															-query string exactly match the terms from the text in the inverted index,
														
 
															-which means that this query would match our example document.
														
 
															-
														
 
															-[float]
														
 
															-=== Specifying a search time analyzer
														
 
															-
														
 
															-Usually the same analyzer should be used both at
														
 
															-index time and at search time, and <<full-text-queries,full text queries>>
														
 
															-like the  <<query-dsl-match-query,`match` query>> will use the mapping to look
														
 
															-up the analyzer to use for each field.
														
 
															-
														
 
															-The analyzer to use to search a particular field is determined by
														
 
															-looking for:
														
 
															-
														
 
															-* An `analyzer` specified in the query itself.
														
 
															-* The <<search-analyzer,`search_analyzer`>> mapping parameter.
														
 
															-* The <<analyzer,`analyzer`>> mapping parameter.
														
 
															-* An analyzer in the index settings called `default_search`.
														
 
															-* An analyzer in the index settings called `default`.
														
 
															-* The `standard` analyzer.
														
 
															+[[analysis-toc]]
														
 
															+=== In this section
														
 
															+
														
 
															+* <<analysis-overview>>
														
 
															+* <<analysis-concepts>>
														
 
															+* <<configure-text-analysis>>
														
 
															+* <<analysis-analyzers>>
														
 
															+* <<analysis-tokenizers>>
														
 
															+* <<analysis-tokenfilters>>
														
 
															+* <<analysis-charfilters>>
														
 
															+* <<analysis-normalizers>>
														
 
															 --
														
@@ -156,5 +55,4 @@ include::analysis/tokenfilters.asciidoc[]
 
															 include::analysis/charfilters.asciidoc[]
														
 
															-include::analysis/normalizers.asciidoc[]
														
 
															-
														
 
															+include::analysis/normalizers.asciidoc[]
														
--- a/docs/reference/analysis/concepts.asciidoc
+++ b/docs/reference/analysis/concepts.asciidoc
@@ -7,5 +7,7 @@
 
															 This section explains the fundamental concepts of text analysis in {es}.
														
 
															 * <<analyzer-anatomy>>
														
 
															+* <<analysis-index-search-time>>
														
 
															-include::anatomy.asciidoc[]
														
 
															+include::anatomy.asciidoc[]
														
 
															+include::index-search-time.asciidoc[]
														
--- a/docs/reference/analysis/configure-text-analysis.asciidoc
+++ b/docs/reference/analysis/configure-text-analysis.asciidoc
@@ -20,10 +20,13 @@ the process.
 
															 * <<test-analyzer>>
														
 
															 * <<configuring-analyzers>>
														
 
															 * <<analysis-custom-analyzer>>
														
 
															+* <specify-analyer>>
														
 
															 include::testing.asciidoc[]
														
 
															 include::analyzers/configuring.asciidoc[]
														
 
															-include::analyzers/custom-analyzer.asciidoc[]
														
 
															+include::analyzers/custom-analyzer.asciidoc[]
														
 
															+
														
 
															+include::specify-analyzer.asciidoc[]
														
--- a/docs/reference/analysis/index-search-time.asciidoc
+++ b/docs/reference/analysis/index-search-time.asciidoc
@@ -0,0 +1,175 @@
 
															+[[analysis-index-search-time]]
														
 
															+=== Index and search analysis
														
 
															+
														
 
															+Text analysis occurs at two times:
														
 
															+
														
 
															+Index time::
														
 
															+When a document is indexed, any <<text,`text`>> field values are analyzed.
														
 
															+
														
 
															+Search time::
														
 
															+When running a <<full-text-queries,full-text search>> on a `text` field,
														
 
															+the query string (the text the user is searching for) is analyzed.
														
 
															++
														
 
															+Search time is also called _query time_.
														
 
															+
														
 
															+The analyzer, or set of analysis rules, used at each time is called the _index
														
 
															+analyzer_ or _search analyzer_ respectively.
														
 
															+
														
 
															+[[analysis-same-index-search-analyzer]]
														
 
															+==== How the index and search analyzer work together
														
 
															+
														
 
															+In most cases, the same analyzer should be used at index and search time. This
														
 
															+ensures the values and query strings for a field are changed into the same form
														
 
															+of tokens. In turn, this ensures the tokens match as expected during a search.
														
 
															+
														
 
															+.**Example**
														
 
															+[%collapsible]
														
 
															+====
														
 
															+
														
 
															+A document is indexed with the following value in a `text` field:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+The QUICK brown foxes jumped over the dog!
														
 
															+------
														
 
															+
														
 
															+The index analyzer for the field converts the value into tokens and normalizes
														
 
															+them. In this case, each of the tokens represents a word:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+[ quick, brown, fox, jump, over, dog ]
														
 
															+------
														
 
															+
														
 
															+These tokens are then indexed.
														
 
															+
														
 
															+Later, a user searches the same `text` field for:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+"Quick fox"
														
 
															+------
														
 
															+
														
 
															+The user expects this search to match the sentence indexed earlier,
														
 
															+`The QUICK brown foxes jumped over the dog!`.
														
 
															+
														
 
															+However, the query string does not contain the exact words used in the
														
 
															+document's original text:
														
 
															+
														
 
															+* `quick` vs `QUICK`
														
 
															+* `fox` vs `foxes`
														
 
															+
														
 
															+To account for this, the query string is analyzed using the same analyzer. This
														
 
															+analyzer produces the following tokens:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+[ quick, fox ]
														
 
															+------
														
 
															+
														
 
															+To execute the serach, {es} compares these query string tokens to the tokens
														
 
															+indexed in the `text` field.
														
 
															+
														
 
															+[options="header"]
														
 
															+|===
														
 
															+|Token     | Query string | `text` field
														
 
															+|`quick`   | X            | X
														
 
															+|`brown`   |              | X
														
 
															+|`fox`     | X            | X
														
 
															+|`jump`    |              | X
														
 
															+|`over`    |              | X
														
 
															+|`dog`     |              | X
														
 
															+|===
														
 
															+
														
 
															+Because the field value are query string were analyzed in the same way, they
														
 
															+created similar tokens. The tokens `quick` and `fox` are exact matches. This
														
 
															+means the search matches the document containing `"The QUICK brown foxes jumped
														
 
															+over the dog!"`, just as the user expects.
														
 
															+====
														
 
															+
														
 
															+[[different-analyzers]]
														
 
															+==== When to use a different search analyzer
														
 
															+
														
 
															+While less common, it sometimes makes sense to use different analyzers at index
														
 
															+and search time. To enable this, {es} allows you to
														
 
															+<<specify-search-analyzer,specify a separate search analyzer>>.
														
 
															+
														
 
															+Generally, a separate search analyzer should only be specified when using the
														
 
															+same form of tokens for field values and query strings would create unexpected
														
 
															+or irrelevant search matches.
														
 
															+
														
 
															+[[different-analyzer-ex]]
														
 
															+.*Example*
														
 
															+[%collapsible]
														
 
															+====
														
 
															+{es} is used to create a search engine that matches only words that start with
														
 
															+a provided prefix. For instance, a search for `tr` should return `tram` or
														
 
															+`trope`—but never `taxi` or `bat`.
														
 
															+
														
 
															+A document is added to the search engine's index; this document contains one
														
 
															+such word in a `text` field:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+"Apple"
														
 
															+------
														
 
															+
														
 
															+The index analyzer for the field converts the value into tokens and normalizes
														
 
															+them. In this case, each of the tokens represents a potential prefix for
														
 
															+the word:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+[ a, ap, app, appl, apple]
														
 
															+------
														
 
															+
														
 
															+These tokens are then indexed.
														
 
															+
														
 
															+Later, a user searches the same `text` field for:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+"appli"
														
 
															+------
														
 
															+
														
 
															+The user expects this search to match only words that start with `appli`,
														
 
															+such as `appliance` or `application`. The search should not match `apple`.
														
 
															+
														
 
															+However, if the index analyzer is used to analyze this query string, it would
														
 
															+produce the following tokens:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+[ a, ap, app, appl, appli ]
														
 
															+------
														
 
															+
														
 
															+When {es} compares these query string tokens to the ones indexed for `apple`,
														
 
															+it finds several matches.
														
 
															+
														
 
															+[options="header"]
														
 
															+|===
														
 
															+|Token      | `appli`      | `apple`
														
 
															+|`a`        | X            | X
														
 
															+|`ap`       | X            | X
														
 
															+|`app`      | X            | X
														
 
															+|`appl`     | X            | X
														
 
															+|`appli`    |              | X
														
 
															+|===
														
 
															+
														
 
															+This means the search would erroneously match `apple`. Not only that, it would
														
 
															+match any word starting with `a`.
														
 
															+
														
 
															+To fix this, you can specify a different search analyzer for query strings used
														
 
															+on the `text` field.
														
 
															+
														
 
															+In this case, you could specify a search analyzer that produces a single token
														
 
															+rather than a set of prefixes:
														
 
															+
														
 
															+[source,text]
														
 
															+------
														
 
															+[ appli ]
														
 
															+------
														
 
															+
														
 
															+This query string token would only match tokens for words that start with
														
 
															+`appli`, which better aligns with the user's search expectations.
														
 
															+====
														
--- a/docs/reference/analysis/specify-analyzer.asciidoc
+++ b/docs/reference/analysis/specify-analyzer.asciidoc
@@ -0,0 +1,202 @@
 
															+[[specify-analyzer]]
														
 
															+=== Specify an analyzer
														
 
															+
														
 
															+{es} offers a variety of ways to specify built-in or custom analyzers:
														
 
															+
														
 
															+* By `text` field, index, or query
														
 
															+* For <<analysis-index-search-time,index or search time>>
														
 
															+
														
 
															+[TIP]
														
 
															+.Keep it simple
														
 
															+====
														
 
															+The flexibility to specify analyzers at different levels and for different times
														
 
															+is great... _but only when it's needed_.
														
 
															+
														
 
															+In most cases, a simple approach works best: Specify an analyzer for each
														
 
															+`text` field, as outlined in <<specify-index-field-analyzer>>.
														
 
															+
														
 
															+This approach works well with {es}'s default behavior, letting you use the same
														
 
															+analyzer for indexing and search. It also lets you quickly see which analyzer
														
 
															+applies to which field using the <<indices-get-mapping,get mapping API>>.
														
 
															+
														
 
															+If you don't typically create mappings for your indices, you can use
														
 
															+<<indices-templates,index templates>> to achieve a similar effect.
														
 
															+====
														
 
															+
														
 
															+[[specify-index-time-analyzer]]
														
 
															+==== How {es} determines the index analyzer
														
 
															+
														
 
															+{es} determines which index analyzer to use by checking the following parameters
														
 
															+in order:
														
 
															+
														
 
															+. The <<analyzer,`analyzer`>> mapping parameter for the field.
														
 
															+  See <<specify-index-field-analyzer>>.
														
 
															+. The `analysis.analyzer.default` index setting.
														
 
															+  See <<specify-index-time-default-analyzer>>.
														
 
															+
														
 
															+If none of these parameters are specified, the
														
 
															+<<analysis-standard-analyzer,`standard` analyzer>> is used.
														
 
															+
														
 
															+[[specify-index-field-analyzer]]
														
 
															+==== Specify the analyzer for a field
														
 
															+
														
 
															+When mapping an index, you can use the <<analyzer,`analyzer`>> mapping parameter
														
 
															+to specify an analyzer for each `text` field.
														
 
															+
														
 
															+The following <<indices-create-index,create index API>> request sets the
														
 
															+`whitespace` analyzer as the analyzer for the `title` field.
														
 
															+
														
 
															+[source,console]
														
 
															+----
														
 
															+PUT my_index
														
 
															+{
														
 
															+  "mappings": {
														
 
															+    "properties": {
														
 
															+      "title": {
														
 
															+        "type": "text",
														
 
															+        "analyzer": "whitespace"
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+----
														
 
															+
														
 
															+[[specify-index-time-default-analyzer]]
														
 
															+==== Specify the default analyzer for an index
														
 
															+
														
 
															+In addition to a field-level analyzer, you can set a fallback analyzer for
														
 
															+using the `analysis.analyzer.default` setting.
														
 
															+
														
 
															+The following <<indices-create-index,create index API>> request sets the
														
 
															+`simple` analyzer as the fallback analyzer for `my_index`.
														
 
															+
														
 
															+[source,console]
														
 
															+----
														
 
															+PUT my_index
														
 
															+{
														
 
															+  "settings": {
														
 
															+    "analysis": {
														
 
															+      "analyzer": {
														
 
															+        "default": {
														
 
															+          "type": "simple"
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+----
														
 
															+
														
 
															+[[specify-search-analyzer]]
														
 
															+==== How {es} determines the search analyzer
														
 
															+
														
 
															+// tag::search-analyzer-warning[]
														
 
															+[WARNING]
														
 
															+====
														
 
															+In most cases, specifying a different search analyzer is unnecessary. Doing so
														
 
															+could negatively impact relevancy and result in unexpected search results.
														
 
															+
														
 
															+If you choose to specify a separate search analyzer, we recommend you thoroughly
														
 
															+<<test-analyzer,test your analysis configuration>> before deploying in
														
 
															+production.
														
 
															+====
														
 
															+// end::search-analyzer-warning[]
														
 
															+
														
 
															+At search time, {es} determines which analyzer to use by checking the following
														
 
															+parameters in order:
														
 
															+
														
 
															+. The <<analyzer,`analyzer`>> parameter in the search query.
														
 
															+  See <<specify-search-query-analyzer>>.
														
 
															+. The <<search-analyzer,`search_analyzer`>> mapping parameter for the field.
														
 
															+  See <<specify-search-field-analyzer>>.
														
 
															+. The `analysis.analyzer.default_search` index setting.
														
 
															+  See <<specify-search-default-analyzer>>.
														
 
															+. The <<analyzer,`analyzer`>> mapping parameter for the field.
														
 
															+  See <<specify-index-field-analyzer>>.
														
 
															+
														
 
															+If none of these parameters are specified, the
														
 
															+<<analysis-standard-analyzer,`standard` analyzer>> is used.
														
 
															+
														
 
															+[[specify-search-query-analyzer]]
														
 
															+==== Specify the search analyzer for a query
														
 
															+
														
 
															+When writing a <<full-text-queries,full-text query>>, you can use the `analyzer`
														
 
															+parameter to specify a search analyzer. If provided, this overrides any other
														
 
															+search analyzers.
														
 
															+
														
 
															+The following <<search-search,search API>> request sets the `stop` analyzer as
														
 
															+the search analyzer for a <<query-dsl-match-query,`match`>> query.
														
 
															+
														
 
															+[source,console]
														
 
															+----
														
 
															+GET my_index/_search
														
 
															+{
														
 
															+  "query": {
														
 
															+    "match": {
														
 
															+      "message": {
														
 
															+        "query": "Quick foxes",
														
 
															+        "analyzer": "stop"
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+----
														
 
															+// TEST[s/^/PUT my_index\n/]
														
 
															+
														
 
															+[[specify-search-field-analyzer]]
														
 
															+==== Specify the search analyzer for a field
														
 
															+
														
 
															+When mapping an index, you can use the <<analyzer,`search_analyzer`>> mapping
														
 
															+parameter to specify a search analyzer for each `text` field.
														
 
															+
														
 
															+If a search analyzer is provided, the index analyzer must also be specified
														
 
															+using the `analyzer` parameter.
														
 
															+
														
 
															+The following <<indices-create-index,create index API>> request sets the
														
 
															+`simple` analyzer as the search analyzer for the `title` field.
														
 
															+
														
 
															+[source,console]
														
 
															+----
														
 
															+PUT my_index
														
 
															+{
														
 
															+  "mappings": {
														
 
															+    "properties": {
														
 
															+      "title": {
														
 
															+        "type": "text",
														
 
															+        "analyzer": "whitespace",
														
 
															+        "search_analyzer": "simple"
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+----
														
 
															+
														
 
															+[[specify-search-default-analyzer]]
														
 
															+==== Specify the default search analyzer for an index
														
 
															+
														
 
															+When <<indices-create-index,creating an index>>, you can set a default search
														
 
															+analyzer using the `analysis.analyzer.default_search` setting.
														
 
															+
														
 
															+If a search analyzer is provided, a default index analyzer must also be
														
 
															+specified using the `analysis.analyzer.default` setting.
														
 
															+
														
 
															+The following  <<indices-create-index,create index API>> request sets the
														
 
															+`whitespace` analyzer as the default search analyzer for the `my_index` index.
														
 
															+
														
 
															+[source,console]
														
 
															+----
														
 
															+PUT my_index
														
 
															+{
														
 
															+  "settings": {
														
 
															+    "analysis": {
														
 
															+      "analyzer": {
														
 
															+        "default": {
														
 
															+          "type": "simple"
														
 
															+        },
														
 
															+        "default_search": {
														
 
															+          "type": "whitespace"
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+----