9 年之前 · 3076377fdb
--- a/docs/reference/analysis.asciidoc
+++ b/docs/reference/analysis.asciidoc
@@ -73,5 +73,3 @@ include::analysis/tokenfilters.asciidoc[]
 
															 include::analysis/charfilters.asciidoc[]
														
 
															-include::analysis/icu-plugin.asciidoc[]
														
 
															-
														
--- a/docs/reference/analysis/icu-plugin.asciidoc
+++ b/docs/reference/analysis/icu-plugin.asciidoc
@@ -1,246 +0,0 @@
 
															-[[analysis-icu-plugin]]
														
 
															-== ICU Analysis Plugin
														
 
															-
														
 
															-The http://icu-project.org/[ICU] analysis plugin allows for unicode
														
 
															-normalization, collation and folding. The plugin is called
														
 
															-https://github.com/elasticsearch/elasticsearch-analysis-icu[elasticsearch-analysis-icu].
														
 
															-
														
 
															-The plugin includes the following analysis components:
														
 
															-
														
 
															-[float]
														
 
															-[[icu-normalization]]
														
 
															-=== ICU Normalization
														
 
															-
														
 
															-Normalizes characters as explained
														
 
															-http://userguide.icu-project.org/transforms/normalization[here]. It
														
 
															-registers itself by default under `icu_normalizer` or `icuNormalizer`
														
 
															-using the default settings. Allows for the name parameter to be provided
														
 
															-which can include the following values: `nfc`, `nfkc`, and `nfkc_cf`.
														
 
															-Here is a sample settings:
														
 
															-
														
 
															-[source,js]
														
 
															---------------------------------------------------
														
 
															-{
														
 
															-    "index" : {
														
 
															-        "analysis" : {
														
 
															-            "analyzer" : {
														
 
															-                "normalization" : {
														
 
															-                    "tokenizer" : "keyword",
														
 
															-                    "filter" : ["icu_normalizer"]
														
 
															-                }
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															---------------------------------------------------
														
 
															-
														
 
															-[float]
														
 
															-[[icu-folding]]
														
 
															-=== ICU Folding
														
 
															-
														
 
															-Folding of unicode characters based on `UTR#30`. It registers itself
														
 
															-under `icu_folding` and `icuFolding` names.
														
 
															-The filter also does lowercasing, which means the lowercase filter can
														
 
															-normally be left out. Sample setting:
														
 
															-
														
 
															-[source,js]
														
 
															---------------------------------------------------
														
 
															-{
														
 
															-    "index" : {
														
 
															-        "analysis" : {
														
 
															-            "analyzer" : {
														
 
															-                "folding" : {
														
 
															-                    "tokenizer" : "keyword",
														
 
															-                    "filter" : ["icu_folding"]
														
 
															-                }
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															---------------------------------------------------
														
 
															-
														
 
															-[float]
														
 
															-[[icu-filtering]]
														
 
															-==== Filtering
														
 
															-
														
 
															-The folding can be filtered by a set of unicode characters with the
														
 
															-parameter `unicodeSetFilter`. This is useful for a non-internationalized
														
 
															-search engine where retaining a set of national characters which are
														
 
															-primary letters in a specific language is wanted. See syntax for the
														
 
															-UnicodeSet
														
 
															-http://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html[here].
														
 
															-
														
 
															-The Following example exempts Swedish characters from the folding. Note
														
 
															-that the filtered characters are NOT lowercased which is why we add that
														
 
															-filter below.
														
 
															-
														
 
															-[source,js]
														
 
															---------------------------------------------------
														
 
															-{
														
 
															-    "index" : {
														
 
															-        "analysis" : {
														
 
															-            "analyzer" : {
														
 
															-                "folding" : {
														
 
															-                    "tokenizer" : "standard",
														
 
															-                    "filter" : ["my_icu_folding", "lowercase"]
														
 
															-                }
														
 
															-            }
														
 
															-            "filter" : {
														
 
															-                "my_icu_folding" : {
														
 
															-                    "type" : "icu_folding"
														
 
															-                    "unicodeSetFilter" : "[^åäöÅÄÖ]"
														
 
															-                }
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															---------------------------------------------------
														
 
															-
														
 
															-[float]
														
 
															-[[icu-collation]]
														
 
															-=== ICU Collation
														
 
															-
														
 
															-Uses collation token filter. Allows to either specify the rules for
														
 
															-collation (defined
														
 
															-http://www.icu-project.org/userguide/Collate_Customization.html[here])
														
 
															-using the `rules` parameter (can point to a location or expressed in the
														
 
															-settings, location can be relative to config location), or using the
														
 
															-`language` parameter (further specialized by country and variant). By
														
 
															-default registers under `icu_collation` or `icuCollation` and uses the
														
 
															-default locale.
														
 
															-
														
 
															-Here is a sample settings:
														
 
															-
														
 
															-[source,js]
														
 
															---------------------------------------------------
														
 
															-{
														
 
															-    "index" : {
														
 
															-        "analysis" : {
														
 
															-            "analyzer" : {
														
 
															-                "collation" : {
														
 
															-                    "tokenizer" : "keyword",
														
 
															-                    "filter" : ["icu_collation"]
														
 
															-                }
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															---------------------------------------------------
														
 
															-
														
 
															-And here is a sample of custom collation:
														
 
															-
														
 
															-[source,js]
														
 
															---------------------------------------------------
														
 
															-{
														
 
															-    "index" : {
														
 
															-        "analysis" : {
														
 
															-            "analyzer" : {
														
 
															-                "collation" : {
														
 
															-                    "tokenizer" : "keyword",
														
 
															-                    "filter" : ["myCollator"]
														
 
															-                }
														
 
															-            },
														
 
															-            "filter" : {
														
 
															-                "myCollator" : {
														
 
															-                    "type" : "icu_collation",
														
 
															-                    "language" : "en"
														
 
															-                }
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															---------------------------------------------------
														
 
															-
														
 
															-[float]
														
 
															-==== Options
														
 
															-
														
 
															-[horizontal]
														
 
															-`strength`::
														
 
															-    The strength property determines the minimum level of difference considered significant during comparison.
														
 
															-     The default strength for the Collator is `tertiary`, unless specified otherwise by the locale used to create the Collator.
														
 
															-     Possible values: `primary`, `secondary`, `tertiary`, `quaternary` or `identical`.
														
 
															- +
														
 
															- See http://icu-project.org/apiref/icu4j/com/ibm/icu/text/Collator.html[ICU Collation] documentation for a more detailed
														
 
															- explanation for the specific values.
														
 
															-
														
 
															-`decomposition`::
														
 
															-    Possible values: `no` or `canonical`. Defaults to `no`. Setting this decomposition property with
														
 
															-    `canonical` allows the Collator to handle un-normalized text properly, producing the same results as if the text were
														
 
															-    normalized. If `no` is set, it is the user's responsibility to insure that all text is already in the appropriate form
														
 
															-    before a comparison or before getting a CollationKey. Adjusting decomposition mode allows the user to select between
														
 
															-    faster and more complete collation behavior. Since a great many of the world's languages do not require text
														
 
															-    normalization, most locales set `no` as the default decomposition mode.
														
 
															-
														
 
															-[float]
														
 
															-==== Expert options:
														
 
															-
														
 
															-[horizontal]
														
 
															-`alternate`::
														
 
															-     Possible values: `shifted` or `non-ignorable`. Sets the alternate handling for strength `quaternary`
														
 
															-     to be either shifted or non-ignorable. What boils down to ignoring punctuation and whitespace.
														
 
															-
														
 
															-`caseLevel`::
														
 
															-    Possible values: `true` or `false`. Default is `false`. Whether case level sorting is required. When
														
 
															-     strength is set to `primary` this will ignore accent differences.
														
 
															-
														
 
															-`caseFirst`::
														
 
															-    Possible values: `lower` or `upper`. Useful to control which case is sorted first when case is not ignored
														
 
															-    for strength `tertiary`.
														
 
															-
														
 
															-`numeric`::
														
 
															-    Possible values: `true` or `false`. Whether digits are sorted according to numeric representation. For
														
 
															-    example the value `egg-9` is sorted before the value `egg-21`. Defaults to `false`.
														
 
															-
														
 
															-`variableTop`::
														
 
															-    Single character or contraction. Controls what is variable for `alternate`.
														
 
															-
														
 
															-`hiraganaQuaternaryMode`::
														
 
															-    Possible values: `true` or `false`. Defaults to `false`. Distinguishing between Katakana and
														
 
															-    Hiragana characters in `quaternary` strength .
														
 
															-
														
 
															-[float]
														
 
															-=== ICU Tokenizer
														
 
															-
														
 
															-Breaks text into words according to UAX #29: Unicode Text Segmentation ((http://www.unicode.org/reports/tr29/)).
														
 
															-
														
 
															-[source,js]
														
 
															---------------------------------------------------
														
 
															-{
														
 
															-    "index" : {
														
 
															-        "analysis" : {
														
 
															-            "analyzer" : {
														
 
															-                "collation" : {
														
 
															-                    "tokenizer" : "icu_tokenizer",
														
 
															-                }
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															---------------------------------------------------
														
 
															-
														
 
															-
														
 
															-[float]
														
 
															-=== ICU Normalization CharFilter
														
 
															-
														
 
															-Normalizes characters as explained http://userguide.icu-project.org/transforms/normalization[here].
														
 
															-It registers itself by default under `icu_normalizer` or `icuNormalizer` using the default settings.
														
 
															-Allows for the name parameter to be provided which can include the following values: `nfc`, `nfkc`, and `nfkc_cf`.
														
 
															-Allows for the mode parameter to be provided which can include the following values: `compose` and `decompose`.
														
 
															-Use `decompose` with `nfc` or `nfkc`, to get `nfd` or `nfkd`, respectively.
														
 
															-Here is a sample settings:
														
 
															-
														
 
															-[source,js]
														
 
															---------------------------------------------------
														
 
															-{
														
 
															-    "index" : {
														
 
															-        "analysis" : {
														
 
															-            "analyzer" : {
														
 
															-                "collation" : {
														
 
															-                    "tokenizer" : "keyword",
														
 
															-                    "char_filter" : ["icu_normalizer"]
														
 
															-                }
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															---------------------------------------------------