8 years ago · ad69503dce
--- a/docs/build.gradle
+++ b/docs/build.gradle
@@ -53,18 +53,6 @@ buildRestTests.expectedUnconvertedCandidates = [
 
				   'reference/aggregations/pipeline/serial-diff-aggregation.asciidoc',
			
 
				   'reference/aggregations/pipeline/stats-bucket-aggregation.asciidoc',
			
 
				   'reference/aggregations/pipeline/sum-bucket-aggregation.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/common-grams-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/elision-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/keyword-marker-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc',
			
 
				-  'reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc',
			
 
				   'reference/cat/snapshots.asciidoc',
			
 
				   'reference/cat/templates.asciidoc',
			
 
				   'reference/cat/thread_pool.asciidoc',
			
@@ -124,10 +112,14 @@ integTestCluster {
 
				   configFile 'scripts/my_map_script.painless'
			
 
				   configFile 'scripts/my_combine_script.painless'
			
 
				   configFile 'scripts/my_reduce_script.painless'
			
 
				+  configFile 'analysis/example_word_list.txt'
			
 
				+  configFile 'analysis/hyphenation_patterns.xml'
			
 
				   configFile 'analysis/synonym.txt'
			
 
				   configFile 'analysis/stemmer_override.txt'
			
 
				   configFile 'userdict_ja.txt'
			
 
				   configFile 'KeywordTokenizer.rbbi'
			
 
				+  extraConfigFile 'hunspell/en_US/en_US.aff', '../core/src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.aff'
			
 
				+  extraConfigFile 'hunspell/en_US/en_US.dic', '../core/src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.dic'
			
 
				   // Whitelist reindexing from the local node so we can test it.
			
 
				   setting 'reindex.remote.whitelist', '127.0.0.1:*'
			
 
				 }
			
--- a/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc
@@ -8,17 +8,21 @@ equivalents, if one exists.  Example:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-"index" : {
			
 
				-    "analysis" : {
			
 
				-        "analyzer" : {
			
 
				-            "default" : {
			
 
				-                "tokenizer" : "standard",
			
 
				-                "filter" : ["standard", "asciifolding"]
			
 
				+PUT /asciifold_example
			
 
				+{
			
 
				+    "settings" : {
			
 
				+        "analysis" : {
			
 
				+            "analyzer" : {
			
 
				+                "default" : {
			
 
				+                    "tokenizer" : "standard",
			
 
				+                    "filter" : ["standard", "asciifolding"]
			
 
				+                }
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
 
				 
			
 
				 Accepts `preserve_original` setting which defaults to false but if true
			
 
				 will keep the original token as well as emit the folded token.  For
			
@@ -26,20 +30,24 @@ example:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-"index" : {
			
 
				-    "analysis" : {
			
 
				-        "analyzer" : {
			
 
				-            "default" : {
			
 
				-                "tokenizer" : "standard",
			
 
				-                "filter" : ["standard", "my_ascii_folding"]
			
 
				-            }
			
 
				-        },
			
 
				-        "filter" : {
			
 
				-            "my_ascii_folding" : {
			
 
				-                "type" : "asciifolding",
			
 
				-                "preserve_original" : true
			
 
				+PUT /asciifold_example
			
 
				+{
			
 
				+    "settings" : {
			
 
				+        "analysis" : {
			
 
				+            "analyzer" : {
			
 
				+                "default" : {
			
 
				+                    "tokenizer" : "standard",
			
 
				+                    "filter" : ["standard", "my_ascii_folding"]
			
 
				+                }
			
 
				+            },
			
 
				+            "filter" : {
			
 
				+                "my_ascii_folding" : {
			
 
				+                    "type" : "asciifolding",
			
 
				+                    "preserve_original" : true
			
 
				+                }
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
--- a/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc
@@ -16,8 +16,9 @@ Bigrams are generated for characters in `han`, `hiragana`, `katakana` and
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				+PUT /cjk_bigram_example
			
 
				 {
			
 
				-    "index" : {
			
 
				+    "settings" : {
			
 
				         "analysis" : {
			
 
				             "analyzer" : {
			
 
				                 "han_bigrams" : {
			
@@ -40,3 +41,4 @@ Bigrams are generated for characters in `han`, `hiragana`, `katakana` and
 
				     }
			
 
				 }
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
--- a/docs/reference/analysis/tokenfilters/common-grams-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/common-grams-tokenfilter.asciidoc
@@ -41,21 +41,33 @@ Here is an example:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-index :
			
 
				-    analysis :
			
 
				-        analyzer :
			
 
				-            index_grams :
			
 
				-                tokenizer : whitespace
			
 
				-                filter : [common_grams]
			
 
				-            search_grams :
			
 
				-                tokenizer : whitespace
			
 
				-                filter : [common_grams_query]
			
 
				-        filter :
			
 
				-            common_grams :
			
 
				-                type : common_grams
			
 
				-                common_words: [a, an, the]                
			
 
				-            common_grams_query :
			
 
				-                type : common_grams
			
 
				-                query_mode: true
			
 
				-                common_words: [a, an, the]                
			
 
				+PUT /common_grams_example
			
 
				+{
			
 
				+    "settings": {
			
 
				+        "analysis": {
			
 
				+            "my_analyzer": {
			
 
				+                "index_grams": {
			
 
				+                    "tokenizer": "whitespace",
			
 
				+                    "filter": ["common_grams"]
			
 
				+                },
			
 
				+                "search_grams": {
			
 
				+                    "tokenizer": "whitespace",
			
 
				+                    "filter": ["common_grams_query"]
			
 
				+                }
			
 
				+            },
			
 
				+            "filter": {
			
 
				+                "common_grams": {
			
 
				+                    "type": "common_grams",
			
 
				+                    "common_words": ["a", "an", "the"]
			
 
				+                },
			
 
				+                "common_grams_query": {
			
 
				+                    "type": "common_grams",
			
 
				+                    "query_mode": true,
			
 
				+                    "common_words": ["a", "an", "the"]
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
--- a/docs/reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc
@@ -1,5 +1,5 @@
 
				 [[analysis-compound-word-tokenfilter]]
			
 
				-=== Compound Word Token Filter
			
 
				+=== Compound Word Token Filters
			
 
				 
			
 
				 The `hyphenation_decompounder` and `dictionary_decompounder` token filters can
			
 
				 decompose compound words found in many German languages into word parts.
			
@@ -84,20 +84,31 @@ Here is an example:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-index :
			
 
				-    analysis :
			
 
				-        analyzer :
			
 
				-            myAnalyzer2 :
			
 
				-                type : custom
			
 
				-                tokenizer : standard
			
 
				-                filter : [myTokenFilter1, myTokenFilter2]
			
 
				-        filter :
			
 
				-            myTokenFilter1 :
			
 
				-                type : dictionary_decompounder
			
 
				-                word_list: [one, two, three]
			
 
				-            myTokenFilter2 :
			
 
				-                type : hyphenation_decompounder
			
 
				-                word_list_path: path/to/words.txt
			
 
				-                hyphenation_patterns_path: path/to/fop.xml
			
 
				-                max_subword_size : 22
			
 
				+PUT /compound_word_example
			
 
				+{
			
 
				+    "index": {
			
 
				+        "analysis": {
			
 
				+            "analyzer": {
			
 
				+                "my_analyzer": {
			
 
				+                    "type": "custom",
			
 
				+                    "tokenizer": "standard",
			
 
				+                    "filter": ["dictionary_decompounder", "hyphenation_decompounder"]
			
 
				+                }
			
 
				+            },
			
 
				+            "filter": {
			
 
				+                "dictionary_decompounder": {
			
 
				+                    "type": "dictionary_decompounder",
			
 
				+                    "word_list": ["one", "two", "three"]
			
 
				+                },
			
 
				+                "hyphenation_decompounder": {
			
 
				+                    "type" : "hyphenation_decompounder",
			
 
				+                    "word_list_path": "analysis/example_word_list.txt",
			
 
				+                    "hyphenation_patterns_path": "analysis/hyphenation_patterns.xml",
			
 
				+                    "max_subword_size": 22
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
--- a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc
@@ -9,20 +9,24 @@ example:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-"index" : {
			
 
				-    "analysis" : {
			
 
				-        "analyzer" : {
			
 
				-            "default" : {
			
 
				-                "tokenizer" : "standard",
			
 
				-                "filter" : ["standard", "elision"]
			
 
				-            }
			
 
				-        },
			
 
				-        "filter" : {
			
 
				-            "elision" : {
			
 
				-                "type" : "elision",
			
 
				-                "articles" : ["l", "m", "t", "qu", "n", "s", "j"]
			
 
				+PUT /elision_example
			
 
				+{
			
 
				+    "settings" : {
			
 
				+        "analysis" : {
			
 
				+            "analyzer" : {
			
 
				+                "default" : {
			
 
				+                    "tokenizer" : "standard",
			
 
				+                    "filter" : ["standard", "elision"]
			
 
				+                }
			
 
				+            },
			
 
				+            "filter" : {
			
 
				+                "elision" : {
			
 
				+                    "type" : "elision",
			
 
				+                    "articles" : ["l", "m", "t", "qu", "n", "s", "j"]
			
 
				+                }
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
--- a/docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc
@@ -10,7 +10,7 @@ one or more `*.dic` files (all of which will automatically be picked up).
 
				 For example, assuming the default hunspell location is used, the
			
 
				 following directory layout will define the `en_US` dictionary:
			
 
				 
			
 
				-[source,js]
			
 
				+[source,txt]
			
 
				 --------------------------------------------------
			
 
				 - conf
			
 
				     |-- hunspell
			
@@ -42,24 +42,28 @@ settings:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				+PUT /hunspell_example
			
 
				 {
			
 
				-    "analysis" : {
			
 
				-        "analyzer" : {
			
 
				-            "en" : {
			
 
				-                "tokenizer" : "standard",
			
 
				-                "filter" : [ "lowercase", "en_US" ]
			
 
				-            }
			
 
				-        },
			
 
				-        "filter" : {
			
 
				-            "en_US" : {
			
 
				-                "type" : "hunspell",
			
 
				-                "locale" : "en_US",
			
 
				-                "dedup" : true
			
 
				+    "settings": {
			
 
				+        "analysis" : {
			
 
				+            "analyzer" : {
			
 
				+                "en" : {
			
 
				+                    "tokenizer" : "standard",
			
 
				+                    "filter" : [ "lowercase", "en_US" ]
			
 
				+                }
			
 
				+            },
			
 
				+            "filter" : {
			
 
				+                "en_US" : {
			
 
				+                    "type" : "hunspell",
			
 
				+                    "locale" : "en_US",
			
 
				+                    "dedup" : true
			
 
				+                }
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
 
				 
			
 
				 The hunspell token filter accepts four options:
			
 
				 
			
--- a/docs/reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc
@@ -1,7 +1,7 @@
 
				 [[analysis-keep-types-tokenfilter]]
			
 
				 === Keep Types Token Filter
			
 
				 
			
 
				-A token filter of type `keep_types` that only keeps tokens with a token type 
			
 
				+A token filter of type `keep_types` that only keeps tokens with a token type
			
 
				 contained in a predefined set.
			
 
				 
			
 
				 
			
@@ -14,24 +14,61 @@ types:: a list of types to keep
 
				 [float]
			
 
				 === Settings example
			
 
				 
			
 
				+You can set it up like:
			
 
				+
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				+PUT /keep_types_example
			
 
				 {
			
 
				-    "index" : {
			
 
				+    "settings" : {
			
 
				         "analysis" : {
			
 
				             "analyzer" : {
			
 
				                 "my_analyzer" : {
			
 
				                     "tokenizer" : "standard",
			
 
				                     "filter" : ["standard", "lowercase", "extract_numbers"]
			
 
				-                },
			
 
				+                }
			
 
				             },
			
 
				             "filter" : {
			
 
				                 "extract_numbers" : {
			
 
				                     "type" : "keep_types",
			
 
				                     "types" : [ "<NUM>" ]
			
 
				-                },
			
 
				+                }
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+And test it like:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+POST /keep_types_example/_analyze
			
 
				+{
			
 
				+  "analyzer" : "my_analyzer",
			
 
				+  "text" : "this is just 1 a test"
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+And it'd respond:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens": [
			
 
				+    {
			
 
				+      "token": "1",
			
 
				+      "start_offset": 13,
			
 
				+      "end_offset": 14,
			
 
				+      "type": "<NUM>",
			
 
				+      "position": 3
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
 
				+
			
 
				+Note how only the `<NUM>` token is in the output.
			
--- a/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc
@@ -20,17 +20,18 @@ keep_words_case:: a boolean indicating whether to lower case the words (defaults
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				+PUT /keep_words_example
			
 
				 {
			
 
				-    "index" : {
			
 
				+    "settings" : {
			
 
				         "analysis" : {
			
 
				             "analyzer" : {
			
 
				-                "my_analyzer" : {
			
 
				+                "example_1" : {
			
 
				                     "tokenizer" : "standard",
			
 
				                     "filter" : ["standard", "lowercase", "words_till_three"]
			
 
				                 },
			
 
				-                "my_analyzer1" : {
			
 
				+                "example_2" : {
			
 
				                     "tokenizer" : "standard",
			
 
				-                    "filter" : ["standard", "lowercase", "words_on_file"]
			
 
				+                    "filter" : ["standard", "lowercase", "words_in_file"]
			
 
				                 }
			
 
				             },
			
 
				             "filter" : {
			
@@ -38,12 +39,13 @@ keep_words_case:: a boolean indicating whether to lower case the words (defaults
 
				                     "type" : "keep",
			
 
				                     "keep_words" : [ "one", "two", "three"]
			
 
				                 },
			
 
				-                "words_on_file" : {
			
 
				+                "words_in_file" : {
			
 
				                     "type" : "keep",
			
 
				-                    "keep_words_path" : "/path/to/word/file"
			
 
				+                    "keep_words_path" : "analysis/example_word_list.txt"
			
 
				                 }
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
--- a/docs/reference/analysis/tokenfilters/keyword-marker-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/keyword-marker-tokenfilter.asciidoc
@@ -19,19 +19,124 @@ in the text.
 
				 `false`.
			
 
				 |=======================================================================
			
 
				 
			
 
				-Here is an example:
			
 
				+You can configure it like:
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-index :
			
 
				-    analysis :
			
 
				-        analyzer :
			
 
				-            myAnalyzer :
			
 
				-                type : custom
			
 
				-                tokenizer : standard
			
 
				-                filter : [lowercase, protwords, porter_stem]    
			
 
				-        filter :
			
 
				-            protwords :
			
 
				-                type : keyword_marker
			
 
				-                keywords_path : analysis/protwords.txt
			
 
				+PUT /keyword_marker_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "protect_cats": {
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": ["lowercase", "protect_cats", "porter_stem"]
			
 
				+        },
			
 
				+        "normal": {
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": ["lowercase", "porter_stem"]
			
 
				+        }
			
 
				+      },
			
 
				+      "filter": {
			
 
				+        "protect_cats": {
			
 
				+          "type": "keyword_marker",
			
 
				+          "keywords": ["cats"]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+And test it with:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+POST /keyword_marker_example/_analyze
			
 
				+{
			
 
				+  "analyzer" : "protect_cats",
			
 
				+  "text" : "I like cats"
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+And it'd respond:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens": [
			
 
				+    {
			
 
				+      "token": "i",
			
 
				+      "start_offset": 0,
			
 
				+      "end_offset": 1,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 0
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "like",
			
 
				+      "start_offset": 2,
			
 
				+      "end_offset": 6,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 1
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "cats",
			
 
				+      "start_offset": 7,
			
 
				+      "end_offset": 11,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 2
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
 
				+
			
 
				+As compared to the `normal` analyzer which has `cats` stemmed to `cat`:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+POST /keyword_marker_example/_analyze
			
 
				+{
			
 
				+  "analyzer" : "normal",
			
 
				+  "text" : "I like cats"
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+Response:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens": [
			
 
				+    {
			
 
				+      "token": "i",
			
 
				+      "start_offset": 0,
			
 
				+      "end_offset": 1,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 0
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "like",
			
 
				+      "start_offset": 2,
			
 
				+      "end_offset": 6,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 1
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "cat",
			
 
				+      "start_offset": 7,
			
 
				+      "end_offset": 11,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 2
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
--- a/docs/reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc
@@ -9,18 +9,85 @@ subsequent stemmer will be indexed twice. Therefore, consider adding a
 
				 `unique` filter with `only_on_same_position` set to `true` to drop
			
 
				 unnecessary duplicates.
			
 
				 
			
 
				-Here is an example:
			
 
				+Here is an example of using the `keyword_repeat` token filter to
			
 
				+preserve both the stemmed and unstemmed version of tokens:
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-index :
			
 
				-    analysis :
			
 
				-        analyzer :
			
 
				-            myAnalyzer :
			
 
				-                type : custom
			
 
				-                tokenizer : standard
			
 
				-                filter : [lowercase, keyword_repeat, porter_stem, unique_stem]    
			
 
				-            unique_stem:
			
 
				-                type: unique
			
 
				-                only_on_same_position : true
			
 
				+PUT /keyword_repeat_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "stemmed_and_unstemmed": {
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": ["lowercase", "keyword_repeat", "porter_stem", "unique_stem"]
			
 
				+        }
			
 
				+      },
			
 
				+      "filter": {
			
 
				+        "unique_stem": {
			
 
				+          "type": "unique",
			
 
				+          "only_on_same_position": true
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+And you can test it with:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+POST /keyword_repeat_example/_analyze
			
 
				+{
			
 
				+  "analyzer" : "stemmed_and_unstemmed",
			
 
				+  "text" : "I like cats"
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+And it'd respond:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens": [
			
 
				+    {
			
 
				+      "token": "i",
			
 
				+      "start_offset": 0,
			
 
				+      "end_offset": 1,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 0
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "like",
			
 
				+      "start_offset": 2,
			
 
				+      "end_offset": 6,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 1
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "cats",
			
 
				+      "start_offset": 7,
			
 
				+      "end_offset": 11,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 2
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "cat",
			
 
				+      "start_offset": 7,
			
 
				+      "end_offset": 11,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 2
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
 
				+
			
 
				+Which preserves both the `cat` and `cats` tokens. Compare this to the example
			
 
				+on the <<analysis-keyword-marker-tokenfilter>>.
			
--- a/docs/reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc
@@ -18,15 +18,25 @@ Here is an example:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-index :
			
 
				-    analysis :
			
 
				-        analyzer :
			
 
				-            myAnalyzer :
			
 
				-                type : custom
			
 
				-                tokenizer : standard
			
 
				-                filter : [lowercase, five_token_limit]
			
 
				-        filter :
			
 
				-            five_token_limit :
			
 
				-                type : limit
			
 
				-                max_token_count : 5
			
 
				+PUT /limit_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "limit_example": {
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": ["lowercase", "five_token_limit"]
			
 
				+        }
			
 
				+      },
			
 
				+      "filter": {
			
 
				+        "five_token_limit": {
			
 
				+          "type": "limit",
			
 
				+          "max_token_count": 5
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
--- a/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc
@@ -10,28 +10,30 @@ custom analyzer
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-index :
			
 
				-    analysis :
			
 
				-        analyzer : 
			
 
				-            myAnalyzer2 :
			
 
				-                type : custom
			
 
				-                tokenizer : myTokenizer1
			
 
				-                filter : [myTokenFilter1, myGreekLowerCaseFilter]
			
 
				-                char_filter : [my_html]
			
 
				-        tokenizer :
			
 
				-            myTokenizer1 :
			
 
				-                type : standard
			
 
				-                max_token_length : 900
			
 
				-        filter :
			
 
				-            myTokenFilter1 :
			
 
				-                type : stop
			
 
				-                stopwords : [stop1, stop2, stop3, stop4]
			
 
				-            myGreekLowerCaseFilter :
			
 
				-                type : lowercase
			
 
				-                language : greek
			
 
				-        char_filter :
			
 
				-              my_html :
			
 
				-                type : html_strip
			
 
				-                escaped_tags : [xxx, yyy]
			
 
				-                read_ahead : 1024
			
 
				+PUT /lowercase_example
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "standard_lowercase_example": {
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": ["lowercase"]
			
 
				+        },
			
 
				+        "greek_lowercase_example": {
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": ["greek_lowercase"]
			
 
				+        }
			
 
				+      },
			
 
				+      "filter": {
			
 
				+        "greek_lowercase": {
			
 
				+          "type": "lowercase",
			
 
				+          "language": "greek"
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				 --------------------------------------------------
			
 
				+// CONSOLE
			
--- a/docs/src/test/cluster/config/analysis/example_word_list.txt
+++ b/docs/src/test/cluster/config/analysis/example_word_list.txt
@@ -0,0 +1,4 @@
 
				+test
			
 
				+list
			
 
				+of
			
 
				+words
			
--- a/docs/src/test/cluster/config/analysis/hyphenation_patterns.xml
+++ b/docs/src/test/cluster/config/analysis/hyphenation_patterns.xml
@@ -0,0 +1,21 @@
 
				+<?xml version="1.0" encoding="utf-8"?>
			
 
				+<!DOCTYPE hyphenation-info SYSTEM "hyphenation.dtd">
			
 
				+
			
 
				+<!-- Example hyphenation patterns file. -->
			
 
				+
			
 
				+<hyphenation-info>
			
 
				+
			
 
				+<hyphen-char value="-"/>
			
 
				+<hyphen-min before="2" after="2"/>
			
 
				+
			
 
				+<classes>
			
 
				+aA
			
 
				+</classes>
			
 
				+
			
 
				+<exceptions>
			
 
				+</exceptions>
			
 
				+
			
 
				+<patterns>
			
 
				+.a2
			
 
				+</patterns>
			
 
				+</hyphenation-info>