11 years ago · b9a09c2b06
--- a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
+++ b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
@@ -23,12 +23,14 @@ following types are supported:
 
				 <<hindi-analyzer,`hindi`>>,
			
 
				 <<hungarian-analyzer,`hungarian`>>,
			
 
				 <<indonesian-analyzer,`indonesian`>>,
			
 
				+<<irish-analyzer,`irish`>>,
			
 
				 <<italian-analyzer,`italian`>>,
			
 
				 <<norwegian-analyzer,`norwegian`>>,
			
 
				 <<persian-analyzer,`persian`>>,
			
 
				 <<portuguese-analyzer,`portuguese`>>,
			
 
				 <<romanian-analyzer,`romanian`>>,
			
 
				 <<russian-analyzer,`russian`>>,
			
 
				+<<sorani-analyzer,`sorani`>>,
			
 
				 <<spanish-analyzer,`spanish`>>,
			
 
				 <<swedish-analyzer,`swedish`>>,
			
 
				 <<turkish-analyzer,`turkish`>>,
			
@@ -42,8 +44,8 @@ more details.
 
				 The following analyzers support setting custom `stem_exclusion` list:
			
 
				 `arabic`, `armenian`, `basque`, `catalan`, `bulgarian`, `catalan`,
			
 
				 `czech`, `finnish`, `dutch`, `english`, `finnish`, `french`, `galician`,
			
 
				-`german`, `hindi`, `hungarian`, `indonesian`, `italian`, `norwegian`,
			
 
				-`portuguese`, `romanian`, `russian`, `spanish`, `swedish`, `turkish`.
			
 
				+`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `norwegian`,
			
 
				+`portuguese`, `romanian`, `russian`, `sorani`, `spanish`, `swedish`, `turkish`.
			
 
				 
			
 
				 [[arabic-analyzer]]
			
 
				 ==== `arabic` analyzer
			
@@ -720,7 +722,7 @@ The `german` analyzer could be reimplemented as a `custom` analyzer as follows:
 
				             "lowercase",
			
 
				             "german_stop",
			
 
				             "german_keywords",
			
 
				-            "ascii_folding", <3>
			
 
				+            "german_normalization",
			
 
				             "german_stemmer"
			
 
				           ]
			
 
				         }
			
@@ -733,9 +735,6 @@ The `german` analyzer could be reimplemented as a `custom` analyzer as follows:
 
				     or `stopwords_path` parameters.
			
 
				 <2> Words can be excluded from stemming with the `stem_exclusion`
			
 
				     parameter.
			
 
				-<3> The `german` analyzer actually uses the GermanNormalizationFilter,
			
 
				-    which isn't exposed in Elasticsearch.  The `ascii_folding` filter
			
 
				-    does a similar job but is more extensive.
			
 
				 
			
 
				 [[greek-analyzer]]
			
 
				 ==== `greek` analyzer
			
@@ -752,6 +751,10 @@ The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
 
				           "type":       "stop",
			
 
				           "stopwords":  "_greek_" <1>
			
 
				         },
			
 
				+        "greek_lowercase": {
			
 
				+          "type":       "lowercase",
			
 
				+          "language":   "greek"
			
 
				+        },
			
 
				         "greek_keywords": {
			
 
				           "type":       "keyword_marker",
			
 
				           "keywords":   [] <2>
			
@@ -765,7 +768,7 @@ The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
 
				         "greek": {
			
 
				           "tokenizer":  "standard",
			
 
				           "filter": [
			
 
				-            "lowercase",
			
 
				+            "greek_lowercase",
			
 
				             "greek_stop",
			
 
				             "greek_keywords",
			
 
				             "greek_stemmer"
			
@@ -784,9 +787,48 @@ The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
 
				 [[hindi-analyzer]]
			
 
				 ==== `hindi` analyzer
			
 
				 
			
 
				-The `hindi` analyzer cannot currently be implemented as a `custom` analyzer
			
 
				-as it depends on the IndicNormalizationFilter and HindiNormalizationFilter
			
 
				-which are not yet exposed by Elasticsearch. Instead, see the <<analysis-icu-plugin>>.
			
 
				+The `hindi` analyzer could be reimplemented as a `custom` analyzer as follows:
			
 
				+
			
 
				+[source,js]
			
 
				+----------------------------------------------------
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "filter": {
			
 
				+        "hindi_stop": {
			
 
				+          "type":       "stop",
			
 
				+          "stopwords":  "_hindi_" <1>
			
 
				+        },
			
 
				+        "hindi_keywords": {
			
 
				+          "type":       "keyword_marker",
			
 
				+          "keywords":   [] <2>
			
 
				+        },
			
 
				+        "hindi_stemmer": {
			
 
				+          "type":       "stemmer",
			
 
				+          "language":   "hindi"
			
 
				+        }
			
 
				+      },
			
 
				+      "analyzer": {
			
 
				+        "hindi": {
			
 
				+          "tokenizer":  "standard",
			
 
				+          "filter": [
			
 
				+            "lowercase",
			
 
				+            "indic_normalization",
			
 
				+            "hindi_normalization",
			
 
				+            "hindi_stop",
			
 
				+            "hindi_keywords",
			
 
				+            "hindi_stemmer"
			
 
				+          ]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----------------------------------------------------
			
 
				+<1> The default stopwords can be overridden with the `stopwords`
			
 
				+    or `stopwords_path` parameters.
			
 
				+<2> Words can be excluded from stemming with the `stem_exclusion`
			
 
				+    parameter.
			
 
				 
			
 
				 [[hungarian-analyzer]]
			
 
				 ==== `hungarian` analyzer
			
@@ -877,6 +919,59 @@ The `indonesian` analyzer could be reimplemented as a `custom` analyzer as follo
 
				 <2> Words can be excluded from stemming with the `stem_exclusion`
			
 
				     parameter.
			
 
				 
			
 
				+[[irish-analyzer]]
			
 
				+==== `irish` analyzer
			
 
				+
			
 
				+The `irish` analyzer could be reimplemented as a `custom` analyzer as follows:
			
 
				+
			
 
				+[source,js]
			
 
				+----------------------------------------------------
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "filter": {
			
 
				+        "irish_elision": {
			
 
				+          "type":       "elision",
			
 
				+          "articles": [ "h", "n", "t" ]
			
 
				+        },
			
 
				+        "irish_stop": {
			
 
				+          "type":       "stop",
			
 
				+          "stopwords":  "_irish_" <1>
			
 
				+        },
			
 
				+        "irish_lowercase": {
			
 
				+          "type":       "lowercase",
			
 
				+          "language":   "irish"
			
 
				+        },
			
 
				+        "irish_keywords": {
			
 
				+          "type":       "keyword_marker",
			
 
				+          "keywords":   [] <2>
			
 
				+        },
			
 
				+        "irish_stemmer": {
			
 
				+          "type":       "stemmer",
			
 
				+          "language":   "irish"
			
 
				+        }
			
 
				+      },
			
 
				+      "analyzer": {
			
 
				+        "irish": {
			
 
				+          "tokenizer":  "standard",
			
 
				+          "filter": [
			
 
				+            "irish_stop",
			
 
				+            "irish_elision",
			
 
				+            "irish_lowercase",
			
 
				+            "irish_keywords",
			
 
				+            "irish_stemmer"
			
 
				+          ]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----------------------------------------------------
			
 
				+<1> The default stopwords can be overridden with the `stopwords`
			
 
				+    or `stopwords_path` parameters.
			
 
				+<2> Words can be excluded from stemming with the `stem_exclusion`
			
 
				+    parameter.
			
 
				+
			
 
				 [[italian-analyzer]]
			
 
				 ==== `italian` analyzer
			
 
				 
			
@@ -1150,6 +1245,51 @@ The `russian` analyzer could be reimplemented as a `custom` analyzer as follows:
 
				 <2> Words can be excluded from stemming with the `stem_exclusion`
			
 
				     parameter.
			
 
				 
			
 
				+[[sorani-analyzer]]
			
 
				+==== `sorani` analyzer
			
 
				+
			
 
				+The `sorani` analyzer could be reimplemented as a `custom` analyzer as follows:
			
 
				+
			
 
				+[source,js]
			
 
				+----------------------------------------------------
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "filter": {
			
 
				+        "sorani_stop": {
			
 
				+          "type":       "stop",
			
 
				+          "stopwords":  "_sorani_" <1>
			
 
				+        },
			
 
				+        "sorani_keywords": {
			
 
				+          "type":       "keyword_marker",
			
 
				+          "keywords":   [] <2>
			
 
				+        },
			
 
				+        "sorani_stemmer": {
			
 
				+          "type":       "stemmer",
			
 
				+          "language":   "sorani"
			
 
				+        }
			
 
				+      },
			
 
				+      "analyzer": {
			
 
				+        "sorani": {
			
 
				+          "tokenizer":  "standard",
			
 
				+          "filter": [
			
 
				+            "sorani_normalization",
			
 
				+            "lowercase",
			
 
				+            "sorani_stop",
			
 
				+            "sorani_keywords",
			
 
				+            "sorani_stemmer"
			
 
				+          ]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----------------------------------------------------
			
 
				+<1> The default stopwords can be overridden with the `stopwords`
			
 
				+    or `stopwords_path` parameters.
			
 
				+<2> Words can be excluded from stemming with the `stem_exclusion`
			
 
				+    parameter.
			
 
				+
			
 
				 [[spanish-analyzer]]
			
 
				 ==== `spanish` analyzer
			
 
				 
			
@@ -1241,14 +1381,80 @@ The `swedish` analyzer could be reimplemented as a `custom` analyzer as follows:
 
				 [[turkish-analyzer]]
			
 
				 ==== `turkish` analyzer
			
 
				 
			
 
				-The `turkish` analyzer cannot currently be implemented as a `custom` analyzer
			
 
				-because it depends on the TurkishLowerCaseFilter and the ApostropheFilter
			
 
				-which are not exposed in Elasticsearch. Instead, see the <<analysis-icu-plugin>>.
			
 
				+The `turkish` analyzer could be reimplemented as a `custom` analyzer as follows:
			
 
				+
			
 
				+[source,js]
			
 
				+----------------------------------------------------
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "filter": {
			
 
				+        "turkish_stop": {
			
 
				+          "type":       "stop",
			
 
				+          "stopwords":  "_turkish_" <1>
			
 
				+        },
			
 
				+        "turkish_lowercase": {
			
 
				+          "type":       "lowercase",
			
 
				+          "language":   "turkish"
			
 
				+        },
			
 
				+        "turkish_keywords": {
			
 
				+          "type":       "keyword_marker",
			
 
				+          "keywords":   [] <2>
			
 
				+        },
			
 
				+        "turkish_stemmer": {
			
 
				+          "type":       "stemmer",
			
 
				+          "language":   "turkish"
			
 
				+        }
			
 
				+      },
			
 
				+      "analyzer": {
			
 
				+        "turkish": {
			
 
				+          "tokenizer":  "standard",
			
 
				+          "filter": [
			
 
				+            "apostrophe",
			
 
				+            "turkish_lowercase",
			
 
				+            "turkish_stop",
			
 
				+            "turkish_keywords",
			
 
				+            "turkish_stemmer"
			
 
				+          ]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----------------------------------------------------
			
 
				+<1> The default stopwords can be overridden with the `stopwords`
			
 
				+    or `stopwords_path` parameters.
			
 
				+<2> Words can be excluded from stemming with the `stem_exclusion`
			
 
				+    parameter.
			
 
				 
			
 
				 [[thai-analyzer]]
			
 
				 ==== `thai` analyzer
			
 
				 
			
 
				-The `thai` analyzer cannot currently be implemented as a `custom` analyzer
			
 
				-because it depends on the ThaiTokenizer which is not exposed in Elasticsearch.
			
 
				-Instead, see the <<analysis-icu-plugin>>.
			
 
				+The `thai` analyzer could be reimplemented as a `custom` analyzer as follows:
			
 
				 
			
 
				+[source,js]
			
 
				+----------------------------------------------------
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "filter": {
			
 
				+        "thai_stop": {
			
 
				+          "type":       "stop",
			
 
				+          "stopwords":  "_thai_" <1>
			
 
				+        }
			
 
				+      },
			
 
				+      "analyzer": {
			
 
				+        "thai": {
			
 
				+          "tokenizer":  "thai",
			
 
				+          "filter": [
			
 
				+            "lowercase",
			
 
				+            "thai_stop"
			
 
				+          ]
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----------------------------------------------------
			
 
				+<1> The default stopwords can be overridden with the `stopwords`
			
 
				+    or `stopwords_path` parameters.
			
--- a/docs/reference/analysis/tokenfilters.asciidoc
+++ b/docs/reference/analysis/tokenfilters.asciidoc
@@ -78,3 +78,7 @@ include::tokenfilters/cjk-bigram-tokenfilter.asciidoc[]
 
				 include::tokenfilters/delimited-payload-tokenfilter.asciidoc[]
			
 
				 
			
 
				 include::tokenfilters/keep-words-tokenfilter.asciidoc[]
			
 
				+
			
 
				+include::tokenfilters/classic-tokenfilter.asciidoc[]
			
 
				+
			
 
				+include::tokenfilters/apostrophe-tokenfilter.asciidoc[]
			
--- a/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc
@@ -0,0 +1,7 @@
 
				+[[analysis-apostrophe-tokenfilter]]
			
 
				+=== Apostrophe Token Filter
			
 
				+
			
 
				+coming[1.3.0]
			
 
				+
			
 
				+The `apostrophe` token filter strips all characters after an apostrophe,
			
 
				+including the apostrophe itself.
			
--- a/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc
@@ -0,0 +1,11 @@
 
				+[[analysis-classic-tokenfilter]]
			
 
				+=== Classic Token Filter
			
 
				+
			
 
				+coming[1.3.0]
			
 
				+
			
 
				+The `classic` token filter does optional post-processing of
			
 
				+terms that are generated by the <<analysis-classic-tokenizer,`classic` tokenizer>>.
			
 
				+
			
 
				+This filter removes the english possessive from the end of words, and
			
 
				+it removes dots from acronyms.
			
 
				+
			
--- a/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc
@@ -4,7 +4,7 @@
 
				 A token filter of type `lowercase` that normalizes token text to lower
			
 
				 case.
			
 
				 
			
 
				-Lowercase token filter supports Greek and Turkish lowercase token
			
 
				+Lowercase token filter supports Greek, Irish coming[1.3.0], and Turkish lowercase token
			
 
				 filters through the `language` parameter. Below is a usage example in a
			
 
				 custom analyzer
			
 
				 
			
--- a/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc
@@ -4,12 +4,33 @@
 
				 There are several token filters available which try to normalize special
			
 
				 characters of a certain language.
			
 
				 
			
 
				-You can currently choose between `arabic_normalization` and
			
 
				-`persian_normalization` normalization in your token filter
			
 
				-configuration. For more information check the
			
 
				-http://lucene.apache.org/core/4_3_1/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormalizer.html[ArabicNormalizer]
			
 
				-or the
			
 
				-http://lucene.apache.org/core/4_3_1/analyzers-common/org/apache/lucene/analysis/fa/PersianNormalizer.html[PersianNormalizer]
			
 
				-documentation.
			
 
				-
			
 
				-*Note:* These filters are available since `0.90.2`
			
 
				+[horizontal]
			
 
				+Arabic::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormalizer.html[`arabic_normalization`]
			
 
				+
			
 
				+German::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/de/GermanNormalizationFilter.html[`german_normalization`] coming[1.3.0]
			
 
				+
			
 
				+Hindi::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/hi/HindiNormalizer.html[`hindi_normalization`] coming[1.3.0]
			
 
				+
			
 
				+Indic::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/in/IndicNormalizer.html[`indic_normalization`] coming[1.3.0]
			
 
				+
			
 
				+Kurdish (Sorani)::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/ckb/SoraniNormalizer.html[`sorani_normalization`] coming[1.3.0]
			
 
				+
			
 
				+Persian::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/fa/PersianNormalizer.html[`persian_normalization`]
			
 
				+
			
 
				+Scandinavian::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianNormalizationFilter.html[`scandinavian_normalization`] coming[1.3.0]
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianFoldingFilter.html[`scandinavian_folding`] coming[1.3.0]
			
 
				+
			
--- a/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc
@@ -32,7 +32,7 @@ available values (the preferred filters are marked in *bold*):
 
				 [horizontal]
			
 
				 Arabic::
			
 
				 
			
 
				-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Far%2FArabicStemmer.html[*`arabic`*]
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/ar/ArabicStemmer.html[*`arabic`*]
			
 
				 
			
 
				 Armenian::
			
 
				 
			
@@ -44,7 +44,7 @@ http://snowball.tartarus.org/algorithms/basque/stemmer.html[*`basque`*]
 
				 
			
 
				 Brazilian Portuguese::
			
 
				 
			
 
				-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Fbr%2FBrazilianStemmer.html[*`brazilian`*]
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/br/BrazilianStemmer.html[*`brazilian`*]
			
 
				 
			
 
				 Bulgarian::
			
 
				 
			
@@ -72,7 +72,7 @@ English::
 
				 http://snowball.tartarus.org/algorithms/porter/stemmer.html[*`english`*] coming[1.3.0,Returns the <<analysis-porterstem-tokenfilter,`porter_stem`>> instead of the <<analysis-snowball-tokenfilter,`english` Snowball token filter>>],
			
 
				 http://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`] coming[1.3.0,Returns the <<analysis-kstem-tokenfilter,`kstem` token filter>>],
			
 
				 http://www.medialab.tfe.umu.se/courses/mdm0506a/material/fulltext_ID%3D10049387%26PLACEBO%3DIE.pdf[`minimal_english`],
			
 
				-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Fen%2FEnglishPossessiveFilter.html[`possessive_english`],
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/en/EnglishPossessiveFilter.html[`possessive_english`],
			
 
				 http://snowball.tartarus.org/algorithms/english/stemmer.html[`porter2`] coming[1.3.0,Returns the <<analysis-snowball-tokenfilter,`english` Snowball token filter>> instead of the <<analysis-snowball-tokenfilter,`porter` Snowball token filter>>],
			
 
				 http://snowball.tartarus.org/algorithms/lovins/stemmer.html[`lovins`]
			
 
				 
			
@@ -87,6 +87,11 @@ http://snowball.tartarus.org/algorithms/french/stemmer.html[`french`],
 
				 http://dl.acm.org/citation.cfm?id=1141523[*`light_french`*],
			
 
				 http://dl.acm.org/citation.cfm?id=318984[`minimal_french`]
			
 
				 
			
 
				+Galician::
			
 
				+
			
 
				+http://bvg.udc.es/recursos_lingua/stemming.jsp[*`galician`*] coming[1.3.0],
			
 
				+http://bvg.udc.es/recursos_lingua/stemming.jsp[`minimal_galician`] (Plural step only) coming[1.3.0]
			
 
				+
			
 
				 German::
			
 
				 
			
 
				 http://snowball.tartarus.org/algorithms/german/stemmer.html[`german`],
			
@@ -111,19 +116,33 @@ Indonesian::
 
				 
			
 
				 http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf[*`indonesian`*]
			
 
				 
			
 
				+Irish::
			
 
				+
			
 
				+http://snowball.tartarus.org/otherapps/oregan/intro.html[*`irish`*]
			
 
				+
			
 
				 Italian::
			
 
				 
			
 
				 http://snowball.tartarus.org/algorithms/italian/stemmer.html[`italian`],
			
 
				 http://www.ercim.eu/publication/ws-proceedings/CLEF2/savoy.pdf[*`light_italian`*]
			
 
				 
			
 
				+Kurdish (Sorani)::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/ckb/SoraniStemmer.html[*`sorani`*] coming[1.3.0]
			
 
				+
			
 
				 Latvian::
			
 
				 
			
 
				-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Flv%2FLatvianStemmer.html[*`latvian`*]
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/lv/LatvianStemmer.html[*`latvian`*]
			
 
				 
			
 
				-Norwegian::
			
 
				+Norwegian (Bokmål)::
			
 
				 
			
 
				 http://snowball.tartarus.org/algorithms/norwegian/stemmer.html[*`norwegian`*],
			
 
				-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Fno%2FNorwegianMinimalStemFilter.html[`minimal_norwegian`]
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/no/NorwegianLightStemmer.html[*`light_norwegian`*] coming[1.3.0]
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/no/NorwegianMinimalStemmer.html[`minimal_norwegian`]
			
 
				+
			
 
				+Norwegian (Nynorsk)::
			
 
				+
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/no/NorwegianLightStemmer.html[*`light_nynorsk`*] coming[1.3.0]
			
 
				+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/no/NorwegianMinimalStemmer.html[`minimal_nynorsk`] coming[1.3.0]
			
 
				 
			
 
				 Portuguese::
			
 
				 
			
@@ -132,7 +151,6 @@ http://dl.acm.org/citation.cfm?id=1141523&dl=ACM&coll=DL&CFID=179095584&CFTOKEN=
 
				 http://www.inf.ufrgs.br/\~buriol/papers/Orengo_CLEF07.pdf[`minimal_portuguese`],
			
 
				 http://www.inf.ufrgs.br/\~viviane/rslp/index.htm[`portuguese_rslp`] coming[1.3.0]
			
 
				 
			
 
				-
			
 
				 Romanian::
			
 
				 
			
 
				 http://snowball.tartarus.org/algorithms/romanian/stemmer.html[*`romanian`*]
			
--- a/docs/reference/analysis/tokenizers.asciidoc
+++ b/docs/reference/analysis/tokenizers.asciidoc
@@ -28,3 +28,7 @@ include::tokenizers/uaxurlemail-tokenizer.asciidoc[]
 
				 
			
 
				 include::tokenizers/pathhierarchy-tokenizer.asciidoc[]
			
 
				 
			
 
				+include::tokenizers/classic-tokenizer.asciidoc[]
			
 
				+
			
 
				+include::tokenizers/thai-tokenizer.asciidoc[]
			
 
				+
			
--- a/docs/reference/analysis/tokenizers/classic-tokenizer.asciidoc
+++ b/docs/reference/analysis/tokenizers/classic-tokenizer.asciidoc
@@ -0,0 +1,21 @@
 
				+[[analysis-classic-tokenizer]]
			
 
				+=== Classic Tokenizer
			
 
				+
			
 
				+coming[1.3.0]
			
 
				+
			
 
				+A tokenizer of type `classic` providing grammar based tokenizer that is
			
 
				+a good tokenizer for English language documents. This tokenizer has 
			
 
				+heuristics for special treatment of acronyms, company names, email addresses,
			
 
				+and internet host names. However, these rules don't always work, and 
			
 
				+the tokenizer doesn't work well for most languages other than English.
			
 
				+
			
 
				+The following are settings that can be set for a `classic` tokenizer
			
 
				+type:
			
 
				+
			
 
				+[cols="<,<",options="header",]
			
 
				+|=======================================================================
			
 
				+|Setting |Description
			
 
				+|`max_token_length` |The maximum token length. If a token is seen that
			
 
				+exceeds this length then it is discarded. Defaults to `255`.
			
 
				+|=======================================================================
			
 
				+
			
--- a/docs/reference/analysis/tokenizers/thai-tokenizer.asciidoc
+++ b/docs/reference/analysis/tokenizers/thai-tokenizer.asciidoc
@@ -0,0 +1,9 @@
 
				+[[analysis-thai-tokenizer]]
			
 
				+=== Thai Tokenizer
			
 
				+
			
 
				+coming[1.3.0]
			
 
				+
			
 
				+A tokenizer of type `thai` that segments Thai text into words. This tokenizer
			
 
				+uses the built-in Thai segmentation algorithm included with Java to divide
			
 
				+up Thai text. Text in other languages in general will be treated the same
			
 
				+as `standard`.
			
--- a/src/main/java/org/elasticsearch/index/analysis/Analysis.java
+++ b/src/main/java/org/elasticsearch/index/analysis/Analysis.java
@@ -28,6 +28,7 @@ import org.apache.lucene.analysis.ar.ArabicAnalyzer;
 
				 import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
			
 
				 import org.apache.lucene.analysis.br.BrazilianAnalyzer;
			
 
				 import org.apache.lucene.analysis.ca.CatalanAnalyzer;
			
 
				+import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
			
 
				 import org.apache.lucene.analysis.cz.CzechAnalyzer;
			
 
				 import org.apache.lucene.analysis.da.DanishAnalyzer;
			
 
				 import org.apache.lucene.analysis.de.GermanAnalyzer;
			
@@ -38,6 +39,7 @@ import org.apache.lucene.analysis.eu.BasqueAnalyzer;
 
				 import org.apache.lucene.analysis.fa.PersianAnalyzer;
			
 
				 import org.apache.lucene.analysis.fi.FinnishAnalyzer;
			
 
				 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
			
 
				+import org.apache.lucene.analysis.ga.IrishAnalyzer;
			
 
				 import org.apache.lucene.analysis.gl.GalicianAnalyzer;
			
 
				 import org.apache.lucene.analysis.hi.HindiAnalyzer;
			
 
				 import org.apache.lucene.analysis.hu.HungarianAnalyzer;
			
@@ -50,6 +52,7 @@ import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
 
				 import org.apache.lucene.analysis.ro.RomanianAnalyzer;
			
 
				 import org.apache.lucene.analysis.ru.RussianAnalyzer;
			
 
				 import org.apache.lucene.analysis.sv.SwedishAnalyzer;
			
 
				+import org.apache.lucene.analysis.th.ThaiAnalyzer;
			
 
				 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
			
 
				 import org.apache.lucene.analysis.tr.TurkishAnalyzer;
			
 
				 import org.apache.lucene.analysis.util.CharArraySet;
			
@@ -134,14 +137,17 @@ public class Analysis {
 
				             .put("_hindi_", HindiAnalyzer.getDefaultStopSet())
			
 
				             .put("_hungarian_", HungarianAnalyzer.getDefaultStopSet())
			
 
				             .put("_indonesian_", IndonesianAnalyzer.getDefaultStopSet())
			
 
				+            .put("_irish_", IrishAnalyzer.getDefaultStopSet())
			
 
				             .put("_italian_", ItalianAnalyzer.getDefaultStopSet())
			
 
				             .put("_norwegian_", NorwegianAnalyzer.getDefaultStopSet())
			
 
				             .put("_persian_", PersianAnalyzer.getDefaultStopSet())
			
 
				             .put("_portuguese_", PortugueseAnalyzer.getDefaultStopSet())
			
 
				             .put("_romanian_", RomanianAnalyzer.getDefaultStopSet())
			
 
				             .put("_russian_", RussianAnalyzer.getDefaultStopSet())
			
 
				+            .put("_sorani_", SoraniAnalyzer.getDefaultStopSet())
			
 
				             .put("_spanish_", SpanishAnalyzer.getDefaultStopSet())
			
 
				             .put("_swedish_", SwedishAnalyzer.getDefaultStopSet())
			
 
				+            .put("_thai_", ThaiAnalyzer.getDefaultStopSet())
			
 
				             .put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
			
 
				             .immutableMap();
			
 
				 
			
--- a/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java
+++ b/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java
@@ -503,11 +503,20 @@ public class AnalysisModule extends AbstractModule {
 
				             tokenFiltersBindings.processTokenFilter("stemmer_override", StemmerOverrideTokenFilterFactory.class);
			
 
				 
			
 
				             tokenFiltersBindings.processTokenFilter("arabic_normalization", ArabicNormalizationFilterFactory.class);
			
 
				+            tokenFiltersBindings.processTokenFilter("german_normalization", GermanNormalizationFilterFactory.class);
			
 
				+            tokenFiltersBindings.processTokenFilter("hindi_normalization", HindiNormalizationFilterFactory.class);
			
 
				+            tokenFiltersBindings.processTokenFilter("indic_normalization", IndicNormalizationFilterFactory.class);
			
 
				+            tokenFiltersBindings.processTokenFilter("sorani_normalization", SoraniNormalizationFilterFactory.class);
			
 
				             tokenFiltersBindings.processTokenFilter("persian_normalization", PersianNormalizationFilterFactory.class);
			
 
				+            tokenFiltersBindings.processTokenFilter("scandinavian_normalization", ScandinavianNormalizationFilterFactory.class);
			
 
				+            tokenFiltersBindings.processTokenFilter("scandinavian_folding", ScandinavianFoldingFilterFactory.class);
			
 
				 
			
 
				             tokenFiltersBindings.processTokenFilter("hunspell", HunspellTokenFilterFactory.class);
			
 
				             tokenFiltersBindings.processTokenFilter("cjk_bigram", CJKBigramFilterFactory.class);
			
 
				             tokenFiltersBindings.processTokenFilter("cjk_width", CJKWidthFilterFactory.class);
			
 
				+            
			
 
				+            tokenFiltersBindings.processTokenFilter("apostrophe", ApostropheFilterFactory.class);
			
 
				+            tokenFiltersBindings.processTokenFilter("classic", ClassicFilterFactory.class);
			
 
				 
			
 
				 
			
 
				         }
			
@@ -515,6 +524,8 @@ public class AnalysisModule extends AbstractModule {
 
				         @Override
			
 
				         public void processTokenizers(TokenizersBindings tokenizersBindings) {
			
 
				             tokenizersBindings.processTokenizer("pattern", PatternTokenizerFactory.class);
			
 
				+            tokenizersBindings.processTokenizer("classic", ClassicTokenizerFactory.class);
			
 
				+            tokenizersBindings.processTokenizer("thai", ThaiTokenizerFactory.class);
			
 
				         }
			
 
				 
			
 
				         @Override
			
@@ -542,6 +553,7 @@ public class AnalysisModule extends AbstractModule {
 
				             analyzersBindings.processAnalyzer("hindi", HindiAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("hungarian", HungarianAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("indonesian", IndonesianAnalyzerProvider.class);
			
 
				+            analyzersBindings.processAnalyzer("irish", IrishAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("italian", ItalianAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("latvian", LatvianAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("norwegian", NorwegianAnalyzerProvider.class);
			
@@ -549,6 +561,7 @@ public class AnalysisModule extends AbstractModule {
 
				             analyzersBindings.processAnalyzer("portuguese", PortugueseAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("romanian", RomanianAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("russian", RussianAnalyzerProvider.class);
			
 
				+            analyzersBindings.processAnalyzer("sorani", SoraniAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("spanish", SpanishAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("swedish", SwedishAnalyzerProvider.class);
			
 
				             analyzersBindings.processAnalyzer("turkish", TurkishAnalyzerProvider.class);
			
--- a/src/main/java/org/elasticsearch/index/analysis/ApostropheFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/ApostropheFilterFactory.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.tr.ApostropheFilter;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link ApostropheFilter}
			
 
				+ */
			
 
				+public class ApostropheFilterFactory extends AbstractTokenFilterFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public ApostropheFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new ApostropheFilter(tokenStream);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/ClassicFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/ClassicFilterFactory.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.standard.ClassicFilter;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link ClassicFilter}
			
 
				+ */
			
 
				+public class ClassicFilterFactory extends AbstractTokenFilterFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public ClassicFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new ClassicFilter(tokenStream);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/ClassicTokenizerFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/ClassicTokenizerFactory.java
@@ -0,0 +1,52 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Tokenizer;
			
 
				+import org.apache.lucene.analysis.standard.ClassicTokenizer;
			
 
				+import org.apache.lucene.analysis.standard.StandardAnalyzer;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+import java.io.Reader;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link ClassicTokenizer}
			
 
				+ */
			
 
				+public class ClassicTokenizerFactory extends AbstractTokenizerFactory {
			
 
				+
			
 
				+    private final int maxTokenLength;
			
 
				+
			
 
				+    @Inject
			
 
				+    public ClassicTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+        maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Tokenizer create(Reader reader) {
			
 
				+        ClassicTokenizer tokenizer = new ClassicTokenizer(version, reader);
			
 
				+        tokenizer.setMaxTokenLength(maxTokenLength);
			
 
				+        return tokenizer;
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/GermanNormalizationFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/GermanNormalizationFilterFactory.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.de.GermanNormalizationFilter;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link GermanNormalizationFilter}
			
 
				+ */
			
 
				+public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public GermanNormalizationFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new GermanNormalizationFilter(tokenStream);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/HindiNormalizationFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/HindiNormalizationFilterFactory.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link HindiNormalizationFilter}
			
 
				+ */
			
 
				+public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public HindiNormalizationFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new HindiNormalizationFilter(tokenStream);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/IndicNormalizationFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IndicNormalizationFilterFactory.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.in.IndicNormalizationFilter;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link IndicNormalizationFilter}
			
 
				+ */
			
 
				+public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public IndicNormalizationFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new IndicNormalizationFilter(tokenStream);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/IrishAnalyzerProvider.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IrishAnalyzerProvider.java
@@ -0,0 +1,50 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.ga.IrishAnalyzer;
			
 
				+import org.apache.lucene.analysis.util.CharArraySet;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Provider for {@link IrishAnalyzer}
			
 
				+ */
			
 
				+public class IrishAnalyzerProvider extends AbstractIndexAnalyzerProvider<IrishAnalyzer> {
			
 
				+
			
 
				+    private final IrishAnalyzer analyzer;
			
 
				+
			
 
				+    @Inject
			
 
				+    public IrishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+        analyzer = new IrishAnalyzer(version,
			
 
				+                Analysis.parseStopWords(env, settings, IrishAnalyzer.getDefaultStopSet(), version),
			
 
				+                Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public IrishAnalyzer get() {
			
 
				+        return this.analyzer;
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java
@@ -22,6 +22,7 @@ package org.elasticsearch.index.analysis;
 
				 import org.apache.lucene.analysis.TokenStream;
			
 
				 import org.apache.lucene.analysis.core.LowerCaseFilter;
			
 
				 import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
			
 
				+import org.apache.lucene.analysis.ga.IrishLowerCaseFilter;
			
 
				 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
			
 
				 import org.elasticsearch.ElasticsearchIllegalArgumentException;
			
 
				 import org.elasticsearch.common.inject.Inject;
			
@@ -31,7 +32,13 @@ import org.elasticsearch.index.Index;
 
				 import org.elasticsearch.index.settings.IndexSettings;
			
 
				 
			
 
				 /**
			
 
				- *
			
 
				+ * Factory for {@link LowerCaseFilter} and some language-specific variants
			
 
				+ * supported by the {@code language} parameter:
			
 
				+ * <ul>
			
 
				+ *   <li>greek: {@link GreekLowerCaseFilter}
			
 
				+ *   <li>irish: {@link IrishLowerCaseFilter}
			
 
				+ *   <li>turkish: {@link TurkishLowerCaseFilter}
			
 
				+ * </ul>
			
 
				  */
			
 
				 public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory {
			
 
				 
			
@@ -49,6 +56,8 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory {
 
				             return new LowerCaseFilter(version, tokenStream);
			
 
				         } else if (lang.equalsIgnoreCase("greek")) {
			
 
				             return new GreekLowerCaseFilter(version, tokenStream);
			
 
				+        } else if (lang.equalsIgnoreCase("irish")) {
			
 
				+            return new IrishLowerCaseFilter(tokenStream);
			
 
				         } else if (lang.equalsIgnoreCase("turkish")) {
			
 
				             return new TurkishLowerCaseFilter(tokenStream);
			
 
				         } else {
			
--- a/src/main/java/org/elasticsearch/index/analysis/ScandinavianFoldingFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/ScandinavianFoldingFilterFactory.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilter;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link ScandinavianFoldingFilter}
			
 
				+ */
			
 
				+public class ScandinavianFoldingFilterFactory extends AbstractTokenFilterFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public ScandinavianFoldingFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new ScandinavianFoldingFilter(tokenStream);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/ScandinavianNormalizationFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/ScandinavianNormalizationFilterFactory.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link ScandinavianNormalizationFilter}
			
 
				+ */
			
 
				+public class ScandinavianNormalizationFilterFactory extends AbstractTokenFilterFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public ScandinavianNormalizationFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new ScandinavianNormalizationFilter(tokenStream);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/SoraniAnalyzerProvider.java
+++ b/src/main/java/org/elasticsearch/index/analysis/SoraniAnalyzerProvider.java
@@ -0,0 +1,50 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
			
 
				+import org.apache.lucene.analysis.util.CharArraySet;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Provider for {@link SoraniAnalyzer}
			
 
				+ */
			
 
				+public class SoraniAnalyzerProvider extends AbstractIndexAnalyzerProvider<SoraniAnalyzer> {
			
 
				+
			
 
				+    private final SoraniAnalyzer analyzer;
			
 
				+
			
 
				+    @Inject
			
 
				+    public SoraniAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+        analyzer = new SoraniAnalyzer(version,
			
 
				+                Analysis.parseStopWords(env, settings, SoraniAnalyzer.getDefaultStopSet(), version),
			
 
				+                Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public SoraniAnalyzer get() {
			
 
				+        return this.analyzer;
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/SoraniNormalizationFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/SoraniNormalizationFilterFactory.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link SoraniNormalizationFilter}
			
 
				+ */
			
 
				+public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public SoraniNormalizationFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new SoraniNormalizationFilter(tokenStream);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java
@@ -23,6 +23,7 @@ import org.apache.lucene.analysis.TokenStream;
 
				 import org.apache.lucene.analysis.ar.ArabicStemFilter;
			
 
				 import org.apache.lucene.analysis.bg.BulgarianStemFilter;
			
 
				 import org.apache.lucene.analysis.br.BrazilianStemFilter;
			
 
				+import org.apache.lucene.analysis.ckb.SoraniStemFilter;
			
 
				 import org.apache.lucene.analysis.cz.CzechStemFilter;
			
 
				 import org.apache.lucene.analysis.de.GermanLightStemFilter;
			
 
				 import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
			
@@ -35,11 +36,15 @@ import org.apache.lucene.analysis.es.SpanishLightStemFilter;
 
				 import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
			
 
				 import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
			
 
				 import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
			
 
				+import org.apache.lucene.analysis.gl.GalicianMinimalStemFilter;
			
 
				+import org.apache.lucene.analysis.gl.GalicianStemFilter;
			
 
				 import org.apache.lucene.analysis.hi.HindiStemFilter;
			
 
				 import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
			
 
				 import org.apache.lucene.analysis.id.IndonesianStemFilter;
			
 
				 import org.apache.lucene.analysis.it.ItalianLightStemFilter;
			
 
				 import org.apache.lucene.analysis.lv.LatvianStemFilter;
			
 
				+import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
			
 
				+import org.apache.lucene.analysis.no.NorwegianLightStemmer;
			
 
				 import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
			
 
				 import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
			
 
				 import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
			
@@ -137,6 +142,12 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
 
				             return new FrenchLightStemFilter(tokenStream);
			
 
				         } else if ("minimal_french".equalsIgnoreCase(language) || "minimalFrench".equalsIgnoreCase(language)) {
			
 
				             return new FrenchMinimalStemFilter(tokenStream);
			
 
				+            
			
 
				+            // Galician stemmers
			
 
				+        } else if ("galician".equalsIgnoreCase(language)) {
			
 
				+            return new GalicianStemFilter(tokenStream);
			
 
				+        } else if ("minimal_galician".equalsIgnoreCase(language)) {
			
 
				+            return new GalicianMinimalStemFilter(tokenStream);
			
 
				 
			
 
				             // German stemmers
			
 
				         } else if ("german".equalsIgnoreCase(language)) {
			
@@ -161,6 +172,10 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
 
				 
			
 
				         } else if ("indonesian".equalsIgnoreCase(language)) {
			
 
				             return new IndonesianStemFilter(tokenStream);
			
 
				+            
			
 
				+            // Irish stemmer
			
 
				+        } else if ("irish".equalsIgnoreCase(language)) {
			
 
				+            return new SnowballFilter(tokenStream, new IrishStemmer());
			
 
				 
			
 
				             // Italian stemmers
			
 
				         } else if ("italian".equalsIgnoreCase(language)) {
			
@@ -171,11 +186,19 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
 
				         } else if ("latvian".equalsIgnoreCase(language)) {
			
 
				             return new LatvianStemFilter(tokenStream);
			
 
				 
			
 
				-            // Norwegian stemmers
			
 
				+            // Norwegian (Bokmål) stemmers
			
 
				         } else if ("norwegian".equalsIgnoreCase(language)) {
			
 
				             return new SnowballFilter(tokenStream, new NorwegianStemmer());
			
 
				+        } else if ("light_norwegian".equalsIgnoreCase(language) || "lightNorwegian".equalsIgnoreCase(language)) {
			
 
				+            return new NorwegianLightStemFilter(tokenStream);
			
 
				         } else if ("minimal_norwegian".equalsIgnoreCase(language) || "minimalNorwegian".equals(language)) {
			
 
				             return new NorwegianMinimalStemFilter(tokenStream);
			
 
				+            
			
 
				+            // Norwegian (Nynorsk) stemmers 
			
 
				+        } else if ("light_nynorsk".equalsIgnoreCase(language) || "lightNynorsk".equalsIgnoreCase(language)) {
			
 
				+            return new NorwegianLightStemFilter(tokenStream, NorwegianLightStemmer.NYNORSK);
			
 
				+        } else if ("minimal_nynorsk".equalsIgnoreCase(language) || "minimalNynorsk".equalsIgnoreCase(language)) {
			
 
				+            return new NorwegianMinimalStemFilter(tokenStream, NorwegianLightStemmer.NYNORSK);
			
 
				 
			
 
				             // Portuguese stemmers
			
 
				         } else if ("portuguese".equalsIgnoreCase(language)) {
			
@@ -201,6 +224,10 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
 
				             return new SnowballFilter(tokenStream, new SpanishStemmer());
			
 
				         } else if ("light_spanish".equalsIgnoreCase(language) || "lightSpanish".equalsIgnoreCase(language)) {
			
 
				             return new SpanishLightStemFilter(tokenStream);
			
 
				+            
			
 
				+            // Sorani Kurdish stemmer
			
 
				+        } else if ("sorani".equalsIgnoreCase(language)) {
			
 
				+            return new SoraniStemFilter(tokenStream);
			
 
				 
			
 
				             // Swedish stemmers
			
 
				         } else if ("swedish".equalsIgnoreCase(language)) {
			
--- a/src/main/java/org/elasticsearch/index/analysis/ThaiTokenizerFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/ThaiTokenizerFactory.java
@@ -0,0 +1,46 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Tokenizer;
			
 
				+import org.apache.lucene.analysis.th.ThaiTokenizer;
			
 
				+import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.inject.assistedinject.Assisted;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.Index;
			
 
				+import org.elasticsearch.index.settings.IndexSettings;
			
 
				+
			
 
				+import java.io.Reader;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link ThaiTokenizer}
			
 
				+ */
			
 
				+public class ThaiTokenizerFactory extends AbstractTokenizerFactory {
			
 
				+
			
 
				+    @Inject
			
 
				+    public ThaiTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
			
 
				+        super(index, indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Tokenizer create(Reader reader) {
			
 
				+        return new ThaiTokenizer(reader);
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java
+++ b/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
 
				 import org.apache.lucene.analysis.br.BrazilianAnalyzer;
			
 
				 import org.apache.lucene.analysis.ca.CatalanAnalyzer;
			
 
				 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
			
 
				+import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
			
 
				 import org.apache.lucene.analysis.cn.ChineseAnalyzer;
			
 
				 import org.apache.lucene.analysis.core.KeywordAnalyzer;
			
 
				 import org.apache.lucene.analysis.core.SimpleAnalyzer;
			
@@ -348,6 +349,13 @@ public enum PreBuiltAnalyzers {
 
				             return new RussianAnalyzer(version.luceneVersion);
			
 
				         }
			
 
				     },
			
 
				+    
			
 
				+    SORANI {
			
 
				+        @Override
			
 
				+        protected Analyzer create(Version version) {
			
 
				+            return new SoraniAnalyzer(version.luceneVersion);
			
 
				+        }
			
 
				+    },
			
 
				 
			
 
				     SPANISH {
			
 
				         @Override
			
--- a/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java
+++ b/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java
@@ -43,6 +43,7 @@ public class AnalysisFactoryTests extends ElasticsearchTestCase {
 
				         put("russianletter", Deprecated.class);
			
 
				         
			
 
				         // exposed in ES
			
 
				+        put("classic",       ClassicTokenizerFactory.class);
			
 
				         put("edgengram",     EdgeNGramTokenizerFactory.class);
			
 
				         put("keyword",       KeywordTokenizerFactory.class);
			
 
				         put("letter",        LetterTokenizerFactory.class);
			
@@ -51,16 +52,10 @@ public class AnalysisFactoryTests extends ElasticsearchTestCase {
 
				         put("pathhierarchy", PathHierarchyTokenizerFactory.class);
			
 
				         put("pattern",       PatternTokenizerFactory.class);
			
 
				         put("standard",      StandardTokenizerFactory.class);
			
 
				+        put("thai",          ThaiTokenizerFactory.class);
			
 
				         put("uax29urlemail", UAX29URLEmailTokenizerFactory.class);
			
 
				         put("whitespace",    WhitespaceTokenizerFactory.class);
			
 
				-                
			
 
				-        // TODO: these tokenizers are not yet exposed: useful?
			
 
				         
			
 
				-        // historical version of standardtokenizer... tries to recognize 
			
 
				-        // company names and a few other things. not good for asian languages etc.
			
 
				-        put("classic",       Void.class);
			
 
				-        // we should add this, the thaiwordfilter is deprecated. this one has correct offsets
			
 
				-        put("thai",          Void.class);
			
 
				         // this one "seems to mess up offsets". probably shouldn't be a tokenizer...
			
 
				         put("wikipedia",     Void.class);
			
 
				     }};
			
@@ -80,6 +75,7 @@ public class AnalysisFactoryTests extends ElasticsearchTestCase {
 
				         
			
 
				         
			
 
				         // exposed in ES
			
 
				+        put("apostrophe",                ApostropheFilterFactory.class);
			
 
				         put("arabicnormalization",       ArabicNormalizationFilterFactory.class);
			
 
				         put("arabicstem",                ArabicStemTokenFilterFactory.class);
			
 
				         put("asciifolding",              ASCIIFoldingTokenFilterFactory.class);
			
@@ -87,6 +83,7 @@ public class AnalysisFactoryTests extends ElasticsearchTestCase {
 
				         put("bulgarianstem",             StemmerTokenFilterFactory.class);
			
 
				         put("cjkbigram",                 CJKBigramFilterFactory.class);
			
 
				         put("cjkwidth",                  CJKWidthFilterFactory.class);
			
 
				+        put("classic",                   ClassicFilterFactory.class);
			
 
				         put("commongrams",               CommonGramsTokenFilterFactory.class);
			
 
				         put("commongramsquery",          CommonGramsTokenFilterFactory.class);
			
 
				         put("czechstem",                 CzechStemTokenFilterFactory.class);
			
@@ -99,16 +96,21 @@ public class AnalysisFactoryTests extends ElasticsearchTestCase {
 
				         put("finnishlightstem",          StemmerTokenFilterFactory.class);
			
 
				         put("frenchlightstem",           StemmerTokenFilterFactory.class);
			
 
				         put("frenchminimalstem",         StemmerTokenFilterFactory.class);
			
 
				+        put("galicianminimalstem",       StemmerTokenFilterFactory.class);
			
 
				+        put("galicianstem",              StemmerTokenFilterFactory.class);
			
 
				         put("germanstem",                GermanStemTokenFilterFactory.class);
			
 
				         put("germanlightstem",           StemmerTokenFilterFactory.class);
			
 
				         put("germanminimalstem",         StemmerTokenFilterFactory.class);
			
 
				+        put("germannormalization",       GermanNormalizationFilterFactory.class);
			
 
				         put("greeklowercase",            LowerCaseTokenFilterFactory.class);
			
 
				         put("greekstem",                 StemmerTokenFilterFactory.class);
			
 
				-        put("hindistem",                 StemmerTokenFilterFactory.class);
			
 
				+        put("hindinormalization",        HindiNormalizationFilterFactory.class);
			
 
				         put("hindistem",                 StemmerTokenFilterFactory.class);
			
 
				         put("hungarianlightstem",        StemmerTokenFilterFactory.class);
			
 
				         put("hunspellstem",              HunspellTokenFilterFactory.class);
			
 
				         put("hyphenationcompoundword",   HyphenationCompoundWordTokenFilterFactory.class);
			
 
				+        put("indicnormalization",        IndicNormalizationFilterFactory.class);
			
 
				+        put("irishlowercase",            LowerCaseTokenFilterFactory.class);
			
 
				         put("indonesianstem",            StemmerTokenFilterFactory.class);
			
 
				         put("italianlightstem",          StemmerTokenFilterFactory.class);
			
 
				         put("keepword",                  KeepWordFilterFactory.class);
			
@@ -119,17 +121,23 @@ public class AnalysisFactoryTests extends ElasticsearchTestCase {
 
				         put("limittokencount",           LimitTokenCountFilterFactory.class);
			
 
				         put("lowercase",                 LowerCaseTokenFilterFactory.class);
			
 
				         put("ngram",                     NGramTokenFilterFactory.class);
			
 
				+        put("norwegianlightstem",        StemmerTokenFilterFactory.class);
			
 
				         put("norwegianminimalstem",      StemmerTokenFilterFactory.class);
			
 
				         put("patterncapturegroup",       PatternCaptureGroupTokenFilterFactory.class);
			
 
				         put("patternreplace",            PatternReplaceTokenFilterFactory.class);
			
 
				         put("persiannormalization",      PersianNormalizationFilterFactory.class);
			
 
				         put("porterstem",                PorterStemTokenFilterFactory.class);
			
 
				+        put("portuguesestem",            StemmerTokenFilterFactory.class);
			
 
				         put("portugueselightstem",       StemmerTokenFilterFactory.class);
			
 
				         put("portugueseminimalstem",     StemmerTokenFilterFactory.class);
			
 
				         put("reversestring",             ReverseTokenFilterFactory.class);
			
 
				         put("russianlightstem",          StemmerTokenFilterFactory.class);
			
 
				+        put("scandinavianfolding",       ScandinavianFoldingFilterFactory.class);
			
 
				+        put("scandinaviannormalization", ScandinavianNormalizationFilterFactory.class);
			
 
				         put("shingle",                   ShingleTokenFilterFactory.class);
			
 
				         put("snowballporter",            SnowballTokenFilterFactory.class);
			
 
				+        put("soraninormalization",       SoraniNormalizationFilterFactory.class);
			
 
				+        put("soranistem",                StemmerTokenFilterFactory.class);
			
 
				         put("spanishlightstem",          StemmerTokenFilterFactory.class);
			
 
				         put("standard",                  StandardTokenFilterFactory.class);
			
 
				         put("stemmeroverride",           StemmerOverrideTokenFilterFactory.class);
			
@@ -144,46 +152,20 @@ public class AnalysisFactoryTests extends ElasticsearchTestCase {
 
				                 
			
 
				         // TODO: these tokenfilters are not yet exposed: useful?
			
 
				         
			
 
				-        // useful for turkish language
			
 
				-        put("apostrophe",                Void.class);
			
 
				         // capitalizes tokens
			
 
				         put("capitalization",            Void.class);
			
 
				-        // cleans up after classic tokenizer
			
 
				-        put("classic",                   Void.class);
			
 
				         // like length filter (but codepoints)
			
 
				         put("codepointcount",            Void.class);
			
 
				-        // galician language stemmers
			
 
				-        put("galicianminimalstem",       Void.class);
			
 
				-        put("galicianstem",              Void.class);
			
 
				-        // o+umlaut=oe type normalization for german
			
 
				-        put("germannormalization",       Void.class);
			
 
				-        // hindi text normalization
			
 
				-        put("hindinormalization",        Void.class);
			
 
				         // puts hyphenated words back together
			
 
				         put("hyphenatedwords",           Void.class);
			
 
				-        // unicode normalization for indian languages
			
 
				-        put("indicnormalization",        Void.class);
			
 
				-        // lowercasing for irish: add to LowerCase (has a stemmer, too)
			
 
				-        put("irishlowercase",            Void.class);
			
 
				         // repeats anything marked as keyword
			
 
				         put("keywordrepeat",             Void.class);
			
 
				         // like limittokencount, but by position
			
 
				         put("limittokenposition",        Void.class);
			
 
				         // ???
			
 
				         put("numericpayload",            Void.class);
			
 
				-        // RSLP stemmer for portuguese
			
 
				-        put("portuguesestem",            Void.class);
			
 
				-        // light stemming for norwegian (has nb/nn options too)
			
 
				-        put("norwegianlightstem",        Void.class);
			
 
				         // removes duplicates at the same position (this should be used by the existing factory)
			
 
				         put("removeduplicates",          Void.class);
			
 
				-        // accent handling for scandinavian languages
			
 
				-        put("scandinavianfolding",       Void.class);
			
 
				-        // less aggressive accent handling for scandinavian languages
			
 
				-        put("scandinaviannormalization", Void.class);
			
 
				-        // kurdish language support
			
 
				-        put("soraninormalization",       Void.class);
			
 
				-        put("soranistem",                Void.class);
			
 
				         // ???
			
 
				         put("tokenoffsetpayload",        Void.class);
			
 
				         // like a stop filter but by token-type