7 years ago · 891d3bd9c3
--- a/docs/CHANGELOG.asciidoc
+++ b/docs/CHANGELOG.asciidoc
@@ -146,6 +146,9 @@ The new <<mapping-ignored-field,`_ignored`>> field allows to know which fields
 
				 got ignored at index time because of the <<ignore-malformed,`ignore_malformed`>>
			
 
				 option. ({pull}30140[#29658])
			
 
				 
			
 
				+A new analysis plugin called `analysis_nori` that exposes the Lucene Korean
			
 
				+analysis module.  ({pull}30397[#30397])
			
 
				+
			
 
				 [float]
			
 
				 === Enhancements
			
 
				 
			
--- a/docs/build.gradle
+++ b/docs/build.gradle
@@ -32,6 +32,7 @@ integTestCluster {
 
				   configFile 'analysis/synonym.txt'
			
 
				   configFile 'analysis/stemmer_override.txt'
			
 
				   configFile 'userdict_ja.txt'
			
 
				+  configFile 'userdict_ko.txt'
			
 
				   configFile 'KeywordTokenizer.rbbi'
			
 
				   extraConfigFile 'hunspell/en_US/en_US.aff', '../server/src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.aff'
			
 
				   extraConfigFile 'hunspell/en_US/en_US.dic', '../server/src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.dic'
			
--- a/docs/plugins/analysis-nori.asciidoc
+++ b/docs/plugins/analysis-nori.asciidoc
@@ -0,0 +1,408 @@
 
				+[[analysis-nori]]
			
 
				+=== Korean (nori) Analysis Plugin
			
 
				+
			
 
				+The Korean (nori) Analysis plugin integrates Lucene nori analysis
			
 
				+module into elasticsearch. It uses the https://bitbucket.org/eunjeon/mecab-ko-dic[mecab-ko-dic dictionary]
			
 
				+to perform morphological analysis of Korean texts.
			
 
				+
			
 
				+:plugin_name: analysis-nori
			
 
				+include::install_remove.asciidoc[]
			
 
				+
			
 
				+[[analysis-nori-analyzer]]
			
 
				+==== `nori` analyzer
			
 
				+
			
 
				+The `nori` analyzer consists of the following tokenizer and token filters:
			
 
				+
			
 
				+* <<analysis-nori-tokenizer,`nori_tokenizer`>>
			
 
				+* <<analysis-nori-speech,`nori_part_of_speech`>> token filter
			
 
				+* <<analysis-nori-reading,`nori_readingform`>> token filter
			
 
				+* {ref}/analysis-lowercase-tokenfilter.html[`lowercase`] token filter
			
 
				+
			
 
				+It supports the `decompound_mode` and `user_dictionary` settings from
			
 
				+<<analysis-nori-tokenizer,`nori_tokenizer`>> and the `stoptags` setting from
			
 
				+<<analysis-nori-speech,`nori_part_of_speech`>>.
			
 
				+
			
 
				+[[analysis-nori-tokenizer]]
			
 
				+==== `nori_tokenizer`
			
 
				+
			
 
				+The `nori_tokenizer` accepts the following settings:
			
 
				+
			
 
				+`decompound_mode`::
			
 
				++
			
 
				+--
			
 
				+
			
 
				+The decompound mode determines how the tokenizer handles compound tokens.
			
 
				+It can be set to:
			
 
				+
			
 
				+`none`::
			
 
				+
			
 
				+    No decomposition for compounds. Example output:
			
 
				+
			
 
				+    가거도항
			
 
				+    가곡역
			
 
				+
			
 
				+`discard`::
			
 
				+
			
 
				+    Decomposes compounds and discards the original form (*default*). Example output:
			
 
				+
			
 
				+    가곡역 => 가곡, 역
			
 
				+
			
 
				+`mixed`::
			
 
				+
			
 
				+    Decomposes compounds and keeps the original form. Example output:
			
 
				+
			
 
				+    가곡역 => 가곡역, 가곡, 역
			
 
				+--
			
 
				+
			
 
				+`user_dictionary`::
			
 
				++
			
 
				+--
			
 
				+The Nori tokenizer uses the https://bitbucket.org/eunjeon/mecab-ko-dic[mecab-ko-dic dictionary] by default.
			
 
				+A `user_dictionary` with custom nouns (`NNG`) may be appended to the default dictionary.
			
 
				+The dictionary should have the following format:
			
 
				+
			
 
				+[source,txt]
			
 
				+-----------------------
			
 
				+<token> [<token 1> ... <token n>]
			
 
				+-----------------------
			
 
				+
			
 
				+The first token is mandatory and represents the custom noun that should be added in
			
 
				+the dictionary. For compound nouns the custom segmentation can be provided
			
 
				+after the first token (`[<token 1> ... <token n>]`). The segmentation of the
			
 
				+custom compound nouns is controlled by the `decompound_mode` setting.
			
 
				+--
			
 
				+
			
 
				+As a demonstration of how the user dictionary can be used, save the following
			
 
				+dictionary to `$ES_HOME/config/userdict_ko.txt`:
			
 
				+
			
 
				+[source,txt]
			
 
				+-----------------------
			
 
				+c++                 <1>
			
 
				+C샤프
			
 
				+세종
			
 
				+세종시 세종 시        <2>
			
 
				+-----------------------
			
 
				+--
			
 
				+
			
 
				+<1> A simple noun
			
 
				+<2> A compound noun (`세종시`) followed by its decomposition: `세종` and `시`.
			
 
				+
			
 
				+
			
 
				+Then create an analyzer as follows:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT nori_sample
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "index": {
			
 
				+      "analysis": {
			
 
				+        "tokenizer": {
			
 
				+          "nori_user_dict": {
			
 
				+            "type": "nori_tokenizer",
			
 
				+            "decompound_mode": "mixed",
			
 
				+            "user_dictionary": "userdict_ko.txt"
			
 
				+          }
			
 
				+        },
			
 
				+        "analyzer": {
			
 
				+          "my_analyzer": {
			
 
				+            "type": "custom",
			
 
				+            "tokenizer": "nori_user_dict"
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+GET nori_sample/_analyze
			
 
				+{
			
 
				+  "analyzer": "my_analyzer",
			
 
				+  "text": "세종시"  <1>
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+<1> Sejong city
			
 
				+
			
 
				+The above `analyze` request returns the following:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens" : [ {
			
 
				+    "token" : "세종시",
			
 
				+    "start_offset" : 0,
			
 
				+    "end_offset" : 3,
			
 
				+    "type" : "word",
			
 
				+    "position" : 0,
			
 
				+    "positionLength" : 2    <1>
			
 
				+  }, {
			
 
				+    "token" : "세종",
			
 
				+    "start_offset" : 0,
			
 
				+    "end_offset" : 2,
			
 
				+    "type" : "word",
			
 
				+    "position" : 0
			
 
				+  }, {
			
 
				+    "token" : "시",
			
 
				+    "start_offset" : 2,
			
 
				+    "end_offset" : 3,
			
 
				+    "type" : "word",
			
 
				+    "position" : 1
			
 
				+   }]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
 
				+
			
 
				+<1> This is a compound token that spans two positions (`mixed` mode).
			
 
				+
			
 
				+The `nori_tokenizer` sets a number of additional attributes per token that are used by token filters
			
 
				+to modify the stream.
			
 
				+You can view all these additional attributes with the following request:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+GET _analyze
			
 
				+{
			
 
				+  "tokenizer": "nori_tokenizer",
			
 
				+  "text": "뿌리가 깊은 나무는",   <1>
			
 
				+  "attributes" : ["posType", "leftPOS", "rightPOS", "morphemes", "reading"],
			
 
				+  "explain": true
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+<1> A tree with deep roots
			
 
				+
			
 
				+Which responds with:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+    "detail": {
			
 
				+        "custom_analyzer": true,
			
 
				+        "charfilters": [],
			
 
				+        "tokenizer": {
			
 
				+            "name": "nori_tokenizer",
			
 
				+            "tokens": [
			
 
				+                {
			
 
				+                    "token": "뿌리",
			
 
				+                    "start_offset": 0,
			
 
				+                    "end_offset": 2,
			
 
				+                    "type": "word",
			
 
				+                    "position": 0,
			
 
				+                    "leftPOS": "NNG(General Noun)",
			
 
				+                    "morphemes": null,
			
 
				+                    "posType": "MORPHEME",
			
 
				+                    "reading": null,
			
 
				+                    "rightPOS": "NNG(General Noun)"
			
 
				+                },
			
 
				+                {
			
 
				+                    "token": "가",
			
 
				+                    "start_offset": 2,
			
 
				+                    "end_offset": 3,
			
 
				+                    "type": "word",
			
 
				+                    "position": 1,
			
 
				+                    "leftPOS": "J(Ending Particle)",
			
 
				+                    "morphemes": null,
			
 
				+                    "posType": "MORPHEME",
			
 
				+                    "reading": null,
			
 
				+                    "rightPOS": "J(Ending Particle)"
			
 
				+                },
			
 
				+                {
			
 
				+                    "token": "깊",
			
 
				+                    "start_offset": 4,
			
 
				+                    "end_offset": 5,
			
 
				+                    "type": "word",
			
 
				+                    "position": 2,
			
 
				+                    "leftPOS": "VA(Adjective)",
			
 
				+                    "morphemes": null,
			
 
				+                    "posType": "MORPHEME",
			
 
				+                    "reading": null,
			
 
				+                    "rightPOS": "VA(Adjective)"
			
 
				+                },
			
 
				+                {
			
 
				+                    "token": "은",
			
 
				+                    "start_offset": 5,
			
 
				+                    "end_offset": 6,
			
 
				+                    "type": "word",
			
 
				+                    "position": 3,
			
 
				+                    "leftPOS": "E(Verbal endings)",
			
 
				+                    "morphemes": null,
			
 
				+                    "posType": "MORPHEME",
			
 
				+                    "reading": null,
			
 
				+                    "rightPOS": "E(Verbal endings)"
			
 
				+                },
			
 
				+                {
			
 
				+                    "token": "나무",
			
 
				+                    "start_offset": 7,
			
 
				+                    "end_offset": 9,
			
 
				+                    "type": "word",
			
 
				+                    "position": 4,
			
 
				+                    "leftPOS": "NNG(General Noun)",
			
 
				+                    "morphemes": null,
			
 
				+                    "posType": "MORPHEME",
			
 
				+                    "reading": null,
			
 
				+                    "rightPOS": "NNG(General Noun)"
			
 
				+                },
			
 
				+                {
			
 
				+                    "token": "는",
			
 
				+                    "start_offset": 9,
			
 
				+                    "end_offset": 10,
			
 
				+                    "type": "word",
			
 
				+                    "position": 5,
			
 
				+                    "leftPOS": "J(Ending Particle)",
			
 
				+                    "morphemes": null,
			
 
				+                    "posType": "MORPHEME",
			
 
				+                    "reading": null,
			
 
				+                    "rightPOS": "J(Ending Particle)"
			
 
				+                }
			
 
				+            ]
			
 
				+        },
			
 
				+        "tokenfilters": []
			
 
				+    }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
 
				+
			
 
				+[[analysis-nori-speech]]
			
 
				+==== `nori_part_of_speech` token filter
			
 
				+
			
 
				+The `nori_part_of_speech` token filter removes tokens that match a set of
			
 
				+part-of-speech tags. The list of supported tags and their meanings can be found here:
			
 
				+{lucene_version_path}/org/apache/lucene/analysis/ko/POS.Tag.html[Part of speech tags]
			
 
				+
			
 
				+It accepts the following setting:
			
 
				+
			
 
				+`stoptags`::
			
 
				+
			
 
				+    An array of part-of-speech tags that should be removed.
			
 
				+
			
 
				+and defaults to:
			
 
				+
			
 
				+```
			
 
				+"stoptags": [
			
 
				+    "E",
			
 
				+    "IC",
			
 
				+    "J",
			
 
				+    "MAG", "MAJ", "MM",
			
 
				+    "SP", "SSC", "SSO", "SC", "SE",
			
 
				+    "XPN", "XSA", "XSN", "XSV",
			
 
				+    "UNA", "NA", "VSV"
			
 
				+]
			
 
				+```
			
 
				+
			
 
				+For example:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT nori_sample
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "index": {
			
 
				+      "analysis": {
			
 
				+        "analyzer": {
			
 
				+          "my_analyzer": {
			
 
				+            "tokenizer": "nori_tokenizer",
			
 
				+            "filter": [
			
 
				+              "my_posfilter"
			
 
				+            ]
			
 
				+          }
			
 
				+        },
			
 
				+        "filter": {
			
 
				+          "my_posfilter": {
			
 
				+            "type": "nori_part_of_speech",
			
 
				+            "stoptags": [
			
 
				+              "NR"   <1>
			
 
				+            ]
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+GET nori_sample/_analyze
			
 
				+{
			
 
				+  "analyzer": "my_analyzer",
			
 
				+  "text": "여섯 용이"  <2>
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+<1> Korean numerals should be removed (`NR`)
			
 
				+<2> Six dragons
			
 
				+
			
 
				+Which responds with:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens" : [ {
			
 
				+    "token" : "용",
			
 
				+    "start_offset" : 3,
			
 
				+    "end_offset" : 4,
			
 
				+    "type" : "word",
			
 
				+    "position" : 1
			
 
				+  }, {
			
 
				+    "token" : "이",
			
 
				+    "start_offset" : 4,
			
 
				+    "end_offset" : 5,
			
 
				+    "type" : "word",
			
 
				+    "position" : 2
			
 
				+  } ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
 
				+
			
 
				+[[analysis-nori-readingform]]
			
 
				+==== `nori_readingform` token filter
			
 
				+
			
 
				+The `nori_readingform` token filter rewrites tokens written in Hanja to their Hangul form.
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT nori_sample
			
 
				+{
			
 
				+    "settings": {
			
 
				+        "index":{
			
 
				+            "analysis":{
			
 
				+                "analyzer" : {
			
 
				+                    "my_analyzer" : {
			
 
				+                        "tokenizer" : "nori_tokenizer",
			
 
				+                        "filter" : ["nori_readingform"]
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+GET nori_sample/_analyze
			
 
				+{
			
 
				+  "analyzer": "my_analyzer",
			
 
				+  "text": "鄕歌" <1>
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+<1> Hyangga
			
 
				+
			
 
				+Which responds with:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens" : [ {
			
 
				+    "token" : "향가", <2>
			
 
				+    "start_offset" : 0,
			
 
				+    "end_offset" : 2,
			
 
				+    "type" : "word",
			
 
				+    "position" : 0
			
 
				+  }]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE
			
 
				+
			
 
				+<1> A token written in Hanja.
			
 
				+<2> The Hanja form is replaced by the Hangul translation.
			
--- a/docs/plugins/analysis.asciidoc
+++ b/docs/plugins/analysis.asciidoc
@@ -20,6 +20,10 @@ transliteration.
 
				 
			
 
				 Advanced analysis of Japanese using the http://www.atilika.org/[Kuromoji analyzer].
			
 
				 
			
 
				+<<analysis-nori,Nori>>::
			
 
				+
			
 
				+Morphological analysis of Korean using the Lucene Nori analyzer.
			
 
				+
			
 
				 <<analysis-phonetic,Phonetic>>::
			
 
				 
			
 
				 Analyzes tokens into their phonetic equivalent using Soundex, Metaphone,
			
@@ -59,6 +63,8 @@ include::analysis-icu.asciidoc[]
 
				 
			
 
				 include::analysis-kuromoji.asciidoc[]
			
 
				 
			
 
				+include::analysis-nori.asciidoc[]
			
 
				+
			
 
				 include::analysis-phonetic.asciidoc[]
			
 
				 
			
 
				 include::analysis-smartcn.asciidoc[]
			
--- a/docs/reference/cat/plugins.asciidoc
+++ b/docs/reference/cat/plugins.asciidoc
@@ -16,10 +16,11 @@ Might look like:
 
				 name    component               version   description
			
 
				 U7321H6 analysis-icu            {version} The ICU Analysis plugin integrates Lucene ICU module into elasticsearch, adding ICU relates analysis components.
			
 
				 U7321H6 analysis-kuromoji       {version} The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch.
			
 
				+U7321H6 analysis-nori           {version} The Korean (nori) Analysis plugin integrates Lucene nori analysis module into elasticsearch.
			
 
				 U7321H6 analysis-phonetic       {version} The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch.
			
 
				 U7321H6 analysis-smartcn        {version} Smart Chinese Analysis plugin integrates Lucene Smart Chinese analysis module into elasticsearch.
			
 
				 U7321H6 analysis-stempel        {version} The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.
			
 
				-U7321H6 analysis-ukrainian        {version} The Ukrainian Analysis plugin integrates the Lucene UkrainianMorfologikAnalyzer into elasticsearch.
			
 
				+U7321H6 analysis-ukrainian      {version} The Ukrainian Analysis plugin integrates the Lucene UkrainianMorfologikAnalyzer into elasticsearch.
			
 
				 U7321H6 discovery-azure-classic {version} The Azure Classic Discovery plugin allows to use Azure Classic API for the unicast discovery mechanism
			
 
				 U7321H6 discovery-ec2           {version} The EC2 discovery plugin allows to use AWS API for the unicast discovery mechanism.
			
 
				 U7321H6 discovery-file          {version} Discovery file plugin enables unicast discovery from hosts stored in a file.
			
--- a/docs/src/test/cluster/config/userdict_ko.txt
+++ b/docs/src/test/cluster/config/userdict_ko.txt
@@ -0,0 +1,5 @@
 
				+# Additional nouns
			
 
				+c++
			
 
				+C샤프
			
 
				+세종
			
 
				+세종시 세종 시
			
--- a/plugins/analysis-kuromoji/src/test/resources/rest-api-spec/test/analysis_kuromoji/10_basic.yml
+++ b/plugins/analysis-kuromoji/src/test/resources/rest-api-spec/test/analysis_kuromoji/10_basic.yml
--- a/plugins/analysis-kuromoji/src/test/resources/rest-api-spec/test/analysis_kuromoji/20_search.yml
+++ b/plugins/analysis-kuromoji/src/test/resources/rest-api-spec/test/analysis_kuromoji/20_search.yml
--- a/plugins/analysis-nori/build.gradle
+++ b/plugins/analysis-nori/build.gradle
@@ -0,0 +1,32 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+esplugin {
			
 
				+  description 'The Korean (nori) Analysis plugin integrates Lucene nori analysis module into elasticsearch.'
			
 
				+  classname 'org.elasticsearch.plugin.analysis.nori.AnalysisNoriPlugin'
			
 
				+}
			
 
				+
			
 
				+dependencies {
			
 
				+  compile "org.apache.lucene:lucene-analyzers-nori:${versions.lucene}"
			
 
				+}
			
 
				+
			
 
				+dependencyLicenses {
			
 
				+  mapping from: /lucene-.*/, to: 'lucene'
			
 
				+}
			
 
				+
			
--- a/plugins/analysis-nori/licenses/lucene-LICENSE.txt
+++ b/plugins/analysis-nori/licenses/lucene-LICENSE.txt
@@ -0,0 +1,475 @@
 
				+
			
 
				+                                 Apache License
			
 
				+                           Version 2.0, January 2004
			
 
				+                        http://www.apache.org/licenses/
			
 
				+
			
 
				+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
			
 
				+
			
 
				+   1. Definitions.
			
 
				+
			
 
				+      "License" shall mean the terms and conditions for use, reproduction,
			
 
				+      and distribution as defined by Sections 1 through 9 of this document.
			
 
				+
			
 
				+      "Licensor" shall mean the copyright owner or entity authorized by
			
 
				+      the copyright owner that is granting the License.
			
 
				+
			
 
				+      "Legal Entity" shall mean the union of the acting entity and all
			
 
				+      other entities that control, are controlled by, or are under common
			
 
				+      control with that entity. For the purposes of this definition,
			
 
				+      "control" means (i) the power, direct or indirect, to cause the
			
 
				+      direction or management of such entity, whether by contract or
			
 
				+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
			
 
				+      outstanding shares, or (iii) beneficial ownership of such entity.
			
 
				+
			
 
				+      "You" (or "Your") shall mean an individual or Legal Entity
			
 
				+      exercising permissions granted by this License.
			
 
				+
			
 
				+      "Source" form shall mean the preferred form for making modifications,
			
 
				+      including but not limited to software source code, documentation
			
 
				+      source, and configuration files.
			
 
				+
			
 
				+      "Object" form shall mean any form resulting from mechanical
			
 
				+      transformation or translation of a Source form, including but
			
 
				+      not limited to compiled object code, generated documentation,
			
 
				+      and conversions to other media types.
			
 
				+
			
 
				+      "Work" shall mean the work of authorship, whether in Source or
			
 
				+      Object form, made available under the License, as indicated by a
			
 
				+      copyright notice that is included in or attached to the work
			
 
				+      (an example is provided in the Appendix below).
			
 
				+
			
 
				+      "Derivative Works" shall mean any work, whether in Source or Object
			
 
				+      form, that is based on (or derived from) the Work and for which the
			
 
				+      editorial revisions, annotations, elaborations, or other modifications
			
 
				+      represent, as a whole, an original work of authorship. For the purposes
			
 
				+      of this License, Derivative Works shall not include works that remain
			
 
				+      separable from, or merely link (or bind by name) to the interfaces of,
			
 
				+      the Work and Derivative Works thereof.
			
 
				+
			
 
				+      "Contribution" shall mean any work of authorship, including
			
 
				+      the original version of the Work and any modifications or additions
			
 
				+      to that Work or Derivative Works thereof, that is intentionally
			
 
				+      submitted to Licensor for inclusion in the Work by the copyright owner
			
 
				+      or by an individual or Legal Entity authorized to submit on behalf of
			
 
				+      the copyright owner. For the purposes of this definition, "submitted"
			
 
				+      means any form of electronic, verbal, or written communication sent
			
 
				+      to the Licensor or its representatives, including but not limited to
			
 
				+      communication on electronic mailing lists, source code control systems,
			
 
				+      and issue tracking systems that are managed by, or on behalf of, the
			
 
				+      Licensor for the purpose of discussing and improving the Work, but
			
 
				+      excluding communication that is conspicuously marked or otherwise
			
 
				+      designated in writing by the copyright owner as "Not a Contribution."
			
 
				+
			
 
				+      "Contributor" shall mean Licensor and any individual or Legal Entity
			
 
				+      on behalf of whom a Contribution has been received by Licensor and
			
 
				+      subsequently incorporated within the Work.
			
 
				+
			
 
				+   2. Grant of Copyright License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      copyright license to reproduce, prepare Derivative Works of,
			
 
				+      publicly display, publicly perform, sublicense, and distribute the
			
 
				+      Work and such Derivative Works in Source or Object form.
			
 
				+
			
 
				+   3. Grant of Patent License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      (except as stated in this section) patent license to make, have made,
			
 
				+      use, offer to sell, sell, import, and otherwise transfer the Work,
			
 
				+      where such license applies only to those patent claims licensable
			
 
				+      by such Contributor that are necessarily infringed by their
			
 
				+      Contribution(s) alone or by combination of their Contribution(s)
			
 
				+      with the Work to which such Contribution(s) was submitted. If You
			
 
				+      institute patent litigation against any entity (including a
			
 
				+      cross-claim or counterclaim in a lawsuit) alleging that the Work
			
 
				+      or a Contribution incorporated within the Work constitutes direct
			
 
				+      or contributory patent infringement, then any patent licenses
			
 
				+      granted to You under this License for that Work shall terminate
			
 
				+      as of the date such litigation is filed.
			
 
				+
			
 
				+   4. Redistribution. You may reproduce and distribute copies of the
			
 
				+      Work or Derivative Works thereof in any medium, with or without
			
 
				+      modifications, and in Source or Object form, provided that You
			
 
				+      meet the following conditions:
			
 
				+
			
 
				+      (a) You must give any other recipients of the Work or
			
 
				+          Derivative Works a copy of this License; and
			
 
				+
			
 
				+      (b) You must cause any modified files to carry prominent notices
			
 
				+          stating that You changed the files; and
			
 
				+
			
 
				+      (c) You must retain, in the Source form of any Derivative Works
			
 
				+          that You distribute, all copyright, patent, trademark, and
			
 
				+          attribution notices from the Source form of the Work,
			
 
				+          excluding those notices that do not pertain to any part of
			
 
				+          the Derivative Works; and
			
 
				+
			
 
				+      (d) If the Work includes a "NOTICE" text file as part of its
			
 
				+          distribution, then any Derivative Works that You distribute must
			
 
				+          include a readable copy of the attribution notices contained
			
 
				+          within such NOTICE file, excluding those notices that do not
			
 
				+          pertain to any part of the Derivative Works, in at least one
			
 
				+          of the following places: within a NOTICE text file distributed
			
 
				+          as part of the Derivative Works; within the Source form or
			
 
				+          documentation, if provided along with the Derivative Works; or,
			
 
				+          within a display generated by the Derivative Works, if and
			
 
				+          wherever such third-party notices normally appear. The contents
			
 
				+          of the NOTICE file are for informational purposes only and
			
 
				+          do not modify the License. You may add Your own attribution
			
 
				+          notices within Derivative Works that You distribute, alongside
			
 
				+          or as an addendum to the NOTICE text from the Work, provided
			
 
				+          that such additional attribution notices cannot be construed
			
 
				+          as modifying the License.
			
 
				+
			
 
				+      You may add Your own copyright statement to Your modifications and
			
 
				+      may provide additional or different license terms and conditions
			
 
				+      for use, reproduction, or distribution of Your modifications, or
			
 
				+      for any such Derivative Works as a whole, provided Your use,
			
 
				+      reproduction, and distribution of the Work otherwise complies with
			
 
				+      the conditions stated in this License.
			
 
				+
			
 
				+   5. Submission of Contributions. Unless You explicitly state otherwise,
			
 
				+      any Contribution intentionally submitted for inclusion in the Work
			
 
				+      by You to the Licensor shall be under the terms and conditions of
			
 
				+      this License, without any additional terms or conditions.
			
 
				+      Notwithstanding the above, nothing herein shall supersede or modify
			
 
				+      the terms of any separate license agreement you may have executed
			
 
				+      with Licensor regarding such Contributions.
			
 
				+
			
 
				+   6. Trademarks. This License does not grant permission to use the trade
			
 
				+      names, trademarks, service marks, or product names of the Licensor,
			
 
				+      except as required for reasonable and customary use in describing the
			
 
				+      origin of the Work and reproducing the content of the NOTICE file.
			
 
				+
			
 
				+   7. Disclaimer of Warranty. Unless required by applicable law or
			
 
				+      agreed to in writing, Licensor provides the Work (and each
			
 
				+      Contributor provides its Contributions) on an "AS IS" BASIS,
			
 
				+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
			
 
				+      implied, including, without limitation, any warranties or conditions
			
 
				+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
			
 
				+      PARTICULAR PURPOSE. You are solely responsible for determining the
			
 
				+      appropriateness of using or redistributing the Work and assume any
			
 
				+      risks associated with Your exercise of permissions under this License.
			
 
				+
			
 
				+   8. Limitation of Liability. In no event and under no legal theory,
			
 
				+      whether in tort (including negligence), contract, or otherwise,
			
 
				+      unless required by applicable law (such as deliberate and grossly
			
 
				+      negligent acts) or agreed to in writing, shall any Contributor be
			
 
				+      liable to You for damages, including any direct, indirect, special,
			
 
				+      incidental, or consequential damages of any character arising as a
			
 
				+      result of this License or out of the use or inability to use the
			
 
				+      Work (including but not limited to damages for loss of goodwill,
			
 
				+      work stoppage, computer failure or malfunction, or any and all
			
 
				+      other commercial damages or losses), even if such Contributor
			
 
				+      has been advised of the possibility of such damages.
			
 
				+
			
 
				+   9. Accepting Warranty or Additional Liability. While redistributing
			
 
				+      the Work or Derivative Works thereof, You may choose to offer,
			
 
				+      and charge a fee for, acceptance of support, warranty, indemnity,
			
 
				+      or other liability obligations and/or rights consistent with this
			
 
				+      License. However, in accepting such obligations, You may act only
			
 
				+      on Your own behalf and on Your sole responsibility, not on behalf
			
 
				+      of any other Contributor, and only if You agree to indemnify,
			
 
				+      defend, and hold each Contributor harmless for any liability
			
 
				+      incurred by, or claims asserted against, such Contributor by reason
			
 
				+      of your accepting any such warranty or additional liability.
			
 
				+
			
 
				+   END OF TERMS AND CONDITIONS
			
 
				+
			
 
				+   APPENDIX: How to apply the Apache License to your work.
			
 
				+
			
 
				+      To apply the Apache License to your work, attach the following
			
 
				+      boilerplate notice, with the fields enclosed by brackets "[]"
			
 
				+      replaced with your own identifying information. (Don't include
			
 
				+      the brackets!)  The text should be enclosed in the appropriate
			
 
				+      comment syntax for the file format. We also recommend that a
			
 
				+      file or class name and description of purpose be included on the
			
 
				+      same "printed page" as the copyright notice for easier
			
 
				+      identification within third-party archives.
			
 
				+
			
 
				+   Copyright [yyyy] [name of copyright owner]
			
 
				+
			
 
				+   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+   you may not use this file except in compliance with the License.
			
 
				+   You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+
			
 
				+
			
 
				+
			
 
				+Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
			
 
				+derived from unicode conversion examples available at
			
 
				+http://www.unicode.org/Public/PROGRAMS/CVTUTF.  Here is the copyright
			
 
				+from those sources:
			
 
				+
			
 
				+/*
			
 
				+ * Copyright 2001-2004 Unicode, Inc.
			
 
				+ * 
			
 
				+ * Disclaimer
			
 
				+ * 
			
 
				+ * This source code is provided as is by Unicode, Inc. No claims are
			
 
				+ * made as to fitness for any particular purpose. No warranties of any
			
 
				+ * kind are expressed or implied. The recipient agrees to determine
			
 
				+ * applicability of information provided. If this file has been
			
 
				+ * purchased on magnetic or optical media from Unicode, Inc., the
			
 
				+ * sole remedy for any claim will be exchange of defective media
			
 
				+ * within 90 days of receipt.
			
 
				+ * 
			
 
				+ * Limitations on Rights to Redistribute This Code
			
 
				+ * 
			
 
				+ * Unicode, Inc. hereby grants the right to freely use the information
			
 
				+ * supplied in this file in the creation of products supporting the
			
 
				+ * Unicode Standard, and to make copies of this file in any form
			
 
				+ * for internal or external distribution as long as this notice
			
 
				+ * remains attached.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+Some code in core/src/java/org/apache/lucene/util/ArrayUtil.java was
			
 
				+derived from Python 2.4.2 sources available at
			
 
				+http://www.python.org. Full license is here:
			
 
				+
			
 
				+  http://www.python.org/download/releases/2.4.2/license/
			
 
				+
			
 
				+Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
			
 
				+derived from Python 3.1.2 sources available at
			
 
				+http://www.python.org. Full license is here:
			
 
				+
			
 
				+  http://www.python.org/download/releases/3.1.2/license/
			
 
				+
			
 
				+Some code in core/src/java/org/apache/lucene/util/automaton was
			
 
				+derived from Brics automaton sources available at
			
 
				+www.brics.dk/automaton/. Here is the copyright from those sources:
			
 
				+
			
 
				+/*
			
 
				+ * Copyright (c) 2001-2009 Anders Moeller
			
 
				+ * All rights reserved.
			
 
				+ * 
			
 
				+ * Redistribution and use in source and binary forms, with or without
			
 
				+ * modification, are permitted provided that the following conditions
			
 
				+ * are met:
			
 
				+ * 1. Redistributions of source code must retain the above copyright
			
 
				+ *    notice, this list of conditions and the following disclaimer.
			
 
				+ * 2. Redistributions in binary form must reproduce the above copyright
			
 
				+ *    notice, this list of conditions and the following disclaimer in the
			
 
				+ *    documentation and/or other materials provided with the distribution.
			
 
				+ * 3. The name of the author may not be used to endorse or promote products
			
 
				+ *    derived from this software without specific prior written permission.
			
 
				+ * 
			
 
				+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
			
 
				+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
			
 
				+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
			
 
				+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
			
 
				+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
			
 
				+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
			
 
				+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+ */
			
 
				+ 
			
 
				+The levenshtein automata tables in core/src/java/org/apache/lucene/util/automaton 
			
 
				+were automatically generated with the moman/finenight FSA package.
			
 
				+Here is the copyright for those sources:
			
 
				+
			
 
				+# Copyright (c) 2010, Jean-Philippe Barrette-LaPierre, <jpb@rrette.com>
			
 
				+#
			
 
				+# Permission is hereby granted, free of charge, to any person
			
 
				+# obtaining a copy of this software and associated documentation
			
 
				+# files (the "Software"), to deal in the Software without
			
 
				+# restriction, including without limitation the rights to use,
			
 
				+# copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+# copies of the Software, and to permit persons to whom the
			
 
				+# Software is furnished to do so, subject to the following
			
 
				+# conditions:
			
 
				+#
			
 
				+# The above copyright notice and this permission notice shall be
			
 
				+# included in all copies or substantial portions of the Software.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
			
 
				+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
			
 
				+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
			
 
				+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
			
 
				+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
			
 
				+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
			
 
				+# OTHER DEALINGS IN THE SOFTWARE.
			
 
				+
			
 
				+Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
			
 
				+derived from ICU (http://www.icu-project.org)
			
 
				+The full license is available here: 
			
 
				+  http://source.icu-project.org/repos/icu/icu/trunk/license.html
			
 
				+
			
 
				+/*
			
 
				+ * Copyright (C) 1999-2010, International Business Machines
			
 
				+ * Corporation and others.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy 
			
 
				+ * of this software and associated documentation files (the "Software"), to deal
			
 
				+ * in the Software without restriction, including without limitation the rights 
			
 
				+ * to use, copy, modify, merge, publish, distribute, and/or sell copies of the 
			
 
				+ * Software, and to permit persons to whom the Software is furnished to do so, 
			
 
				+ * provided that the above copyright notice(s) and this permission notice appear 
			
 
				+ * in all copies of the Software and that both the above copyright notice(s) and
			
 
				+ * this permission notice appear in supporting documentation.
			
 
				+ * 
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. 
			
 
				+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE 
			
 
				+ * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR 
			
 
				+ * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 
			
 
				+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 
			
 
				+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
			
 
				+ *
			
 
				+ * Except as contained in this notice, the name of a copyright holder shall not 
			
 
				+ * be used in advertising or otherwise to promote the sale, use or other 
			
 
				+ * dealings in this Software without prior written authorization of the 
			
 
				+ * copyright holder.
			
 
				+ */
			
 
				+ 
			
 
				+The following license applies to the Snowball stemmers:
			
 
				+
			
 
				+Copyright (c) 2001, Dr Martin Porter
			
 
				+Copyright (c) 2002, Richard Boulton
			
 
				+All rights reserved.
			
 
				+
			
 
				+Redistribution and use in source and binary forms, with or without
			
 
				+modification, are permitted provided that the following conditions are met:
			
 
				+
			
 
				+    * Redistributions of source code must retain the above copyright notice,
			
 
				+    * this list of conditions and the following disclaimer.
			
 
				+    * Redistributions in binary form must reproduce the above copyright
			
 
				+    * notice, this list of conditions and the following disclaimer in the
			
 
				+    * documentation and/or other materials provided with the distribution.
			
 
				+    * Neither the name of the copyright holders nor the names of its contributors
			
 
				+    * may be used to endorse or promote products derived from this software
			
 
				+    * without specific prior written permission.
			
 
				+
			
 
				+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
			
 
				+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
			
 
				+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
			
 
				+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
			
 
				+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+The following license applies to the KStemmer:
			
 
				+
			
 
				+Copyright © 2003,
			
 
				+Center for Intelligent Information Retrieval,
			
 
				+University of Massachusetts, Amherst.
			
 
				+All rights reserved.
			
 
				+
			
 
				+Redistribution and use in source and binary forms, with or without modification,
			
 
				+are permitted provided that the following conditions are met:
			
 
				+
			
 
				+1. Redistributions of source code must retain the above copyright notice, this
			
 
				+list of conditions and the following disclaimer.
			
 
				+
			
 
				+2. Redistributions in binary form must reproduce the above copyright notice,
			
 
				+this list of conditions and the following disclaimer in the documentation
			
 
				+and/or other materials provided with the distribution.
			
 
				+
			
 
				+3. The names "Center for Intelligent Information Retrieval" and
			
 
				+"University of Massachusetts" must not be used to endorse or promote products
			
 
				+derived from this software without prior written permission. To obtain
			
 
				+permission, contact info@ciir.cs.umass.edu.
			
 
				+
			
 
				+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
			
 
				+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
			
 
				+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
			
 
				+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
			
 
				+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
			
 
				+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
			
 
				+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
			
 
				+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
			
 
				+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
			
 
				+SUCH DAMAGE.
			
 
				+
			
 
				+The following license applies to the Morfologik project:
			
 
				+
			
 
				+Copyright (c) 2006 Dawid Weiss
			
 
				+Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski
			
 
				+All rights reserved.
			
 
				+
			
 
				+Redistribution and use in source and binary forms, with or without modification, 
			
 
				+are permitted provided that the following conditions are met:
			
 
				+
			
 
				+    * Redistributions of source code must retain the above copyright notice, 
			
 
				+    this list of conditions and the following disclaimer.
			
 
				+    
			
 
				+    * Redistributions in binary form must reproduce the above copyright notice, 
			
 
				+    this list of conditions and the following disclaimer in the documentation 
			
 
				+    and/or other materials provided with the distribution.
			
 
				+    
			
 
				+    * Neither the name of Morfologik nor the names of its contributors 
			
 
				+    may be used to endorse or promote products derived from this software 
			
 
				+    without specific prior written permission.
			
 
				+
			
 
				+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
			
 
				+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
			
 
				+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
			
 
				+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
			
 
				+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
			
 
				+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
			
 
				+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
			
 
				+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
			
 
				+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
			
 
				+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+---
			
 
				+
			
 
				+The dictionary comes from Morfologik project. Morfologik uses data from 
			
 
				+Polish ispell/myspell dictionary hosted at http://www.sjp.pl/slownik/en/ and 
			
 
				+is licenced on the terms of (inter alia) LGPL and Creative Commons 
			
 
				+ShareAlike. The part-of-speech tags were added in Morfologik project and
			
 
				+are not found in the data from sjp.pl. The tagset is similar to IPI PAN
			
 
				+tagset.
			
 
				+
			
 
				+---
			
 
				+
			
 
				+The following license applies to the Morfeusz project,
			
 
				+used by org.apache.lucene.analysis.morfologik.
			
 
				+
			
 
				+BSD-licensed dictionary of Polish (SGJP)
			
 
				+http://sgjp.pl/morfeusz/
			
 
				+
			
 
				+Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 
			
 
				+             Marcin Woliński, Robert Wołosz
			
 
				+
			
 
				+All rights reserved.
			
 
				+
			
 
				+Redistribution and  use in  source and binary  forms, with  or without
			
 
				+modification, are permitted provided that the following conditions are
			
 
				+met:
			
 
				+
			
 
				+1. Redistributions of source code must retain the above copyright
			
 
				+   notice, this list of conditions and the following disclaimer.
			
 
				+
			
 
				+2. Redistributions in binary form must reproduce the above copyright
			
 
				+   notice, this list of conditions and the following disclaimer in the
			
 
				+   documentation and/or other materials provided with the
			
 
				+   distribution.
			
 
				+
			
 
				+THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
			
 
				+OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
			
 
				+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
			
 
				+LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
			
 
				+CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
			
 
				+SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
			
 
				+BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
			
 
				+WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
			
 
				+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
			
 
				+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
--- a/plugins/analysis-nori/licenses/lucene-NOTICE.txt
+++ b/plugins/analysis-nori/licenses/lucene-NOTICE.txt
@@ -0,0 +1,204 @@
 
				+Apache Lucene
			
 
				+Copyright 2001-2018 The Apache Software Foundation
			
 
				+
			
 
				+This product includes software developed at
			
 
				+The Apache Software Foundation (http://www.apache.org/).
			
 
				+
			
 
				+Includes software from other Apache Software Foundation projects,
			
 
				+including, but not limited to:
			
 
				+ - Apache Ant
			
 
				+ - Apache Jakarta Regexp
			
 
				+ - Apache Commons
			
 
				+ - Apache Xerces
			
 
				+
			
 
				+ICU4J, (under analysis/icu) is licensed under an MIT styles license
			
 
				+and Copyright (c) 1995-2008 International Business Machines Corporation and others
			
 
				+
			
 
				+Some data files (under analysis/icu/src/data) are derived from Unicode data such
			
 
				+as the Unicode Character Database. See http://unicode.org/copyright.html for more
			
 
				+details.
			
 
				+
			
 
				+Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is
			
 
				+BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/
			
 
				+
			
 
				+The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were
			
 
				+automatically generated with the moman/finenight FSA library, created by
			
 
				+Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
			
 
				+see http://sites.google.com/site/rrettesite/moman and
			
 
				+http://bitbucket.org/jpbarrette/moman/overview/
			
 
				+
			
 
				+The class org.apache.lucene.util.WeakIdentityMap was derived from
			
 
				+the Apache CXF project and is Apache License 2.0.
			
 
				+
			
 
				+The Google Code Prettify is Apache License 2.0.
			
 
				+See http://code.google.com/p/google-code-prettify/
			
 
				+
			
 
				+JUnit (junit-4.10) is licensed under the Common Public License v. 1.0
			
 
				+See http://junit.sourceforge.net/cpl-v10.html
			
 
				+
			
 
				+This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin
			
 
				+g Package (jaspell): http://jaspell.sourceforge.net/
			
 
				+License: The BSD License (http://www.opensource.org/licenses/bsd-license.php)
			
 
				+
			
 
				+The snowball stemmers in
			
 
				+  analysis/common/src/java/net/sf/snowball
			
 
				+were developed by Martin Porter and Richard Boulton.
			
 
				+The snowball stopword lists in
			
 
				+  analysis/common/src/resources/org/apache/lucene/analysis/snowball
			
 
				+were developed by Martin Porter and Richard Boulton.
			
 
				+The full snowball package is available from
			
 
				+  http://snowball.tartarus.org/
			
 
				+
			
 
				+The KStem stemmer in
			
 
				+  analysis/common/src/org/apache/lucene/analysis/en
			
 
				+was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst)
			
 
				+under the BSD-license.
			
 
				+
			
 
				+The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default
			
 
				+stopword list that is BSD-licensed created by Jacques Savoy.  These files reside in:
			
 
				+analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
			
 
				+analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
			
 
				+analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
			
 
				+analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
			
 
				+analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt,
			
 
				+analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt
			
 
				+See http://members.unine.ch/jacques.savoy/clef/index.html.
			
 
				+
			
 
				+The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
			
 
				+(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
			
 
				+Ljiljana Dolamic. These files reside in:
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
			
 
				+analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
			
 
				+
			
 
				+The Stempel analyzer (stempel) includes BSD-licensed software developed
			
 
				+by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
			
 
				+and Edmond Nolan.
			
 
				+
			
 
				+The Polish analyzer (stempel) comes with a default
			
 
				+stopword list that is BSD-licensed created by the Carrot2 project. The file resides
			
 
				+in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt.
			
 
				+See http://project.carrot2.org/license.html.
			
 
				+
			
 
				+The SmartChineseAnalyzer source code (smartcn) was
			
 
				+provided by Xiaoping Gao and copyright 2009 by www.imdict.net.
			
 
				+
			
 
				+WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/)
			
 
				+is derived from Unicode data such as the Unicode Character Database.
			
 
				+See http://unicode.org/copyright.html for more details.
			
 
				+
			
 
				+The Morfologik analyzer (morfologik) includes BSD-licensed software
			
 
				+developed by Dawid Weiss and Marcin Miłkowski (http://morfologik.blogspot.com/).
			
 
				+
			
 
				+Morfologik uses data from Polish ispell/myspell dictionary
			
 
				+(http://www.sjp.pl/slownik/en/) licenced on the terms of (inter alia)
			
 
				+LGPL and Creative Commons ShareAlike.
			
 
				+
			
 
				+Morfologic includes data from BSD-licensed dictionary of Polish (SGJP)
			
 
				+(http://sgjp.pl/morfeusz/)
			
 
				+
			
 
				+Servlet-api.jar and javax.servlet-*.jar are under the CDDL license, the original
			
 
				+source code for this can be found at http://www.eclipse.org/jetty/downloads.php
			
 
				+
			
 
				+===========================================================================
			
 
				+Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration
			
 
				+===========================================================================
			
 
				+
			
 
				+This software includes a binary and/or source version of data from
			
 
				+
			
 
				+  mecab-ipadic-2.7.0-20070801
			
 
				+
			
 
				+which can be obtained from
			
 
				+
			
 
				+  http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz
			
 
				+
			
 
				+or
			
 
				+
			
 
				+  http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz
			
 
				+
			
 
				+===========================================================================
			
 
				+mecab-ipadic-2.7.0-20070801 Notice
			
 
				+===========================================================================
			
 
				+
			
 
				+Nara Institute of Science and Technology (NAIST),
			
 
				+the copyright holders, disclaims all warranties with regard to this
			
 
				+software, including all implied warranties of merchantability and
			
 
				+fitness, in no event shall NAIST be liable for
			
 
				+any special, indirect or consequential damages or any damages
			
 
				+whatsoever resulting from loss of use, data or profits, whether in an
			
 
				+action of contract, negligence or other tortuous action, arising out
			
 
				+of or in connection with the use or performance of this software.
			
 
				+
			
 
				+A large portion of the dictionary entries
			
 
				+originate from ICOT Free Software.  The following conditions for ICOT
			
 
				+Free Software applies to the current dictionary as well.
			
 
				+
			
 
				+Each User may also freely distribute the Program, whether in its
			
 
				+original form or modified, to any third party or parties, PROVIDED
			
 
				+that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
			
 
				+on, or be attached to, the Program, which is distributed substantially
			
 
				+in the same form as set out herein and that such intended
			
 
				+distribution, if actually made, will neither violate or otherwise
			
 
				+contravene any of the laws and regulations of the countries having
			
 
				+jurisdiction over the User or the intended distribution itself.
			
 
				+
			
 
				+NO WARRANTY
			
 
				+
			
 
				+The program was produced on an experimental basis in the course of the
			
 
				+research and development conducted during the project and is provided
			
 
				+to users as so produced on an experimental basis.  Accordingly, the
			
 
				+program is provided without any warranty whatsoever, whether express,
			
 
				+implied, statutory or otherwise.  The term "warranty" used herein
			
 
				+includes, but is not limited to, any warranty of the quality,
			
 
				+performance, merchantability and fitness for a particular purpose of
			
 
				+the program and the nonexistence of any infringement or violation of
			
 
				+any right of any third party.
			
 
				+
			
 
				+Each user of the program will agree and understand, and be deemed to
			
 
				+have agreed and understood, that there is no warranty whatsoever for
			
 
				+the program and, accordingly, the entire risk arising from or
			
 
				+otherwise connected with the program is assumed by the user.
			
 
				+
			
 
				+Therefore, neither ICOT, the copyright holder, or any other
			
 
				+organization that participated in or was otherwise related to the
			
 
				+development of the program and their respective officials, directors,
			
 
				+officers and other employees shall be held liable for any and all
			
 
				+damages, including, without limitation, general, special, incidental
			
 
				+and consequential damages, arising out of or otherwise in connection
			
 
				+with the use or inability to use the program or any product, material
			
 
				+or result produced or otherwise obtained by using the program,
			
 
				+regardless of whether they have been advised of, or otherwise had
			
 
				+knowledge of, the possibility of such damages at any time during the
			
 
				+project or thereafter.  Each user will be deemed to have agreed to the
			
 
				+foregoing by his or her commencement of use of the program.  The term
			
 
				+"use" as used herein includes, but is not limited to, the use,
			
 
				+modification, copying and distribution of the program and the
			
 
				+production of secondary products from the program.
			
 
				+
			
 
				+In the case where the program, whether in its original form or
			
 
				+modified, was distributed or delivered to or received by a user from
			
 
				+any person, organization or entity other than ICOT, unless it makes or
			
 
				+grants independently of ICOT any specific warranty to the user in
			
 
				+writing, such person, organization or entity, will also be exempted
			
 
				+from and not be held liable to the user for any such damages as noted
			
 
				+above as far as the program is concerned.
			
 
				+
			
 
				+===========================================================================
			
 
				+Nori Korean Morphological Analyzer - Apache Lucene Integration
			
 
				+===========================================================================
			
 
				+
			
 
				+This software includes a binary and/or source version of data from
			
 
				+
			
 
				+  mecab-ko-dic-2.0.3-20170922
			
 
				+
			
 
				+which can be obtained from
			
 
				+
			
 
				+  https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.3-20170922.tar.gz
			
--- a/plugins/analysis-nori/licenses/lucene-analyzers-nori-7.4.0-snapshot-1ed95c097b.jar.sha1
+++ b/plugins/analysis-nori/licenses/lucene-analyzers-nori-7.4.0-snapshot-1ed95c097b.jar.sha1
@@ -0,0 +1 @@
 
				+a7daed3dc3a67674862002f315cd9193944de783
			
--- a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriAnalyzerProvider.java
+++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriAnalyzerProvider.java
@@ -0,0 +1,54 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import java.util.List;
			
 
				+import java.util.Set;
			
 
				+import org.apache.lucene.analysis.ko.KoreanAnalyzer;
			
 
				+import org.apache.lucene.analysis.ko.KoreanPartOfSpeechStopFilter;
			
 
				+import org.apache.lucene.analysis.ko.KoreanTokenizer;
			
 
				+import org.apache.lucene.analysis.ko.dict.UserDictionary;
			
 
				+import org.apache.lucene.analysis.ko.POS;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+
			
 
				+import static org.elasticsearch.index.analysis.NoriPartOfSpeechStopFilterFactory.resolvePOSList;
			
 
				+
			
 
				+
			
 
				+public class NoriAnalyzerProvider extends AbstractIndexAnalyzerProvider<KoreanAnalyzer> {
			
 
				+    private final KoreanAnalyzer analyzer;
			
 
				+
			
 
				+    public NoriAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
			
 
				+        super(indexSettings, name, settings);
			
 
				+        final KoreanTokenizer.DecompoundMode mode = NoriTokenizerFactory.getMode(settings);
			
 
				+        final UserDictionary userDictionary = NoriTokenizerFactory.getUserDictionary(env, settings);
			
 
				+        final List<String> tagList = Analysis.getWordList(env, settings, "stoptags");
			
 
				+        final Set<POS.Tag> stopTags = tagList != null ? resolvePOSList(tagList) : KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS;
			
 
				+        analyzer = new KoreanAnalyzer(userDictionary, mode, stopTags, false);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public KoreanAnalyzer get() {
			
 
				+        return analyzer;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+}
			
--- a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriPartOfSpeechStopFilterFactory.java
+++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriPartOfSpeechStopFilterFactory.java
@@ -0,0 +1,55 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.ko.KoreanPartOfSpeechStopFilter;
			
 
				+import org.apache.lucene.analysis.ko.POS;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+
			
 
				+import java.util.HashSet;
			
 
				+import java.util.List;
			
 
				+import java.util.Set;
			
 
				+
			
 
				+public class NoriPartOfSpeechStopFilterFactory extends AbstractTokenFilterFactory {
			
 
				+    private final Set<POS.Tag> stopTags;
			
 
				+
			
 
				+    public NoriPartOfSpeechStopFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
			
 
				+        super(indexSettings, name, settings);
			
 
				+        List<String> tagList = Analysis.getWordList(env, settings, "stoptags");
			
 
				+        this.stopTags = tagList != null ? resolvePOSList(tagList) : KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new KoreanPartOfSpeechStopFilter(tokenStream, stopTags);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    static Set<POS.Tag> resolvePOSList(List<String> tagList) {
			
 
				+        Set<POS.Tag> stopTags = new HashSet<>();
			
 
				+        for (String tag : tagList) {
			
 
				+            stopTags.add(POS.resolveTag(tag));
			
 
				+        }
			
 
				+        return stopTags;
			
 
				+    }
			
 
				+}
			
--- a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriReadingFormFilterFactory.java
+++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriReadingFormFilterFactory.java
@@ -0,0 +1,37 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.ko.KoreanReadingFormFilter;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+
			
 
				+public class NoriReadingFormFilterFactory extends AbstractTokenFilterFactory {
			
 
				+    public NoriReadingFormFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
			
 
				+        super(indexSettings, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        return new KoreanReadingFormFilter(tokenStream);
			
 
				+    }
			
 
				+}
			
--- a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java
+++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java
@@ -0,0 +1,72 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Tokenizer;
			
 
				+import org.apache.lucene.analysis.ko.KoreanTokenizer;
			
 
				+import org.apache.lucene.analysis.ko.dict.UserDictionary;
			
 
				+import org.elasticsearch.ElasticsearchException;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.Reader;
			
 
				+import java.util.Locale;
			
 
				+
			
 
				+public class NoriTokenizerFactory extends AbstractTokenizerFactory {
			
 
				+    private static final String USER_DICT_OPTION = "user_dictionary";
			
 
				+
			
 
				+    private final UserDictionary userDictionary;
			
 
				+    private final KoreanTokenizer.DecompoundMode decompoundMode;
			
 
				+
			
 
				+    public NoriTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
			
 
				+        super(indexSettings, name, settings);
			
 
				+        decompoundMode = getMode(settings);
			
 
				+        userDictionary = getUserDictionary(env, settings);
			
 
				+    }
			
 
				+
			
 
				+    public static UserDictionary getUserDictionary(Environment env, Settings settings) {
			
 
				+        try (Reader reader = Analysis.getReaderFromFile(env, settings, USER_DICT_OPTION)) {
			
 
				+            if (reader == null) {
			
 
				+                return null;
			
 
				+            } else {
			
 
				+                return UserDictionary.open(reader);
			
 
				+            }
			
 
				+        } catch (IOException e) {
			
 
				+            throw new ElasticsearchException("failed to load nori user dictionary", e);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public static KoreanTokenizer.DecompoundMode getMode(Settings settings) {
			
 
				+        KoreanTokenizer.DecompoundMode mode = KoreanTokenizer.DEFAULT_DECOMPOUND;
			
 
				+        String modeSetting = settings.get("decompound_mode", null);
			
 
				+        if (modeSetting != null) {
			
 
				+            mode = KoreanTokenizer.DecompoundMode.valueOf(modeSetting.toUpperCase(Locale.ENGLISH));
			
 
				+        }
			
 
				+        return mode;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Tokenizer create() {
			
 
				+        return new KoreanTokenizer(KoreanTokenizer.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, userDictionary, decompoundMode, false);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java
+++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java
@@ -0,0 +1,57 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.plugin.analysis.nori;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.elasticsearch.index.analysis.AnalyzerProvider;
			
 
				+import org.elasticsearch.index.analysis.NoriAnalyzerProvider;
			
 
				+import org.elasticsearch.index.analysis.NoriPartOfSpeechStopFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.NoriReadingFormFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.NoriTokenizerFactory;
			
 
				+import org.elasticsearch.index.analysis.TokenFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.TokenizerFactory;
			
 
				+import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
			
 
				+import org.elasticsearch.plugins.AnalysisPlugin;
			
 
				+import org.elasticsearch.plugins.Plugin;
			
 
				+
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+import static java.util.Collections.singletonMap;
			
 
				+
			
 
				+public class AnalysisNoriPlugin extends Plugin implements AnalysisPlugin {
			
 
				+    @Override
			
 
				+    public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
			
 
				+        Map<String, AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>();
			
 
				+        extra.put("nori_part_of_speech", NoriPartOfSpeechStopFilterFactory::new);
			
 
				+        extra.put("nori_readingform", NoriReadingFormFilterFactory::new);
			
 
				+        return extra;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
			
 
				+        return singletonMap("nori_tokenizer", NoriTokenizerFactory::new);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
			
 
				+        return singletonMap("nori", NoriAnalyzerProvider::new);
			
 
				+    }
			
 
				+}
			
--- a/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/AnalysisNoriFactoryTests.java
+++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/AnalysisNoriFactoryTests.java
@@ -0,0 +1,48 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.ko.KoreanTokenizerFactory;
			
 
				+import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
			
 
				+import org.elasticsearch.plugin.analysis.nori.AnalysisNoriPlugin;
			
 
				+
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+public class AnalysisNoriFactoryTests extends AnalysisFactoryTestCase {
			
 
				+    public AnalysisNoriFactoryTests() {
			
 
				+        super(new AnalysisNoriPlugin());
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected Map<String, Class<?>> getTokenizers() {
			
 
				+        Map<String, Class<?>> tokenizers = new HashMap<>(super.getTokenizers());
			
 
				+        tokenizers.put("korean", KoreanTokenizerFactory.class);
			
 
				+        return tokenizers;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected Map<String, Class<?>> getTokenFilters() {
			
 
				+        Map<String, Class<?>> filters = new HashMap<>(super.getTokenFilters());
			
 
				+        filters.put("koreanpartofspeechstop", NoriPartOfSpeechStopFilterFactory.class);
			
 
				+        filters.put("koreanreadingform", NoriReadingFormFilterFactory.class);
			
 
				+        return filters;
			
 
				+    }
			
 
				+}
			
--- a/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java
+++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java
@@ -0,0 +1,147 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.Tokenizer;
			
 
				+import org.apache.lucene.analysis.ko.KoreanAnalyzer;
			
 
				+import org.apache.lucene.analysis.ko.KoreanTokenizer;
			
 
				+import org.elasticsearch.Version;
			
 
				+import org.elasticsearch.cluster.metadata.IndexMetaData;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.plugin.analysis.nori.AnalysisNoriPlugin;
			
 
				+import org.elasticsearch.test.ESTestCase.TestAnalysis;
			
 
				+import org.elasticsearch.test.ESTokenStreamTestCase;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+import java.io.StringReader;
			
 
				+import java.nio.file.Files;
			
 
				+import java.nio.file.Path;
			
 
				+
			
 
				+import static org.hamcrest.Matchers.instanceOf;
			
 
				+
			
 
				+public class NoriAnalysisTests extends ESTokenStreamTestCase {
			
 
				+    public void testDefaultsNoriAnalysis() throws IOException {
			
 
				+        TestAnalysis analysis = createTestAnalysis(Settings.EMPTY);
			
 
				+
			
 
				+        TokenizerFactory tokenizerFactory = analysis.tokenizer.get("nori_tokenizer");
			
 
				+        assertThat(tokenizerFactory, instanceOf(NoriTokenizerFactory.class));
			
 
				+
			
 
				+        TokenFilterFactory filterFactory = analysis.tokenFilter.get("nori_part_of_speech");
			
 
				+        assertThat(filterFactory, instanceOf(NoriPartOfSpeechStopFilterFactory.class));
			
 
				+
			
 
				+        filterFactory = analysis.tokenFilter.get("nori_readingform");
			
 
				+        assertThat(filterFactory, instanceOf(NoriReadingFormFilterFactory.class));
			
 
				+
			
 
				+        IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
			
 
				+        NamedAnalyzer analyzer = indexAnalyzers.get("nori");
			
 
				+        assertThat(analyzer.analyzer(), instanceOf(KoreanAnalyzer.class));
			
 
				+    }
			
 
				+
			
 
				+    public void testNoriAnalyzer() throws Exception {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put("index.analysis.analyzer.my_analyzer.type", "nori")
			
 
				+            .put("index.analysis.analyzer.my_analyzer.stoptags", "NR, SP")
			
 
				+            .put("index.analysis.analyzer.my_analyzer.decompound_mode", "mixed")
			
 
				+            .build();
			
 
				+        TestAnalysis analysis = createTestAnalysis(settings);
			
 
				+        Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
			
 
				+        try (TokenStream stream = analyzer.tokenStream("", "여섯 용이" )) {
			
 
				+            assertTokenStreamContents(stream, new String[] {"용", "이"});
			
 
				+        }
			
 
				+
			
 
				+        try (TokenStream stream = analyzer.tokenStream("", "가늠표")) {
			
 
				+            assertTokenStreamContents(stream, new String[] {"가늠표", "가늠", "표"});
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testNoriAnalyzerUserDict() throws Exception {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put("index.analysis.analyzer.my_analyzer.type", "nori")
			
 
				+            .put("index.analysis.analyzer.my_analyzer.user_dictionary", "user_dict.txt")
			
 
				+            .build();
			
 
				+        TestAnalysis analysis = createTestAnalysis(settings);
			
 
				+        Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
			
 
				+        try (TokenStream stream = analyzer.tokenStream("", "세종시" )) {
			
 
				+            assertTokenStreamContents(stream, new String[] {"세종", "시"});
			
 
				+        }
			
 
				+
			
 
				+        try (TokenStream stream = analyzer.tokenStream("", "c++world")) {
			
 
				+            assertTokenStreamContents(stream, new String[] {"c++", "world"});
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testNoriTokenizer() throws Exception {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put("index.analysis.tokenizer.my_tokenizer.type", "nori_tokenizer")
			
 
				+            .put("index.analysis.tokenizer.my_tokenizer.decompound_mode", "mixed")
			
 
				+            .build();
			
 
				+        TestAnalysis analysis = createTestAnalysis(settings);
			
 
				+        Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create();
			
 
				+        tokenizer.setReader(new StringReader("뿌리가 깊은 나무"));
			
 
				+        assertTokenStreamContents(tokenizer, new String[] {"뿌리", "가", "깊", "은", "나무"});
			
 
				+        tokenizer.setReader(new StringReader("가늠표"));
			
 
				+        assertTokenStreamContents(tokenizer, new String[] {"가늠표", "가늠", "표"});
			
 
				+    }
			
 
				+
			
 
				+    public void testNoriPartOfSpeech() throws IOException {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put("index.analysis.filter.my_filter.type", "nori_part_of_speech")
			
 
				+            .put("index.analysis.filter.my_filter.stoptags", "NR, SP")
			
 
				+            .build();
			
 
				+        TestAnalysis analysis = createTestAnalysis(settings);
			
 
				+        TokenFilterFactory factory = analysis.tokenFilter.get("my_filter");
			
 
				+        Tokenizer tokenizer = new KoreanTokenizer();
			
 
				+        tokenizer.setReader(new StringReader("여섯 용이"));
			
 
				+        TokenStream stream = factory.create(tokenizer);
			
 
				+        assertTokenStreamContents(stream, new String[] {"용", "이"});
			
 
				+    }
			
 
				+
			
 
				+    public void testNoriReadingForm() throws IOException {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
			
 
				+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
			
 
				+            .put("index.analysis.filter.my_filter.type", "nori_readingform")
			
 
				+            .build();
			
 
				+        TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new AnalysisNoriPlugin());
			
 
				+        TokenFilterFactory factory = analysis.tokenFilter.get("my_filter");
			
 
				+        Tokenizer tokenizer = new KoreanTokenizer();
			
 
				+        tokenizer.setReader(new StringReader("鄕歌"));
			
 
				+        TokenStream stream = factory.create(tokenizer);
			
 
				+        assertTokenStreamContents(stream, new String[] {"향가"});
			
 
				+    }
			
 
				+
			
 
				+    private TestAnalysis createTestAnalysis(Settings analysisSettings) throws IOException {
			
 
				+        InputStream dict = NoriAnalysisTests.class.getResourceAsStream("user_dict.txt");
			
 
				+        Path home = createTempDir();
			
 
				+        Path config = home.resolve("config");
			
 
				+        Files.createDirectory(config);
			
 
				+        Files.copy(dict, config.resolve("user_dict.txt"));
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
			
 
				+            .put(Environment.PATH_HOME_SETTING.getKey(), home)
			
 
				+            .put(analysisSettings)
			
 
				+            .build();
			
 
				+        return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new AnalysisNoriPlugin());
			
 
				+    }
			
 
				+}
			
--- a/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriClientYamlTestSuiteIT.java
+++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriClientYamlTestSuiteIT.java
@@ -0,0 +1,39 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.analysis;
			
 
				+
			
 
				+import com.carrotsearch.randomizedtesting.annotations.Name;
			
 
				+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
			
 
				+
			
 
				+import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
			
 
				+import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
			
 
				+
			
 
				+public class NoriClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
			
 
				+
			
 
				+    public NoriClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {
			
 
				+        super(testCandidate);
			
 
				+    }
			
 
				+
			
 
				+    @ParametersFactory
			
 
				+    public static Iterable<Object[]> parameters() throws Exception {
			
 
				+        return ESClientYamlSuiteTestCase.createParameters();
			
 
				+    }
			
 
				+}
			
 
				+
			
--- a/plugins/analysis-nori/src/test/resources/org/elasticsearch/index/analysis/user_dict.txt
+++ b/plugins/analysis-nori/src/test/resources/org/elasticsearch/index/analysis/user_dict.txt
@@ -0,0 +1,5 @@
 
				+# Additional nouns
			
 
				+c++
			
 
				+C샤프
			
 
				+세종
			
 
				+세종시 세종 시
			
--- a/plugins/analysis-nori/src/test/resources/rest-api-spec/test/analysis_nori/10_basic.yml
+++ b/plugins/analysis-nori/src/test/resources/rest-api-spec/test/analysis_nori/10_basic.yml
@@ -0,0 +1,48 @@
 
				+# Integration tests for Korean analysis components
			
 
				+#
			
 
				+---
			
 
				+"Analyzer":
			
 
				+    - do:
			
 
				+        indices.analyze:
			
 
				+          body:
			
 
				+            text:         뿌리가 깊은 나무
			
 
				+            analyzer:     nori
			
 
				+    - length: { tokens: 3 }
			
 
				+    - match:  { tokens.0.token: 뿌리 }
			
 
				+    - match:  { tokens.1.token: 깊 }
			
 
				+    - match:  { tokens.2.token: 나무 }
			
 
				+---
			
 
				+"Tokenizer":
			
 
				+    - do:
			
 
				+        indices.analyze:
			
 
				+          body:
			
 
				+            text:         뿌리가 깊은 나무
			
 
				+            tokenizer:    nori_tokenizer
			
 
				+    - length: { tokens: 5 }
			
 
				+    - match:  { tokens.0.token: 뿌리 }
			
 
				+    - match:  { tokens.1.token: 가  }
			
 
				+    - match:  { tokens.2.token: 깊  }
			
 
				+    - match:  { tokens.3.token: 은  }
			
 
				+    - match:  { tokens.4.token: 나무 }
			
 
				+---
			
 
				+"Part of speech filter":
			
 
				+    - do:
			
 
				+        indices.analyze:
			
 
				+          body:
			
 
				+            text:         뿌리가 깊은 나무
			
 
				+            tokenizer:    nori_tokenizer
			
 
				+            filter:       [nori_part_of_speech]
			
 
				+    - length: { tokens: 3 }
			
 
				+    - match:  { tokens.0.token: 뿌리 }
			
 
				+    - match:  { tokens.1.token: 깊  }
			
 
				+    - match:  { tokens.2.token: 나무 }
			
 
				+---
			
 
				+"Reading filter":
			
 
				+    - do:
			
 
				+        indices.analyze:
			
 
				+          body:
			
 
				+            text:         鄕歌
			
 
				+            tokenizer:    nori_tokenizer
			
 
				+            filter:       [nori_readingform]
			
 
				+    - length: { tokens: 1 }
			
 
				+    - match:  { tokens.0.token: 향가 }
			
--- a/plugins/analysis-nori/src/test/resources/rest-api-spec/test/analysis_nori/20_search.yml
+++ b/plugins/analysis-nori/src/test/resources/rest-api-spec/test/analysis_nori/20_search.yml
@@ -0,0 +1,32 @@
 
				+# Integration tests for Korean analysis components
			
 
				+#
			
 
				+---
			
 
				+"Index Korean content":
			
 
				+  - do:
			
 
				+      indices.create:
			
 
				+        index: test
			
 
				+        body:
			
 
				+          mappings:
			
 
				+            type:
			
 
				+              properties:
			
 
				+                text:
			
 
				+                  type:     text
			
 
				+                  analyzer: nori
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index:  test
			
 
				+        type:   type
			
 
				+        id:     1
			
 
				+        body:   { "text": "뿌리가 깊은 나무는" }
			
 
				+  - do:
			
 
				+      indices.refresh: {}
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test
			
 
				+        body:
			
 
				+          query:
			
 
				+            match:
			
 
				+              text: 나무
			
 
				+  - match: { hits.total: 1 }