6 жил өмнө · a53e8653f2
--- a/docs/plugins/analysis-nori.asciidoc
+++ b/docs/plugins/analysis-nori.asciidoc
@@ -70,7 +70,7 @@ The first token is mandatory and represents the custom noun that should be added
 
				 the dictionary. For compound nouns the custom segmentation can be provided
			
 
				 after the first token (`[<token 1> ... <token n>]`). The segmentation of the
			
 
				 custom compound nouns is controlled by the `decompound_mode` setting.
			
 
				---
			
 
				+
			
 
				 
			
 
				 As a demonstration of how the user dictionary can be used, save the following
			
 
				 dictionary to `$ES_HOME/config/userdict_ko.txt`:
			
@@ -153,6 +153,42 @@ The above `analyze` request returns the following:
 
				 // TESTRESPONSE
			
 
				 
			
 
				 <1> This is a compound token that spans two positions (`mixed` mode).
			
 
				+--
			
 
				+
			
 
				+`user_dictionary_rules`::
			
 
				++
			
 
				+--
			
 
				+
			
 
				+You can also inline the rules directly in the tokenizer definition using
			
 
				+the `user_dictionary_rules` option:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT nori_sample
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "index": {
			
 
				+      "analysis": {
			
 
				+        "tokenizer": {
			
 
				+          "nori_user_dict": {
			
 
				+            "type": "nori_tokenizer",
			
 
				+            "decompound_mode": "mixed",
			
 
				+            "user_dictionary_rules": ["c++", "C샤프", "세종", "세종시 세종 시"]
			
 
				+          }
			
 
				+        },
			
 
				+        "analyzer": {
			
 
				+          "my_analyzer": {
			
 
				+            "type": "custom",
			
 
				+            "tokenizer": "nori_user_dict"
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+--
			
 
				 
			
 
				 The `nori_tokenizer` sets a number of additional attributes per token that are used by token filters
			
 
				 to modify the stream.
			
--- a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java
+++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java
@@ -29,10 +29,13 @@ import org.elasticsearch.index.IndexSettings;
 
				 
			
 
				 import java.io.IOException;
			
 
				 import java.io.Reader;
			
 
				+import java.io.StringReader;
			
 
				+import java.util.List;
			
 
				 import java.util.Locale;
			
 
				 
			
 
				 public class NoriTokenizerFactory extends AbstractTokenizerFactory {
			
 
				-    private static final String USER_DICT_OPTION = "user_dictionary";
			
 
				+    private static final String USER_DICT_PATH_OPTION = "user_dictionary";
			
 
				+    private static final String USER_DICT_RULES_OPTION = "user_dictionary_rules";
			
 
				 
			
 
				     private final UserDictionary userDictionary;
			
 
				     private final KoreanTokenizer.DecompoundMode decompoundMode;
			
@@ -44,12 +47,20 @@ public class NoriTokenizerFactory extends AbstractTokenizerFactory {
 
				     }
			
 
				 
			
 
				     public static UserDictionary getUserDictionary(Environment env, Settings settings) {
			
 
				-        try (Reader reader = Analysis.getReaderFromFile(env, settings, USER_DICT_OPTION)) {
			
 
				-            if (reader == null) {
			
 
				-                return null;
			
 
				-            } else {
			
 
				-                return UserDictionary.open(reader);
			
 
				-            }
			
 
				+        if (settings.get(USER_DICT_PATH_OPTION) != null && settings.get(USER_DICT_RULES_OPTION) != null) {
			
 
				+            throw new IllegalArgumentException("It is not allowed to use [" + USER_DICT_PATH_OPTION + "] in conjunction" +
			
 
				+                " with [" + USER_DICT_RULES_OPTION + "]");
			
 
				+        }
			
 
				+        List<String> ruleList = Analysis.getWordList(env, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION);
			
 
				+        StringBuilder sb = new StringBuilder();
			
 
				+        if (ruleList == null || ruleList.isEmpty()) {
			
 
				+            return null;
			
 
				+        }
			
 
				+        for (String line : ruleList) {
			
 
				+            sb.append(line).append(System.lineSeparator());
			
 
				+        }
			
 
				+        try (Reader rulesReader = new StringReader(sb.toString())) {
			
 
				+            return UserDictionary.open(rulesReader);
			
 
				         } catch (IOException e) {
			
 
				             throw new ElasticsearchException("failed to load nori user dictionary", e);
			
 
				         }
			
--- a/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java
+++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java
@@ -38,6 +38,7 @@ import java.io.StringReader;
 
				 import java.nio.file.Files;
			
 
				 import java.nio.file.Path;
			
 
				 
			
 
				+import static org.hamcrest.Matchers.containsString;
			
 
				 import static org.hamcrest.Matchers.instanceOf;
			
 
				 
			
 
				 public class NoriAnalysisTests extends ESTokenStreamTestCase {
			
@@ -76,6 +77,22 @@ public class NoriAnalysisTests extends ESTokenStreamTestCase {
 
				     }
			
 
				 
			
 
				     public void testNoriAnalyzerUserDict() throws Exception {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put("index.analysis.analyzer.my_analyzer.type", "nori")
			
 
				+            .putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "c++", "C샤프", "세종", "세종시 세종 시")
			
 
				+            .build();
			
 
				+        TestAnalysis analysis = createTestAnalysis(settings);
			
 
				+        Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
			
 
				+        try (TokenStream stream = analyzer.tokenStream("", "세종시")) {
			
 
				+            assertTokenStreamContents(stream, new String[]{"세종", "시"});
			
 
				+        }
			
 
				+
			
 
				+        try (TokenStream stream = analyzer.tokenStream("", "c++world")) {
			
 
				+            assertTokenStreamContents(stream, new String[]{"c++", "world"});
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testNoriAnalyzerUserDictPath() throws Exception {
			
 
				         Settings settings = Settings.builder()
			
 
				             .put("index.analysis.analyzer.my_analyzer.type", "nori")
			
 
				             .put("index.analysis.analyzer.my_analyzer.user_dictionary", "user_dict.txt")
			
@@ -91,6 +108,17 @@ public class NoriAnalysisTests extends ESTokenStreamTestCase {
 
				         }
			
 
				     }
			
 
				 
			
 
				+    public void testNoriAnalyzerInvalidUserDictOption() throws Exception {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put("index.analysis.analyzer.my_analyzer.type", "nori")
			
 
				+            .put("index.analysis.analyzer.my_analyzer.user_dictionary", "user_dict.txt")
			
 
				+            .putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "c++", "C샤프", "세종", "세종시 세종 시")
			
 
				+            .build();
			
 
				+        IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
			
 
				+        assertThat(exc.getMessage(), containsString("It is not allowed to use [user_dictionary] in conjunction " +
			
 
				+            "with [user_dictionary_rules]"));
			
 
				+    }
			
 
				+
			
 
				     public void testNoriTokenizer() throws Exception {
			
 
				         Settings settings = Settings.builder()
			
 
				             .put("index.analysis.tokenizer.my_tokenizer.type", "nori_tokenizer")
			
--- a/server/src/main/java/org/elasticsearch/index/analysis/Analysis.java
+++ b/server/src/main/java/org/elasticsearch/index/analysis/Analysis.java
@@ -221,10 +221,21 @@ public class Analysis {
 
				      *          If the word list cannot be found at either key.
			
 
				      */
			
 
				     public static List<String> getWordList(Environment env, Settings settings, String settingPrefix) {
			
 
				-        String wordListPath = settings.get(settingPrefix + "_path", null);
			
 
				+        return getWordList(env, settings, settingPrefix + "_path", settingPrefix);
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Fetches a list of words from the specified settings file. The list should either be available at the key
			
 
				+     * specified by <code>settingList</code> or in a file specified by <code>settingPath</code>.
			
 
				+     *
			
 
				+     * @throws IllegalArgumentException
			
 
				+     *          If the word list cannot be found at either key.
			
 
				+     */
			
 
				+    public static List<String> getWordList(Environment env, Settings settings, String settingPath, String settingList) {
			
 
				+        String wordListPath = settings.get(settingPath, null);
			
 
				 
			
 
				         if (wordListPath == null) {
			
 
				-            List<String> explicitWordList = settings.getAsList(settingPrefix, null);
			
 
				+            List<String> explicitWordList = settings.getAsList(settingList, null);
			
 
				             if (explicitWordList == null) {
			
 
				                 return null;
			
 
				             } else {
			
@@ -238,11 +249,11 @@ public class Analysis {
 
				             return loadWordList(path, "#");
			
 
				         } catch (CharacterCodingException ex) {
			
 
				             String message = String.format(Locale.ROOT,
			
 
				-                "Unsupported character encoding detected while reading %s_path: %s - files must be UTF-8 encoded",
			
 
				-                settingPrefix, path.toString());
			
 
				+                "Unsupported character encoding detected while reading %s: %s - files must be UTF-8 encoded",
			
 
				+                settingPath, path.toString());
			
 
				             throw new IllegalArgumentException(message, ex);
			
 
				         } catch (IOException ioe) {
			
 
				-            String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString());
			
 
				+            String message = String.format(Locale.ROOT, "IOException while reading %s: %s", settingPath, path.toString());
			
 
				             throw new IllegalArgumentException(message, ioe);
			
 
				         }
			
 
				     }