|
@@ -54,6 +54,9 @@ public class NoriAnalysisTests extends ESTokenStreamTestCase {
|
|
|
filterFactory = analysis.tokenFilter.get("nori_readingform");
|
|
|
assertThat(filterFactory, instanceOf(NoriReadingFormFilterFactory.class));
|
|
|
|
|
|
+ filterFactory = analysis.tokenFilter.get("nori_number");
|
|
|
+ assertThat(filterFactory, instanceOf(NoriNumberFilterFactory.class));
|
|
|
+
|
|
|
IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
|
|
|
NamedAnalyzer analyzer = indexAnalyzers.get("nori");
|
|
|
assertThat(analyzer.analyzer(), instanceOf(KoreanAnalyzer.class));
|
|
@@ -130,6 +133,33 @@ public class NoriAnalysisTests extends ESTokenStreamTestCase {
|
|
|
assertTokenStreamContents(tokenizer, new String[] {"뿌리", "가", "깊", "은", "나무"});
|
|
|
tokenizer.setReader(new StringReader("가늠표"));
|
|
|
assertTokenStreamContents(tokenizer, new String[] {"가늠표", "가늠", "표"});
|
|
|
+ // discard_punctuation default(true)
|
|
|
+ tokenizer.setReader(new StringReader("3.2개"));
|
|
|
+ assertTokenStreamContents(tokenizer, new String[] {"3", "2", "개"});
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testNoriTokenizerDiscardPunctuationOptionTrue() throws Exception {
|
|
|
+ Settings settings = createDiscardPunctuationOption("true");
|
|
|
+ TestAnalysis analysis = createTestAnalysis(settings);
|
|
|
+ Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create();
|
|
|
+ tokenizer.setReader(new StringReader("3.2개"));
|
|
|
+ assertTokenStreamContents(tokenizer, new String[] {"3", "2", "개"});
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testNoriTokenizerDiscardPunctuationOptionFalse() throws Exception {
|
|
|
+ Settings settings = createDiscardPunctuationOption("false");
|
|
|
+ TestAnalysis analysis = createTestAnalysis(settings);
|
|
|
+ Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create();
|
|
|
+ tokenizer.setReader(new StringReader("3.2개"));
|
|
|
+ assertTokenStreamContents(tokenizer, new String[] {"3", ".", "2", "개"});
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testNoriTokenizerInvalidDiscardPunctuationOption() {
|
|
|
+ String wrongOption = "wrong";
|
|
|
+ Settings settings = createDiscardPunctuationOption(wrongOption);
|
|
|
+ IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
|
|
|
+ assertThat(exc.getMessage(), containsString("Failed to parse value [" + wrongOption
|
|
|
+ + "] as only [true] or [false] are allowed."));
|
|
|
}
|
|
|
|
|
|
public void testNoriPartOfSpeech() throws IOException {
|
|
@@ -159,6 +189,27 @@ public class NoriAnalysisTests extends ESTokenStreamTestCase {
|
|
|
assertTokenStreamContents(stream, new String[] {"향가"});
|
|
|
}
|
|
|
|
|
|
+ public void testNoriNumber() throws IOException {
|
|
|
+ Settings settings = Settings.builder()
|
|
|
+ .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
|
|
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
|
|
+ .put("index.analysis.filter.my_filter.type", "nori_number")
|
|
|
+ .build();
|
|
|
+ TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new AnalysisNoriPlugin());
|
|
|
+ TokenFilterFactory factory = analysis.tokenFilter.get("my_filter");
|
|
|
+ Tokenizer tokenizer = new KoreanTokenizer();
|
|
|
+ tokenizer.setReader(new StringReader("오늘 십만이천오백원짜리 와인 구입"));
|
|
|
+ TokenStream stream = factory.create(tokenizer);
|
|
|
+ assertTokenStreamContents(stream, new String[] {"오늘", "102500", "원", "짜리", "와인", "구입"});
|
|
|
+ }
|
|
|
+
|
|
|
+ private Settings createDiscardPunctuationOption(String option) {
|
|
|
+ return Settings.builder()
|
|
|
+ .put("index.analysis.tokenizer.my_tokenizer.type", "nori_tokenizer")
|
|
|
+ .put("index.analysis.tokenizer.my_tokenizer.discard_punctuation", option)
|
|
|
+ .build();
|
|
|
+ }
|
|
|
+
|
|
|
private TestAnalysis createTestAnalysis(Settings analysisSettings) throws IOException {
|
|
|
InputStream dict = NoriAnalysisTests.class.getResourceAsStream("user_dict.txt");
|
|
|
Path home = createTempDir();
|