|
@@ -111,36 +111,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|
|
assertThat(analyzeResponse.getTokens().get(0).getPositionLength(), equalTo(1));
|
|
|
}
|
|
|
|
|
|
- public void testAnalyzeWithCharFilters() throws Exception {
|
|
|
- assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
|
|
|
- .setSettings(Settings.builder().put(indexSettings())
|
|
|
- .put("index.analysis.char_filter.custom_mapping.type", "mapping")
|
|
|
- .putArray("index.analysis.char_filter.custom_mapping.mappings", "ph=>f", "qu=>q")
|
|
|
- .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
|
|
|
- .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping")));
|
|
|
- ensureGreen();
|
|
|
-
|
|
|
- AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").addCharFilter("html_strip").get();
|
|
|
- assertThat(analyzeResponse.getTokens().size(), equalTo(4));
|
|
|
-
|
|
|
- analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("html_strip").get();
|
|
|
- assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
|
|
- assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
|
|
|
-
|
|
|
- analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("custom_mapping").get();
|
|
|
- assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
|
|
- assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish"));
|
|
|
-
|
|
|
- analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").addCharFilter("html_strip").addCharFilter("custom_mapping").get();
|
|
|
- assertThat(analyzeResponse.getTokens().size(), equalTo(3));
|
|
|
- AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
|
|
|
- assertThat(token.getTerm(), equalTo("jeff"));
|
|
|
- token = analyzeResponse.getTokens().get(1);
|
|
|
- assertThat(token.getTerm(), equalTo("qit"));
|
|
|
- token = analyzeResponse.getTokens().get(2);
|
|
|
- assertThat(token.getTerm(), equalTo("fish"));
|
|
|
- }
|
|
|
-
|
|
|
public void testAnalyzeWithNonDefaultPostionLength() throws Exception {
|
|
|
assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
|
|
|
.setSettings(Settings.builder().put(indexSettings())
|
|
@@ -263,46 +233,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|
|
assertThat(token.getPositionLength(), equalTo(1));
|
|
|
}
|
|
|
|
|
|
- public void testDetailAnalyze() throws Exception {
|
|
|
- assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
|
|
|
- .setSettings(
|
|
|
- Settings.builder()
|
|
|
- .put("index.analysis.char_filter.my_mapping.type", "mapping")
|
|
|
- .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
|
|
|
- .put("index.analysis.analyzer.test_analyzer.type", "custom")
|
|
|
- .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
|
|
|
- .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
|
|
|
- .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
|
|
|
- .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball")));
|
|
|
- ensureGreen();
|
|
|
-
|
|
|
- for (int i = 0; i < 10; i++) {
|
|
|
- AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH")
|
|
|
- .setExplain(true).addCharFilter("my_mapping").setTokenizer("keyword").addTokenFilter("lowercase").get();
|
|
|
-
|
|
|
- assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
|
|
- //charfilters
|
|
|
- assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("THIS IS A FISH"));
|
|
|
- //tokenizer
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("THIS IS A FISH"));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
|
|
|
- //tokenfilters
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("this is a fish"));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(15));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
public void testDetailAnalyzeWithNoIndex() throws Exception {
|
|
|
//analyzer only
|
|
|
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
|
|
@@ -414,90 +344,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|
|
assertThat(token.getPositionLength(), equalTo(1));
|
|
|
}
|
|
|
|
|
|
- public void testDetailAnalyzeWithMultiValuesWithCustomAnalyzer() throws Exception {
|
|
|
- assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
|
|
|
- .setSettings(
|
|
|
- Settings.builder()
|
|
|
- .put("index.analysis.char_filter.my_mapping.type", "mapping")
|
|
|
- .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
|
|
|
- .put("index.analysis.analyzer.test_analyzer.type", "custom")
|
|
|
- .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
|
|
|
- .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
|
|
|
- .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
|
|
|
- .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball", "lowercase")));
|
|
|
- ensureGreen();
|
|
|
-
|
|
|
- client().admin().indices().preparePutMapping("test")
|
|
|
- .setType("document").setSource("simple", "type=text,analyzer=simple,position_increment_gap=100").get();
|
|
|
-
|
|
|
- //only analyzer =
|
|
|
- String[] texts = new String[]{"this is a PHISH", "the troubled text"};
|
|
|
- AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts)
|
|
|
- .setExplain(true).setAnalyzer("test_analyzer").setText(texts).execute().get();
|
|
|
-
|
|
|
- // charfilter
|
|
|
- assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(2));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("this is a FISH"));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[1], equalTo("the troubled text"));
|
|
|
-
|
|
|
- // tokenizer
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("standard"));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(7));
|
|
|
- AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().tokenizer().getTokens()[3];
|
|
|
-
|
|
|
- assertThat(token.getTerm(), equalTo("FISH"));
|
|
|
- assertThat(token.getPosition(), equalTo(3));
|
|
|
- assertThat(token.getStartOffset(), equalTo(10));
|
|
|
- assertThat(token.getEndOffset(), equalTo(15));
|
|
|
- assertThat(token.getPositionLength(), equalTo(1));
|
|
|
-
|
|
|
- token = analyzeResponse.detail().tokenizer().getTokens()[5];
|
|
|
- assertThat(token.getTerm(), equalTo("troubled"));
|
|
|
- assertThat(token.getPosition(), equalTo(105));
|
|
|
- assertThat(token.getStartOffset(), equalTo(20));
|
|
|
- assertThat(token.getEndOffset(), equalTo(28));
|
|
|
- assertThat(token.getPositionLength(), equalTo(1));
|
|
|
-
|
|
|
- // tokenfilter(snowball)
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(7));
|
|
|
- token = analyzeResponse.detail().tokenfilters()[0].getTokens()[3];
|
|
|
-
|
|
|
- assertThat(token.getTerm(), equalTo("FISH"));
|
|
|
- assertThat(token.getPosition(), equalTo(3));
|
|
|
- assertThat(token.getStartOffset(), equalTo(10));
|
|
|
- assertThat(token.getEndOffset(), equalTo(15));
|
|
|
- assertThat(token.getPositionLength(), equalTo(1));
|
|
|
-
|
|
|
- token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
|
|
|
- assertThat(token.getTerm(), equalTo("troubl"));
|
|
|
- assertThat(token.getPosition(), equalTo(105));
|
|
|
- assertThat(token.getStartOffset(), equalTo(20));
|
|
|
- assertThat(token.getEndOffset(), equalTo(28));
|
|
|
- assertThat(token.getPositionLength(), equalTo(1));
|
|
|
-
|
|
|
- // tokenfilter(lowercase)
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("lowercase"));
|
|
|
- assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(7));
|
|
|
- token = analyzeResponse.detail().tokenfilters()[1].getTokens()[3];
|
|
|
-
|
|
|
- assertThat(token.getTerm(), equalTo("fish"));
|
|
|
- assertThat(token.getPosition(), equalTo(3));
|
|
|
- assertThat(token.getStartOffset(), equalTo(10));
|
|
|
- assertThat(token.getEndOffset(), equalTo(15));
|
|
|
- assertThat(token.getPositionLength(), equalTo(1));
|
|
|
-
|
|
|
- token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
|
|
|
- assertThat(token.getTerm(), equalTo("troubl"));
|
|
|
- assertThat(token.getPosition(), equalTo(105));
|
|
|
- assertThat(token.getStartOffset(), equalTo(20));
|
|
|
- assertThat(token.getEndOffset(), equalTo(28));
|
|
|
- assertThat(token.getPositionLength(), equalTo(1));
|
|
|
- }
|
|
|
-
|
|
|
public void testNonExistTokenizer() {
|
|
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
|
|
() -> client().admin().indices()
|
|
@@ -575,35 +421,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|
|
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getPositionLength(), equalTo(1));
|
|
|
}
|
|
|
|
|
|
-
|
|
|
- public void testCustomCharFilterInRequest() throws Exception {
|
|
|
- Map<String, Object> charFilterSettings = new HashMap<>();
|
|
|
- charFilterSettings.put("type", "mapping");
|
|
|
- charFilterSettings.put("mappings", new String[]{"ph => f", "qu => q"});
|
|
|
- AnalyzeResponse analyzeResponse = client().admin().indices()
|
|
|
- .prepareAnalyze()
|
|
|
- .setText("jeff quit phish")
|
|
|
- .setTokenizer("keyword")
|
|
|
- .addCharFilter(charFilterSettings)
|
|
|
- .setExplain(true)
|
|
|
- .get();
|
|
|
-
|
|
|
- assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
|
|
- //charfilters
|
|
|
- assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("_anonymous_charfilter_[0]"));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("jeff qit fish"));
|
|
|
- //tokenizer
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("jeff qit fish"));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
|
|
|
- assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPositionLength(), equalTo(1));
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
public void testCustomTokenizerInRequest() throws Exception {
|
|
|
Map<String, Object> tokenizerSettings = new HashMap<>();
|
|
|
tokenizerSettings.put("type", "nGram");
|