|
@@ -22,11 +22,14 @@ import org.elasticsearch.action.admin.indices.alias.Alias;
|
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
|
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
|
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
|
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
|
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
|
|
|
+import org.elasticsearch.common.ParseFieldMatcher;
|
|
import org.elasticsearch.common.bytes.BytesArray;
|
|
import org.elasticsearch.common.bytes.BytesArray;
|
|
import org.elasticsearch.common.bytes.BytesReference;
|
|
import org.elasticsearch.common.bytes.BytesReference;
|
|
|
|
+import org.elasticsearch.common.settings.Settings;
|
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
|
import org.elasticsearch.rest.action.admin.indices.analyze.RestAnalyzeAction;
|
|
import org.elasticsearch.rest.action.admin.indices.analyze.RestAnalyzeAction;
|
|
import org.elasticsearch.test.ESIntegTestCase;
|
|
import org.elasticsearch.test.ESIntegTestCase;
|
|
|
|
+import org.hamcrest.core.IsNull;
|
|
|
|
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
|
|
|
|
@@ -36,8 +39,10 @@ import static org.hamcrest.Matchers.equalTo;
|
|
import static org.hamcrest.Matchers.hasSize;
|
|
import static org.hamcrest.Matchers.hasSize;
|
|
import static org.hamcrest.Matchers.instanceOf;
|
|
import static org.hamcrest.Matchers.instanceOf;
|
|
import static org.hamcrest.Matchers.is;
|
|
import static org.hamcrest.Matchers.is;
|
|
|
|
+import static org.hamcrest.Matchers.notNullValue;
|
|
import static org.hamcrest.Matchers.startsWith;
|
|
import static org.hamcrest.Matchers.startsWith;
|
|
|
|
|
|
|
|
+
|
|
/**
|
|
/**
|
|
*
|
|
*
|
|
*/
|
|
*/
|
|
@@ -201,7 +206,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|
|
|
|
|
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
|
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
|
|
|
|
|
- RestAnalyzeAction.buildFromContent(content, analyzeRequest);
|
|
|
|
|
|
+ RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
|
|
|
|
|
assertThat(analyzeRequest.text().length, equalTo(1));
|
|
assertThat(analyzeRequest.text().length, equalTo(1));
|
|
assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
|
|
assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
|
|
@@ -213,7 +218,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
|
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
|
|
|
|
|
try {
|
|
try {
|
|
- RestAnalyzeAction.buildFromContent(new BytesArray("{invalid_json}"), analyzeRequest);
|
|
|
|
|
|
+ RestAnalyzeAction.buildFromContent(new BytesArray("{invalid_json}"), analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
|
fail("shouldn't get here");
|
|
fail("shouldn't get here");
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
assertThat(e, instanceOf(IllegalArgumentException.class));
|
|
assertThat(e, instanceOf(IllegalArgumentException.class));
|
|
@@ -230,7 +235,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|
.endObject().bytes();
|
|
.endObject().bytes();
|
|
|
|
|
|
try {
|
|
try {
|
|
- RestAnalyzeAction.buildFromContent(invalidContent, analyzeRequest);
|
|
|
|
|
|
+ RestAnalyzeAction.buildFromContent(invalidContent, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
|
fail("shouldn't get here");
|
|
fail("shouldn't get here");
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
assertThat(e, instanceOf(IllegalArgumentException.class));
|
|
assertThat(e, instanceOf(IllegalArgumentException.class));
|
|
@@ -267,4 +272,235 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ public void testDetailAnalyze() throws Exception {
|
|
|
|
+ assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
|
|
|
|
+ .setSettings(
|
|
|
|
+ settingsBuilder()
|
|
|
|
+ .put("index.analysis.char_filter.my_mapping.type", "mapping")
|
|
|
|
+ .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
|
|
|
|
+ .put("index.analysis.analyzer.test_analyzer.type", "custom")
|
|
|
|
+ .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
|
|
|
|
+ .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
|
|
|
|
+ .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
|
|
|
|
+ .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball")));
|
|
|
|
+ ensureGreen();
|
|
|
|
+
|
|
|
|
+ for (int i = 0; i < 10; i++) {
|
|
|
|
+ AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH")
|
|
|
|
+ .setExplain(true).setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").get();
|
|
|
|
+
|
|
|
|
+ assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
|
|
|
+ //charfilters
|
|
|
|
+ // global charfilter is not change text.
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("THIS IS A FISH"));
|
|
|
|
+ //tokenizer
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("THIS IS A FISH"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
|
|
|
|
+ //tokenfilters
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("this is a fish"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(15));
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public void testDetailAnalyzeWithNoIndex() throws Exception {
|
|
|
|
+ //analyzer only
|
|
|
|
+ AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
|
|
|
|
+ .setExplain(true).setAnalyzer("simple").get();
|
|
|
|
+
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer(), IsNull.nullValue());
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters(), IsNull.nullValue());
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters(), IsNull.nullValue());
|
|
|
|
+ assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
|
|
|
|
+ assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public void testDetailAnalyzeCustomAnalyzerWithNoIndex() throws Exception {
|
|
|
|
+ //analyzer only
|
|
|
|
+ AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
|
|
|
|
+ .setExplain(true).setAnalyzer("simple").get();
|
|
|
|
+
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer(), IsNull.nullValue());
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters(), IsNull.nullValue());
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters(), IsNull.nullValue());
|
|
|
|
+ assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
|
|
|
|
+ assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4));
|
|
|
|
+
|
|
|
|
+ //custom analyzer
|
|
|
|
+ analyzeResponse = client().admin().indices().prepareAnalyze("<text>THIS IS A TEST</text>")
|
|
|
|
+ .setExplain(true).setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").get();
|
|
|
|
+ assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
|
|
|
+ //charfilters
|
|
|
|
+ // global charfilter is not change text.
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip"));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("\nTHIS IS A TEST\n"));
|
|
|
|
+ //tokenizer
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("\nTHIS IS A TEST\n"));
|
|
|
|
+ //tokenfilters
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n"));
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ //check other attributes
|
|
|
|
+ analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
|
|
|
+ .setExplain(true).setTokenizer("standard").setTokenFilters("snowball").get();
|
|
|
|
+
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl"));
|
|
|
|
+ String[] expectedAttributesKey = {
|
|
|
|
+ "bytes",
|
|
|
|
+ "positionLength",
|
|
|
|
+ "keyword"};
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
|
|
|
|
+ Object extendedAttribute;
|
|
|
|
+
|
|
|
|
+ for (String key : expectedAttributesKey) {
|
|
|
|
+ extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key);
|
|
|
|
+ assertThat(extendedAttribute, notNullValue());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public void testDetailAnalyzeSpecifyAttributes() throws Exception {
|
|
|
|
+ AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
|
|
|
+ .setExplain(true).setTokenizer("standard").setTokenFilters("snowball").setAttributes("keyword").get();
|
|
|
|
+
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl"));
|
|
|
|
+ String[] expectedAttributesKey = {
|
|
|
|
+ "keyword"};
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
|
|
|
|
+ Object extendedAttribute;
|
|
|
|
+
|
|
|
|
+ for (String key : expectedAttributesKey) {
|
|
|
|
+ extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key);
|
|
|
|
+ assertThat(extendedAttribute, notNullValue());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public void testDetailAnalyzeWithMultiValues() throws Exception {
|
|
|
|
+ assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
|
|
|
|
+ ensureGreen();
|
|
|
|
+ client().admin().indices().preparePutMapping("test")
|
|
|
|
+ .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get();
|
|
|
|
+
|
|
|
|
+ String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"};
|
|
|
|
+ AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts)
|
|
|
|
+ .setExplain(true).setField("simple").setText(texts).execute().get();
|
|
|
|
+
|
|
|
|
+ assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
|
|
|
|
+ assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(7));
|
|
|
|
+ AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().analyzer().getTokens()[3];
|
|
|
|
+
|
|
|
|
+ assertThat(token.getTerm(), equalTo("test"));
|
|
|
|
+ assertThat(token.getPosition(), equalTo(3));
|
|
|
|
+ assertThat(token.getStartOffset(), equalTo(10));
|
|
|
|
+ assertThat(token.getEndOffset(), equalTo(14));
|
|
|
|
+
|
|
|
|
+ token = analyzeResponse.detail().analyzer().getTokens()[5];
|
|
|
|
+ assertThat(token.getTerm(), equalTo("second"));
|
|
|
|
+ assertThat(token.getPosition(), equalTo(105));
|
|
|
|
+ assertThat(token.getStartOffset(), equalTo(19));
|
|
|
|
+ assertThat(token.getEndOffset(), equalTo(25));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public void testDetailAnalyzeWithMultiValuesWithCustomAnalyzer() throws Exception {
|
|
|
|
+ assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
|
|
|
|
+ .setSettings(
|
|
|
|
+ settingsBuilder()
|
|
|
|
+ .put("index.analysis.char_filter.my_mapping.type", "mapping")
|
|
|
|
+ .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
|
|
|
|
+ .put("index.analysis.analyzer.test_analyzer.type", "custom")
|
|
|
|
+ .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
|
|
|
|
+ .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
|
|
|
|
+ .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
|
|
|
|
+ .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball", "lowercase")));
|
|
|
|
+ ensureGreen();
|
|
|
|
+
|
|
|
|
+ client().admin().indices().preparePutMapping("test")
|
|
|
|
+ .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get();
|
|
|
|
+
|
|
|
|
+ //only analyzer =
|
|
|
|
+ String[] texts = new String[]{"this is a PHISH", "the troubled text"};
|
|
|
|
+ AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts)
|
|
|
|
+ .setExplain(true).setAnalyzer("test_analyzer").setText(texts).execute().get();
|
|
|
|
+
|
|
|
|
+ // charfilter
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(2));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("this is a FISH"));
|
|
|
|
+ assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[1], equalTo("the troubled text"));
|
|
|
|
+
|
|
|
|
+ // tokenizer
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("standard"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(7));
|
|
|
|
+ AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().tokenizer().getTokens()[3];
|
|
|
|
+
|
|
|
|
+ assertThat(token.getTerm(), equalTo("FISH"));
|
|
|
|
+ assertThat(token.getPosition(), equalTo(3));
|
|
|
|
+ assertThat(token.getStartOffset(), equalTo(10));
|
|
|
|
+ assertThat(token.getEndOffset(), equalTo(15));
|
|
|
|
+
|
|
|
|
+ token = analyzeResponse.detail().tokenizer().getTokens()[5];
|
|
|
|
+ assertThat(token.getTerm(), equalTo("troubled"));
|
|
|
|
+ assertThat(token.getPosition(), equalTo(105));
|
|
|
|
+ assertThat(token.getStartOffset(), equalTo(20));
|
|
|
|
+ assertThat(token.getEndOffset(), equalTo(28));
|
|
|
|
+
|
|
|
|
+ // tokenfilter(snowball)
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(7));
|
|
|
|
+ token = analyzeResponse.detail().tokenfilters()[0].getTokens()[3];
|
|
|
|
+
|
|
|
|
+ assertThat(token.getTerm(), equalTo("FISH"));
|
|
|
|
+ assertThat(token.getPosition(), equalTo(3));
|
|
|
|
+ assertThat(token.getStartOffset(), equalTo(10));
|
|
|
|
+ assertThat(token.getEndOffset(), equalTo(15));
|
|
|
|
+
|
|
|
|
+ token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
|
|
|
|
+ assertThat(token.getTerm(), equalTo("troubl"));
|
|
|
|
+ assertThat(token.getPosition(), equalTo(105));
|
|
|
|
+ assertThat(token.getStartOffset(), equalTo(20));
|
|
|
|
+ assertThat(token.getEndOffset(), equalTo(28));
|
|
|
|
+
|
|
|
|
+ // tokenfilter(lowercase)
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("lowercase"));
|
|
|
|
+ assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(7));
|
|
|
|
+ token = analyzeResponse.detail().tokenfilters()[1].getTokens()[3];
|
|
|
|
+
|
|
|
|
+ assertThat(token.getTerm(), equalTo("fish"));
|
|
|
|
+ assertThat(token.getPosition(), equalTo(3));
|
|
|
|
+ assertThat(token.getStartOffset(), equalTo(10));
|
|
|
|
+ assertThat(token.getEndOffset(), equalTo(15));
|
|
|
|
+
|
|
|
|
+ token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
|
|
|
|
+ assertThat(token.getTerm(), equalTo("troubl"));
|
|
|
|
+ assertThat(token.getPosition(), equalTo(105));
|
|
|
|
+ assertThat(token.getStartOffset(), equalTo(20));
|
|
|
|
+ assertThat(token.getEndOffset(), equalTo(28));
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
}
|
|
}
|