1
0
Эх сурвалжийг харах

Analysis: Add multi-valued text support

Add support array text as a multi-valued for AnalyzeRequestBuilder
Add support array text as a multi-valued for Analyze REST API
Add docs

Closes #3023
Jun Ohtani 10 жил өмнө
parent
commit
3a1a4d3e89

+ 13 - 0
docs/reference/indices/analyze.asciidoc

@@ -18,6 +18,19 @@ curl -XGET 'localhost:9200/_analyze' -d '
 
 
 coming[2.0.0, body based parameters were added in 2.0.0]
 coming[2.0.0, body based parameters were added in 2.0.0]
 
 
+If text parameter is provided as array of strings, it is analyzed as a multi-valued field.
+
+[source,js]
+--------------------------------------------------
+curl -XGET 'localhost:9200/_analyze' -d '
+{
+  "analyzer" : "standard",
+  "text" : ["this is a test", "the second text"]
+}'
+--------------------------------------------------
+
+coming[2.0.0, body based parameters were added in 2.0.0]
+
 Or by building a custom transient analyzer out of tokenizers,
 Or by building a custom transient analyzer out of tokenizers,
 token filters and char filters. Token filters can use the shorter 'filters'
 token filters and char filters. Token filters can use the shorter 'filters'
 parameter name:
 parameter name:

+ 1 - 1
rest-api-spec/api/indices.analyze.json

@@ -37,7 +37,7 @@
           "description" : "With `true`, specify that a local shard should be used if available, with `false`, use a random shard (default: true)"
           "description" : "With `true`, specify that a local shard should be used if available, with `false`, use a random shard (default: true)"
         },
         },
         "text": {
         "text": {
-          "type" : "string",
+          "type" : "list",
           "description" : "The text on which the analysis should be performed (when request body is not used)"
           "description" : "The text on which the analysis should be performed (when request body is not used)"
         },
         },
         "tokenizer": {
         "tokenizer": {

+ 8 - 0
rest-api-spec/test/indices.analyze/10_analyze.yaml

@@ -63,3 +63,11 @@ setup:
           body: { "text": "Bar Foo", "filters": ["lowercase"], "tokenizer": keyword }
           body: { "text": "Bar Foo", "filters": ["lowercase"], "tokenizer": keyword }
     - length: {tokens: 1 }
     - length: {tokens: 1 }
     - match:     { tokens.0.token: bar foo }
     - match:     { tokens.0.token: bar foo }
+---
+"Array text":
+    - do:
+        indices.analyze:
+          body: { "text": ["Foo Bar", "Baz"], "filters": ["lowercase"], "tokenizer": keyword }
+    - length: {tokens: 2 }
+    - match:     { tokens.0.token: foo bar }
+    - match:     { tokens.1.token: baz }

+ 6 - 6
src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java

@@ -36,7 +36,7 @@ import static org.elasticsearch.action.ValidateActions.addValidationError;
  */
  */
 public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest> {
 public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest> {
 
 
-    private String text;
+    private String[] text;
 
 
     private String analyzer;
     private String analyzer;
 
 
@@ -61,11 +61,11 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
         this.index(index);
         this.index(index);
     }
     }
 
 
-    public String text() {
+    public String[] text() {
         return this.text;
         return this.text;
     }
     }
 
 
-    public AnalyzeRequest text(String text) {
+    public AnalyzeRequest text(String... text) {
         this.text = text;
         this.text = text;
         return this;
         return this;
     }
     }
@@ -118,7 +118,7 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
     @Override
     @Override
     public ActionRequestValidationException validate() {
     public ActionRequestValidationException validate() {
         ActionRequestValidationException validationException = super.validate();
         ActionRequestValidationException validationException = super.validate();
-        if (text == null) {
+        if (text == null || text.length == 0) {
             validationException = addValidationError("text is missing", validationException);
             validationException = addValidationError("text is missing", validationException);
         }
         }
         if (tokenFilters == null) {
         if (tokenFilters == null) {
@@ -133,7 +133,7 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
     @Override
     @Override
     public void readFrom(StreamInput in) throws IOException {
     public void readFrom(StreamInput in) throws IOException {
         super.readFrom(in);
         super.readFrom(in);
-        text = in.readString();
+        text = in.readStringArray();
         analyzer = in.readOptionalString();
         analyzer = in.readOptionalString();
         tokenizer = in.readOptionalString();
         tokenizer = in.readOptionalString();
         tokenFilters = in.readStringArray();
         tokenFilters = in.readStringArray();
@@ -144,7 +144,7 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
     @Override
     @Override
     public void writeTo(StreamOutput out) throws IOException {
     public void writeTo(StreamOutput out) throws IOException {
         super.writeTo(out);
         super.writeTo(out);
-        out.writeString(text);
+        out.writeStringArray(text);
         out.writeOptionalString(analyzer);
         out.writeOptionalString(analyzer);
         out.writeOptionalString(tokenizer);
         out.writeOptionalString(tokenizer);
         out.writeStringArray(tokenFilters);
         out.writeStringArray(tokenFilters);

+ 9 - 1
src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java

@@ -30,7 +30,7 @@ public class AnalyzeRequestBuilder extends SingleCustomOperationRequestBuilder<A
         super(client, action, new AnalyzeRequest());
         super(client, action, new AnalyzeRequest());
     }
     }
 
 
-    public AnalyzeRequestBuilder(ElasticsearchClient client, AnalyzeAction action, String index, String text) {
+    public AnalyzeRequestBuilder(ElasticsearchClient client, AnalyzeAction action, String index, String... text) {
         super(client, action, new AnalyzeRequest(index).text(text));
         super(client, action, new AnalyzeRequest(index).text(text));
     }
     }
 
 
@@ -86,4 +86,12 @@ public class AnalyzeRequestBuilder extends SingleCustomOperationRequestBuilder<A
         request.charFilters(charFilters);
         request.charFilters(charFilters);
         return this;
         return this;
     }
     }
+
+    /**
+     * Sets texts to analyze
+     */
+    public AnalyzeRequestBuilder setText(String... texts) {
+        request.text(texts);
+        return this;
+    }
 }
 }

+ 34 - 28
src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java

@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.Version;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.ActionFilters;
@@ -210,36 +211,41 @@ public class TransportAnalyzeAction extends TransportSingleCustomOperationAction
 
 
         List<AnalyzeResponse.AnalyzeToken> tokens = Lists.newArrayList();
         List<AnalyzeResponse.AnalyzeToken> tokens = Lists.newArrayList();
         TokenStream stream = null;
         TokenStream stream = null;
-        try {
-            stream = analyzer.tokenStream(field, request.text());
-            stream.reset();
-            CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
-            PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
-            OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
-            TypeAttribute type = stream.addAttribute(TypeAttribute.class);
-
-            int position = -1;
-            while (stream.incrementToken()) {
-                int increment = posIncr.getPositionIncrement();
-                if (increment > 0) {
-                    position = position + increment;
-                }
-                tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), position, offset.startOffset(), offset.endOffset(), type.type()));
-            }
-            stream.end();
-        } catch (IOException e) {
-            throw new ElasticsearchException("failed to analyze", e);
-        } finally {
-            if (stream != null) {
-                try {
-                    stream.close();
-                } catch (IOException e) {
-                    // ignore
+        int lastPosition = -1;
+        int lastOffset = 0;
+        for (String text : request.text()) {
+            try {
+                stream = analyzer.tokenStream(field, text);
+                stream.reset();
+                CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
+                PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
+                OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
+                TypeAttribute type = stream.addAttribute(TypeAttribute.class);
+
+                while (stream.incrementToken()) {
+                    int increment = posIncr.getPositionIncrement();
+                    if (increment > 0) {
+                        lastPosition = lastPosition + increment;
+                    }
+                    tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type()));
+
                 }
                 }
+                stream.end();
+                lastOffset += offset.endOffset();
+                lastPosition += posIncr.getPositionIncrement();
+
+                lastPosition += analyzer.getPositionIncrementGap(field);
+                lastOffset += analyzer.getOffsetGap(field);
+
+            } catch (IOException e) {
+                throw new ElasticsearchException("failed to analyze", e);
+            } finally {
+                IOUtils.closeWhileHandlingException(stream);
             }
             }
-            if (closeAnalyzer) {
-                analyzer.close();
-            }
+        }
+
+        if (closeAnalyzer) {
+            analyzer.close();
         }
         }
 
 
         return new AnalyzeResponse(tokens);
         return new AnalyzeResponse(tokens);

+ 6 - 0
src/main/java/org/elasticsearch/client/IndicesAdminClient.java

@@ -587,6 +587,12 @@ public interface IndicesAdminClient extends ElasticsearchClient {
      */
      */
     AnalyzeRequestBuilder prepareAnalyze(String text);
     AnalyzeRequestBuilder prepareAnalyze(String text);
 
 
+    /**
+     * Analyze text/texts.
+     *
+     */
+    AnalyzeRequestBuilder prepareAnalyze();
+
     /**
     /**
      * Puts an index template.
      * Puts an index template.
      */
      */

+ 5 - 0
src/main/java/org/elasticsearch/client/support/AbstractClient.java

@@ -1478,6 +1478,11 @@ public abstract class AbstractClient extends AbstractComponent implements Client
             return new AnalyzeRequestBuilder(this, AnalyzeAction.INSTANCE, null, text);
             return new AnalyzeRequestBuilder(this, AnalyzeAction.INSTANCE, null, text);
         }
         }
 
 
+        @Override
+        public AnalyzeRequestBuilder prepareAnalyze() {
+            return new AnalyzeRequestBuilder(this, AnalyzeAction.INSTANCE);
+        }
+
         @Override
         @Override
         public ActionFuture<PutIndexTemplateResponse> putTemplate(final PutIndexTemplateRequest request) {
         public ActionFuture<PutIndexTemplateResponse> putTemplate(final PutIndexTemplateRequest request) {
             return execute(PutIndexTemplateAction.INSTANCE, request);
             return execute(PutIndexTemplateAction.INSTANCE, request);

+ 18 - 8
src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java

@@ -22,6 +22,7 @@ import com.google.common.collect.Lists;
 import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
 import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
 import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
 import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.client.Client;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.settings.Settings;
@@ -55,10 +56,10 @@ public class RestAnalyzeAction extends BaseRestHandler {
     @Override
     @Override
     public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) {
     public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) {
 
 
-        String text = request.param("text");
+        String[] texts = request.paramAsStringArrayOrEmptyIfAll("text");
 
 
         AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
         AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
-        analyzeRequest.text(text);
+        analyzeRequest.text(texts);
         analyzeRequest.preferLocal(request.paramAsBoolean("prefer_local", analyzeRequest.preferLocalShard()));
         analyzeRequest.preferLocal(request.paramAsBoolean("prefer_local", analyzeRequest.preferLocalShard()));
         analyzeRequest.analyzer(request.param("analyzer"));
         analyzeRequest.analyzer(request.param("analyzer"));
         analyzeRequest.field(request.param("field"));
         analyzeRequest.field(request.param("field"));
@@ -69,9 +70,9 @@ public class RestAnalyzeAction extends BaseRestHandler {
         if (RestActions.hasBodyContent(request)) {
         if (RestActions.hasBodyContent(request)) {
             XContentType type = RestActions.guessBodyContentType(request);
             XContentType type = RestActions.guessBodyContentType(request);
             if (type == null) {
             if (type == null) {
-                if (text == null) {
-                    text = RestActions.getRestContent(request).toUtf8();
-                    analyzeRequest.text(text);
+                if (texts == null || texts.length == 0) {
+                    texts = new String[]{ RestActions.getRestContent(request).toUtf8() };
+                    analyzeRequest.text(texts);
                 }
                 }
             } else {
             } else {
                 // NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
                 // NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
@@ -95,7 +96,16 @@ public class RestAnalyzeAction extends BaseRestHandler {
                     } else if ("prefer_local".equals(currentFieldName) && token == XContentParser.Token.VALUE_BOOLEAN) {
                     } else if ("prefer_local".equals(currentFieldName) && token == XContentParser.Token.VALUE_BOOLEAN) {
                         analyzeRequest.preferLocal(parser.booleanValue());
                         analyzeRequest.preferLocal(parser.booleanValue());
                     } else if ("text".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
                     } else if ("text".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
-                            analyzeRequest.text(parser.text());
+                        analyzeRequest.text(parser.text());
+                    } else if ("text".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
+                        List<String> texts = Lists.newArrayList();
+                        while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+                            if (token.isValue() == false) {
+                                throw new IllegalArgumentException(currentFieldName + " array element should only contain text");
+                            }
+                            texts.add(parser.text());
+                        }
+                        analyzeRequest.text(texts.toArray(Strings.EMPTY_ARRAY));
                     } else if ("analyzer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
                     } else if ("analyzer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
                         analyzeRequest.analyzer(parser.text());
                         analyzeRequest.analyzer(parser.text());
                     } else if ("field".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
                     } else if ("field".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
@@ -110,7 +120,7 @@ public class RestAnalyzeAction extends BaseRestHandler {
                             }
                             }
                             filters.add(parser.text());
                             filters.add(parser.text());
                         }
                         }
-                        analyzeRequest.tokenFilters(filters.toArray(new String[0]));
+                        analyzeRequest.tokenFilters(filters.toArray(Strings.EMPTY_ARRAY));
                     } else if ("char_filters".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
                     } else if ("char_filters".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
                         List<String> charFilters = Lists.newArrayList();
                         List<String> charFilters = Lists.newArrayList();
                         while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                         while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
@@ -119,7 +129,7 @@ public class RestAnalyzeAction extends BaseRestHandler {
                             }
                             }
                             charFilters.add(parser.text());
                             charFilters.add(parser.text());
                         }
                         }
-                        analyzeRequest.tokenFilters(charFilters.toArray(new String[0]));
+                        analyzeRequest.tokenFilters(charFilters.toArray(Strings.EMPTY_ARRAY));
                     } else {
                     } else {
                         throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
                         throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
                     }
                     }

+ 34 - 14
src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java

@@ -158,18 +158,7 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest {
         ensureGreen();
         ensureGreen();
 
 
         client().admin().indices().preparePutMapping("test")
         client().admin().indices().preparePutMapping("test")
-                .setType("document").setSource(
-                "{\n" +
-                        "    \"document\":{\n" +
-                        "        \"properties\":{\n" +
-                        "            \"simple\":{\n" +
-                        "                \"type\":\"string\",\n" +
-                        "                \"analyzer\": \"simple\"\n" +
-                        "            }\n" +
-                        "        }\n" +
-                        "    }\n" +
-                        "}"
-        ).get();
+                .setType("document").setSource("simple", "type=string,analyzer=simple").get();
 
 
         for (int i = 0; i < 10; i++) {
         for (int i = 0; i < 10; i++) {
             final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyze("THIS IS A TEST");
             final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyze("THIS IS A TEST");
@@ -220,7 +209,8 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest {
 
 
         RestAnalyzeAction.buildFromContent(content, analyzeRequest);
         RestAnalyzeAction.buildFromContent(content, analyzeRequest);
 
 
-        assertThat(analyzeRequest.text(), equalTo("THIS IS A TEST"));
+        assertThat(analyzeRequest.text().length, equalTo(1));
+        assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
         assertThat(analyzeRequest.tokenizer(), equalTo("keyword"));
         assertThat(analyzeRequest.tokenizer(), equalTo("keyword"));
         assertThat(analyzeRequest.tokenFilters(), equalTo(new String[]{"lowercase"}));
         assertThat(analyzeRequest.tokenFilters(), equalTo(new String[]{"lowercase"}));
     }
     }
@@ -239,7 +229,6 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest {
         }
         }
     }
     }
 
 
-
     @Test
     @Test
     public void testParseXContentForAnalyzeRequestWithUnknownParamThrowsException() throws Exception {
     public void testParseXContentForAnalyzeRequestWithUnknownParamThrowsException() throws Exception {
         AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
         AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
@@ -258,4 +247,35 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest {
         }
         }
     }
     }
 
 
+    @Test
+    public void analyzerWithMultiValues() throws Exception {
+
+        assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
+        ensureGreen();
+
+        client().admin().indices().preparePutMapping("test")
+            .setType("document").setSource("simple", "type=string,analyzer=simple,position_offset_gap=100").get();
+
+        String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"};
+
+        final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyze();
+        requestBuilder.setText(texts);
+        requestBuilder.setIndex(indexOrAlias());
+        requestBuilder.setField("simple");
+        AnalyzeResponse analyzeResponse = requestBuilder.get();
+        assertThat(analyzeResponse.getTokens().size(), equalTo(7));
+        AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(3);
+        assertThat(token.getTerm(), equalTo("test"));
+        assertThat(token.getPosition(), equalTo(3));
+        assertThat(token.getStartOffset(), equalTo(10));
+        assertThat(token.getEndOffset(), equalTo(14));
+
+        token = analyzeResponse.getTokens().get(5);
+        assertThat(token.getTerm(), equalTo("second"));
+        assertThat(token.getPosition(), equalTo(105));
+        assertThat(token.getStartOffset(), equalTo(19));
+        assertThat(token.getEndOffset(), equalTo(25));
+
+    }
+
 }
 }