Browse Source

Make it possible to validate a query on all shards instead of a single random shard (#23697)

This is especially useful when we rewrite the query because the result of the rewrite can be very different on different shards. See #18254 for example.
Igor Motov 8 years ago
parent
commit
f927a2708d

+ 25 - 6
core/src/main/java/org/elasticsearch/action/admin/indices/validate/query/QueryExplanation.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.action.admin.indices.validate.query;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Streamable;
@@ -27,20 +28,26 @@ import java.io.IOException;
 
 public class QueryExplanation  implements Streamable {
 
+    public static final int RANDOM_SHARD = -1;
+
     private String index;
-    
+
+    private int shard = RANDOM_SHARD;
+
     private boolean valid;
-    
+
     private String explanation;
-    
+
     private String error;
 
     QueryExplanation() {
-        
+
     }
-    
-    public QueryExplanation(String index, boolean valid, String explanation, String error) {
+
+    public QueryExplanation(String index, int shard, boolean valid, String explanation,
+                            String error) {
         this.index = index;
+        this.shard = shard;
         this.valid = valid;
         this.explanation = explanation;
         this.error = error;
@@ -50,6 +57,10 @@ public class QueryExplanation  implements Streamable {
         return this.index;
     }
 
+    public int getShard() {
+        return this.shard;
+    }
+
     public boolean isValid() {
         return this.valid;
     }
@@ -65,6 +76,11 @@ public class QueryExplanation  implements Streamable {
     @Override
     public void readFrom(StreamInput in) throws IOException {
         index = in.readString();
+        if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
+            shard = in.readInt();
+        } else {
+            shard = RANDOM_SHARD;
+        }
         valid = in.readBoolean();
         explanation = in.readOptionalString();
         error = in.readOptionalString();
@@ -73,6 +89,9 @@ public class QueryExplanation  implements Streamable {
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeString(index);
+        if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
+            out.writeInt(shard);
+        }
         out.writeBoolean(valid);
         out.writeOptionalString(explanation);
         out.writeOptionalString(error);

+ 10 - 3
core/src/main/java/org/elasticsearch/action/admin/indices/validate/query/TransportValidateQueryAction.java

@@ -89,8 +89,14 @@ public class TransportValidateQueryAction extends TransportBroadcastAction<Valid
 
     @Override
     protected GroupShardsIterator shards(ClusterState clusterState, ValidateQueryRequest request, String[] concreteIndices) {
-        // Hard-code routing to limit request to a single shard, but still, randomize it...
-        Map<String, Set<String>> routingMap = indexNameExpressionResolver.resolveSearchRouting(clusterState, Integer.toString(Randomness.get().nextInt(1000)), request.indices());
+        final String routing;
+        if (request.allShards()) {
+            routing = null;
+        } else {
+            // Random routing to limit request to a single shard
+            routing = Integer.toString(Randomness.get().nextInt(1000));
+        }
+        Map<String, Set<String>> routingMap = indexNameExpressionResolver.resolveSearchRouting(clusterState, routing, request.indices());
         return clusterService.operationRouting().searchShards(clusterState, concreteIndices, routingMap, "_local");
     }
 
@@ -124,12 +130,13 @@ public class TransportValidateQueryAction extends TransportBroadcastAction<Valid
             } else {
                 ShardValidateQueryResponse validateQueryResponse = (ShardValidateQueryResponse) shardResponse;
                 valid = valid && validateQueryResponse.isValid();
-                if (request.explain() || request.rewrite()) {
+                if (request.explain() || request.rewrite() || request.allShards()) {
                     if (queryExplanations == null) {
                         queryExplanations = new ArrayList<>();
                     }
                     queryExplanations.add(new QueryExplanation(
                             validateQueryResponse.getIndex(),
+                            request.allShards() ? validateQueryResponse.getShardId().getId() : QueryExplanation.RANDOM_SHARD,
                             validateQueryResponse.isValid(),
                             validateQueryResponse.getExplanation(),
                             validateQueryResponse.getError()

+ 23 - 1
core/src/main/java/org/elasticsearch/action/admin/indices/validate/query/ValidateQueryRequest.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.action.admin.indices.validate.query;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.action.ActionRequestValidationException;
 import org.elasticsearch.action.ValidateActions;
 import org.elasticsearch.action.support.IndicesOptions;
@@ -43,6 +44,7 @@ public class ValidateQueryRequest extends BroadcastRequest<ValidateQueryRequest>
 
     private boolean explain;
     private boolean rewrite;
+    private boolean allShards;
 
     private String[] types = Strings.EMPTY_ARRAY;
 
@@ -125,6 +127,20 @@ public class ValidateQueryRequest extends BroadcastRequest<ValidateQueryRequest>
         return rewrite;
     }
 
+    /**
+     * Indicates whether the query should be validated on all shards instead of one random shard
+     */
+    public void allShards(boolean allShards) {
+        this.allShards = allShards;
+    }
+
+    /**
+     * Indicates whether the query should be validated on all shards instead of one random shard
+     */
+    public boolean allShards() {
+        return allShards;
+    }
+
     @Override
     public void readFrom(StreamInput in) throws IOException {
         super.readFrom(in);
@@ -138,6 +154,9 @@ public class ValidateQueryRequest extends BroadcastRequest<ValidateQueryRequest>
         }
         explain = in.readBoolean();
         rewrite = in.readBoolean();
+        if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
+            allShards = in.readBoolean();
+        }
     }
 
     @Override
@@ -150,11 +169,14 @@ public class ValidateQueryRequest extends BroadcastRequest<ValidateQueryRequest>
         }
         out.writeBoolean(explain);
         out.writeBoolean(rewrite);
+        if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
+            out.writeBoolean(allShards);
+        }
     }
 
     @Override
     public String toString() {
         return "[" + Arrays.toString(indices) + "]" + Arrays.toString(types) + ", query[" + query + "], explain:" + explain +
-                ", rewrite:" + rewrite;
+                ", rewrite:" + rewrite + ", all_shards:" + allShards;
     }
 }

+ 8 - 0
core/src/main/java/org/elasticsearch/action/admin/indices/validate/query/ValidateQueryRequestBuilder.java

@@ -64,4 +64,12 @@ public class ValidateQueryRequestBuilder extends BroadcastOperationRequestBuilde
         request.rewrite(rewrite);
         return this;
     }
+
+    /**
+     * Indicates whether the query should be validated on all shards
+     */
+    public ValidateQueryRequestBuilder setAllShards(boolean rewrite) {
+        request.allShards(rewrite);
+        return this;
+    }
 }

+ 5 - 0
core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestValidateQueryAction.java

@@ -62,6 +62,7 @@ public class RestValidateQueryAction extends BaseRestHandler {
         validateQueryRequest.explain(request.paramAsBoolean("explain", false));
         validateQueryRequest.types(Strings.splitStringByCommaToArray(request.param("type")));
         validateQueryRequest.rewrite(request.paramAsBoolean("rewrite", false));
+        validateQueryRequest.allShards(request.paramAsBoolean("all_shards", false));
 
         Exception bodyParsingException = null;
         try {
@@ -98,6 +99,9 @@ public class RestValidateQueryAction extends BaseRestHandler {
                                 if (explanation.getIndex() != null) {
                                     builder.field(INDEX_FIELD, explanation.getIndex());
                                 }
+                                if(explanation.getShard() >= 0) {
+                                    builder.field(SHARD_FIELD, explanation.getShard());
+                                }
                                 builder.field(VALID_FIELD, explanation.isValid());
                                 if (explanation.getError() != null) {
                                     builder.field(ERROR_FIELD, explanation.getError());
@@ -132,6 +136,7 @@ public class RestValidateQueryAction extends BaseRestHandler {
     }
 
     private static final String INDEX_FIELD = "index";
+    private static final String SHARD_FIELD = "shard";
     private static final String VALID_FIELD = "valid";
     private static final String EXPLANATIONS_FIELD = "explanations";
     private static final String ERROR_FIELD = "error";

+ 52 - 0
core/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java

@@ -39,11 +39,14 @@ import org.joda.time.format.ISODateTimeFormat;
 
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.List;
 
 import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
 import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
+import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
@@ -252,6 +255,37 @@ public class SimpleValidateQueryIT extends ESIntegTestCase {
                 containsString("field:huge field:pidgin"), true);
     }
 
+    public void testExplainWithRewriteValidateQueryAllShards() throws Exception {
+        client().admin().indices().prepareCreate("test")
+            .addMapping("type1", "field", "type=text,analyzer=whitespace")
+            .setSettings(SETTING_NUMBER_OF_SHARDS, 2).get();
+        // We are relying on specific routing behaviors for the result to be right, so
+        // we cannot randomize the number of shards or change ids here.
+        client().prepareIndex("test", "type1", "1")
+            .setSource("field", "quick lazy huge brown pidgin").get();
+        client().prepareIndex("test", "type1", "2")
+            .setSource("field", "the quick brown fox").get();
+        client().prepareIndex("test", "type1", "3")
+            .setSource("field", "the quick lazy huge brown fox jumps over the tree").get();
+        client().prepareIndex("test", "type1", "4")
+            .setSource("field", "the lazy dog quacks like a duck").get();
+        refresh();
+
+        // prefix queries
+        assertExplanations(QueryBuilders.matchPhrasePrefixQuery("field", "qu"),
+            Arrays.asList(
+                equalTo("field:quick"),
+                allOf(containsString("field:quick"), containsString("field:quacks"))
+            ), true, true);
+        assertExplanations(QueryBuilders.matchPhrasePrefixQuery("field", "ju"),
+            Arrays.asList(
+                equalTo("field:jumps"),
+                equalTo("+MatchNoDocsQuery(\"empty MultiPhraseQuery\") +MatchNoDocsQuery(\"No " +
+                    "terms supplied for org.elasticsearch.common.lucene.search." +
+                    "MultiPhrasePrefixQuery\")")
+            ), true, true);
+    }
+
     public void testIrrelevantPropertiesBeforeQuery() throws IOException {
         createIndex("test");
         ensureGreen();
@@ -280,4 +314,22 @@ public class SimpleValidateQueryIT extends ESIntegTestCase {
         assertThat(response.getQueryExplanation().get(0).getExplanation(), matcher);
         assertThat(response.isValid(), equalTo(true));
     }
+
+    private static void assertExplanations(QueryBuilder queryBuilder,
+                                           List<Matcher<String>> matchers, boolean withRewrite,
+                                           boolean allShards) {
+        ValidateQueryResponse response = client().admin().indices().prepareValidateQuery("test")
+            .setTypes("type1")
+            .setQuery(queryBuilder)
+            .setExplain(true)
+            .setRewrite(withRewrite)
+            .setAllShards(allShards)
+            .execute().actionGet();
+        assertThat(response.getQueryExplanation().size(), equalTo(matchers.size()));
+        for (int i = 0; i < matchers.size(); i++) {
+            assertThat(response.getQueryExplanation().get(i).getError(), nullValue());
+            assertThat(response.getQueryExplanation().get(i).getExplanation(), matchers.get(i));
+            assertThat(response.isValid(), equalTo(true));
+        }
+    }
 }

+ 59 - 33
docs/reference/search/validate.asciidoc

@@ -146,24 +146,24 @@ When the query is valid, the explanation defaults to the string
 representation of that query. With `rewrite` set to `true`, the explanation
 is more detailed showing the actual Lucene query that will be executed.
 
-For Fuzzy Queries:
+For More Like This:
 
 [source,js]
 --------------------------------------------------
 GET twitter/tweet/_validate/query?rewrite=true
 {
   "query": {
-    "match": {
-      "user": {
-        "query": "kimchy",
-        "fuzziness": "auto"
-      }
+    "more_like_this": {
+      "like": {
+        "_id": "2"
+      },
+      "boost_terms": 1
     }
   }
 }
 --------------------------------------------------
 // CONSOLE
-// TEST[skip:https://github.com/elastic/elasticsearch/issues/18254]
+// TEST[skip:the output is randomized depending on which shard we hit]
 
 Response:
 
@@ -180,54 +180,80 @@ Response:
       {
          "index": "twitter",
          "valid": true,
-         "explanation": "+user:kimchy +user:kimchi^0.75 #(ConstantScore(_type:tweet))^0.0"
+         "explanation": "((user:terminator^3.71334 plot:future^2.763601 plot:human^2.8415773 plot:sarah^3.4193945 plot:kyle^3.8244398 plot:cyborg^3.9177752 plot:connor^4.040236 plot:reese^4.7133346 ... )~6) -ConstantScore(_uid:tweet#2)) #(ConstantScore(_type:tweet))^0.0"
       }
    ]
 }
 --------------------------------------------------
 // TESTRESPONSE
 
-For More Like This:
+By default, the request is executed on a single shard only, which is randomly
+selected. The detailed explanation of the query may depend on which shard is
+being hit, and therefore may vary from one request to another. So, in case of
+query rewrite the `all_shards` parameter should be used to get response from
+all available shards.
+
+For Fuzzy Queries:
 
 [source,js]
 --------------------------------------------------
-GET twitter/tweet/_validate/query?rewrite=true
+GET twitter/tweet/_validate/query?rewrite=true&all_shards=true
 {
   "query": {
-    "more_like_this": {
-      "like": {
-        "_id": "2"
-      },
-      "boost_terms": 1
+    "match": {
+      "user": {
+        "query": "kimchy",
+        "fuzziness": "auto"
+      }
     }
   }
 }
 --------------------------------------------------
 // CONSOLE
-// TEST[skip:https://github.com/elastic/elasticsearch/issues/18254]
 
 Response:
 
 [source,js]
 --------------------------------------------------
 {
-   "valid": true,
-   "_shards": {
-      "total": 1,
-      "successful": 1,
-      "failed": 0
-   },
-   "explanations": [
-      {
-         "index": "twitter",
-         "valid": true,
-         "explanation": "((user:terminator^3.71334 plot:future^2.763601 plot:human^2.8415773 plot:sarah^3.4193945 plot:kyle^3.8244398 plot:cyborg^3.9177752 plot:connor^4.040236 plot:reese^4.7133346 ... )~6) -ConstantScore(_uid:tweet#2)) #(ConstantScore(_type:tweet))^0.0"
-      }
-   ]
+  "valid": true,
+  "_shards": {
+    "total": 5,
+    "successful": 5,
+    "failed": 0
+  },
+  "explanations": [
+    {
+      "index": "twitter",
+      "shard": 0,
+      "valid": true,
+      "explanation": "+MatchNoDocsQuery(\"empty BooleanQuery\") #ConstantScore(MatchNoDocsQuery(\"empty BooleanQuery\"))"
+    },
+    {
+      "index": "twitter",
+      "shard": 1,
+      "valid": true,
+      "explanation": "+MatchNoDocsQuery(\"empty BooleanQuery\") #ConstantScore(MatchNoDocsQuery(\"empty BooleanQuery\"))"
+    },
+    {
+      "index": "twitter",
+      "shard": 2,
+      "valid": true,
+      "explanation": "(user:kimchi)^0.8333333"
+    },
+    {
+      "index": "twitter",
+      "shard": 3,
+      "valid": true,
+      "explanation": "user:kimchy"
+    },
+    {
+      "index": "twitter",
+      "shard": 4,
+      "valid": true,
+      "explanation": "+MatchNoDocsQuery(\"empty BooleanQuery\") #ConstantScore(MatchNoDocsQuery(\"empty BooleanQuery\"))"
+    }
+  ]
 }
 --------------------------------------------------
 // TESTRESPONSE
-
-CAUTION: The request is executed on a single shard only, which is randomly
-selected. The detailed explanation of the query may depend on which shard is
-being hit, and therefore may vary from one request to another.

+ 4 - 0
rest-api-spec/src/main/resources/rest-api-spec/api/indices.validate_query.json

@@ -66,6 +66,10 @@
         "rewrite": {
           "type": "boolean",
           "description": "Provide a more detailed explanation showing the actual Lucene query that will be executed."
+        },
+        "all_shards": {
+          "type": "boolean",
+          "description": "Execute validation on all shards instead of one random shard per index"
         }
       }
     },