Browse Source

Merge pull request #19450 from elastic/feature/friendly-index-creation

Makes index creation more friendly
Ali Beyad 9 năm trước cách đây
mục cha
commit
687e2e12b3
68 tập tin đã thay đổi với 2210 bổ sung227 xóa
  1. 0 1
      buildSrc/src/main/resources/checkstyle_suppressions.xml
  2. 29 0
      core/src/main/java/org/elasticsearch/action/ActionListener.java
  3. 12 0
      core/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java
  4. 29 0
      core/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexRequest.java
  5. 20 0
      core/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexRequestBuilder.java
  6. 21 1
      core/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponse.java
  7. 6 20
      core/src/main/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexAction.java
  8. 19 0
      core/src/main/java/org/elasticsearch/action/admin/indices/rollover/RolloverRequest.java
  9. 20 0
      core/src/main/java/org/elasticsearch/action/admin/indices/rollover/RolloverRequestBuilder.java
  10. 33 2
      core/src/main/java/org/elasticsearch/action/admin/indices/rollover/RolloverResponse.java
  11. 27 31
      core/src/main/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverAction.java
  12. 4 1
      core/src/main/java/org/elasticsearch/action/admin/indices/shards/TransportIndicesShardStoresAction.java
  13. 19 1
      core/src/main/java/org/elasticsearch/action/admin/indices/shrink/ShrinkRequest.java
  14. 20 0
      core/src/main/java/org/elasticsearch/action/admin/indices/shrink/ShrinkRequestBuilder.java
  15. 2 2
      core/src/main/java/org/elasticsearch/action/admin/indices/shrink/ShrinkResponse.java
  16. 3 18
      core/src/main/java/org/elasticsearch/action/admin/indices/shrink/TransportShrinkAction.java
  17. 211 0
      core/src/main/java/org/elasticsearch/action/support/ActiveShardCount.java
  18. 105 0
      core/src/main/java/org/elasticsearch/action/support/ActiveShardsObserver.java
  19. 40 0
      core/src/main/java/org/elasticsearch/cluster/ack/CreateIndexClusterStateUpdateResponse.java
  20. 1 1
      core/src/main/java/org/elasticsearch/cluster/health/ClusterIndexHealth.java
  21. 41 8
      core/src/main/java/org/elasticsearch/cluster/health/ClusterShardHealth.java
  22. 55 3
      core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java
  23. 25 7
      core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java
  24. 115 5
      core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java
  25. 7 4
      core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java
  26. 17 11
      core/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java
  27. 0 2
      core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AbstractAllocateAllocationCommand.java
  28. 2 1
      core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java
  29. 20 13
      core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/Decision.java
  30. 6 4
      core/src/main/java/org/elasticsearch/gateway/PrimaryShardAllocator.java
  31. 24 18
      core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java
  32. 14 0
      core/src/main/java/org/elasticsearch/gateway/TransportNodesListGatewayStartedShards.java
  33. 2 0
      core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestRolloverIndexAction.java
  34. 12 1
      core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestShrinkIndexAction.java
  35. 11 1
      core/src/main/java/org/elasticsearch/rest/action/admin/indices/create/RestCreateIndexAction.java
  36. 7 7
      core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java
  37. 2 1
      core/src/test/java/org/elasticsearch/action/admin/indices/create/CreateIndexIT.java
  38. 3 0
      core/src/test/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverActionTests.java
  39. 4 0
      core/src/test/java/org/elasticsearch/action/admin/indices/shrink/TransportShrinkActionTests.java
  40. 2 1
      core/src/test/java/org/elasticsearch/action/admin/indices/template/put/MetaDataIndexTemplateServiceTests.java
  41. 305 0
      core/src/test/java/org/elasticsearch/action/support/ActiveShardCountTests.java
  42. 155 0
      core/src/test/java/org/elasticsearch/action/support/ActiveShardsObserverIT.java
  43. 24 1
      core/src/test/java/org/elasticsearch/cluster/ClusterHealthIT.java
  44. 2 1
      core/src/test/java/org/elasticsearch/cluster/SimpleDataNodesIT.java
  45. 9 9
      core/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java
  46. 367 7
      core/src/test/java/org/elasticsearch/cluster/health/ClusterStateHealthTests.java
  47. 1 0
      core/src/test/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexServiceTests.java
  48. 17 7
      core/src/test/java/org/elasticsearch/cluster/routing/PrimaryAllocationIT.java
  49. 18 8
      core/src/test/java/org/elasticsearch/cluster/routing/RoutingTableGenerator.java
  50. 18 3
      core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java
  51. 2 1
      core/src/test/java/org/elasticsearch/cluster/routing/allocation/ClusterRebalanceRoutingTests.java
  52. 169 0
      core/src/test/java/org/elasticsearch/cluster/routing/allocation/DecisionsImpactOnClusterHealthTests.java
  53. 2 1
      core/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
  54. 54 11
      core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java
  55. 1 1
      core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java
  56. 1 1
      core/src/test/java/org/elasticsearch/indices/cluster/ClusterStateChanges.java
  57. 2 1
      core/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java
  58. 2 1
      core/src/test/java/org/elasticsearch/indices/flush/FlushIT.java
  59. 2 1
      core/src/test/java/org/elasticsearch/indices/state/SimpleIndexStateIT.java
  60. 5 2
      core/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java
  61. 2 1
      core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java
  62. 4 0
      rest-api-spec/src/main/resources/rest-api-spec/api/indices.create.json
  63. 4 0
      rest-api-spec/src/main/resources/rest-api-spec/api/indices.rollover.json
  64. 4 0
      rest-api-spec/src/main/resources/rest-api-spec/api/indices.shrink.json
  65. 29 0
      rest-api-spec/src/main/resources/rest-api-spec/test/indices.create/10_basic.yaml
  66. 7 5
      rest-api-spec/src/main/resources/rest-api-spec/test/indices.rollover/10_basic.yaml
  67. 2 0
      rest-api-spec/src/main/resources/rest-api-spec/test/indices.shrink/10_basic.yaml
  68. 12 0
      test/framework/src/main/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java

+ 0 - 1
buildSrc/src/main/resources/checkstyle_suppressions.xml

@@ -740,7 +740,6 @@
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]allocation[/\\]ShardsAllocatorModuleIT.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]allocation[/\\]SimpleAllocationIT.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]health[/\\]ClusterIndexHealthTests.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]health[/\\]ClusterStateHealthTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]metadata[/\\]AutoExpandReplicasTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]metadata[/\\]DateMathExpressionResolverTests.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]metadata[/\\]HumanReadableIndexSettingsTests.java" checks="LineLength" />

+ 29 - 0
core/src/main/java/org/elasticsearch/action/ActionListener.java

@@ -19,6 +19,8 @@
 
 package org.elasticsearch.action;
 
+import java.util.function.Consumer;
+
 /**
  * A listener for action responses or failures.
  */
@@ -33,4 +35,31 @@ public interface ActionListener<Response> {
      * A failure caused by an exception at some phase of the task.
      */
     void onFailure(Exception e);
+
+    /**
+     * Creates a listener that listens for a response (or failure) and executes the
+     * corresponding consumer when the response (or failure) is received.
+     *
+     * @param onResponse the consumer of the response, when the listener receives one
+     * @param onFailure the consumer of the failure, when the listener receives one
+     * @param <Response> the type of the response
+     * @return a listener that listens for responses and invokes the consumer when received
+     */
+    static <Response> ActionListener<Response> wrap(Consumer<Response> onResponse, Consumer<Exception> onFailure) {
+        return new ActionListener<Response>() {
+            @Override
+            public void onResponse(Response response) {
+                try {
+                    onResponse.accept(response);
+                } catch (Exception e) {
+                    onFailure(e);
+                }
+            }
+
+            @Override
+            public void onFailure(Exception e) {
+                onFailure.accept(e);
+            }
+        };
+    }
 }

+ 12 - 0
core/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.action.admin.indices.create;
 
 import org.elasticsearch.action.admin.indices.alias.Alias;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.cluster.ack.ClusterStateUpdateRequest;
 import org.elasticsearch.cluster.block.ClusterBlock;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -55,6 +56,8 @@ public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequ
 
     private final Set<ClusterBlock> blocks = new HashSet<>();
 
+    private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT;
+
 
     public CreateIndexClusterStateUpdateRequest(TransportMessage originalMessage, String cause, String index, boolean updateAllTypes) {
         this.originalMessage = originalMessage;
@@ -98,6 +101,11 @@ public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequ
         return this;
     }
 
+    public CreateIndexClusterStateUpdateRequest waitForActiveShards(ActiveShardCount waitForActiveShards) {
+        this.waitForActiveShards = waitForActiveShards;
+        return this;
+    }
+
     public TransportMessage originalMessage() {
         return originalMessage;
     }
@@ -142,4 +150,8 @@ public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequ
     public boolean updateAllTypes() {
         return updateAllTypes;
     }
+
+    public ActiveShardCount waitForActiveShards() {
+        return waitForActiveShards;
+    }
 }

+ 29 - 0
core/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexRequest.java

@@ -25,6 +25,7 @@ import org.elasticsearch.action.ActionRequestValidationException;
 import org.elasticsearch.action.IndicesRequest;
 import org.elasticsearch.action.admin.indices.alias.Alias;
 import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.action.support.master.AcknowledgedRequest;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -77,6 +78,8 @@ public class CreateIndexRequest extends AcknowledgedRequest<CreateIndexRequest>
 
     private boolean updateAllTypes = false;
 
+    private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT;
+
     public CreateIndexRequest() {
     }
 
@@ -440,6 +443,30 @@ public class CreateIndexRequest extends AcknowledgedRequest<CreateIndexRequest>
         return this;
     }
 
+    public ActiveShardCount waitForActiveShards() {
+        return waitForActiveShards;
+    }
+
+    /**
+     * Sets the number of shard copies that should be active for index creation to return.
+     * Defaults to {@link ActiveShardCount#DEFAULT}, which will wait for one shard copy
+     * (the primary) to become active. Set this value to {@link ActiveShardCount#ALL} to
+     * wait for all shards (primary and all replicas) to be active before returning.
+     * Otherwise, use {@link ActiveShardCount#from(int)} to set this value to any
+     * non-negative integer, up to the number of copies per shard (number of replicas + 1),
+     * to wait for the desired amount of shard copies to become active before returning.
+     * Index creation will only wait up until the timeout value for the number of shard copies
+     * to be active before returning.  Check {@link CreateIndexResponse#isShardsAcked()} to
+     * determine if the requisite shard copies were all started before returning or timing out.
+     *
+     * @param waitForActiveShards number of active shard copies to wait on
+     */
+    public CreateIndexRequest waitForActiveShards(ActiveShardCount waitForActiveShards) {
+        this.waitForActiveShards = waitForActiveShards;
+        return this;
+    }
+
+
     @Override
     public void readFrom(StreamInput in) throws IOException {
         super.readFrom(in);
@@ -462,6 +489,7 @@ public class CreateIndexRequest extends AcknowledgedRequest<CreateIndexRequest>
             aliases.add(Alias.read(in));
         }
         updateAllTypes = in.readBoolean();
+        waitForActiveShards = ActiveShardCount.readFrom(in);
     }
 
     @Override
@@ -486,5 +514,6 @@ public class CreateIndexRequest extends AcknowledgedRequest<CreateIndexRequest>
             alias.writeTo(out);
         }
         out.writeBoolean(updateAllTypes);
+        waitForActiveShards.writeTo(out);
     }
 }

+ 20 - 0
core/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexRequestBuilder.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.action.admin.indices.create;
 
 import org.elasticsearch.action.admin.indices.alias.Alias;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.master.AcknowledgedRequestBuilder;
 import org.elasticsearch.client.ElasticsearchClient;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -249,4 +250,23 @@ public class CreateIndexRequestBuilder extends AcknowledgedRequestBuilder<Create
         request.updateAllTypes(updateAllTypes);
         return this;
     }
+
+    /**
+     * Sets the number of shard copies that should be active for index creation to return.
+     * Defaults to {@link ActiveShardCount#DEFAULT}, which will wait for one shard copy
+     * (the primary) to become active. Set this value to {@link ActiveShardCount#ALL} to
+     * wait for all shards (primary and all replicas) to be active before returning.
+     * Otherwise, use {@link ActiveShardCount#from(int)} to set this value to any
+     * non-negative integer, up to the number of copies per shard (number of replicas + 1),
+     * to wait for the desired amount of shard copies to become active before returning.
+     * Index creation will only wait up until the timeout value for the number of shard copies
+     * to be active before returning.  Check {@link CreateIndexResponse#isShardsAcked()} to
+     * determine if the requisite shard copies were all started before returning or timing out.
+     *
+     * @param waitForActiveShards number of active shard copies to wait on
+     */
+    public CreateIndexRequestBuilder setWaitForActiveShards(ActiveShardCount waitForActiveShards) {
+        request.waitForActiveShards(waitForActiveShards);
+        return this;
+    }
 }

+ 21 - 1
core/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexResponse.java

@@ -22,6 +22,7 @@ package org.elasticsearch.action.admin.indices.create;
 import org.elasticsearch.action.support.master.AcknowledgedResponse;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.xcontent.XContentBuilder;
 
 import java.io.IOException;
 
@@ -30,22 +31,41 @@ import java.io.IOException;
  */
 public class CreateIndexResponse extends AcknowledgedResponse {
 
+    private boolean shardsAcked;
+
     protected CreateIndexResponse() {
     }
 
-    protected CreateIndexResponse(boolean acknowledged) {
+    protected CreateIndexResponse(boolean acknowledged, boolean shardsAcked) {
         super(acknowledged);
+        assert acknowledged || shardsAcked == false; // if its not acknowledged, then shards acked should be false too
+        this.shardsAcked = shardsAcked;
     }
 
     @Override
     public void readFrom(StreamInput in) throws IOException {
         super.readFrom(in);
         readAcknowledged(in);
+        shardsAcked = in.readBoolean();
     }
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         super.writeTo(out);
         writeAcknowledged(out);
+        out.writeBoolean(shardsAcked);
+    }
+
+    /**
+     * Returns true if the requisite number of shards were started before
+     * returning from the index creation operation.  If {@link #isAcknowledged()}
+     * is false, then this also returns false.
+     */
+    public boolean isShardsAcked() {
+        return shardsAcked;
+    }
+
+    public void addCustomFields(XContentBuilder builder) throws IOException {
+        builder.field("shards_acknowledged", isShardsAcked());
     }
 }

+ 6 - 20
core/src/main/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexAction.java

@@ -23,7 +23,6 @@ import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.master.TransportMasterNodeAction;
 import org.elasticsearch.cluster.ClusterState;
-import org.elasticsearch.cluster.ack.ClusterStateUpdateResponse;
 import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
@@ -31,7 +30,6 @@ import org.elasticsearch.cluster.metadata.MetaDataCreateIndexService;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.indices.IndexAlreadyExistsException;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportService;
 
@@ -77,24 +75,12 @@ public class TransportCreateIndexAction extends TransportMasterNodeAction<Create
         final CreateIndexClusterStateUpdateRequest updateRequest = new CreateIndexClusterStateUpdateRequest(request, cause, indexName, request.updateAllTypes())
                 .ackTimeout(request.timeout()).masterNodeTimeout(request.masterNodeTimeout())
                 .settings(request.settings()).mappings(request.mappings())
-                .aliases(request.aliases()).customs(request.customs());
+                .aliases(request.aliases()).customs(request.customs())
+                .waitForActiveShards(request.waitForActiveShards());
 
-        createIndexService.createIndex(updateRequest, new ActionListener<ClusterStateUpdateResponse>() {
-
-            @Override
-            public void onResponse(ClusterStateUpdateResponse response) {
-                listener.onResponse(new CreateIndexResponse(response.isAcknowledged()));
-            }
-
-            @Override
-            public void onFailure(Exception t) {
-                if (t instanceof IndexAlreadyExistsException) {
-                    logger.trace("[{}] failed to create", t, request.index());
-                } else {
-                    logger.debug("[{}] failed to create", t, request.index());
-                }
-                listener.onFailure(t);
-            }
-        });
+        createIndexService.createIndex(updateRequest, ActionListener.wrap(response ->
+            listener.onResponse(new CreateIndexResponse(response.isAcknowledged(), response.isShardsAcked())),
+            listener::onFailure));
     }
+
 }

+ 19 - 0
core/src/main/java/org/elasticsearch/action/admin/indices/rollover/RolloverRequest.java

@@ -22,6 +22,7 @@ import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.action.ActionRequestValidationException;
 import org.elasticsearch.action.IndicesRequest;
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.action.support.master.AcknowledgedRequest;
 import org.elasticsearch.common.ParseField;
@@ -206,4 +207,22 @@ public class RolloverRequest extends AcknowledgedRequest<RolloverRequest> implem
         }
     }
 
+    /**
+     * Sets the number of shard copies that should be active for creation of the
+     * new rollover index to return. Defaults to {@link ActiveShardCount#DEFAULT}, which will
+     * wait for one shard copy (the primary) to become active. Set this value to
+     * {@link ActiveShardCount#ALL} to wait for all shards (primary and all replicas) to be active
+     * before returning. Otherwise, use {@link ActiveShardCount#from(int)} to set this value to any
+     * non-negative integer, up to the number of copies per shard (number of replicas + 1),
+     * to wait for the desired amount of shard copies to become active before returning.
+     * Index creation will only wait up until the timeout value for the number of shard copies
+     * to be active before returning.  Check {@link RolloverResponse#isShardsAcked()} to
+     * determine if the requisite shard copies were all started before returning or timing out.
+     *
+     * @param waitForActiveShards number of active shard copies to wait on
+     */
+    public void setWaitForActiveShards(ActiveShardCount waitForActiveShards) {
+        this.createIndexRequest.waitForActiveShards(waitForActiveShards);
+    }
+
 }

+ 20 - 0
core/src/main/java/org/elasticsearch/action/admin/indices/rollover/RolloverRequestBuilder.java

@@ -19,6 +19,7 @@
 package org.elasticsearch.action.admin.indices.rollover;
 
 import org.elasticsearch.action.admin.indices.alias.Alias;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.master.MasterNodeOperationRequestBuilder;
 import org.elasticsearch.client.ElasticsearchClient;
 import org.elasticsearch.common.settings.Settings;
@@ -70,4 +71,23 @@ public class RolloverRequestBuilder extends MasterNodeOperationRequestBuilder<Ro
         this.request.getCreateIndexRequest().mapping(type, source);
         return this;
     }
+
+    /**
+     * Sets the number of shard copies that should be active for creation of the
+     * new rollover index to return. Defaults to {@link ActiveShardCount#DEFAULT}, which will
+     * wait for one shard copy (the primary) to become active. Set this value to
+     * {@link ActiveShardCount#ALL} to wait for all shards (primary and all replicas) to be active
+     * before returning. Otherwise, use {@link ActiveShardCount#from(int)} to set this value to any
+     * non-negative integer, up to the number of copies per shard (number of replicas + 1),
+     * to wait for the desired amount of shard copies to become active before returning.
+     * Index creation will only wait up until the timeout value for the number of shard copies
+     * to be active before returning.  Check {@link RolloverResponse#isShardsAcked()} to
+     * determine if the requisite shard copies were all started before returning or timing out.
+     *
+     * @param waitForActiveShards number of active shard copies to wait on
+     */
+    public RolloverRequestBuilder waitForActiveShards(ActiveShardCount waitForActiveShards) {
+        this.request.setWaitForActiveShards(waitForActiveShards);
+        return this;
+    }
 }

+ 33 - 2
core/src/main/java/org/elasticsearch/action/admin/indices/rollover/RolloverResponse.java

@@ -39,22 +39,28 @@ public final class RolloverResponse extends ActionResponse implements ToXContent
     private static final String DRY_RUN = "dry_run";
     private static final String ROLLED_OVER = "rolled_over";
     private static final String CONDITIONS = "conditions";
+    private static final String ACKNOWLEDGED = "acknowledged";
+    private static final String SHARDS_ACKED = "shards_acknowledged";
 
     private String oldIndex;
     private String newIndex;
     private Set<Map.Entry<String, Boolean>> conditionStatus;
     private boolean dryRun;
     private boolean rolledOver;
+    private boolean acknowledged;
+    private boolean shardsAcked;
 
     RolloverResponse() {
     }
 
     RolloverResponse(String oldIndex, String newIndex, Set<Condition.Result> conditionResults,
-                     boolean dryRun, boolean rolledOver) {
+                     boolean dryRun, boolean rolledOver, boolean acknowledged, boolean shardsAcked) {
         this.oldIndex = oldIndex;
         this.newIndex = newIndex;
         this.dryRun = dryRun;
         this.rolledOver = rolledOver;
+        this.acknowledged = acknowledged;
+        this.shardsAcked = shardsAcked;
         this.conditionStatus = conditionResults.stream()
             .map(result -> new AbstractMap.SimpleEntry<>(result.condition.toString(), result.matched))
             .collect(Collectors.toSet());
@@ -89,12 +95,31 @@ public final class RolloverResponse extends ActionResponse implements ToXContent
     }
 
     /**
-     * Returns if the rollover was not simulated and the conditions were met
+     * Returns true if the rollover was not simulated and the conditions were met
      */
     public boolean isRolledOver() {
         return rolledOver;
     }
 
+    /**
+     * Returns true if the creation of the new rollover index and switching of the
+     * alias to the newly created index was successful, and returns false otherwise.
+     * If {@link #isDryRun()} is true, then this will also return false. If this
+     * returns false, then {@link #isShardsAcked()} will also return false.
+     */
+    public boolean isAcknowledged() {
+        return acknowledged;
+    }
+
+    /**
+     * Returns true if the requisite number of shards were started in the newly
+     * created rollover index before returning.  If {@link #isAcknowledged()} is
+     * false, then this will also return false.
+     */
+    public boolean isShardsAcked() {
+        return shardsAcked;
+    }
+
     @Override
     public void readFrom(StreamInput in) throws IOException {
         super.readFrom(in);
@@ -110,6 +135,8 @@ public final class RolloverResponse extends ActionResponse implements ToXContent
         conditionStatus = conditions;
         dryRun = in.readBoolean();
         rolledOver = in.readBoolean();
+        acknowledged = in.readBoolean();
+        shardsAcked = in.readBoolean();
     }
 
     @Override
@@ -124,6 +151,8 @@ public final class RolloverResponse extends ActionResponse implements ToXContent
         }
         out.writeBoolean(dryRun);
         out.writeBoolean(rolledOver);
+        out.writeBoolean(acknowledged);
+        out.writeBoolean(shardsAcked);
     }
 
     @Override
@@ -132,6 +161,8 @@ public final class RolloverResponse extends ActionResponse implements ToXContent
         builder.field(NEW_INDEX, newIndex);
         builder.field(ROLLED_OVER, rolledOver);
         builder.field(DRY_RUN, dryRun);
+        builder.field(ACKNOWLEDGED, acknowledged);
+        builder.field(SHARDS_ACKED, shardsAcked);
         builder.startObject(CONDITIONS);
         for (Map.Entry<String, Boolean> entry : conditionStatus) {
             builder.field(entry.getKey(), entry.getValue());

+ 27 - 31
core/src/main/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverAction.java

@@ -25,11 +25,12 @@ import org.elasticsearch.action.admin.indices.create.CreateIndexClusterStateUpda
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
 import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
 import org.elasticsearch.action.support.ActionFilters;
+import org.elasticsearch.action.support.ActiveShardCount;
+import org.elasticsearch.action.support.ActiveShardsObserver;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.action.support.master.TransportMasterNodeAction;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.cluster.ClusterState;
-import org.elasticsearch.cluster.ack.ClusterStateUpdateResponse;
 import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.metadata.AliasAction;
@@ -58,6 +59,7 @@ public class TransportRolloverAction extends TransportMasterNodeAction<RolloverR
     private static final Pattern INDEX_NAME_PATTERN = Pattern.compile("^.*-(\\d)+$");
     private final MetaDataCreateIndexService createIndexService;
     private final MetaDataIndexAliasesService indexAliasesService;
+    private final ActiveShardsObserver activeShardsObserver;
     private final Client client;
 
     @Inject
@@ -70,6 +72,7 @@ public class TransportRolloverAction extends TransportMasterNodeAction<RolloverR
         this.createIndexService = createIndexService;
         this.indexAliasesService = indexAliasesService;
         this.client = client;
+        this.activeShardsObserver = new ActiveShardsObserver(settings, clusterService, threadPool);
     }
 
     @Override
@@ -110,42 +113,34 @@ public class TransportRolloverAction extends TransportMasterNodeAction<RolloverR
                         : generateRolloverIndexName(sourceIndexName);
                     if (rolloverRequest.isDryRun()) {
                         listener.onResponse(
-                            new RolloverResponse(sourceIndexName, rolloverIndexName, conditionResults, true, false));
+                            new RolloverResponse(sourceIndexName, rolloverIndexName, conditionResults, true, false, false, false));
                         return;
                     }
                     if (conditionResults.size() == 0 || conditionResults.stream().anyMatch(result -> result.matched)) {
-                        createIndexService.createIndex(prepareCreateIndexRequest(rolloverIndexName, rolloverRequest),
-                            new ActionListener<ClusterStateUpdateResponse>() {
-                                @Override
-                                public void onResponse(ClusterStateUpdateResponse response) {
-                                    // switch the alias to point to the newly created index
-                                    indexAliasesService.indicesAliases(
-                                        prepareRolloverAliasesUpdateRequest(sourceIndexName, rolloverIndexName,
-                                            rolloverRequest),
-                                        new ActionListener<ClusterStateUpdateResponse>() {
-                                            @Override
-                                            public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) {
-                                                listener.onResponse(
-                                                    new RolloverResponse(sourceIndexName, rolloverIndexName,
-                                                        conditionResults, false, true));
-                                            }
-
-                                            @Override
-                                            public void onFailure(Exception e) {
-                                                listener.onFailure(e);
-                                            }
-                                        });
-                                }
-
-                                @Override
-                                public void onFailure(Exception t) {
-                                    listener.onFailure(t);
-                                }
-                            });
+                        CreateIndexClusterStateUpdateRequest updateRequest = prepareCreateIndexRequest(rolloverIndexName, rolloverRequest);
+                        createIndexService.createIndex(updateRequest, ActionListener.wrap(createIndexClusterStateUpdateResponse -> {
+                            // switch the alias to point to the newly created index
+                            indexAliasesService.indicesAliases(
+                                prepareRolloverAliasesUpdateRequest(sourceIndexName, rolloverIndexName,
+                                    rolloverRequest),
+                                ActionListener.wrap(aliasClusterStateUpdateResponse -> {
+                                    if (aliasClusterStateUpdateResponse.isAcknowledged()) {
+                                        activeShardsObserver.waitForActiveShards(rolloverIndexName,
+                                            rolloverRequest.getCreateIndexRequest().waitForActiveShards(),
+                                            rolloverRequest.masterNodeTimeout(),
+                                            isShardsAcked -> listener.onResponse(new RolloverResponse(sourceIndexName, rolloverIndexName,
+                                                                                    conditionResults, false, true, true, isShardsAcked)),
+                                            listener::onFailure);
+                                    } else {
+                                        listener.onResponse(new RolloverResponse(sourceIndexName, rolloverIndexName, conditionResults,
+                                                                                    false, true, false, false));
+                                    }
+                                }, listener::onFailure));
+                        }, listener::onFailure));
                     } else {
                         // conditions not met
                         listener.onResponse(
-                            new RolloverResponse(sourceIndexName, sourceIndexName, conditionResults, false, false)
+                            new RolloverResponse(sourceIndexName, sourceIndexName, conditionResults, false, false, false, false)
                         );
                     }
                 }
@@ -216,6 +211,7 @@ public class TransportRolloverAction extends TransportMasterNodeAction<RolloverR
             .masterNodeTimeout(createIndexRequest.masterNodeTimeout())
             .settings(createIndexRequest.settings())
             .aliases(createIndexRequest.aliases())
+            .waitForActiveShards(ActiveShardCount.NONE) // not waiting for shards here, will wait on the alias switch operation
             .mappings(createIndexRequest.mappings());
     }
 

+ 4 - 1
core/src/main/java/org/elasticsearch/action/admin/indices/shards/TransportIndicesShardStoresAction.java

@@ -28,6 +28,7 @@ import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.cluster.health.ClusterShardHealth;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
@@ -93,12 +94,14 @@ public class TransportIndicesShardStoresAction extends TransportMasterNodeReadAc
         logger.trace("using cluster state version [{}] to determine shards", state.version());
         // collect relevant shard ids of the requested indices for fetching store infos
         for (String index : concreteIndices) {
+            IndexMetaData indexMetaData = state.metaData().index(index);
             IndexRoutingTable indexShardRoutingTables = routingTables.index(index);
             if (indexShardRoutingTables == null) {
                 continue;
             }
             for (IndexShardRoutingTable routing : indexShardRoutingTables) {
-                ClusterShardHealth shardHealth = new ClusterShardHealth(routing.shardId().id(), routing);
+                final int shardId = routing.shardId().id();
+                ClusterShardHealth shardHealth = new ClusterShardHealth(shardId, routing, indexMetaData);
                 if (request.shardStatuses().contains(shardHealth.getStatus())) {
                     shardIdsToFetch.add(routing.shardId());
                 }

+ 19 - 1
core/src/main/java/org/elasticsearch/action/admin/indices/shrink/ShrinkRequest.java

@@ -22,6 +22,7 @@ import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.action.ActionRequestValidationException;
 import org.elasticsearch.action.IndicesRequest;
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.action.support.master.AcknowledgedRequest;
 import org.elasticsearch.common.ParseField;
@@ -36,7 +37,6 @@ import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentType;
 
 import java.io.IOException;
-import java.util.Map;
 import java.util.Objects;
 
 import static org.elasticsearch.action.ValidateActions.addValidationError;
@@ -126,6 +126,24 @@ public class ShrinkRequest extends AcknowledgedRequest<ShrinkRequest> implements
         return sourceIndex;
     }
 
+    /**
+     * Sets the number of shard copies that should be active for creation of the
+     * new shrunken index to return. Defaults to {@link ActiveShardCount#DEFAULT}, which will
+     * wait for one shard copy (the primary) to become active. Set this value to
+     * {@link ActiveShardCount#ALL} to wait for all shards (primary and all replicas) to be active
+     * before returning. Otherwise, use {@link ActiveShardCount#from(int)} to set this value to any
+     * non-negative integer, up to the number of copies per shard (number of replicas + 1),
+     * to wait for the desired amount of shard copies to become active before returning.
+     * Index creation will only wait up until the timeout value for the number of shard copies
+     * to be active before returning.  Check {@link ShrinkResponse#isShardsAcked()} to
+     * determine if the requisite shard copies were all started before returning or timing out.
+     *
+     * @param waitForActiveShards number of active shard copies to wait on
+     */
+    public void setWaitForActiveShards(ActiveShardCount waitForActiveShards) {
+        this.getShrinkIndexRequest().waitForActiveShards(waitForActiveShards);
+    }
+
     public void source(BytesReference source) {
         XContentType xContentType = XContentFactory.xContentType(source);
         if (xContentType != null) {

+ 20 - 0
core/src/main/java/org/elasticsearch/action/admin/indices/shrink/ShrinkRequestBuilder.java

@@ -19,6 +19,7 @@
 package org.elasticsearch.action.admin.indices.shrink;
 
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.master.AcknowledgedRequestBuilder;
 import org.elasticsearch.client.ElasticsearchClient;
 import org.elasticsearch.common.settings.Settings;
@@ -44,4 +45,23 @@ public class ShrinkRequestBuilder extends AcknowledgedRequestBuilder<ShrinkReque
         this.request.getShrinkIndexRequest().settings(settings);
         return this;
     }
+
+    /**
+     * Sets the number of shard copies that should be active for creation of the
+     * new shrunken index to return. Defaults to {@link ActiveShardCount#DEFAULT}, which will
+     * wait for one shard copy (the primary) to become active. Set this value to
+     * {@link ActiveShardCount#ALL} to wait for all shards (primary and all replicas) to be active
+     * before returning. Otherwise, use {@link ActiveShardCount#from(int)} to set this value to any
+     * non-negative integer, up to the number of copies per shard (number of replicas + 1),
+     * to wait for the desired amount of shard copies to become active before returning.
+     * Index creation will only wait up until the timeout value for the number of shard copies
+     * to be active before returning.  Check {@link ShrinkResponse#isShardsAcked()} to
+     * determine if the requisite shard copies were all started before returning or timing out.
+     *
+     * @param waitForActiveShards number of active shard copies to wait on
+     */
+    public ShrinkRequestBuilder setWaitForActiveShards(ActiveShardCount waitForActiveShards) {
+        this.request.setWaitForActiveShards(waitForActiveShards);
+        return this;
+    }
 }

+ 2 - 2
core/src/main/java/org/elasticsearch/action/admin/indices/shrink/ShrinkResponse.java

@@ -25,7 +25,7 @@ public final class ShrinkResponse extends CreateIndexResponse {
     ShrinkResponse() {
     }
 
-    ShrinkResponse(boolean acknowledged) {
-        super(acknowledged);
+    ShrinkResponse(boolean acknowledged, boolean shardsAcked) {
+        super(acknowledged, shardsAcked);
     }
 }

+ 3 - 18
core/src/main/java/org/elasticsearch/action/admin/indices/shrink/TransportShrinkAction.java

@@ -29,7 +29,6 @@ import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.master.TransportMasterNodeAction;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.cluster.ClusterState;
-import org.elasticsearch.cluster.ack.ClusterStateUpdateResponse;
 import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -40,7 +39,6 @@ import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.shard.DocsStats;
 import org.elasticsearch.index.shard.ShardId;
-import org.elasticsearch.indices.IndexAlreadyExistsException;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportService;
 
@@ -93,22 +91,8 @@ public class TransportShrinkAction extends TransportMasterNodeAction<ShrinkReque
                         IndexShardStats shard = indicesStatsResponse.getIndex(sourceIndex).getIndexShards().get(i);
                         return shard == null ? null : shard.getPrimary().getDocs();
                     }, indexNameExpressionResolver);
-                createIndexService.createIndex(updateRequest, new ActionListener<ClusterStateUpdateResponse>() {
-                    @Override
-                    public void onResponse(ClusterStateUpdateResponse response) {
-                        listener.onResponse(new ShrinkResponse(response.isAcknowledged()));
-                    }
-
-                    @Override
-                    public void onFailure(Exception t) {
-                        if (t instanceof IndexAlreadyExistsException) {
-                            logger.trace("[{}] failed to create shrink index", t, updateRequest.index());
-                        } else {
-                            logger.debug("[{}] failed to create shrink index", t, updateRequest.index());
-                        }
-                        listener.onFailure(t);
-                    }
-                });
+                createIndexService.createIndex(updateRequest, ActionListener.wrap(response ->
+                    listener.onResponse(new ShrinkResponse(response.isAcknowledged(), response.isShardsAcked())), listener::onFailure));
             }
 
             @Override
@@ -162,6 +146,7 @@ public class TransportShrinkAction extends TransportMasterNodeAction<ShrinkReque
             .settings(targetIndex.settings())
             .aliases(targetIndex.aliases())
             .customs(targetIndex.customs())
+            .waitForActiveShards(targetIndex.waitForActiveShards())
             .shrinkFrom(metaData.getIndex());
     }
 

+ 211 - 0
core/src/main/java/org/elasticsearch/action/support/ActiveShardCount.java

@@ -0,0 +1,211 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.action.support;
+
+import com.carrotsearch.hppc.cursors.IntObjectCursor;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.cluster.routing.IndexRoutingTable;
+import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+
+import java.io.IOException;
+
+/**
+ * A class whose instances represent a value for counting the number
+ * of active shard copies for a given shard in an index.
+ */
+public final class ActiveShardCount implements Writeable {
+
+    private static final int ACTIVE_SHARD_COUNT_DEFAULT = -2;
+    private static final int ALL_ACTIVE_SHARDS = -1;
+
+    public static final ActiveShardCount DEFAULT = new ActiveShardCount(ACTIVE_SHARD_COUNT_DEFAULT);
+    public static final ActiveShardCount ALL = new ActiveShardCount(ALL_ACTIVE_SHARDS);
+    public static final ActiveShardCount NONE = new ActiveShardCount(0);
+    public static final ActiveShardCount ONE = new ActiveShardCount(1);
+
+    private final int value;
+
+    private ActiveShardCount(final int value) {
+        this.value = value;
+    }
+
+    /**
+     * Get an ActiveShardCount instance for the given value.  The value is first validated to ensure
+     * it is a valid shard count and throws an IllegalArgumentException if validation fails.  Valid
+     * values are any non-negative number.  Directly use {@link ActiveShardCount#DEFAULT} for the
+     * default value (which is one shard copy) or {@link ActiveShardCount#ALL} to specify all the shards.
+     */
+    public static ActiveShardCount from(final int value) {
+        if (value < 0) {
+            throw new IllegalArgumentException("shard count cannot be a negative value");
+        }
+        return get(value);
+    }
+
+    private static ActiveShardCount get(final int value) {
+        switch (validateValue(value)) {
+            case ACTIVE_SHARD_COUNT_DEFAULT:
+                return DEFAULT;
+            case ALL_ACTIVE_SHARDS:
+                return ALL;
+            case 1:
+                return ONE;
+            case 0:
+                return NONE;
+            default:
+                return new ActiveShardCount(value);
+        }
+    }
+
+    @Override
+    public void writeTo(final StreamOutput out) throws IOException {
+        out.writeInt(value);
+    }
+
+    public static ActiveShardCount readFrom(final StreamInput in) throws IOException {
+        return get(in.readInt());
+    }
+
+    private static int validateValue(final int value) {
+        if (value < 0 && value != ACTIVE_SHARD_COUNT_DEFAULT && value != ALL_ACTIVE_SHARDS) {
+            throw new IllegalArgumentException("Invalid ActiveShardCount[" + value + "]");
+        }
+        return value;
+    }
+
+    /**
+     * Resolve this instance to an actual integer value for the number of active shard counts.
+     * If {@link ActiveShardCount#ALL} is specified, then the given {@link IndexMetaData} is
+     * used to determine what the actual active shard count should be.  The default value indicates
+     * one active shard.
+     */
+    public int resolve(final IndexMetaData indexMetaData) {
+        if (this == ActiveShardCount.DEFAULT) {
+            return 1;
+        } else if (this == ActiveShardCount.ALL) {
+            return indexMetaData.getNumberOfReplicas() + 1;
+        } else {
+            return value;
+        }
+    }
+
+    /**
+     * Parses the active shard count from the given string.  Valid values are "all" for
+     * all shard copies, null for the default value (which defaults to one shard copy),
+     * or a numeric value greater than or equal to 0. Any other input will throw an
+     * IllegalArgumentException.
+     */
+    public static ActiveShardCount parseString(final String str) {
+        if (str == null) {
+            return ActiveShardCount.DEFAULT;
+        } else if (str.equals("all")) {
+            return ActiveShardCount.ALL;
+        } else {
+            int val;
+            try {
+                val = Integer.parseInt(str);
+            } catch (NumberFormatException e) {
+                throw new IllegalArgumentException("cannot parse ActiveShardCount[" + str + "]", e);
+            }
+            return ActiveShardCount.from(val);
+        }
+    }
+
+    /**
+     * Returns true iff the given cluster state's routing table contains enough active
+     * shards to meet the required shard count represented by this instance.
+     */
+    public boolean enoughShardsActive(final ClusterState clusterState, final String indexName) {
+        if (this == ActiveShardCount.NONE) {
+            // not waiting for any active shards
+            return true;
+        }
+        final IndexMetaData indexMetaData = clusterState.metaData().index(indexName);
+        if (indexMetaData == null) {
+            // its possible the index was deleted while waiting for active shard copies,
+            // in this case, we'll just consider it that we have enough active shard copies
+            // and we can stop waiting
+            return true;
+        }
+        final IndexRoutingTable indexRoutingTable = clusterState.routingTable().index(indexName);
+        assert indexRoutingTable != null;
+        if (indexRoutingTable.allPrimaryShardsActive() == false) {
+            // all primary shards aren't active yet
+            return false;
+        }
+        for (final IntObjectCursor<IndexShardRoutingTable> shardRouting : indexRoutingTable.getShards()) {
+            if (enoughShardsActive(shardRouting.value, indexMetaData) == false) {
+                // not enough active shard copies yet
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Returns true iff the active shard count in the shard routing table is enough
+     * to meet the required shard count represented by this instance.
+     */
+    public boolean enoughShardsActive(final IndexShardRoutingTable shardRoutingTable, final IndexMetaData indexMetaData) {
+        if (shardRoutingTable.activeShards().size() < resolve(indexMetaData)) {
+            // not enough active shard copies yet
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        return Integer.hashCode(value);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
+        @SuppressWarnings("unchecked") ActiveShardCount that = (ActiveShardCount) o;
+        return value == that.value;
+    }
+
+    @Override
+    public String toString() {
+        final String valStr;
+        switch (value) {
+            case ALL_ACTIVE_SHARDS:
+                valStr = "ALL";
+                break;
+            case ACTIVE_SHARD_COUNT_DEFAULT:
+                valStr = "DEFAULT";
+                break;
+            default:
+                valStr = Integer.toString(value);
+        }
+        return "ActiveShardCount[" + valStr + "]";
+    }
+
+}

+ 105 - 0
core/src/main/java/org/elasticsearch/action/support/ActiveShardsObserver.java

@@ -0,0 +1,105 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.action.support;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.ClusterStateObserver;
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.component.AbstractComponent;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.node.NodeClosedException;
+import org.elasticsearch.threadpool.ThreadPool;
+
+import java.util.function.Consumer;
+
+/**
+ * This class provides primitives for waiting for a configured number of shards
+ * to become active before sending a response on an {@link ActionListener}.
+ */
+public class ActiveShardsObserver extends AbstractComponent {
+
+    private final ClusterService clusterService;
+    private final ThreadPool threadPool;
+
+    public ActiveShardsObserver(final Settings settings, final ClusterService clusterService, final ThreadPool threadPool) {
+        super(settings);
+        this.clusterService = clusterService;
+        this.threadPool = threadPool;
+    }
+
+    /**
+     * Waits on the specified number of active shards to be started before executing the
+     *
+     * @param indexName the index to wait for active shards on
+     * @param activeShardCount the number of active shards to wait on before returning
+     * @param timeout the timeout value
+     * @param onResult a function that is executed in response to the requisite shards becoming active or a timeout (whichever comes first)
+     * @param onFailure a function that is executed in response to an error occurring during waiting for the active shards
+     */
+    public void waitForActiveShards(final String indexName,
+                                    final ActiveShardCount activeShardCount,
+                                    final TimeValue timeout,
+                                    final Consumer<Boolean> onResult,
+                                    final Consumer<Exception> onFailure) {
+
+        // wait for the configured number of active shards to be allocated before executing the result consumer
+        if (activeShardCount == ActiveShardCount.NONE) {
+            // not waiting, so just run whatever we were to run when the waiting is
+            onResult.accept(true);
+            return;
+        }
+
+        final ClusterStateObserver observer = new ClusterStateObserver(clusterService, logger, threadPool.getThreadContext());
+        if (activeShardCount.enoughShardsActive(observer.observedState(), indexName)) {
+                onResult.accept(true);
+        } else {
+            final ClusterStateObserver.ChangePredicate shardsAllocatedPredicate =
+                new ClusterStateObserver.ValidationPredicate() {
+                    @Override
+                    protected boolean validate(final ClusterState newState) {
+                        return activeShardCount.enoughShardsActive(newState, indexName);
+                    }
+                };
+
+            final ClusterStateObserver.Listener observerListener = new ClusterStateObserver.Listener() {
+                @Override
+                public void onNewClusterState(ClusterState state) {
+                    onResult.accept(true);
+                }
+
+                @Override
+                public void onClusterServiceClose() {
+                    logger.debug("[{}] cluster service closed while waiting for enough shards to be started.", indexName);
+                    onFailure.accept(new NodeClosedException(clusterService.localNode()));
+                }
+
+                @Override
+                public void onTimeout(TimeValue timeout) {
+                    onResult.accept(false);
+                }
+            };
+
+            observer.waitForNextChange(observerListener, shardsAllocatedPredicate, timeout);
+        }
+    }
+
+}

+ 40 - 0
core/src/main/java/org/elasticsearch/cluster/ack/CreateIndexClusterStateUpdateResponse.java

@@ -0,0 +1,40 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.cluster.ack;
+
+/**
+ * A cluster state update response with specific fields for index creation.
+ */
+public class CreateIndexClusterStateUpdateResponse extends ClusterStateUpdateResponse {
+
+    private final boolean shardsAcked;
+
+    public CreateIndexClusterStateUpdateResponse(boolean acknowledged, boolean shardsAcked) {
+        super(acknowledged);
+        this.shardsAcked = shardsAcked;
+    }
+
+    /**
+     * Returns whether the requisite number of shard copies started before the completion of the operation.
+     */
+    public boolean isShardsAcked() {
+        return shardsAcked;
+    }
+}

+ 1 - 1
core/src/main/java/org/elasticsearch/cluster/health/ClusterIndexHealth.java

@@ -54,7 +54,7 @@ public final class ClusterIndexHealth implements Iterable<ClusterShardHealth>, W
 
         for (IndexShardRoutingTable shardRoutingTable : indexRoutingTable) {
             int shardId = shardRoutingTable.shardId().id();
-            shards.put(shardId, new ClusterShardHealth(shardId, shardRoutingTable));
+            shards.put(shardId, new ClusterShardHealth(shardId, shardRoutingTable, indexMetaData));
         }
 
         // update the index status

+ 41 - 8
core/src/main/java/org/elasticsearch/cluster/health/ClusterShardHealth.java

@@ -19,8 +19,12 @@
 
 package org.elasticsearch.cluster.health;
 
+import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
 import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.cluster.routing.UnassignedInfo;
+import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
+import org.elasticsearch.cluster.routing.UnassignedInfo.Reason;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
@@ -37,13 +41,12 @@ public final class ClusterShardHealth implements Writeable {
     private final int unassignedShards;
     private final boolean primaryActive;
 
-    public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardRoutingTable) {
+    public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardRoutingTable, final IndexMetaData indexMetaData) {
         this.shardId = shardId;
         int computeActiveShards = 0;
         int computeRelocatingShards = 0;
         int computeInitializingShards = 0;
         int computeUnassignedShards = 0;
-        boolean computePrimaryActive = false;
         for (ShardRouting shardRouting : shardRoutingTable) {
             if (shardRouting.active()) {
                 computeActiveShards++;
@@ -51,9 +54,6 @@ public final class ClusterShardHealth implements Writeable {
                     // the shard is relocating, the one it is relocating to will be in initializing state, so we don't count it
                     computeRelocatingShards++;
                 }
-                if (shardRouting.primary()) {
-                    computePrimaryActive = true;
-                }
             } else if (shardRouting.initializing()) {
                 computeInitializingShards++;
             } else if (shardRouting.unassigned()) {
@@ -61,21 +61,22 @@ public final class ClusterShardHealth implements Writeable {
             }
         }
         ClusterHealthStatus computeStatus;
-        if (computePrimaryActive) {
+        final ShardRouting primaryRouting = shardRoutingTable.primaryShard();
+        if (primaryRouting.active()) {
             if (computeActiveShards == shardRoutingTable.size()) {
                 computeStatus = ClusterHealthStatus.GREEN;
             } else {
                 computeStatus = ClusterHealthStatus.YELLOW;
             }
         } else {
-            computeStatus = ClusterHealthStatus.RED;
+            computeStatus = getInactivePrimaryHealth(primaryRouting, indexMetaData);
         }
         this.status = computeStatus;
         this.activeShards = computeActiveShards;
         this.relocatingShards = computeRelocatingShards;
         this.initializingShards = computeInitializingShards;
         this.unassignedShards = computeUnassignedShards;
-        this.primaryActive = computePrimaryActive;
+        this.primaryActive = primaryRouting.active();
     }
 
     public ClusterShardHealth(final StreamInput in) throws IOException {
@@ -126,4 +127,36 @@ public final class ClusterShardHealth implements Writeable {
         out.writeVInt(unassignedShards);
         out.writeBoolean(primaryActive);
     }
+
+    /**
+     * Checks if an inactive primary shard should cause the cluster health to go RED.
+     *
+     * Normally, an inactive primary shard in an index should cause the cluster health to be RED.  However,
+     * there are exceptions where a health status of RED is inappropriate, namely in these scenarios:
+     *   1. Index Creation.  When an index is first created, the primary shards are in the initializing state, so
+     *      there is a small window where the cluster health is RED due to the primaries not being activated yet.
+     *      However, this leads to a false sense that the cluster is in an unhealthy state, when in reality, its
+     *      simply a case of needing to wait for the primaries to initialize.
+     *   2. When a cluster is in the recovery state, and the shard never had any allocation ids assigned to it,
+     *      which indicates the index was created and before allocation of the primary occurred for this shard,
+     *      a cluster restart happened.
+     *
+     * Here, we check for these scenarios and set the cluster health to YELLOW if any are applicable.
+     *
+     * NB: this method should *not* be called on active shards nor on non-primary shards.
+     */
+    public static ClusterHealthStatus getInactivePrimaryHealth(final ShardRouting shardRouting, final IndexMetaData indexMetaData) {
+        assert shardRouting.primary() : "cannot invoke on a replica shard: " + shardRouting;
+        assert shardRouting.active() == false : "cannot invoke on an active shard: " + shardRouting;
+        assert shardRouting.unassignedInfo() != null : "cannot invoke on a shard with no UnassignedInfo: " + shardRouting;
+        final UnassignedInfo unassignedInfo = shardRouting.unassignedInfo();
+        if (unassignedInfo.getLastAllocationStatus() != AllocationStatus.DECIDERS_NO
+                && shardRouting.allocatedPostIndexCreate(indexMetaData) == false
+                && (unassignedInfo.getReason() == Reason.INDEX_CREATED || unassignedInfo.getReason() == Reason.CLUSTER_RECOVERED)) {
+            return ClusterHealthStatus.YELLOW;
+        } else {
+            return ClusterHealthStatus.RED;
+        }
+    }
+
 }

+ 55 - 3
core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java

@@ -27,9 +27,11 @@ import org.elasticsearch.Version;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.admin.indices.alias.Alias;
 import org.elasticsearch.action.admin.indices.create.CreateIndexClusterStateUpdateRequest;
+import org.elasticsearch.action.support.ActiveShardsObserver;
 import org.elasticsearch.cluster.AckedClusterStateUpdateTask;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.ack.ClusterStateUpdateResponse;
+import org.elasticsearch.cluster.ack.CreateIndexClusterStateUpdateResponse;
 import org.elasticsearch.cluster.block.ClusterBlock;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.block.ClusterBlocks;
@@ -68,6 +70,7 @@ import org.elasticsearch.indices.IndexAlreadyExistsException;
 import org.elasticsearch.indices.IndexCreationException;
 import org.elasticsearch.indices.IndicesService;
 import org.elasticsearch.indices.InvalidIndexNameException;
+import org.elasticsearch.threadpool.ThreadPool;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
 
@@ -108,13 +111,15 @@ public class MetaDataCreateIndexService extends AbstractComponent {
     private final Environment env;
     private final NodeServicesProvider nodeServicesProvider;
     private final IndexScopedSettings indexScopedSettings;
-
+    private final ActiveShardsObserver activeShardsObserver;
 
     @Inject
     public MetaDataCreateIndexService(Settings settings, ClusterService clusterService,
                                       IndicesService indicesService, AllocationService allocationService,
                                       AliasValidator aliasValidator,
-                                      Set<IndexTemplateFilter> indexTemplateFilters, Environment env, NodeServicesProvider nodeServicesProvider, IndexScopedSettings indexScopedSettings) {
+                                      Set<IndexTemplateFilter> indexTemplateFilters, Environment env,
+                                      NodeServicesProvider nodeServicesProvider, IndexScopedSettings indexScopedSettings,
+                                      ThreadPool threadPool) {
         super(settings);
         this.clusterService = clusterService;
         this.indicesService = indicesService;
@@ -135,6 +140,7 @@ public class MetaDataCreateIndexService extends AbstractComponent {
             }
             this.indexTemplateFilter = new IndexTemplateFilter.Compound(templateFilters);
         }
+        this.activeShardsObserver = new ActiveShardsObserver(settings, clusterService, threadPool);
     }
 
     public void validateIndexName(String index, ClusterState state) {
@@ -176,7 +182,38 @@ public class MetaDataCreateIndexService extends AbstractComponent {
         }
     }
 
-    public void createIndex(final CreateIndexClusterStateUpdateRequest request, final ActionListener<ClusterStateUpdateResponse> listener) {
+    /**
+     * Creates an index in the cluster state and waits for the specified number of shard copies to
+     * become active (as specified in {@link CreateIndexClusterStateUpdateRequest#waitForActiveShards()})
+     * before sending the response on the listener. If the index creation was successfully applied on
+     * the cluster state, then {@link CreateIndexClusterStateUpdateResponse#isAcknowledged()} will return
+     * true, otherwise it will return false and no waiting will occur for started shards
+     * ({@link CreateIndexClusterStateUpdateResponse#isShardsAcked()} will also be false).  If the index
+     * creation in the cluster state was successful and the requisite shard copies were started before
+     * the timeout, then {@link CreateIndexClusterStateUpdateResponse#isShardsAcked()} will
+     * return true, otherwise if the operation timed out, then it will return false.
+     *
+     * @param request the index creation cluster state update request
+     * @param listener the listener on which to send the index creation cluster state update response
+     */
+    public void createIndex(final CreateIndexClusterStateUpdateRequest request,
+                            final ActionListener<CreateIndexClusterStateUpdateResponse> listener) {
+        onlyCreateIndex(request, ActionListener.wrap(response -> {
+            if (response.isAcknowledged()) {
+                activeShardsObserver.waitForActiveShards(request.index(), request.waitForActiveShards(), request.ackTimeout(),
+                    shardsAcked -> {
+                        logger.debug("[{}] index created, but the operation timed out while waiting for " +
+                                         "enough shards to be started.", request.index());
+                        listener.onResponse(new CreateIndexClusterStateUpdateResponse(response.isAcknowledged(), shardsAcked));
+                    }, listener::onFailure);
+            } else {
+                listener.onResponse(new CreateIndexClusterStateUpdateResponse(false, false));
+            }
+        }, listener::onFailure));
+    }
+
+    private void onlyCreateIndex(final CreateIndexClusterStateUpdateRequest request,
+                                 final ActionListener<ClusterStateUpdateResponse> listener) {
         Settings.Builder updatedSettingsBuilder = Settings.builder();
         updatedSettingsBuilder.put(request.settings()).normalizePrefix(IndexMetaData.INDEX_SETTING_PREFIX);
         indexScopedSettings.validate(updatedSettingsBuilder);
@@ -308,6 +345,11 @@ public class MetaDataCreateIndexService extends AbstractComponent {
                                 .setRoutingNumShards(routingNumShards);
                             // Set up everything, now locally create the index to see that things are ok, and apply
                             final IndexMetaData tmpImd = tmpImdBuilder.settings(actualIndexSettings).build();
+                            if (request.waitForActiveShards().resolve(tmpImd) > tmpImd.getNumberOfReplicas() + 1) {
+                                throw new IllegalArgumentException("invalid wait_for_active_shards[" + request.waitForActiveShards() +
+                                                                   "]: cannot be greater than number of shard copies [" +
+                                                                   (tmpImd.getNumberOfReplicas() + 1) + "]");
+                            }
                             // create the index here (on the master) to validate it can be created, as well as adding the mapping
                             final IndexService indexService = indicesService.createIndex(nodeServicesProvider, tmpImd, Collections.emptyList());
                             createdIndex = indexService.index();
@@ -408,6 +450,16 @@ public class MetaDataCreateIndexService extends AbstractComponent {
                             }
                         }
                     }
+
+                    @Override
+                    public void onFailure(String source, Exception e) {
+                        if (e instanceof IndexAlreadyExistsException) {
+                            logger.trace("[{}] failed to create", e, request.index());
+                        } else {
+                            logger.debug("[{}] failed to create", e, request.index());
+                        }
+                        super.onFailure(source, e);
+                    }
                 });
     }
 

+ 25 - 7
core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java

@@ -25,6 +25,7 @@ import org.apache.lucene.util.CollectionUtil;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.metadata.MetaData;
 import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Randomness;
 import org.elasticsearch.common.collect.Tuple;
@@ -641,14 +642,27 @@ public class RoutingNodes implements Iterable<RoutingNode> {
          * Should be used with caution, typically,
          * the correct usage is to removeAndIgnore from the iterator.
          * @see #ignored()
-         * @see UnassignedIterator#removeAndIgnore()
+         * @see UnassignedIterator#removeAndIgnore(AllocationStatus)
          * @see #isIgnoredEmpty()
+         * @return true iff the decision caused a change to the unassigned info
          */
-        public void ignoreShard(ShardRouting shard) {
+        public boolean ignoreShard(ShardRouting shard, AllocationStatus allocationStatus) {
+            boolean changed = false;
             if (shard.primary()) {
                 ignoredPrimaries++;
+                UnassignedInfo currInfo = shard.unassignedInfo();
+                assert currInfo != null;
+                if (allocationStatus.equals(currInfo.getLastAllocationStatus()) == false) {
+                    UnassignedInfo newInfo = new UnassignedInfo(currInfo.getReason(), currInfo.getMessage(), currInfo.getFailure(),
+                                                                currInfo.getNumFailedAllocations(), currInfo.getUnassignedTimeInNanos(),
+                                                                currInfo.getUnassignedTimeInMillis(), currInfo.isDelayed(),
+                                                                allocationStatus);
+                    shard = shard.updateUnassignedInfo(newInfo);
+                    changed = true;
+                }
             }
             ignored.add(shard);
+            return changed;
         }
 
         public class UnassignedIterator implements Iterator<ShardRouting> {
@@ -685,10 +699,13 @@ public class RoutingNodes implements Iterable<RoutingNode> {
              * will be added back to unassigned once the metadata is constructed again).
              * Typically this is used when an allocation decision prevents a shard from being allocated such
              * that subsequent consumers of this API won't try to allocate this shard again.
+             *
+             * @param attempt the result of the allocation attempt
+             * @return true iff the decision caused an update to the unassigned info
              */
-            public void removeAndIgnore() {
+            public boolean removeAndIgnore(AllocationStatus attempt) {
                 innerRemove();
-                ignoreShard(current);
+                return ignoreShard(current, attempt);
             }
 
             private void updateShardRouting(ShardRouting shardRouting) {
@@ -721,7 +738,7 @@ public class RoutingNodes implements Iterable<RoutingNode> {
             }
 
             /**
-             * Unsupported operation, just there for the interface. Use {@link #removeAndIgnore()} or
+             * Unsupported operation, just there for the interface. Use {@link #removeAndIgnore(AllocationStatus)} or
              * {@link #initialize(String, String, long)}.
              */
             @Override
@@ -747,8 +764,8 @@ public class RoutingNodes implements Iterable<RoutingNode> {
 
         /**
          * Returns <code>true</code> iff any unassigned shards are marked as temporarily ignored.
-         * @see UnassignedShards#ignoreShard(ShardRouting)
-         * @see UnassignedIterator#removeAndIgnore()
+         * @see UnassignedShards#ignoreShard(ShardRouting, AllocationStatus)
+         * @see UnassignedIterator#removeAndIgnore(AllocationStatus)
          */
         public boolean isIgnoredEmpty() {
             return ignored.isEmpty();
@@ -878,6 +895,7 @@ public class RoutingNodes implements Iterable<RoutingNode> {
         assert inactiveShardCount == routingNodes.inactiveShardCount :
                 "Inactive Shard count [" + inactiveShardCount + "] but RoutingNodes returned inactive shards [" + routingNodes.inactiveShardCount + "]";
         assert routingNodes.getRelocatingShardCount() == relocating : "Relocating shards mismatch [" + routingNodes.getRelocatingShardCount() + "] but expected [" + relocating + "]";
+
         return true;
     }
 

+ 115 - 5
core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java

@@ -22,6 +22,7 @@ package org.elasticsearch.cluster.routing;
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.metadata.MetaData;
+import org.elasticsearch.cluster.routing.allocation.decider.Decision;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -36,6 +37,8 @@ import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 
 import java.io.IOException;
+import java.util.Locale;
+import java.util.Objects;
 
 /**
  * Holds additional information as to why the shard is in unassigned state.
@@ -105,7 +108,94 @@ public final class UnassignedInfo implements ToXContent, Writeable {
         /**
          * Unassigned as a result of a failed primary while the replica was initializing.
          */
-        PRIMARY_FAILED;
+        PRIMARY_FAILED
+    }
+
+    /**
+     * Captures the status of an unsuccessful allocation attempt for the shard,
+     * causing it to remain in the unassigned state.
+     *
+     * Note, ordering of the enum is important, make sure to add new values
+     * at the end and handle version serialization properly.
+     */
+    public enum AllocationStatus implements Writeable {
+        /**
+         * The shard was denied allocation to a node because the allocation deciders all returned a NO decision
+         */
+        DECIDERS_NO((byte) 0),
+        /**
+         * The shard was denied allocation to a node because there were no valid shard copies found for it;
+         * this can happen on node restart with gateway allocation
+         */
+        NO_VALID_SHARD_COPY((byte) 1),
+        /**
+         * The allocation attempt was throttled on the shard by the allocation deciders
+         */
+        DECIDERS_THROTTLED((byte) 2),
+        /**
+         * Waiting on getting shard data from all nodes before making a decision about where to allocate the shard
+         */
+        FETCHING_SHARD_DATA((byte) 3),
+        /**
+         * Allocation decision has been delayed
+         */
+        DELAYED_ALLOCATION((byte) 4),
+        /**
+         * No allocation attempt has been made yet
+         */
+        NO_ATTEMPT((byte) 5);
+
+        private final byte id;
+
+        AllocationStatus(byte id) {
+            this.id = id;
+        }
+
+        // package private for testing
+        byte getId() {
+            return id;
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            out.writeByte(id);
+        }
+
+        public static AllocationStatus readFrom(StreamInput in) throws IOException {
+            byte id = in.readByte();
+            switch (id) {
+                case 0:
+                    return DECIDERS_NO;
+                case 1:
+                    return NO_VALID_SHARD_COPY;
+                case 2:
+                    return DECIDERS_THROTTLED;
+                case 3:
+                    return FETCHING_SHARD_DATA;
+                case 4:
+                    return DELAYED_ALLOCATION;
+                case 5:
+                    return NO_ATTEMPT;
+                default:
+                    throw new IllegalArgumentException("Unknown AllocationStatus value [" + id + "]");
+            }
+        }
+
+        public static AllocationStatus fromDecision(Decision decision) {
+            Objects.requireNonNull(decision);
+            switch (decision.type()) {
+                case NO:
+                    return DECIDERS_NO;
+                case THROTTLE:
+                    return DECIDERS_THROTTLED;
+                default:
+                    throw new IllegalArgumentException("no allocation attempt from decision[" + decision.type() + "]");
+            }
+        }
+
+        public String value() {
+            return toString().toLowerCase(Locale.ROOT);
+        }
     }
 
     private final Reason reason;
@@ -115,6 +205,7 @@ public final class UnassignedInfo implements ToXContent, Writeable {
     private final String message;
     private final Exception failure;
     private final int failedAllocations;
+    private final AllocationStatus lastAllocationStatus; // result of the last allocation attempt for this shard
 
     /**
      * creates an UnassignedInfo object based on **current** time
@@ -123,7 +214,8 @@ public final class UnassignedInfo implements ToXContent, Writeable {
      * @param message more information about cause.
      **/
     public UnassignedInfo(Reason reason, String message) {
-        this(reason, message, null, reason == Reason.ALLOCATION_FAILED ? 1 : 0, System.nanoTime(), System.currentTimeMillis(), false);
+        this(reason, message, null, reason == Reason.ALLOCATION_FAILED ? 1 : 0, System.nanoTime(), System.currentTimeMillis(), false,
+             AllocationStatus.NO_ATTEMPT);
     }
 
     /**
@@ -133,16 +225,18 @@ public final class UnassignedInfo implements ToXContent, Writeable {
      * @param unassignedTimeNanos  the time to use as the base for any delayed re-assignment calculation
      * @param unassignedTimeMillis the time of unassignment used to display to in our reporting.
      * @param delayed              if allocation of this shard is delayed due to INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.
+     * @param lastAllocationStatus the result of the last allocation attempt for this shard
      */
     public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Exception failure, int failedAllocations,
-                          long unassignedTimeNanos, long unassignedTimeMillis, boolean delayed) {
-        this.reason = reason;
+                          long unassignedTimeNanos, long unassignedTimeMillis, boolean delayed, AllocationStatus lastAllocationStatus) {
+        this.reason = Objects.requireNonNull(reason);
         this.unassignedTimeMillis = unassignedTimeMillis;
         this.unassignedTimeNanos = unassignedTimeNanos;
         this.delayed = delayed;
         this.message = message;
         this.failure = failure;
         this.failedAllocations = failedAllocations;
+        this.lastAllocationStatus = Objects.requireNonNull(lastAllocationStatus);
         assert (failedAllocations > 0) == (reason == Reason.ALLOCATION_FAILED) :
             "failedAllocations: " + failedAllocations + " for reason " + reason;
         assert !(message == null && failure != null) : "provide a message if a failure exception is provided";
@@ -159,6 +253,7 @@ public final class UnassignedInfo implements ToXContent, Writeable {
         this.message = in.readOptionalString();
         this.failure = in.readException();
         this.failedAllocations = in.readVInt();
+        this.lastAllocationStatus = AllocationStatus.readFrom(in);
     }
 
     public void writeTo(StreamOutput out) throws IOException {
@@ -169,6 +264,7 @@ public final class UnassignedInfo implements ToXContent, Writeable {
         out.writeOptionalString(message);
         out.writeException(failure);
         out.writeVInt(failedAllocations);
+        lastAllocationStatus.writeTo(out);
     }
 
     public UnassignedInfo readFrom(StreamInput in) throws IOException {
@@ -240,6 +336,13 @@ public final class UnassignedInfo implements ToXContent, Writeable {
         return message + (failure == null ? "" : ", failure " + ExceptionsHelper.detailedMessage(failure));
     }
 
+    /**
+     * Get the status for the last allocation attempt for this shard.
+     */
+    public AllocationStatus getLastAllocationStatus() {
+        return lastAllocationStatus;
+    }
+
     /**
      * Calculates the delay left based on current time (in nanoseconds) and the delay defined by the index settings.
      * Only relevant if shard is effectively delayed (see {@link #isDelayed()})
@@ -302,6 +405,7 @@ public final class UnassignedInfo implements ToXContent, Writeable {
         if (details != null) {
             sb.append(", details[").append(details).append("]");
         }
+        sb.append(", allocation_status[").append(lastAllocationStatus.value()).append("]");
         return sb.toString();
     }
 
@@ -323,6 +427,7 @@ public final class UnassignedInfo implements ToXContent, Writeable {
         if (details != null) {
             builder.field("details", details);
         }
+        builder.field("allocation_status", lastAllocationStatus.value());
         builder.endObject();
         return builder;
     }
@@ -353,17 +458,22 @@ public final class UnassignedInfo implements ToXContent, Writeable {
         if (message != null ? !message.equals(that.message) : that.message != null) {
             return false;
         }
+        if (lastAllocationStatus != that.lastAllocationStatus) {
+            return false;
+        }
         return !(failure != null ? !failure.equals(that.failure) : that.failure != null);
     }
 
     @Override
     public int hashCode() {
-        int result = reason != null ? reason.hashCode() : 0;
+        int result = reason.hashCode();
         result = 31 * result + Boolean.hashCode(delayed);
         result = 31 * result + Integer.hashCode(failedAllocations);
         result = 31 * result + Long.hashCode(unassignedTimeMillis);
         result = 31 * result + (message != null ? message.hashCode() : 0);
         result = 31 * result + (failure != null ? failure.hashCode() : 0);
+        result = 31 * result + lastAllocationStatus.hashCode();
         return result;
     }
+
 }

+ 7 - 4
core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java

@@ -34,6 +34,7 @@ import org.elasticsearch.cluster.routing.RoutingNodes;
 import org.elasticsearch.cluster.routing.RoutingTable;
 import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.cluster.routing.UnassignedInfo;
+import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
 import org.elasticsearch.cluster.routing.allocation.allocator.ShardsAllocator;
 import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands;
 import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
@@ -232,7 +233,7 @@ public class AllocationService extends AbstractComponent {
             UnassignedInfo unassignedInfo = failedShard.shard.unassignedInfo();
             final int failedAllocations = unassignedInfo != null ? unassignedInfo.getNumFailedAllocations() : 0;
             changed |= applyFailedShard(allocation, failedShard.shard, true, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShard.message, failedShard.failure,
-                    failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false));
+                    failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false, AllocationStatus.NO_ATTEMPT));
         }
         if (!changed) {
             return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData());
@@ -259,7 +260,8 @@ public class AllocationService extends AbstractComponent {
                 if (newComputedLeftDelayNanos == 0) {
                     changed = true;
                     unassignedIterator.updateUnassignedInfo(new UnassignedInfo(unassignedInfo.getReason(), unassignedInfo.getMessage(), unassignedInfo.getFailure(),
-                        unassignedInfo.getNumFailedAllocations(), unassignedInfo.getUnassignedTimeInNanos(), unassignedInfo.getUnassignedTimeInMillis(), false));
+                        unassignedInfo.getNumFailedAllocations(), unassignedInfo.getUnassignedTimeInNanos(), unassignedInfo.getUnassignedTimeInMillis(), false,
+                        unassignedInfo.getLastAllocationStatus()));
                 }
             }
         }
@@ -417,7 +419,7 @@ public class AllocationService extends AbstractComponent {
                 final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index());
                 boolean delayed = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetaData.getSettings()).nanos() > 0;
                 UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left[" + node.nodeId() + "]",
-                    null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), delayed);
+                    null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), delayed, AllocationStatus.NO_ATTEMPT);
                 applyFailedShard(allocation, shardRouting, false, unassignedInfo);
             }
             // its a dead node, remove it, note, its important to remove it *after* we apply failed shard
@@ -438,7 +440,8 @@ public class AllocationService extends AbstractComponent {
         for (ShardRouting routing : replicas) {
             changed |= applyFailedShard(allocation, routing, false,
                     new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED, "primary failed while replica initializing",
-                            null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false));
+                            null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false,
+                            AllocationStatus.NO_ATTEMPT));
         }
         return changed;
     }

+ 17 - 11
core/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java

@@ -28,6 +28,7 @@ import org.elasticsearch.cluster.routing.RoutingNode;
 import org.elasticsearch.cluster.routing.RoutingNodes;
 import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.cluster.routing.ShardRoutingState;
+import org.elasticsearch.cluster.routing.UnassignedInfo;
 import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
 import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
 import org.elasticsearch.cluster.routing.allocation.decider.Decision;
@@ -647,11 +648,12 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
                 for (int i = 0; i < primaryLength; i++) {
                     ShardRouting shard = primary[i];
                     if (!shard.primary()) {
-                        boolean drop = deciders.canAllocate(shard, allocation).type() == Type.NO;
-                        if (drop) {
-                            unassigned.ignoreShard(shard);
+                        final Decision decision = deciders.canAllocate(shard, allocation);
+                        if (decision.type() == Type.NO) {
+                            UnassignedInfo.AllocationStatus allocationStatus = UnassignedInfo.AllocationStatus.fromDecision(decision);
+                            changed |= unassigned.ignoreShard(shard, allocationStatus);
                             while(i < primaryLength-1 && comparator.compare(primary[i], primary[i+1]) == 0) {
-                                unassigned.ignoreShard(primary[++i]);
+                                changed |= unassigned.ignoreShard(primary[++i], allocationStatus);
                             }
                             continue;
                         } else {
@@ -701,9 +703,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
                                                 final int minNodeHigh = minNode.highestPrimary(shard.getIndexName());
                                                 if ((((nodeHigh > repId && minNodeHigh > repId) || (nodeHigh < repId && minNodeHigh < repId)) && (nodeHigh < minNodeHigh))
                                                         || (nodeHigh > minNodeHigh && nodeHigh > repId && minNodeHigh < repId)) {
-                                                    minNode = node;
-                                                    minWeight = currentWeight;
-                                                    decision = currentDecision;
+                                                    // nothing to set here; the minNode, minWeight, and decision get set below
                                                 } else {
                                                     break NOUPDATE;
                                                 }
@@ -719,7 +719,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
                             }
                         }
                     }
-                    assert decision != null && minNode != null || decision == null && minNode == null;
+                    assert (decision == null) == (minNode == null);
                     if (minNode != null) {
                         final long shardSize = DiskThresholdDecider.getExpectedShardSize(shard, allocation,
                             ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE);
@@ -735,10 +735,12 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
                         } else {
                             minNode.addShard(shard.initialize(minNode.getNodeId(), null, shardSize));
                             final RoutingNode node = minNode.getRoutingNode();
-                            if (deciders.canAllocate(node, allocation).type() != Type.YES) {
+                            final Decision.Type nodeLevelDecision = deciders.canAllocate(node, allocation).type();
+                            if (nodeLevelDecision != Type.YES) {
                                 if (logger.isTraceEnabled()) {
                                     logger.trace("Can not allocate on node [{}] remove from round decision [{}]", node, decision.type());
                                 }
+                                assert nodeLevelDecision == Type.NO;
                                 throttledNodes.add(minNode);
                             }
                         }
@@ -748,10 +750,14 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
                     } else if (logger.isTraceEnabled()) {
                         logger.trace("No Node found to assign shard [{}]", shard);
                     }
-                    unassigned.ignoreShard(shard);
+                    assert decision == null || decision.type() == Type.THROTTLE;
+                    UnassignedInfo.AllocationStatus allocationStatus =
+                        decision == null ? UnassignedInfo.AllocationStatus.DECIDERS_NO :
+                                           UnassignedInfo.AllocationStatus.fromDecision(decision);
+                    changed |= unassigned.ignoreShard(shard, allocationStatus);
                     if (!shard.primary()) { // we could not allocate it and we are a replica - check if we can ignore the other replicas
                         while(secondaryLength > 0 && comparator.compare(shard, secondary[secondaryLength-1]) == 0) {
-                            unassigned.ignoreShard(secondary[--secondaryLength]);
+                            changed |= unassigned.ignoreShard(secondary[--secondaryLength], allocationStatus);
                         }
                     }
                 }

+ 0 - 2
core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AbstractAllocateAllocationCommand.java

@@ -39,8 +39,6 @@ import org.elasticsearch.common.xcontent.XContentParser;
 
 import java.io.IOException;
 import java.util.Objects;
-import java.util.function.Consumer;
-import java.util.function.Function;
 
 /**
  * Abstract base class for allocating an unassigned shard to a node

+ 2 - 1
core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java

@@ -125,7 +125,8 @@ public class AllocateEmptyPrimaryAllocationCommand extends BasePrimaryAllocation
             // we need to move the unassigned info back to treat it as if it was index creation
             unassignedInfoToUpdate = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED,
                 "force empty allocation from previous reason " + shardRouting.unassignedInfo().getReason() + ", " + shardRouting.unassignedInfo().getMessage(),
-                shardRouting.unassignedInfo().getFailure(), 0, System.nanoTime(), System.currentTimeMillis(), false);
+                shardRouting.unassignedInfo().getFailure(), 0, System.nanoTime(), System.currentTimeMillis(), false,
+                shardRouting.unassignedInfo().getLastAllocationStatus());
         }
 
         initializeUnassignedShard(allocation, routingNodes, routingNode, shardRouting, unassignedInfoToUpdate);

+ 20 - 13
core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/Decision.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.cluster.routing.allocation.decider;
 
+import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.xcontent.ToXContent;
@@ -29,11 +30,12 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
+import java.util.Objects;
 
 /**
  * This abstract class defining basic {@link Decision} used during shard
  * allocation process.
- * 
+ *
  * @see AllocationDecider
  */
 public abstract class Decision implements ToXContent {
@@ -44,7 +46,7 @@ public abstract class Decision implements ToXContent {
     public static final Decision THROTTLE = new Single(Type.THROTTLE);
 
     /**
-     * Creates a simple decision 
+     * Creates a simple decision
      * @param type {@link Type} of the decision
      * @param label label for the Decider that produced this decision
      * @param explanation explanation of the decision
@@ -95,10 +97,10 @@ public abstract class Decision implements ToXContent {
     }
 
     /**
-     * This enumeration defines the 
-     * possible types of decisions 
+     * This enumeration defines the
+     * possible types of decisions
      */
-    public static enum Type {
+    public enum Type {
         YES,
         NO,
         THROTTLE;
@@ -144,6 +146,7 @@ public abstract class Decision implements ToXContent {
      */
     public abstract Type type();
 
+    @Nullable
     public abstract String label();
 
     /**
@@ -166,7 +169,7 @@ public abstract class Decision implements ToXContent {
         }
 
         /**
-         * Creates a new {@link Single} decision of a given type 
+         * Creates a new {@link Single} decision of a given type
          * @param type {@link Type} of the decision
          */
         public Single(Type type) {
@@ -175,12 +178,12 @@ public abstract class Decision implements ToXContent {
 
         /**
          * Creates a new {@link Single} decision of a given type
-         *  
+         *
          * @param type {@link Type} of the decision
          * @param explanation An explanation of this {@link Decision}
          * @param explanationParams A set of additional parameters
          */
-        public Single(Type type, String label, String explanation, Object... explanationParams) {
+        public Single(Type type, @Nullable String label, @Nullable String explanation, @Nullable Object... explanationParams) {
             this.type = type;
             this.label = label;
             this.explanation = explanation;
@@ -193,6 +196,7 @@ public abstract class Decision implements ToXContent {
         }
 
         @Override
+        @Nullable
         public String label() {
             return this.label;
         }
@@ -205,6 +209,7 @@ public abstract class Decision implements ToXContent {
         /**
          * Returns the explanation string, fully formatted. Only formats the string once
          */
+        @Nullable
         public String getExplanation() {
             if (explanationString == null && explanation != null) {
                 explanationString = String.format(Locale.ROOT, explanation, explanationParams);
@@ -224,15 +229,16 @@ public abstract class Decision implements ToXContent {
 
             Decision.Single s = (Decision.Single) object;
             return this.type == s.type &&
-                    this.label.equals(s.label) &&
-                    this.getExplanation().equals(s.getExplanation());
+                       Objects.equals(label, s.label) &&
+                       Objects.equals(getExplanation(), s.getExplanation());
         }
 
         @Override
         public int hashCode() {
-            int result = this.type.hashCode();
-            result = 31 * result + this.label.hashCode();
-            result = 31 * result + this.getExplanation().hashCode();
+            int result = type.hashCode();
+            result = 31 * result + (label == null ? 0 : label.hashCode());
+            String explanationStr = getExplanation();
+            result = 31 * result + (explanationStr == null ? 0 : explanationStr.hashCode());
             return result;
         }
 
@@ -288,6 +294,7 @@ public abstract class Decision implements ToXContent {
         }
 
         @Override
+        @Nullable
         public String label() {
             // Multi decisions have no labels
             return null;

+ 6 - 4
core/src/main/java/org/elasticsearch/gateway/PrimaryShardAllocator.java

@@ -27,6 +27,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.routing.RoutingNode;
 import org.elasticsearch.cluster.routing.RoutingNodes;
 import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
 import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
 import org.elasticsearch.cluster.routing.allocation.decider.Decision;
 import org.elasticsearch.common.component.AbstractComponent;
@@ -106,7 +107,7 @@ public abstract class PrimaryShardAllocator extends AbstractComponent {
             if (shardState.hasData() == false) {
                 logger.trace("{}: ignoring allocation, still fetching shard started state", shard);
                 allocation.setHasPendingAsyncFetch();
-                unassignedIterator.removeAndIgnore();
+                changed |= unassignedIterator.removeAndIgnore(AllocationStatus.FETCHING_SHARD_DATA);
                 continue;
             }
 
@@ -147,7 +148,7 @@ public abstract class PrimaryShardAllocator extends AbstractComponent {
                     logger.debug("[{}][{}]: missing local data, recover from any node", shard.index(), shard.id());
                 } else {
                     // we can't really allocate, so ignore it and continue
-                    unassignedIterator.removeAndIgnore();
+                    changed |= unassignedIterator.removeAndIgnore(AllocationStatus.NO_VALID_SHARD_COPY);
                     logger.debug("[{}][{}]: not allocating, number_of_allocated_shards_found [{}]", shard.index(), shard.id(), nodeShardsResult.allocationsFound);
                 }
                 continue;
@@ -167,7 +168,7 @@ public abstract class PrimaryShardAllocator extends AbstractComponent {
             } else {
                 // we are throttling this, but we have enough to allocate to this node, ignore it for now
                 logger.debug("[{}][{}]: throttling allocation [{}] to [{}] on primary allocation", shard.index(), shard.id(), shard, nodesToAllocate.throttleNodeShards);
-                unassignedIterator.removeAndIgnore();
+                changed |= unassignedIterator.removeAndIgnore(AllocationStatus.DECIDERS_THROTTLED);
             }
         }
         return changed;
@@ -384,7 +385,8 @@ public abstract class PrimaryShardAllocator extends AbstractComponent {
         final List<NodeGatewayStartedShards> throttleNodeShards;
         final List<NodeGatewayStartedShards> noNodeShards;
 
-        public NodesToAllocate(List<NodeGatewayStartedShards> yesNodeShards, List<NodeGatewayStartedShards> throttleNodeShards,
+        public NodesToAllocate(List<NodeGatewayStartedShards> yesNodeShards,
+                               List<NodeGatewayStartedShards> throttleNodeShards,
                                List<NodeGatewayStartedShards> noNodeShards) {
             this.yesNodeShards = yesNodeShards;
             this.throttleNodeShards = throttleNodeShards;

+ 24 - 18
core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java

@@ -23,7 +23,6 @@ import com.carrotsearch.hppc.ObjectLongHashMap;
 import com.carrotsearch.hppc.ObjectLongMap;
 import com.carrotsearch.hppc.cursors.ObjectCursor;
 import com.carrotsearch.hppc.cursors.ObjectLongCursor;
-import org.elasticsearch.cluster.ClusterChangedEvent;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.metadata.MetaData;
 import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -31,6 +30,7 @@ import org.elasticsearch.cluster.routing.RoutingNode;
 import org.elasticsearch.cluster.routing.RoutingNodes;
 import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.cluster.routing.UnassignedInfo;
+import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
 import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
 import org.elasticsearch.cluster.routing.allocation.decider.Decision;
 import org.elasticsearch.common.Nullable;
@@ -118,7 +118,7 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
                                 currentNode, nodeWithHighestMatch);
                         UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA,
                             "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node ["+ nodeWithHighestMatch + "]",
-                            null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false);
+                            null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false, UnassignedInfo.AllocationStatus.NO_ATTEMPT);
                         // don't cancel shard in the loop as it will cause a ConcurrentModificationException
                         recoveriesToCancel.add(new Tuple<>(shard, unassignedInfo));
                         changed = true;
@@ -150,9 +150,10 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
             }
 
             // pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
-            if (canBeAllocatedToAtLeastOneNode(shard, allocation) == false) {
+            Decision decision = canBeAllocatedToAtLeastOneNode(shard, allocation);
+            if (decision.type() != Decision.Type.YES) {
                 logger.trace("{}: ignoring allocation, can't be allocated on any node", shard);
-                unassignedIterator.removeAndIgnore();
+                changed |= unassignedIterator.removeAndIgnore(UnassignedInfo.AllocationStatus.fromDecision(decision));
                 continue;
             }
 
@@ -160,7 +161,7 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
             if (shardStores.hasData() == false) {
                 logger.trace("{}: ignoring allocation, still fetching shard stores", shard);
                 allocation.setHasPendingAsyncFetch();
-                unassignedIterator.removeAndIgnore();
+                changed |= unassignedIterator.removeAndIgnore(AllocationStatus.FETCHING_SHARD_DATA);
                 continue; // still fetching
             }
 
@@ -181,11 +182,11 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
             if (matchingNodes.getNodeWithHighestMatch() != null) {
                 RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
                 // we only check on THROTTLE since we checked before before on NO
-                Decision decision = allocation.deciders().canAllocate(shard, nodeWithHighestMatch, allocation);
+                decision = allocation.deciders().canAllocate(shard, nodeWithHighestMatch, allocation);
                 if (decision.type() == Decision.Type.THROTTLE) {
                     logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", shard.index(), shard.id(), shard, nodeWithHighestMatch.node());
                     // we are throttling this, but we have enough to allocate to this node, ignore it for now
-                    unassignedIterator.removeAndIgnore();
+                    changed |= unassignedIterator.removeAndIgnore(UnassignedInfo.AllocationStatus.fromDecision(decision));
                 } else {
                     logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", shard.index(), shard.id(), shard, nodeWithHighestMatch.node());
                     // we found a match
@@ -194,7 +195,7 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
                 }
             } else if (matchingNodes.hasAnyData() == false) {
                 // if we didn't manage to find *any* data (regardless of matching sizes), check if the allocation of the replica shard needs to be delayed
-                ignoreUnassignedIfDelayed(unassignedIterator, shard);
+                changed |= ignoreUnassignedIfDelayed(unassignedIterator, shard);
             }
         }
         return changed;
@@ -210,22 +211,25 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
      *
      * @param unassignedIterator iterator over unassigned shards
      * @param shard the shard which might be delayed
+     * @return true iff there was a change to the unassigned info
      */
-    public void ignoreUnassignedIfDelayed(RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator, ShardRouting shard) {
+    public boolean ignoreUnassignedIfDelayed(RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator, ShardRouting shard) {
         if (shard.unassignedInfo().isDelayed()) {
             logger.debug("{}: allocation of [{}] is delayed", shard.shardId(), shard);
-            /**
-             * mark it as changed, since we want to kick a publishing to schedule future allocation,
-             * see {@link org.elasticsearch.cluster.routing.RoutingService#clusterChanged(ClusterChangedEvent)}).
-             */
-            unassignedIterator.removeAndIgnore();
+            return unassignedIterator.removeAndIgnore(AllocationStatus.DELAYED_ALLOCATION);
         }
+        return false;
     }
 
     /**
-     * Can the shard be allocated on at least one node based on the allocation deciders.
+     * Determines if the shard can be allocated on at least one node based on the allocation deciders.
+     *
+     * Returns the best allocation decision for allocating the shard on any node (i.e. YES if at least one
+     * node decided YES, THROTTLE if at least one node decided THROTTLE, and NO if none of the nodes decided
+     * YES or THROTTLE.
      */
-    private boolean canBeAllocatedToAtLeastOneNode(ShardRouting shard, RoutingAllocation allocation) {
+    private Decision canBeAllocatedToAtLeastOneNode(ShardRouting shard, RoutingAllocation allocation) {
+        Decision madeDecision = Decision.NO;
         for (ObjectCursor<DiscoveryNode> cursor : allocation.nodes().getDataNodes().values()) {
             RoutingNode node = allocation.routingNodes().node(cursor.value.getId());
             if (node == null) {
@@ -235,10 +239,12 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
             // cases for only allocating a replica after a primary
             Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
             if (decision.type() == Decision.Type.YES) {
-                return true;
+                return decision;
+            } else if (madeDecision.type() == Decision.Type.NO && decision.type() == Decision.Type.THROTTLE) {
+                madeDecision = decision;
             }
         }
-        return false;
+        return madeDecision;
     }
 
     /**

+ 14 - 0
core/src/main/java/org/elasticsearch/gateway/TransportNodesListGatewayStartedShards.java

@@ -336,5 +336,19 @@ public class TransportNodesListGatewayStartedShards extends
             result = 31 * result + (storeException != null ? storeException.hashCode() : 0);
             return result;
         }
+
+        @Override
+        public String toString() {
+            StringBuilder buf = new StringBuilder();
+            buf.append("NodeGatewayStartedShards[")
+               .append("allocationId=").append(allocationId)
+               .append(",primary=").append(primary)
+               .append(",legacyVersion=").append(legacyVersion);
+            if (storeException != null) {
+                buf.append(",storeException=").append(storeException);
+            }
+            buf.append("]");
+            return buf.toString();
+        }
     }
 }

+ 2 - 0
core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestRolloverIndexAction.java

@@ -21,6 +21,7 @@ package org.elasticsearch.rest.action.admin.indices;
 
 import org.elasticsearch.action.admin.indices.rollover.RolloverRequest;
 import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.rest.BaseRestHandler;
@@ -51,6 +52,7 @@ public class RestRolloverIndexAction extends BaseRestHandler {
         rolloverIndexRequest.dryRun(request.paramAsBoolean("dry_run", false));
         rolloverIndexRequest.timeout(request.paramAsTime("timeout", rolloverIndexRequest.timeout()));
         rolloverIndexRequest.masterNodeTimeout(request.paramAsTime("master_timeout", rolloverIndexRequest.masterNodeTimeout()));
+        rolloverIndexRequest.setWaitForActiveShards(ActiveShardCount.parseString(request.param("wait_for_active_shards")));
         client.admin().indices().rolloverIndex(rolloverIndexRequest, new RestToXContentListener<>(channel));
     }
 }

+ 12 - 1
core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestShrinkIndexAction.java

@@ -21,14 +21,19 @@ package org.elasticsearch.rest.action.admin.indices;
 
 import org.elasticsearch.action.admin.indices.shrink.ShrinkRequest;
 import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.action.admin.indices.shrink.ShrinkResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestChannel;
 import org.elasticsearch.rest.RestController;
 import org.elasticsearch.rest.RestRequest;
 import org.elasticsearch.rest.action.support.AcknowledgedRestListener;
 
+import java.io.IOException;
+
 /**
  *
  */
@@ -56,6 +61,12 @@ public class RestShrinkIndexAction extends BaseRestHandler {
         }
         shrinkIndexRequest.timeout(request.paramAsTime("timeout", shrinkIndexRequest.timeout()));
         shrinkIndexRequest.masterNodeTimeout(request.paramAsTime("master_timeout", shrinkIndexRequest.masterNodeTimeout()));
-        client.admin().indices().shrinkIndex(shrinkIndexRequest, new AcknowledgedRestListener<>(channel));
+        shrinkIndexRequest.setWaitForActiveShards(ActiveShardCount.parseString(request.param("wait_for_active_shards")));
+        client.admin().indices().shrinkIndex(shrinkIndexRequest, new AcknowledgedRestListener<ShrinkResponse>(channel) {
+            @Override
+            public void addCustomFields(XContentBuilder builder, ShrinkResponse response) throws IOException {
+                response.addCustomFields(builder);
+            }
+        });
     }
 }

+ 11 - 1
core/src/main/java/org/elasticsearch/rest/action/admin/indices/create/RestCreateIndexAction.java

@@ -22,14 +22,18 @@ package org.elasticsearch.rest.action.admin.indices.create;
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
 import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
 import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestChannel;
 import org.elasticsearch.rest.RestController;
 import org.elasticsearch.rest.RestRequest;
 import org.elasticsearch.rest.action.support.AcknowledgedRestListener;
 
+import java.io.IOException;
+
 /**
  *
  */
@@ -52,6 +56,12 @@ public class RestCreateIndexAction extends BaseRestHandler {
         createIndexRequest.updateAllTypes(request.paramAsBoolean("update_all_types", false));
         createIndexRequest.timeout(request.paramAsTime("timeout", createIndexRequest.timeout()));
         createIndexRequest.masterNodeTimeout(request.paramAsTime("master_timeout", createIndexRequest.masterNodeTimeout()));
-        client.admin().indices().create(createIndexRequest, new AcknowledgedRestListener<CreateIndexResponse>(channel));
+        createIndexRequest.waitForActiveShards(ActiveShardCount.parseString(request.param("wait_for_active_shards")));
+        client.admin().indices().create(createIndexRequest, new AcknowledgedRestListener<CreateIndexResponse>(channel) {
+            @Override
+            public void addCustomFields(XContentBuilder builder, CreateIndexResponse response) throws IOException {
+                response.addCustomFields(builder);
+            }
+        });
     }
 }

+ 7 - 7
core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java

@@ -21,6 +21,7 @@ package org.elasticsearch.action.admin.cluster.allocation;
 
 import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
 import org.elasticsearch.action.admin.indices.shards.IndicesShardStoresResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.client.Requests;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -29,10 +30,10 @@ import org.elasticsearch.cluster.routing.allocation.decider.Decision;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.test.ESIntegTestCase;
 
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
@@ -54,11 +55,11 @@ public final class ClusterAllocationExplainIT extends ESIntegTestCase {
         });
 
         logger.info("--> creating 'test' index");
-        prepareCreate("test").setSettings(Settings.builder()
+        assertAcked(prepareCreate("test").setSettings(Settings.builder()
                 .put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "1m")
                 .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 5)
-                .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1)).get();
-        ensureGreen("test");
+                .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1))
+                .setWaitForActiveShards(ActiveShardCount.ALL).get());
 
         logger.info("--> stopping a random node");
         assertTrue(internalCluster().stopRandomDataNode());
@@ -89,6 +90,7 @@ public final class ClusterAllocationExplainIT extends ESIntegTestCase {
                 .setSettings(Settings.builder()
                         .put("index.number_of_shards", 5)
                         .put("index.number_of_replicas", 1))
+                .setWaitForActiveShards(ActiveShardCount.ALL)  // wait on all shards
                 .get();
 
         client().admin().indices().prepareCreate("only-baz")
@@ -96,6 +98,7 @@ public final class ClusterAllocationExplainIT extends ESIntegTestCase {
                         .put("index.routing.allocation.include.bar", "baz")
                         .put("index.number_of_shards", 5)
                         .put("index.number_of_replicas", 1))
+                .setWaitForActiveShards(ActiveShardCount.ALL)
                 .get();
 
         client().admin().indices().prepareCreate("only-foo")
@@ -105,9 +108,6 @@ public final class ClusterAllocationExplainIT extends ESIntegTestCase {
                         .put("index.number_of_replicas", 1))
                 .get();
 
-        ensureGreen("anywhere", "only-baz");
-        ensureYellow("only-foo");
-
         ClusterAllocationExplainResponse resp = client().admin().cluster().prepareAllocationExplain()
                 .setIndex("only-foo")
                 .setShard(0)

+ 2 - 1
core/src/test/java/org/elasticsearch/action/admin/indices/create/CreateIndexIT.java

@@ -25,6 +25,7 @@ import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteResponse;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
 import org.elasticsearch.action.admin.indices.delete.DeleteIndexResponse;
 import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.cluster.ClusterInfoService;
 import org.elasticsearch.cluster.ClusterState;
@@ -289,7 +290,7 @@ public class CreateIndexIT extends ESIntegTestCase {
     public void testRestartIndexCreationAfterFullClusterRestart() throws Exception {
         client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put("cluster.routing.allocation.enable",
             "none")).get();
-        client().admin().indices().prepareCreate("test").setSettings(indexSettings()).get();
+        client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).setSettings(indexSettings()).get();
         internalCluster().fullRestart();
         ensureGreen("test");
     }

+ 3 - 0
core/src/test/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverActionTests.java

@@ -22,6 +22,7 @@ package org.elasticsearch.action.admin.indices.rollover;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.indices.alias.IndicesAliasesClusterStateUpdateRequest;
 import org.elasticsearch.action.admin.indices.create.CreateIndexClusterStateUpdateRequest;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.cluster.metadata.AliasAction;
 import org.elasticsearch.cluster.metadata.AliasMetaData;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -166,6 +167,8 @@ public class TransportRolloverActionTests extends ESTestCase {
         String alias = randomAsciiOfLength(10);
         String rolloverIndex = randomAsciiOfLength(10);
         final RolloverRequest rolloverRequest = new RolloverRequest(alias, randomAsciiOfLength(10));
+        final ActiveShardCount activeShardCount = randomBoolean() ? ActiveShardCount.ALL : ActiveShardCount.ONE;
+        rolloverRequest.setWaitForActiveShards(activeShardCount);
         final Settings settings = Settings.builder()
             .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
             .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())

+ 4 - 0
core/src/test/java/org/elasticsearch/action/admin/indices/shrink/TransportShrinkActionTests.java

@@ -22,6 +22,7 @@ package org.elasticsearch.action.admin.indices.shrink;
 import org.apache.lucene.index.IndexWriter;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.indices.create.CreateIndexClusterStateUpdateRequest;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.EmptyClusterInfoService;
@@ -130,6 +131,8 @@ public class TransportShrinkActionTests extends ESTestCase {
         int numSourceShards = clusterState.metaData().index(indexName).getNumberOfShards();
         DocsStats stats = new DocsStats(randomIntBetween(0, (IndexWriter.MAX_DOCS) / numSourceShards), randomIntBetween(1, 1000));
         ShrinkRequest target = new ShrinkRequest("target", indexName);
+        final ActiveShardCount activeShardCount = randomBoolean() ? ActiveShardCount.ALL : ActiveShardCount.ONE;
+        target.setWaitForActiveShards(activeShardCount);
         CreateIndexClusterStateUpdateRequest request = TransportShrinkAction.prepareCreateIndexRequest(
             target, clusterState, (i) -> stats,
             new IndexNameExpressionResolver(Settings.EMPTY));
@@ -137,6 +140,7 @@ public class TransportShrinkActionTests extends ESTestCase {
         assertEquals(indexName, request.shrinkFrom().getName());
         assertEquals("1", request.settings().get("index.number_of_shards"));
         assertEquals("shrink_index", request.cause());
+        assertEquals(request.waitForActiveShards(), activeShardCount);
     }
 
     private DiscoveryNode newNode(String nodeId) {

+ 2 - 1
core/src/test/java/org/elasticsearch/action/admin/indices/template/put/MetaDataIndexTemplateServiceTests.java

@@ -160,7 +160,7 @@ public class MetaDataIndexTemplateServiceTests extends ESSingleNodeTestCase {
                 null,
                 new HashSet<>(),
                 null,
-                null, null);
+                null, null, null);
         MetaDataIndexTemplateService service = new MetaDataIndexTemplateService(Settings.EMPTY, null, createIndexService, new AliasValidator(Settings.EMPTY), null, null);
 
         final List<Throwable> throwables = new ArrayList<>();
@@ -191,6 +191,7 @@ public class MetaDataIndexTemplateServiceTests extends ESSingleNodeTestCase {
             new HashSet<>(),
             null,
             nodeServicesProvider,
+            null,
             null);
         MetaDataIndexTemplateService service = new MetaDataIndexTemplateService(
             Settings.EMPTY, clusterService, createIndexService, new AliasValidator(Settings.EMPTY), indicesService, nodeServicesProvider);

+ 305 - 0
core/src/test/java/org/elasticsearch/action/support/ActiveShardCountTests.java

@@ -0,0 +1,305 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.action.support;
+
+import com.carrotsearch.hppc.cursors.ObjectCursor;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.ClusterName;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.cluster.metadata.MetaData;
+import org.elasticsearch.cluster.routing.IndexRoutingTable;
+import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
+import org.elasticsearch.cluster.routing.RoutingTable;
+import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.common.UUIDs;
+import org.elasticsearch.common.io.stream.ByteBufferStreamInput;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * Tests for the {@link ActiveShardCount} class
+ */
+public class ActiveShardCountTests extends ESTestCase {
+
+    public void testFromIntValue() {
+        assertSame(ActiveShardCount.from(0), ActiveShardCount.NONE);
+        final int value = randomIntBetween(1, 50);
+        IndexMetaData indexMetaData = IndexMetaData.builder("test")
+                                                   .settings(settings(Version.CURRENT))
+                                                   .numberOfShards(1)
+                                                   .numberOfReplicas(0)
+                                                   .build();
+        assertEquals(ActiveShardCount.from(value).resolve(indexMetaData), value);
+        expectThrows(IllegalArgumentException.class, () -> ActiveShardCount.from(randomIntBetween(-10, -1)));
+    }
+
+    public void testResolve() {
+        // one shard
+        IndexMetaData indexMetaData = IndexMetaData.builder("test")
+                                                   .settings(settings(Version.CURRENT))
+                                                   .numberOfShards(1)
+                                                   .numberOfReplicas(0)
+                                                   .build();
+        assertThat(ActiveShardCount.ALL.resolve(indexMetaData), equalTo(1));
+        assertThat(ActiveShardCount.DEFAULT.resolve(indexMetaData), equalTo(1));
+        assertThat(ActiveShardCount.NONE.resolve(indexMetaData), equalTo(0));
+        final int value = randomIntBetween(2, 20);
+        assertThat(ActiveShardCount.from(value).resolve(indexMetaData), equalTo(value));
+
+        // more than one shard
+        final int numNewShards = randomIntBetween(1, 20);
+        indexMetaData = IndexMetaData.builder("test")
+                                     .settings(settings(Version.CURRENT))
+                                     .numberOfShards(1)
+                                     .numberOfReplicas(numNewShards)
+                                     .build();
+        assertThat(ActiveShardCount.ALL.resolve(indexMetaData), equalTo(numNewShards + 1));
+        assertThat(ActiveShardCount.DEFAULT.resolve(indexMetaData), equalTo(1));
+        assertThat(ActiveShardCount.NONE.resolve(indexMetaData), equalTo(0));
+        assertThat(ActiveShardCount.from(value).resolve(indexMetaData), equalTo(value));
+    }
+
+    public void testSerialization() throws IOException {
+        doWriteRead(ActiveShardCount.ALL);
+        doWriteRead(ActiveShardCount.DEFAULT);
+        doWriteRead(ActiveShardCount.NONE);
+        doWriteRead(ActiveShardCount.from(randomIntBetween(1, 50)));
+    }
+
+    public void testParseString() {
+        assertSame(ActiveShardCount.parseString("all"), ActiveShardCount.ALL);
+        assertSame(ActiveShardCount.parseString(null), ActiveShardCount.DEFAULT);
+        assertSame(ActiveShardCount.parseString("0"), ActiveShardCount.NONE);
+        int value = randomIntBetween(1, 50);
+        assertEquals(ActiveShardCount.parseString(value + ""), ActiveShardCount.from(value));
+        expectThrows(IllegalArgumentException.class, () -> ActiveShardCount.parseString(randomAsciiOfLengthBetween(4, 8)));
+        expectThrows(IllegalArgumentException.class, () -> ActiveShardCount.parseString("-1")); // magic numbers not exposed through API
+        expectThrows(IllegalArgumentException.class, () -> ActiveShardCount.parseString("-2"));
+        expectThrows(IllegalArgumentException.class, () -> ActiveShardCount.parseString(randomIntBetween(-10, -3) + ""));
+    }
+
+    private void doWriteRead(ActiveShardCount activeShardCount) throws IOException {
+        final BytesStreamOutput out = new BytesStreamOutput();
+        activeShardCount.writeTo(out);
+        final ByteBufferStreamInput in = new ByteBufferStreamInput(ByteBuffer.wrap(out.bytes().toBytesRef().bytes));
+        ActiveShardCount readActiveShardCount = ActiveShardCount.readFrom(in);
+        if (activeShardCount == ActiveShardCount.DEFAULT
+                || activeShardCount == ActiveShardCount.ALL
+                || activeShardCount == ActiveShardCount.NONE) {
+            assertSame(activeShardCount, readActiveShardCount);
+        } else {
+            assertEquals(activeShardCount, readActiveShardCount);
+        }
+    }
+
+    public void testEnoughShardsActiveZero() {
+        final String indexName = "test-idx";
+        final int numberOfShards = randomIntBetween(1, 5);
+        final int numberOfReplicas = randomIntBetween(4, 7);
+        final ActiveShardCount waitForActiveShards = ActiveShardCount.from(0);
+        ClusterState clusterState = initializeWithNewIndex(indexName, numberOfShards, numberOfReplicas);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startPrimaries(clusterState, indexName);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startLessThanWaitOnShards(clusterState, indexName, waitForActiveShards);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startWaitOnShards(clusterState, indexName, waitForActiveShards);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startAllShards(clusterState, indexName);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+    }
+
+    public void testEnoughShardsActiveLevelOne() {
+        runTestForOneActiveShard(ActiveShardCount.ONE);
+    }
+
+    public void testEnoughShardsActiveLevelDefault() {
+        // default is 1
+        runTestForOneActiveShard(ActiveShardCount.DEFAULT);
+    }
+
+    public void testEnoughShardsActiveRandom() {
+        final String indexName = "test-idx";
+        final int numberOfShards = randomIntBetween(1, 5);
+        final int numberOfReplicas = randomIntBetween(4, 7);
+        final ActiveShardCount waitForActiveShards = ActiveShardCount.from(randomIntBetween(2, numberOfReplicas));
+        ClusterState clusterState = initializeWithNewIndex(indexName, numberOfShards, numberOfReplicas);
+        assertFalse(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startPrimaries(clusterState, indexName);
+        assertFalse(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startLessThanWaitOnShards(clusterState, indexName, waitForActiveShards);
+        assertFalse(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startWaitOnShards(clusterState, indexName, waitForActiveShards);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startAllShards(clusterState, indexName);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+    }
+
+    public void testEnoughShardsActiveLevelAll() {
+        final String indexName = "test-idx";
+        final int numberOfShards = randomIntBetween(1, 5);
+        final int numberOfReplicas = randomIntBetween(4, 7);
+        // both values should represent "all"
+        final ActiveShardCount waitForActiveShards = randomBoolean() ? ActiveShardCount.from(numberOfReplicas + 1) : ActiveShardCount.ALL;
+        ClusterState clusterState = initializeWithNewIndex(indexName, numberOfShards, numberOfReplicas);
+        assertFalse(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startPrimaries(clusterState, indexName);
+        assertFalse(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startLessThanWaitOnShards(clusterState, indexName, waitForActiveShards);
+        assertFalse(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startAllShards(clusterState, indexName);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+    }
+
+    private void runTestForOneActiveShard(final ActiveShardCount activeShardCount) {
+        final String indexName = "test-idx";
+        final int numberOfShards = randomIntBetween(1, 5);
+        final int numberOfReplicas = randomIntBetween(4, 7);
+        assert activeShardCount == ActiveShardCount.ONE || activeShardCount == ActiveShardCount.DEFAULT;
+        final ActiveShardCount waitForActiveShards = activeShardCount;
+        ClusterState clusterState = initializeWithNewIndex(indexName, numberOfShards, numberOfReplicas);
+        assertFalse(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startPrimaries(clusterState, indexName);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startLessThanWaitOnShards(clusterState, indexName, waitForActiveShards);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startWaitOnShards(clusterState, indexName, waitForActiveShards);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+        clusterState = startAllShards(clusterState, indexName);
+        assertTrue(waitForActiveShards.enoughShardsActive(clusterState, indexName));
+    }
+
+    private ClusterState initializeWithNewIndex(final String indexName, final int numShards, final int numReplicas) {
+        // initial index creation and new routing table info
+        final IndexMetaData indexMetaData = IndexMetaData.builder(indexName)
+                                                .settings(settings(Version.CURRENT)
+                                                              .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()))
+                                                .numberOfShards(numShards)
+                                                .numberOfReplicas(numReplicas)
+                                                .build();
+        final MetaData metaData = MetaData.builder().put(indexMetaData, true).build();
+        final RoutingTable routingTable = RoutingTable.builder().addAsNew(indexMetaData).build();
+        return ClusterState.builder(new ClusterName("test_cluster")).metaData(metaData).routingTable(routingTable).build();
+    }
+
+    private ClusterState startPrimaries(final ClusterState clusterState, final String indexName) {
+        RoutingTable routingTable = clusterState.routingTable();
+        IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
+        IndexRoutingTable.Builder newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            for (ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary()) {
+                    shardRouting = shardRouting.initialize(randomAsciiOfLength(8), null, shardRouting.getExpectedShardSize())
+                                       .moveToStarted();
+                }
+                newIndexRoutingTable.addShard(shardRouting);
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        return ClusterState.builder(clusterState).routingTable(routingTable).build();
+    }
+
+    private ClusterState startLessThanWaitOnShards(final ClusterState clusterState, final String indexName,
+                                                   final ActiveShardCount waitForActiveShards) {
+        RoutingTable routingTable = clusterState.routingTable();
+        IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
+        IndexRoutingTable.Builder newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            assert shardRoutingTable.getSize() > 2;
+            // want less than half, and primary is already started
+            int numToStart = waitForActiveShards.resolve(clusterState.metaData().index(indexName)) - 2;
+            for (ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary()) {
+                    assertTrue(shardRouting.active());
+                } else {
+                    if (numToStart > 0) {
+                        shardRouting = shardRouting.initialize(randomAsciiOfLength(8), null, shardRouting.getExpectedShardSize())
+                                           .moveToStarted();
+                        numToStart--;
+                    }
+                }
+                newIndexRoutingTable.addShard(shardRouting);
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        return ClusterState.builder(clusterState).routingTable(routingTable).build();
+    }
+
+    private ClusterState startWaitOnShards(final ClusterState clusterState, final String indexName,
+                                           final ActiveShardCount waitForActiveShards) {
+        RoutingTable routingTable = clusterState.routingTable();
+        IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
+        IndexRoutingTable.Builder newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            assert shardRoutingTable.getSize() > 2;
+            int numToStart = waitForActiveShards.resolve(clusterState.metaData().index(indexName)) - 1; // primary is already started
+            for (ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary()) {
+                    assertTrue(shardRouting.active());
+                } else {
+                    if (shardRouting.active() == false) {
+                        if (numToStart > 0) {
+                            shardRouting = shardRouting.initialize(randomAsciiOfLength(8), null, shardRouting.getExpectedShardSize())
+                                               .moveToStarted();
+                            numToStart--;
+                        }
+                    } else {
+                        numToStart--;
+                    }
+                }
+                newIndexRoutingTable.addShard(shardRouting);
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        return ClusterState.builder(clusterState).routingTable(routingTable).build();
+    }
+
+    private ClusterState startAllShards(final ClusterState clusterState, final String indexName) {
+        RoutingTable routingTable = clusterState.routingTable();
+        IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
+        IndexRoutingTable.Builder newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            for (ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary()) {
+                    assertTrue(shardRouting.active());
+                } else {
+                    if (shardRouting.active() == false) {
+                        shardRouting = shardRouting.initialize(randomAsciiOfLength(8), null, shardRouting.getExpectedShardSize())
+                                           .moveToStarted();
+                    }
+                }
+                newIndexRoutingTable.addShard(shardRouting);
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        return ClusterState.builder(clusterState).routingTable(routingTable).build();
+    }
+
+}

+ 155 - 0
core/src/test/java/org/elasticsearch/action/support/ActiveShardsObserverIT.java

@@ -0,0 +1,155 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.action.support;
+
+import org.elasticsearch.action.ListenableActionFuture;
+import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.test.ESIntegTestCase;
+
+import static org.elasticsearch.cluster.metadata.IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING;
+import static org.elasticsearch.cluster.metadata.IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+
+/**
+ * Tests that the index creation operation waits for the appropriate
+ * number of active shards to be started before returning.
+ */
+public class ActiveShardsObserverIT extends ESIntegTestCase {
+
+    public void testCreateIndexNoActiveShardsTimesOut() throws Exception {
+        final String indexName = "test-idx";
+        Settings.Builder settingsBuilder = Settings.builder()
+                                               .put(indexSettings())
+                                               .put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), randomIntBetween(1, 5))
+                                               .put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0);
+        if (internalCluster().getNodeNames().length > 0) {
+            String exclude = String.join(",", internalCluster().getNodeNames());
+            settingsBuilder.put("index.routing.allocation.exclude._name", exclude);
+        }
+        Settings settings = settingsBuilder.build();
+        assertFalse(prepareCreate(indexName)
+                       .setSettings(settings)
+                       .setWaitForActiveShards(randomBoolean() ? ActiveShardCount.from(1) : ActiveShardCount.ALL)
+                       .setTimeout("100ms")
+                       .get()
+                       .isShardsAcked());
+    }
+
+    public void testCreateIndexNoActiveShardsNoWaiting() throws Exception {
+        final String indexName = "test-idx";
+        Settings.Builder settingsBuilder = Settings.builder()
+                                               .put(indexSettings())
+                                               .put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), randomIntBetween(1, 5))
+                                               .put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0);
+        if (internalCluster().getNodeNames().length > 0) {
+            String exclude = String.join(",", internalCluster().getNodeNames());
+            settingsBuilder.put("index.routing.allocation.exclude._name", exclude);
+        }
+        Settings settings = settingsBuilder.build();
+        CreateIndexResponse response = prepareCreate(indexName)
+                                           .setSettings(settings)
+                                           .setWaitForActiveShards(ActiveShardCount.from(0))
+                                           .get();
+        assertTrue(response.isAcknowledged());
+    }
+
+    public void testCreateIndexNotEnoughActiveShardsTimesOut() throws Exception {
+        final String indexName = "test-idx";
+        final int numDataNodes = internalCluster().numDataNodes();
+        final int numReplicas = numDataNodes + randomInt(4);
+        Settings settings = Settings.builder()
+                                .put(indexSettings())
+                                .put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), randomIntBetween(1, 7))
+                                .put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), numReplicas)
+                                .build();
+        assertFalse(prepareCreate(indexName)
+                       .setSettings(settings)
+                       .setWaitForActiveShards(ActiveShardCount.from(randomIntBetween(numDataNodes + 1, numReplicas + 1)))
+                       .setTimeout("100ms")
+                       .get()
+                       .isShardsAcked());
+    }
+
+    public void testCreateIndexEnoughActiveShards() throws Exception {
+        final String indexName = "test-idx";
+        Settings settings = Settings.builder()
+                                .put(indexSettings())
+                                .put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), randomIntBetween(1, 7))
+                                .put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), internalCluster().numDataNodes() + randomIntBetween(0, 3))
+                                .build();
+        ActiveShardCount waitForActiveShards = ActiveShardCount.from(randomIntBetween(0, internalCluster().numDataNodes()));
+        assertAcked(prepareCreate(indexName).setSettings(settings).setWaitForActiveShards(waitForActiveShards).get());
+    }
+
+    public void testCreateIndexWaitsForAllActiveShards() throws Exception {
+        final String indexName = "test-idx";
+        // not enough data nodes, index creation times out
+        final int numReplicas = internalCluster().numDataNodes() + randomInt(4);
+        Settings settings = Settings.builder()
+                                .put(indexSettings())
+                                .put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), randomIntBetween(1, 5))
+                                .put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), numReplicas)
+                                .build();
+        assertFalse(prepareCreate(indexName)
+                       .setSettings(settings)
+                       .setWaitForActiveShards(ActiveShardCount.ALL)
+                       .setTimeout("100ms")
+                       .get()
+                       .isShardsAcked());
+        if (client().admin().indices().prepareExists(indexName).get().isExists()) {
+            assertAcked(client().admin().indices().prepareDelete(indexName));
+        }
+
+        // enough data nodes, all shards are active
+         settings = Settings.builder()
+                        .put(indexSettings())
+                        .put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), randomIntBetween(1, 7))
+                        .put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), internalCluster().numDataNodes() - 1)
+                        .build();
+        assertAcked(prepareCreate(indexName).setSettings(settings).setWaitForActiveShards(ActiveShardCount.ALL).get());
+    }
+
+    public void testCreateIndexStopsWaitingWhenIndexDeleted() throws Exception {
+        final String indexName = "test-idx";
+        Settings settings = Settings.builder()
+                                .put(indexSettings())
+                                .put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), randomIntBetween(1, 5))
+                                .put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), internalCluster().numDataNodes() - 1)
+                                .build();
+
+        logger.info("--> start the index creation process");
+        ListenableActionFuture<CreateIndexResponse> responseListener =
+            prepareCreate(indexName)
+                .setSettings(settings)
+                .setWaitForActiveShards(ActiveShardCount.ALL)
+                .execute();
+
+        logger.info("--> wait until the cluster state contains the new index");
+        assertBusy(() -> assertTrue(client().admin().cluster().prepareState().get().getState().metaData().hasIndex(indexName)));
+
+        logger.info("--> delete the index");
+        assertAcked(client().admin().indices().prepareDelete(indexName));
+
+        logger.info("--> ensure the create index request completes");
+        assertAcked(responseListener.get());
+    }
+
+}

+ 24 - 1
core/src/test/java/org/elasticsearch/cluster/ClusterHealthIT.java

@@ -23,10 +23,13 @@ import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
 import org.elasticsearch.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.test.ESIntegTestCase;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
 
 public class ClusterHealthIT extends ESIntegTestCase {
+
     public void testSimpleLocalHealth() {
         createIndex("test");
         ensureGreen(); // master should thing it's green now.
@@ -68,4 +71,24 @@ public class ClusterHealthIT extends ESIntegTestCase {
         assertThat(healthResponse.getIndices().get("test1").getStatus(), equalTo(ClusterHealthStatus.GREEN));
         assertThat(healthResponse.getIndices().size(), equalTo(1));
     }
-}
+
+    public void testHealthOnIndexCreation() throws Exception {
+        final AtomicBoolean finished = new AtomicBoolean(false);
+        Thread clusterHealthThread = new Thread() {
+            @Override
+            public void run() {
+                while (finished.get() == false) {
+                    ClusterHealthResponse health = client().admin().cluster().prepareHealth().get();
+                    assertThat(health.getStatus(), not(equalTo(ClusterHealthStatus.RED)));
+                }
+            }
+        };
+        clusterHealthThread.start();
+        for (int i = 0; i < 10; i++) {
+            createIndex("test" + i);
+        }
+        finished.set(true);
+        clusterHealthThread.join();
+    }
+
+}

+ 2 - 1
core/src/test/java/org/elasticsearch/cluster/SimpleDataNodesIT.java

@@ -21,6 +21,7 @@ package org.elasticsearch.cluster;
 
 import org.elasticsearch.action.UnavailableShardsException;
 import org.elasticsearch.action.index.IndexResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.client.Requests;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.settings.Settings;
@@ -40,7 +41,7 @@ import static org.hamcrest.Matchers.equalTo;
 public class SimpleDataNodesIT extends ESIntegTestCase {
     public void testDataNodes() throws Exception {
         internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build());
-        client().admin().indices().create(createIndexRequest("test")).actionGet();
+        client().admin().indices().create(createIndexRequest("test").waitForActiveShards(ActiveShardCount.NONE)).actionGet();
         try {
             client().index(Requests.indexRequest("test").type("type1").id("1").source(source("1", "test")).timeout(timeValueSeconds(1))).actionGet();
             fail("no allocation should happen");

+ 9 - 9
core/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java

@@ -22,6 +22,7 @@ package org.elasticsearch.cluster.allocation;
 import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
 import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.WriteRequest.RefreshPolicy;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.health.ClusterHealthStatus;
@@ -91,7 +92,7 @@ public class ClusterRerouteIT extends ESIntegTestCase {
         final String node_2 = nodesIds.get(1);
 
         logger.info("--> create an index with 1 shard, 1 replica, nothing should allocate");
-        client().admin().indices().prepareCreate("test")
+        client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE)
                 .setSettings(Settings.builder().put("index.number_of_shards", 1))
                 .execute().actionGet();
 
@@ -203,7 +204,7 @@ public class ClusterRerouteIT extends ESIntegTestCase {
         assertThat(healthResponse.isTimedOut(), equalTo(false));
 
         logger.info("--> create an index with 1 shard, 1 replica, nothing should allocate");
-        client().admin().indices().prepareCreate("test")
+        client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE)
                 .setSettings(Settings.builder().put("index.number_of_shards", 1))
                 .execute().actionGet();
 
@@ -253,14 +254,13 @@ public class ClusterRerouteIT extends ESIntegTestCase {
         assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1));
         assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING));
 
-        healthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet();
-        assertThat(healthResponse.isTimedOut(), equalTo(false));
-
         logger.info("--> get the state, verify shard 1 primary allocated");
-        state = client().admin().cluster().prepareState().execute().actionGet().getState();
-        assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1));
-        assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.STARTED));
-
+        final String nodeToCheck = node_1;
+        assertBusy(() -> {
+            ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
+            String nodeId = clusterState.nodes().resolveNode(nodeToCheck).getId();
+            assertThat(clusterState.getRoutingNodes().node(nodeId).iterator().next().state(), equalTo(ShardRoutingState.STARTED));
+        });
     }
 
     public void testRerouteExplain() {

+ 367 - 7
core/src/test/java/org/elasticsearch/cluster/health/ClusterStateHealthTests.java

@@ -18,6 +18,8 @@
  */
 package org.elasticsearch.cluster.health;
 
+import com.carrotsearch.hppc.cursors.IntObjectCursor;
+import com.carrotsearch.hppc.cursors.ObjectCursor;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
@@ -32,12 +34,18 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
 import org.elasticsearch.cluster.metadata.MetaData;
 import org.elasticsearch.cluster.routing.IndexRoutingTable;
+import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
 import org.elasticsearch.cluster.routing.RoutingTable;
 import org.elasticsearch.cluster.routing.RoutingTableGenerator;
+import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.cluster.routing.UnassignedInfo;
 import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.UUIDs;
+import org.elasticsearch.common.collect.ImmutableOpenIntMap;
 import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.gateway.NoopGatewayAllocator;
 import org.elasticsearch.test.transport.CapturingTransport;
@@ -50,7 +58,10 @@ import org.junit.Before;
 import org.junit.BeforeClass;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
@@ -65,14 +76,13 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo;
 public class ClusterStateHealthTests extends ESTestCase {
     private final IndexNameExpressionResolver indexNameExpressionResolver = new IndexNameExpressionResolver(Settings.EMPTY);
 
-
     private static ThreadPool threadPool;
 
     private ClusterService clusterService;
     private TransportService transportService;
 
     @BeforeClass
-    public static void beforeClass() {
+    public static void setupThreadPool() {
         threadPool = new TestThreadPool("ClusterStateHealthTests");
     }
 
@@ -94,7 +104,7 @@ public class ClusterStateHealthTests extends ESTestCase {
     }
 
     @AfterClass
-    public static void afterClass() {
+    public static void terminateThreadPool() {
         ThreadPool.terminate(threadPool, 30, TimeUnit.SECONDS);
         threadPool = null;
     }
@@ -129,7 +139,6 @@ public class ClusterStateHealthTests extends ESTestCase {
         TransportClusterHealthAction action = new TransportClusterHealthAction(Settings.EMPTY, transportService,
             clusterService, threadPool, new ActionFilters(new HashSet<>()), indexNameExpressionResolver, NoopGatewayAllocator.INSTANCE);
         PlainActionFuture<ClusterHealthResponse> listener = new PlainActionFuture<>();
-
         action.execute(new ClusterHealthRequest(), listener);
 
         assertFalse(listener.isDone());
@@ -138,7 +147,6 @@ public class ClusterStateHealthTests extends ESTestCase {
         listener.get();
     }
 
-
     public void testClusterHealth() throws IOException {
         RoutingTableGenerator routingTableGenerator = new RoutingTableGenerator();
         RoutingTableGenerator.ShardCounter counter = new RoutingTableGenerator.ShardCounter();
@@ -157,14 +165,123 @@ public class ClusterStateHealthTests extends ESTestCase {
             metaData.put(indexMetaData, true);
             routingTable.add(indexRoutingTable);
         }
-        ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(routingTable.build()).build();
-        String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(clusterState, IndicesOptions.strictExpand(), (String[]) null);
+        ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
+                                                .metaData(metaData)
+                                                .routingTable(routingTable.build())
+                                                .build();
+        String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(
+            clusterState, IndicesOptions.strictExpand(), (String[]) null
+        );
         ClusterStateHealth clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices);
         logger.info("cluster status: {}, expected {}", clusterStateHealth.getStatus(), counter.status());
         clusterStateHealth = maybeSerialize(clusterStateHealth);
         assertClusterHealth(clusterStateHealth, counter);
     }
 
+    public void testClusterHealthOnIndexCreation() {
+        final String indexName = "test-idx";
+        final String[] indices = new String[] { indexName };
+        final List<ClusterState> clusterStates = simulateIndexCreationStates(indexName, false);
+        for (int i = 0; i < clusterStates.size(); i++) {
+            // make sure cluster health is always YELLOW, up until the last state where it should be GREEN
+            final ClusterState clusterState = clusterStates.get(i);
+            final ClusterStateHealth health = new ClusterStateHealth(clusterState, indices);
+            if (i < clusterStates.size() - 1) {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
+            } else {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
+            }
+        }
+    }
+
+    public void testClusterHealthOnIndexCreationWithFailedAllocations() {
+        final String indexName = "test-idx";
+        final String[] indices = new String[] { indexName };
+        final List<ClusterState> clusterStates = simulateIndexCreationStates(indexName, true);
+        for (int i = 0; i < clusterStates.size(); i++) {
+            // make sure cluster health is YELLOW up until the final cluster state, which contains primary shard
+            // failed allocations that should make the cluster health RED
+            final ClusterState clusterState = clusterStates.get(i);
+            final ClusterStateHealth health = new ClusterStateHealth(clusterState, indices);
+            if (i < clusterStates.size() - 1) {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
+            } else {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.RED));
+            }
+        }
+    }
+
+    public void testClusterHealthOnClusterRecovery() {
+        final String indexName = "test-idx";
+        final String[] indices = new String[] { indexName };
+        final List<ClusterState> clusterStates = simulateClusterRecoveryStates(indexName, false, false);
+        for (int i = 0; i < clusterStates.size(); i++) {
+            // make sure cluster health is YELLOW up until the final cluster state, when it turns GREEN
+            final ClusterState clusterState = clusterStates.get(i);
+            final ClusterStateHealth health = new ClusterStateHealth(clusterState, indices);
+            if (i < clusterStates.size() - 1) {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
+            } else {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
+            }
+        }
+    }
+
+    public void testClusterHealthOnClusterRecoveryWithFailures() {
+        final String indexName = "test-idx";
+        final String[] indices = new String[] { indexName };
+        final List<ClusterState> clusterStates = simulateClusterRecoveryStates(indexName, false, true);
+        for (int i = 0; i < clusterStates.size(); i++) {
+            // make sure cluster health is YELLOW up until the final cluster state, which contains primary shard
+            // failed allocations that should make the cluster health RED
+            final ClusterState clusterState = clusterStates.get(i);
+            final ClusterStateHealth health = new ClusterStateHealth(clusterState, indices);
+            if (i < clusterStates.size() - 1) {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
+            } else {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.RED));
+            }
+        }
+    }
+
+    public void testClusterHealthOnClusterRecoveryWithPreviousAllocationIds() {
+        final String indexName = "test-idx";
+        final String[] indices = new String[] { indexName };
+        final List<ClusterState> clusterStates = simulateClusterRecoveryStates(indexName, true, false);
+        for (int i = 0; i < clusterStates.size(); i++) {
+            // because there were previous allocation ids, we should be RED until the primaries are started,
+            // then move to YELLOW, and the last state should be GREEN when all shards have been started
+            final ClusterState clusterState = clusterStates.get(i);
+            final ClusterStateHealth health = new ClusterStateHealth(clusterState, indices);
+            if (i < clusterStates.size() - 1) {
+                // if the inactive primaries are due solely to recovery (not failed allocation or previously being allocated),
+                // then cluster health is YELLOW, otherwise RED
+                if (primaryInactiveDueToRecovery(indexName, clusterState)) {
+                    assertThat(health.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
+                } else {
+                    assertThat(health.getStatus(), equalTo(ClusterHealthStatus.RED));
+                }
+            } else {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
+            }
+        }
+    }
+
+    public void testClusterHealthOnClusterRecoveryWithPreviousAllocationIdsAndAllocationFailures() {
+        final String indexName = "test-idx";
+        final String[] indices = new String[] { indexName };
+        for (final ClusterState clusterState : simulateClusterRecoveryStates(indexName, true, true)) {
+            final ClusterStateHealth health = new ClusterStateHealth(clusterState, indices);
+            // if the inactive primaries are due solely to recovery (not failed allocation or previously being allocated)
+            // then cluster health is YELLOW, otherwise RED
+            if (primaryInactiveDueToRecovery(indexName, clusterState)) {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
+            } else {
+                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.RED));
+            }
+        }
+    }
+
     ClusterStateHealth maybeSerialize(ClusterStateHealth clusterStateHealth) throws IOException {
         if (randomBoolean()) {
             BytesStreamOutput out = new BytesStreamOutput();
@@ -175,6 +292,249 @@ public class ClusterStateHealthTests extends ESTestCase {
         return clusterStateHealth;
     }
 
+    private List<ClusterState> simulateIndexCreationStates(final String indexName, final boolean withPrimaryAllocationFailures) {
+        final int numberOfShards = randomIntBetween(1, 5);
+        final int numberOfReplicas = randomIntBetween(1, numberOfShards);
+        // initial index creation and new routing table info
+        final IndexMetaData indexMetaData = IndexMetaData.builder(indexName)
+                                                         .settings(settings(Version.CURRENT)
+                                                                       .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()))
+                                                         .numberOfShards(numberOfShards)
+                                                         .numberOfReplicas(numberOfReplicas)
+                                                         .build();
+        final MetaData metaData = MetaData.builder().put(indexMetaData, true).build();
+        final RoutingTable routingTable = RoutingTable.builder().addAsNew(indexMetaData).build();
+
+        ClusterState clusterState = ClusterState.builder(new ClusterName("test_cluster"))
+                                                .metaData(metaData)
+                                                .routingTable(routingTable)
+                                                .build();
+        return generateClusterStates(clusterState, indexName, numberOfReplicas, withPrimaryAllocationFailures);
+    }
+
+    private List<ClusterState> simulateClusterRecoveryStates(final String indexName,
+                                                             final boolean withPreviousAllocationIds,
+                                                             final boolean withPrimaryAllocationFailures) {
+        final int numberOfShards = randomIntBetween(1, 5);
+        final int numberOfReplicas = randomIntBetween(1, numberOfShards);
+        // initial index creation and new routing table info
+        IndexMetaData indexMetaData = IndexMetaData.builder(indexName)
+                                                   .settings(settings(Version.CURRENT)
+                                                                 .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()))
+                                                   .numberOfShards(numberOfShards)
+                                                   .numberOfReplicas(numberOfReplicas)
+                                                   .state(IndexMetaData.State.OPEN)
+                                                   .build();
+        if (withPreviousAllocationIds) {
+            final IndexMetaData.Builder idxMetaWithAllocationIds = IndexMetaData.builder(indexMetaData);
+            boolean atLeastOne = false;
+            for (int i = 0; i < numberOfShards; i++) {
+                if (atLeastOne == false || randomBoolean()) {
+                    idxMetaWithAllocationIds.putActiveAllocationIds(i, Sets.newHashSet(UUIDs.randomBase64UUID()));
+                    atLeastOne = true;
+                }
+            }
+            indexMetaData = idxMetaWithAllocationIds.build();
+        }
+        final MetaData metaData = MetaData.builder().put(indexMetaData, true).build();
+        final RoutingTable routingTable = RoutingTable.builder().addAsRecovery(indexMetaData).build();
+
+        ClusterState clusterState = ClusterState.builder(new ClusterName("test_cluster"))
+                                                .metaData(metaData)
+                                                .routingTable(routingTable)
+                                                .build();
+        return generateClusterStates(clusterState, indexName, numberOfReplicas, withPrimaryAllocationFailures);
+    }
+
+    private List<ClusterState> generateClusterStates(final ClusterState originalClusterState,
+                                                     final String indexName,
+                                                     final int numberOfReplicas,
+                                                     final boolean withPrimaryAllocationFailures) {
+        // generate random node ids
+        final List<String> nodeIds = new ArrayList<>();
+        final int numNodes = randomIntBetween(numberOfReplicas + 1, 10);
+        for (int i = 0; i < numNodes; i++) {
+            nodeIds.add(randomAsciiOfLength(8));
+        }
+
+        final List<ClusterState> clusterStates = new ArrayList<>();
+        clusterStates.add(originalClusterState);
+        ClusterState clusterState = originalClusterState;
+
+        // initialize primaries
+        RoutingTable routingTable = originalClusterState.routingTable();
+        IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
+        IndexRoutingTable.Builder newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            for (final ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary()) {
+                    newIndexRoutingTable.addShard(
+                        shardRouting.initialize(nodeIds.get(randomIntBetween(0, numNodes - 1)), null, shardRouting.getExpectedShardSize())
+                    );
+                } else {
+                    newIndexRoutingTable.addShard(shardRouting);
+                }
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
+        clusterStates.add(clusterState);
+
+        // some primaries started
+        indexRoutingTable = routingTable.index(indexName);
+        newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        ImmutableOpenIntMap.Builder<Set<String>> allocationIds = ImmutableOpenIntMap.<Set<String>>builder();
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            for (final ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary() && randomBoolean()) {
+                    final ShardRouting newShardRouting = shardRouting.moveToStarted();
+                    allocationIds.fPut(newShardRouting.getId(), Sets.newHashSet(newShardRouting.allocationId().getId()));
+                    newIndexRoutingTable.addShard(newShardRouting);
+                } else {
+                    newIndexRoutingTable.addShard(shardRouting);
+                }
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        IndexMetaData.Builder idxMetaBuilder = IndexMetaData.builder(clusterState.metaData().index(indexName));
+        for (final IntObjectCursor<Set<String>> entry : allocationIds.build()) {
+            idxMetaBuilder.putActiveAllocationIds(entry.key, entry.value);
+        }
+        MetaData.Builder metaDataBuilder = MetaData.builder(clusterState.metaData()).put(idxMetaBuilder);
+        clusterState = ClusterState.builder(clusterState).routingTable(routingTable).metaData(metaDataBuilder).build();
+        clusterStates.add(clusterState);
+
+        if (withPrimaryAllocationFailures) {
+            boolean alreadyFailedPrimary = false;
+            // some primaries failed to allocate
+            indexRoutingTable = routingTable.index(indexName);
+            newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+            for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+                final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+                for (final ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                    if (shardRouting.primary() && (shardRouting.started() == false || alreadyFailedPrimary == false)) {
+                        newIndexRoutingTable.addShard(shardRouting.moveToUnassigned(
+                            new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, "unlucky shard")));
+                        alreadyFailedPrimary = true;
+                    } else {
+                        newIndexRoutingTable.addShard(shardRouting);
+                    }
+                }
+            }
+            routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+            clusterStates.add(ClusterState.builder(clusterState).routingTable(routingTable).build());
+            return clusterStates;
+        }
+
+        // all primaries started
+        indexRoutingTable = routingTable.index(indexName);
+        newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        allocationIds = ImmutableOpenIntMap.<Set<String>>builder();
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            for (final ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary() && shardRouting.started() == false) {
+                    final ShardRouting newShardRouting = shardRouting.moveToStarted();
+                    allocationIds.fPut(newShardRouting.getId(), Sets.newHashSet(newShardRouting.allocationId().getId()));
+                    newIndexRoutingTable.addShard(newShardRouting);
+                } else {
+                    newIndexRoutingTable.addShard(shardRouting);
+                }
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        idxMetaBuilder = IndexMetaData.builder(clusterState.metaData().index(indexName));
+        for (final IntObjectCursor<Set<String>> entry : allocationIds.build()) {
+            idxMetaBuilder.putActiveAllocationIds(entry.key, entry.value);
+        }
+        metaDataBuilder = MetaData.builder(clusterState.metaData()).put(idxMetaBuilder);
+        clusterState = ClusterState.builder(clusterState).routingTable(routingTable).metaData(metaDataBuilder).build();
+        clusterStates.add(clusterState);
+
+        // initialize replicas
+        indexRoutingTable = routingTable.index(indexName);
+        newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            for (final ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary() == false) {
+                    // give the replica a different node id than the primary
+                    final String primaryNodeId = shardRoutingTable.primaryShard().currentNodeId();
+                    String replicaNodeId;
+                    do {
+                        replicaNodeId = nodeIds.get(randomIntBetween(0, numNodes - 1));
+                    } while (primaryNodeId.equals(replicaNodeId));
+                    newIndexRoutingTable.addShard(
+                        shardRouting.initialize(replicaNodeId, null, shardRouting.getExpectedShardSize())
+                    );
+                } else {
+                    newIndexRoutingTable.addShard(shardRouting);
+                }
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        clusterStates.add(ClusterState.builder(clusterState).routingTable(routingTable).build());
+
+        // some replicas started
+        indexRoutingTable = routingTable.index(indexName);
+        newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            for (final ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary() == false && randomBoolean()) {
+                    newIndexRoutingTable.addShard(shardRouting.moveToStarted());
+                } else {
+                    newIndexRoutingTable.addShard(shardRouting);
+                }
+            }
+        }
+        routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+        clusterStates.add(ClusterState.builder(clusterState).routingTable(routingTable).build());
+
+        // all replicas started
+        boolean replicaStateChanged = false;
+        indexRoutingTable = routingTable.index(indexName);
+        newIndexRoutingTable = IndexRoutingTable.builder(indexRoutingTable.getIndex());
+        for (final ObjectCursor<IndexShardRoutingTable> shardEntry : indexRoutingTable.getShards().values()) {
+            final IndexShardRoutingTable shardRoutingTable = shardEntry.value;
+            for (final ShardRouting shardRouting : shardRoutingTable.getShards()) {
+                if (shardRouting.primary() == false && shardRouting.started() == false) {
+                    newIndexRoutingTable.addShard(shardRouting.moveToStarted());
+                    replicaStateChanged = true;
+                } else {
+                    newIndexRoutingTable.addShard(shardRouting);
+                }
+            }
+        }
+        // all of the replicas may have moved to started in the previous phase already
+        if (replicaStateChanged) {
+            routingTable = RoutingTable.builder(routingTable).add(newIndexRoutingTable).build();
+            clusterStates.add(ClusterState.builder(clusterState).routingTable(routingTable).build());
+        }
+
+        return clusterStates;
+    }
+
+    // returns true if the inactive primaries in the index are only due to cluster recovery
+    // (not because of allocation failure or previously having allocation ids assigned)
+    private boolean primaryInactiveDueToRecovery(final String indexName, final ClusterState clusterState) {
+        for (final IntObjectCursor<IndexShardRoutingTable> shardRouting : clusterState.routingTable().index(indexName).shards()) {
+            final ShardRouting primaryShard = shardRouting.value.primaryShard();
+            if (primaryShard.active() == false) {
+                if (clusterState.metaData().index(indexName).activeAllocationIds(shardRouting.key).isEmpty() == false) {
+                    return false;
+                }
+                if (primaryShard.unassignedInfo() != null &&
+                        primaryShard.unassignedInfo().getReason() == UnassignedInfo.Reason.ALLOCATION_FAILED) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
     private void assertClusterHealth(ClusterStateHealth clusterStateHealth, RoutingTableGenerator.ShardCounter counter) {
         assertThat(clusterStateHealth.getStatus(), equalTo(counter.status()));
         assertThat(clusterStateHealth.getActiveShards(), equalTo(counter.active));

+ 1 - 0
core/src/test/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexServiceTests.java

@@ -216,6 +216,7 @@ public class MetaDataCreateIndexServiceTests extends ESTestCase {
             new HashSet<>(),
             null,
             null,
+            null,
             null);
     }
 }

+ 17 - 7
core/src/test/java/org/elasticsearch/cluster/routing/PrimaryAllocationIT.java

@@ -22,6 +22,7 @@ package org.elasticsearch.cluster.routing;
 import com.carrotsearch.hppc.cursors.IntObjectCursor;
 import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteRequestBuilder;
 import org.elasticsearch.action.admin.indices.shards.IndicesShardStoresResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand;
 import org.elasticsearch.cluster.routing.allocation.command.AllocateStalePrimaryAllocationCommand;
@@ -108,7 +109,7 @@ public class PrimaryAllocationIT extends ESIntegTestCase {
         logger.info("--> check that old primary shard does not get promoted to primary again");
         // kick reroute and wait for all shard states to be fetched
         client(master).admin().cluster().prepareReroute().get();
-        assertBusy(new Runnable() { 
+        assertBusy(new Runnable() {
             @Override
             public void run() {
                 assertThat(internalCluster().getInstance(GatewayAllocator.class, master).getNumberOfInFlightFetch(), equalTo(0));
@@ -157,7 +158,8 @@ public class PrimaryAllocationIT extends ESIntegTestCase {
         createStaleReplicaScenario();
 
         logger.info("--> explicitly promote old primary shard");
-        ImmutableOpenIntMap<List<IndicesShardStoresResponse.StoreStatus>> storeStatuses = client().admin().indices().prepareShardStores("test").get().getStoreStatuses().get("test");
+        final String idxName = "test";
+        ImmutableOpenIntMap<List<IndicesShardStoresResponse.StoreStatus>> storeStatuses = client().admin().indices().prepareShardStores(idxName).get().getStoreStatuses().get(idxName);
         ClusterRerouteRequestBuilder rerouteBuilder = client().admin().cluster().prepareReroute();
         for (IntObjectCursor<List<IndicesShardStoresResponse.StoreStatus>> shardStoreStatuses : storeStatuses) {
             int shardId = shardStoreStatuses.key;
@@ -165,22 +167,30 @@ public class PrimaryAllocationIT extends ESIntegTestCase {
             logger.info("--> adding allocation command for shard {}", shardId);
             // force allocation based on node id
             if (useStaleReplica) {
-                rerouteBuilder.add(new AllocateStalePrimaryAllocationCommand("test", shardId, storeStatus.getNode().getId(), true));
+                rerouteBuilder.add(new AllocateStalePrimaryAllocationCommand(idxName, shardId, storeStatus.getNode().getId(), true));
             } else {
-                rerouteBuilder.add(new AllocateEmptyPrimaryAllocationCommand("test", shardId, storeStatus.getNode().getId(), true));
+                rerouteBuilder.add(new AllocateEmptyPrimaryAllocationCommand(idxName, shardId, storeStatus.getNode().getId(), true));
             }
         }
         rerouteBuilder.get();
 
         logger.info("--> check that the stale primary shard gets allocated and that documents are available");
-        ensureYellow("test");
+        ensureYellow(idxName);
+
+        if (useStaleReplica == false) {
+            // When invoking AllocateEmptyPrimaryAllocationCommand, due to the UnassignedInfo.Reason being changed to INDEX_CREATION,
+            // its possible that the shard has not completed initialization, even though the cluster health is yellow, so the
+            // search can throw an "all shards failed" exception.  We will wait until the shard initialization has completed before
+            // verifying the search hit count.
+            assertBusy(() -> assertTrue(clusterService().state().routingTable().index(idxName).allPrimaryShardsActive()));
 
-        assertHitCount(client().prepareSearch("test").setSize(0).setQuery(matchAllQuery()).get(), useStaleReplica ? 1L : 0L);
+        }
+        assertHitCount(client().prepareSearch(idxName).setSize(0).setQuery(matchAllQuery()).get(), useStaleReplica ? 1L : 0L);
     }
 
     public void testForcePrimaryShardIfAllocationDecidersSayNoAfterIndexCreation() throws ExecutionException, InterruptedException {
         String node = internalCluster().startNode();
-        client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
+        client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).setSettings(Settings.builder()
             .put("index.routing.allocation.exclude._name", node)
             .put("index.number_of_shards", 1).put("index.number_of_replicas", 0)).get();
 

+ 18 - 8
core/src/test/java/org/elasticsearch/cluster/routing/RoutingTableGenerator.java

@@ -24,6 +24,8 @@ import org.elasticsearch.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.index.shard.ShardId;
 
+import static org.elasticsearch.cluster.health.ClusterShardHealth.getInactivePrimaryHealth;
+
 public class RoutingTableGenerator {
     private static int node_id = 1;
 
@@ -56,14 +58,15 @@ public class RoutingTableGenerator {
 
     }
 
-    public IndexShardRoutingTable genShardRoutingTable(String index, int shardId, int replicas, ShardCounter counter) {
+    public IndexShardRoutingTable genShardRoutingTable(IndexMetaData indexMetaData, int shardId, ShardCounter counter) {
+        final String index = indexMetaData.getIndex().getName();
         IndexShardRoutingTable.Builder builder = new IndexShardRoutingTable.Builder(new ShardId(index, "_na_", shardId));
         ShardRouting shardRouting = genShardRouting(index, shardId, true);
-        counter.update(shardRouting);
+        counter.update(shardRouting, indexMetaData);
         builder.addShard(shardRouting);
-        for (; replicas > 0; replicas--) {
+        for (int replicas = indexMetaData.getNumberOfReplicas(); replicas > 0; replicas--) {
             shardRouting = genShardRouting(index, shardId, false);
-            counter.update(shardRouting);
+            counter.update(shardRouting, indexMetaData);
             builder.addShard(shardRouting);
         }
 
@@ -73,8 +76,7 @@ public class RoutingTableGenerator {
     public IndexRoutingTable genIndexRoutingTable(IndexMetaData indexMetaData, ShardCounter counter) {
         IndexRoutingTable.Builder builder = IndexRoutingTable.builder(indexMetaData.getIndex());
         for (int shard = 0; shard < indexMetaData.getNumberOfShards(); shard++) {
-            builder.addIndexShard(genShardRoutingTable(indexMetaData.getIndex().getName(), shard,
-                                  indexMetaData.getNumberOfReplicas(), counter));
+            builder.addIndexShard(genShardRoutingTable(indexMetaData, shard, counter));
         }
         return builder.build();
     }
@@ -86,10 +88,15 @@ public class RoutingTableGenerator {
         public int unassigned;
         public int primaryActive;
         public int primaryInactive;
+        private boolean inactivePrimaryCausesRed = false;
 
         public ClusterHealthStatus status() {
             if (primaryInactive > 0) {
-                return ClusterHealthStatus.RED;
+                if (inactivePrimaryCausesRed) {
+                    return ClusterHealthStatus.RED;
+                } else {
+                    return ClusterHealthStatus.YELLOW;
+                }
             }
             if (unassigned > 0 || initializing > 0) {
                 return ClusterHealthStatus.YELLOW;
@@ -97,7 +104,7 @@ public class RoutingTableGenerator {
             return ClusterHealthStatus.GREEN;
         }
 
-        public void update(ShardRouting shardRouting) {
+        public void update(ShardRouting shardRouting, IndexMetaData indexMetaData) {
             if (shardRouting.active()) {
                 active++;
                 if (shardRouting.primary()) {
@@ -111,6 +118,9 @@ public class RoutingTableGenerator {
 
             if (shardRouting.primary()) {
                 primaryInactive++;
+                if (inactivePrimaryCausesRed == false) {
+                    inactivePrimaryCausesRed = getInactivePrimaryHealth(shardRouting, indexMetaData) == ClusterHealthStatus.RED;
+                }
             }
             if (shardRouting.initializing()) {
                 initializing++;

+ 18 - 3
core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java

@@ -26,19 +26,22 @@ import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.metadata.MetaData;
+import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
 import org.elasticsearch.common.UUIDs;
+import org.elasticsearch.common.io.stream.ByteBufferStreamInput;
 import org.elasticsearch.snapshots.SnapshotId;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
 import org.elasticsearch.cluster.routing.allocation.AllocationService;
 import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation;
 import org.elasticsearch.common.io.stream.BytesStreamOutput;
-import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.snapshots.Snapshot;
 import org.elasticsearch.test.ESAllocationTestCase;
 
+import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.Collections;
 
 import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING;
@@ -76,7 +79,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
     public void testSerialization() throws Exception {
         UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values());
         UnassignedInfo meta = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ?
-            new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null, null, randomIntBetween(1, 100), System.nanoTime(), System.currentTimeMillis(), false):
+            new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null, null, randomIntBetween(1, 100), System.nanoTime(),
+                               System.currentTimeMillis(), false, AllocationStatus.NO_ATTEMPT):
             new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null);
         BytesStreamOutput out = new BytesStreamOutput();
         meta.writeTo(out);
@@ -265,7 +269,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
      */
     public void testRemainingDelayCalculation() throws Exception {
         final long baseTime = System.nanoTime();
-        UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, 0, baseTime, System.currentTimeMillis(), randomBoolean());
+        UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, 0, baseTime,
+                                                           System.currentTimeMillis(), randomBoolean(), AllocationStatus.NO_ATTEMPT);
         final long totalDelayNanos = TimeValue.timeValueMillis(10).nanos();
         final Settings indexSettings = Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), TimeValue.timeValueNanos(totalDelayNanos)).build();
         long delay = unassignedInfo.getRemainingDelay(baseTime, indexSettings);
@@ -340,4 +345,14 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
 
         assertThat(UnassignedInfo.findNextDelayedAllocation(baseTime + delta, clusterState), equalTo(expectMinDelaySettingsNanos - delta));
     }
+
+    public void testAllocationStatusSerialization() throws IOException {
+        for (AllocationStatus allocationStatus : AllocationStatus.values()) {
+            BytesStreamOutput out = new BytesStreamOutput();
+            allocationStatus.writeTo(out);
+            ByteBufferStreamInput in = new ByteBufferStreamInput(ByteBuffer.wrap(out.bytes().toBytesRef().bytes));
+            AllocationStatus readStatus = AllocationStatus.readFrom(in);
+            assertThat(readStatus, equalTo(allocationStatus));
+        }
+    }
 }

+ 2 - 1
core/src/test/java/org/elasticsearch/cluster/routing/allocation/ClusterRebalanceRoutingTests.java

@@ -27,6 +27,7 @@ import org.elasticsearch.cluster.node.DiscoveryNodes;
 import org.elasticsearch.cluster.routing.RoutingNodes;
 import org.elasticsearch.cluster.routing.RoutingTable;
 import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.cluster.routing.UnassignedInfo;
 import org.elasticsearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider;
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.logging.Loggers;
@@ -642,7 +643,7 @@ public class ClusterRebalanceRoutingTests extends ESAllocationTestCase {
                     while (iterator.hasNext()) {
                         ShardRouting next = iterator.next();
                         if ("test1".equals(next.index().getName())) {
-                            iterator.removeAndIgnore();
+                            iterator.removeAndIgnore(UnassignedInfo.AllocationStatus.NO_ATTEMPT);
                         }
 
                     }

+ 169 - 0
core/src/test/java/org/elasticsearch/cluster/routing/allocation/DecisionsImpactOnClusterHealthTests.java

@@ -0,0 +1,169 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.cluster.routing.allocation;
+
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.ClusterName;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.EmptyClusterInfoService;
+import org.elasticsearch.cluster.health.ClusterHealthStatus;
+import org.elasticsearch.cluster.health.ClusterStateHealth;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.cluster.metadata.MetaData;
+import org.elasticsearch.cluster.node.DiscoveryNodes;
+import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
+import org.elasticsearch.cluster.routing.RoutingNode;
+import org.elasticsearch.cluster.routing.RoutingTable;
+import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator;
+import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
+import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
+import org.elasticsearch.cluster.routing.allocation.decider.Decision;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.test.ESAllocationTestCase;
+import org.elasticsearch.test.gateway.NoopGatewayAllocator;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Set;
+
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * This class of tests exercise various scenarios of
+ * primary shard allocation and assert the cluster health
+ * has the correct status based on those allocation decisions.
+ */
+public class DecisionsImpactOnClusterHealthTests extends ESAllocationTestCase {
+
+    public void testPrimaryShardNoDecisionOnIndexCreation() throws IOException {
+        final String indexName = "test-idx";
+        Settings settings = Settings.builder()
+                                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
+                                .build();
+        AllocationDecider decider = new TestAllocateDecision(Decision.NO);
+        // if deciders say NO to allocating a primary shard, then the cluster health should be RED
+        runAllocationTest(
+            settings, indexName, Collections.singleton(decider), ClusterHealthStatus.RED
+        );
+    }
+
+    public void testPrimaryShardThrottleDecisionOnIndexCreation() throws IOException {
+        final String indexName = "test-idx";
+        Settings settings = Settings.builder()
+                                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
+                                .build();
+        AllocationDecider decider = new TestAllocateDecision(Decision.THROTTLE) {
+            // the only allocation decider that implements this is ShardsLimitAllocationDecider and it always
+            // returns only YES or NO, never THROTTLE
+            @Override
+            public Decision canAllocate(RoutingNode node, RoutingAllocation allocation) {
+                return randomBoolean() ? Decision.YES : Decision.NO;
+            }
+        };
+        // if deciders THROTTLE allocating a primary shard, stay in YELLOW state
+        runAllocationTest(
+            settings, indexName, Collections.singleton(decider), ClusterHealthStatus.YELLOW
+        );
+    }
+
+    public void testPrimaryShardYesDecisionOnIndexCreation() throws IOException {
+        final String indexName = "test-idx";
+        Settings settings = Settings.builder()
+                                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
+                                .build();
+        AllocationDecider decider = new TestAllocateDecision(Decision.YES) {
+            @Override
+            public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
+                if (node.getByShardId(shardRouting.shardId()) == null) {
+                    return Decision.YES;
+                } else {
+                    return Decision.NO;
+                }
+            }
+        };
+        // if deciders say YES to allocating primary shards, stay in YELLOW state
+        ClusterState clusterState = runAllocationTest(
+            settings, indexName, Collections.singleton(decider), ClusterHealthStatus.YELLOW
+        );
+        // make sure primaries are initialized
+        RoutingTable routingTable = clusterState.routingTable();
+        for (IndexShardRoutingTable indexShardRoutingTable : routingTable.index(indexName)) {
+            assertTrue(indexShardRoutingTable.primaryShard().initializing());
+        }
+    }
+
+    private ClusterState runAllocationTest(final Settings settings,
+                                           final String indexName,
+                                           final Set<AllocationDecider> allocationDeciders,
+                                           final ClusterHealthStatus expectedStatus) throws IOException {
+
+        final String clusterName = "test-cluster";
+        final AllocationService allocationService = newAllocationService(settings, allocationDeciders);
+
+        logger.info("Building initial routing table");
+        final int numShards = randomIntBetween(1, 5);
+        MetaData metaData = MetaData.builder()
+                                .put(IndexMetaData.builder(indexName)
+                                         .settings(settings(Version.CURRENT))
+                                         .numberOfShards(numShards)
+                                         .numberOfReplicas(1))
+                                .build();
+
+        RoutingTable routingTable = RoutingTable.builder()
+                                        .addAsNew(metaData.index(indexName))
+                                        .build();
+
+        ClusterState clusterState = ClusterState.builder(new ClusterName(clusterName))
+                                        .metaData(metaData)
+                                        .routingTable(routingTable)
+                                        .build();
+
+        logger.info("--> adding nodes");
+        // we need at least as many nodes as shards for the THROTTLE case, because
+        // once a shard has been throttled on a node, that node no longer accepts
+        // any allocations on it
+        final DiscoveryNodes.Builder discoveryNodes = DiscoveryNodes.builder();
+        for (int i = 0; i < numShards; i++) {
+            discoveryNodes.put(newNode("node" + i));
+        }
+        clusterState = ClusterState.builder(clusterState).nodes(discoveryNodes).build();
+
+        logger.info("--> do the reroute");
+        routingTable = allocationService.reroute(clusterState, "reroute").routingTable();
+        clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
+
+        logger.info("--> assert cluster health");
+        ClusterStateHealth health = new ClusterStateHealth(clusterState);
+        assertThat(health.getStatus(), equalTo(expectedStatus));
+
+        return clusterState;
+    }
+
+    private static AllocationService newAllocationService(Settings settings, Set<AllocationDecider> deciders) {
+        return new AllocationService(settings,
+                                     new AllocationDeciders(settings, deciders),
+                                     NoopGatewayAllocator.INSTANCE,
+                                     new BalancedShardsAllocator(settings),
+                                     EmptyClusterInfoService.INSTANCE);
+    }
+
+}

+ 2 - 1
core/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java

@@ -24,6 +24,7 @@ import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
 import org.elasticsearch.action.get.GetResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.client.Requests;
 import org.elasticsearch.cluster.ClusterState;
@@ -205,7 +206,7 @@ public class GatewayIndexStateIT extends ESIntegTestCase {
         internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build());
 
         logger.info("--> create an index");
-        client().admin().indices().prepareCreate("test").execute().actionGet();
+        client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).execute().actionGet();
 
         logger.info("--> closing master node");
         internalCluster().closeNonSharedNodes(false);

+ 54 - 11
core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java

@@ -21,7 +21,10 @@ package org.elasticsearch.gateway;
 
 import org.apache.lucene.index.CorruptIndexException;
 import org.elasticsearch.Version;
+import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.health.ClusterHealthStatus;
+import org.elasticsearch.cluster.health.ClusterStateHealth;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.metadata.MetaData;
 import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -50,6 +53,7 @@ import java.util.Map;
 
 import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
 
 /**
  */
@@ -77,6 +81,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(changed, equalTo(false));
         assertThat(allocation.routingNodes().unassigned().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().iterator().next().shardId(), equalTo(shardId));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -90,9 +95,10 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
             allocation = routingAllocationWithOnePrimaryNoReplicas(yesAllocationDeciders(), false, Version.V_2_1_0);
         }
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -108,9 +114,10 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         }
         testAllocator.addData(node1, ShardStateMetaData.NO_VERSION, null, randomBoolean());
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -120,9 +127,10 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         RoutingAllocation allocation = routingAllocationWithOnePrimaryNoReplicas(yesAllocationDeciders(), false, Version.CURRENT, "id2");
         testAllocator.addData(node1, ShardStateMetaData.NO_VERSION, "id1", randomBoolean());
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -137,6 +145,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node1.getId()));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -152,9 +161,10 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
             testAllocator.addData(node1, 3, null, randomBoolean(), new CorruptIndexException("test", "test"));
         }
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -179,6 +189,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
             // check that allocation id is reused
             assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).allocationId().getId(), equalTo("allocId1"));
         }
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -197,6 +208,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
         DiscoveryNode allocatedNode = node1HasPrimaryShard ? node1 : node2;
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(allocatedNode.getId()));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -213,9 +225,10 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
             testAllocator.addData(node1, 3, null, randomBoolean());
         }
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -236,6 +249,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node1.getId()));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -249,6 +263,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node2.getId()));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -268,6 +283,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node2.getId()));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).allocationId().getId(), equalTo("some allocId"));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -285,6 +301,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -299,8 +316,9 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         RoutingAllocation allocation = getRestoreRoutingAllocation(throttleAllocationDeciders(), clusterHasActiveAllocationIds);
         testAllocator.addData(node1, legacyVersion, allocationId, randomBoolean());
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(false));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -318,6 +336,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -331,6 +350,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(changed, equalTo(false));
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().unassigned().size(), equalTo(1));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     private RoutingAllocation getRestoreRoutingAllocation(AllocationDeciders allocationDeciders, boolean hasActiveAllocation) {
@@ -365,6 +385,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.RED);
     }
 
     /**
@@ -378,8 +399,9 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         RoutingAllocation allocation = getRecoverOnAnyNodeRoutingAllocation(throttleAllocationDeciders(), hasActiveAllocation);
         testAllocator.addData(node1, legacyVersion, allocationId, randomBoolean());
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(false));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     /**
@@ -396,6 +418,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.RED);
     }
 
     /**
@@ -409,6 +432,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(changed, equalTo(false));
         assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true));
         assertThat(allocation.routingNodes().unassigned().size(), equalTo(1));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.YELLOW);
     }
 
     private RoutingAllocation getRecoverOnAnyNodeRoutingAllocation(AllocationDeciders allocationDeciders, boolean hasActiveAllocation) {
@@ -448,18 +472,20 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
 
         RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false);
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.RED);
 
         testAllocator.addData(node1, 1, null, randomBoolean());
         allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false);
         changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.RED);
 
         testAllocator.addData(node2, 1, null, randomBoolean());
         allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false);
@@ -469,6 +495,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), anyOf(equalTo(node2.getId()), equalTo(node1.getId())));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.RED);
     }
 
     /**
@@ -489,18 +516,20 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
 
         RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false);
         boolean changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.RED);
 
         testAllocator.addData(node1, 1, null, randomBoolean());
         allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false);
         changed = testAllocator.allocateUnassigned(allocation);
-        assertThat(changed, equalTo(false));
+        assertThat(changed, equalTo(true));
         assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
         assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.RED);
 
         testAllocator.addData(node2, 2, null, randomBoolean());
         allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false);
@@ -510,6 +539,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
         assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node2.getId()));
+        assertClusterHealthStatus(allocation, ClusterHealthStatus.RED);
     }
 
     private RoutingAllocation routingAllocationWithOnePrimaryNoReplicas(AllocationDeciders deciders, boolean asNew, Version version, String... activeAllocationIds) {
@@ -530,6 +560,19 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
         return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false);
     }
 
+    private void assertClusterHealthStatus(RoutingAllocation allocation, ClusterHealthStatus expectedStatus) {
+        RoutingTable oldRoutingTable = allocation.routingTable();
+        RoutingNodes newRoutingNodes = allocation.routingNodes();
+        final RoutingTable newRoutingTable = new RoutingTable.Builder()
+                                                             .updateNodes(oldRoutingTable.version(), newRoutingNodes)
+                                                             .build();
+        ClusterState clusterState = ClusterState.builder(new ClusterName("test-cluster"))
+                                                .routingTable(newRoutingTable)
+                                                .build();
+        ClusterStateHealth clusterStateHealth = new ClusterStateHealth(clusterState);
+        assertThat(clusterStateHealth.getStatus().ordinal(), lessThanOrEqualTo(expectedStatus.ordinal()));
+    }
+
     class TestAllocator extends PrimaryShardAllocator {
 
         private Map<DiscoveryNode, TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> data;

+ 1 - 1
core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java

@@ -299,7 +299,7 @@ public class ReplicaShardAllocatorTests extends ESAllocationTestCase {
                                         .addShard(primaryShard)
                                         .addShard(ShardRouting.newUnassigned(shardId, null, false,
                                             new UnassignedInfo(reason, null, null, failedAllocations, System.nanoTime(),
-                                                System.currentTimeMillis(), delayed)))
+                                                System.currentTimeMillis(), delayed, UnassignedInfo.AllocationStatus.NO_ATTEMPT)))
                                         .build())
                 )
                 .build();

+ 1 - 1
core/src/test/java/org/elasticsearch/indices/cluster/ClusterStateChanges.java

@@ -158,7 +158,7 @@ public class ClusterStateChanges {
             allocationService, IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, indicesService, nodeServicesProvider);
         MetaDataCreateIndexService createIndexService = new MetaDataCreateIndexService(settings, clusterService, indicesService,
             allocationService, new AliasValidator(settings), Collections.emptySet(), environment,
-            nodeServicesProvider, IndexScopedSettings.DEFAULT_SCOPED_SETTINGS);
+            nodeServicesProvider, IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, threadPool);
 
         transportCloseIndexAction = new TransportCloseIndexAction(settings, transportService, clusterService, threadPool,
             indexStateService, clusterSettings, actionFilters, indexNameExpressionResolver, destructiveOperations);

+ 2 - 1
core/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java

@@ -26,6 +26,7 @@ import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
 import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
 import org.elasticsearch.action.admin.indices.open.OpenIndexRequest;
 import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.action.support.replication.ClusterStateCreationUtils;
 import org.elasticsearch.cluster.ClusterChangedEvent;
 import org.elasticsearch.cluster.ClusterState;
@@ -140,7 +141,7 @@ public class IndicesClusterStateServiceRandomUpdatesTests extends AbstractIndice
             CreateIndexRequest request = new CreateIndexRequest(name, Settings.builder()
                 .put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 3))
                 .put(SETTING_NUMBER_OF_REPLICAS, randomInt(2))
-                .build());
+                .build()).waitForActiveShards(ActiveShardCount.NONE);
             state = cluster.createIndex(state, request);
             assertTrue(state.metaData().hasIndex(name));
         }

+ 2 - 1
core/src/test/java/org/elasticsearch/indices/flush/FlushIT.java

@@ -23,6 +23,7 @@ import org.elasticsearch.action.admin.indices.flush.FlushResponse;
 import org.elasticsearch.action.admin.indices.flush.SyncedFlushResponse;
 import org.elasticsearch.action.admin.indices.stats.IndexStats;
 import org.elasticsearch.action.admin.indices.stats.ShardStats;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.routing.ShardRouting;
@@ -213,7 +214,7 @@ public class FlushIT extends ESIntegTestCase {
 
     public void testUnallocatedShardsDoesNotHang() throws InterruptedException {
         //  create an index but disallow allocation
-        prepareCreate("test").setSettings(Settings.builder().put("index.routing.allocation.include._name", "nonexistent")).get();
+        prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).setSettings(Settings.builder().put("index.routing.allocation.include._name", "nonexistent")).get();
 
         // this should not hang but instead immediately return with empty result set
         List<ShardsSyncedFlushResult> shardsResult = client().admin().indices().prepareSyncedFlush("test").get().getShardsResultPerIndex().get("test");

+ 2 - 1
core/src/test/java/org/elasticsearch/indices/state/SimpleIndexStateIT.java

@@ -24,6 +24,7 @@ import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
 import org.elasticsearch.action.admin.indices.close.CloseIndexResponse;
 import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
 import org.elasticsearch.action.admin.indices.open.OpenIndexResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.routing.ShardRoutingState;
@@ -98,7 +99,7 @@ public class SimpleIndexStateIT extends ESIntegTestCase {
 
     public void testFastCloseAfterCreateContinuesCreateAfterOpen() {
         logger.info("--> creating test index that cannot be allocated");
-        client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
+        client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).setSettings(Settings.builder()
                 .put("index.routing.allocation.include.tag", "no_such_node").build()).get();
 
         ClusterHealthResponse health = client().admin().cluster().prepareHealth("test").setWaitForNodes(">=2").get();

+ 5 - 2
core/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java

@@ -29,7 +29,9 @@ import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse
 import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
 import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
 import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse;
+import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
 import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.client.node.NodeClient;
 import org.elasticsearch.cluster.ClusterState;
@@ -45,6 +47,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.discovery.zen.ZenDiscovery;
 import org.elasticsearch.discovery.zen.elect.ElectMasterService;
+import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.store.IndexStore;
 import org.elasticsearch.indices.recovery.RecoveryState;
 import org.elasticsearch.indices.ttl.IndicesTTLService;
@@ -443,9 +446,9 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
         logger.info("--> create an index that will have no allocated shards");
         assertAcked(prepareCreate("test-idx-none", 1, Settings.builder().put("number_of_shards", 6)
                 .put("index.routing.allocation.include.tag", "nowhere")
-                .put("number_of_replicas", 0)));
+                .put("number_of_replicas", 0)).setWaitForActiveShards(ActiveShardCount.NONE).get());
+        assertTrue(client().admin().indices().prepareExists("test-idx-none").get().isExists());
 
-        logger.info("--> create repository");
         logger.info("--> creating repository");
         PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository("test-repo")
                 .setType("fs").setSettings(Settings.builder().put("location", randomRepoPath())).execute().actionGet();

+ 2 - 1
core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java

@@ -39,6 +39,7 @@ import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
 import org.elasticsearch.action.admin.indices.template.get.GetIndexTemplatesResponse;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.support.ActiveShardCount;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.ClusterStateUpdateTask;
@@ -755,7 +756,7 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
                         .put("location", randomRepoPath())));
 
         logger.info("-->  creating index that cannot be allocated");
-        prepareCreate("test-idx", 2, Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + ".tag", "nowhere").put("index.number_of_shards", 3)).get();
+        prepareCreate("test-idx", 2, Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + ".tag", "nowhere").put("index.number_of_shards", 3)).setWaitForActiveShards(ActiveShardCount.NONE).get();
 
         logger.info("--> snapshot");
         CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test-idx").get();

+ 4 - 0
rest-api-spec/src/main/resources/rest-api-spec/api/indices.create.json

@@ -13,6 +13,10 @@
         }
       },
       "params": {
+        "wait_for_active_shards": {
+          "type" : "string",
+          "description" : "Set the number of active shards to wait for before the operation returns." 
+        },
         "timeout": {
           "type" : "time",
           "description" : "Explicit operation timeout"

+ 4 - 0
rest-api-spec/src/main/resources/rest-api-spec/api/indices.rollover.json

@@ -25,6 +25,10 @@
         "master_timeout": {
           "type" : "time",
           "description" : "Specify timeout for connection to master"
+        },
+        "wait_for_active_shards": {
+          "type" : "string",
+          "description" : "Set the number of active shards to wait for on the newly created rollover index before the operation returns."
         }
       }
     },

+ 4 - 0
rest-api-spec/src/main/resources/rest-api-spec/api/indices.shrink.json

@@ -25,6 +25,10 @@
         "master_timeout": {
           "type" : "time",
           "description" : "Specify timeout for connection to master"
+        },
+        "wait_for_active_shards": {
+          "type" : "string",
+          "description" : "Set the number of active shards to wait for on the shrunken index before the operation returns."
         }
       }
     },

+ 29 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/indices.create/10_basic.yaml

@@ -30,6 +30,35 @@
 
   - match: { test_index.settings.index.number_of_replicas: "0"}
 
+---
+"Create index with too large wait_for_active_shards":
+
+  - do:
+      indices.create:
+        index: test_index
+        timeout: 100ms
+        master_timeout: 100ms
+        wait_for_active_shards: 6
+        body:
+          settings:
+            number_of_replicas: 5
+
+  - match: { shards_acknowledged: false }
+
+---
+"Create index with wait_for_active_shards set to all":
+
+  - do:
+      indices.create:
+        index: test_index
+        wait_for_active_shards: all
+        body:
+          settings:
+            number_of_replicas: "0"
+
+  - match: { acknowledged: true }
+  - match: { shards_acknowledged: true }
+
 ---
 "Create index with aliases":
 

+ 7 - 5
rest-api-spec/src/main/resources/rest-api-spec/test/indices.rollover/10_basic.yaml

@@ -4,6 +4,7 @@
   - do:
       indices.create:
         index: logs-1
+        wait_for_active_shards: 1
         body:
           aliases:
             logs_index: {}
@@ -30,11 +31,12 @@
 
   # perform alias rollover
   - do:
-      indices.rollover:
-        alias: "logs_search"
-        body:
-          conditions:
-            max_docs: 1
+    indices.rollover:
+      alias: "logs_search"
+      wait_for_active_shards: 1
+      body:
+        conditions:
+          max_docs: 1
 
   - match: { old_index: logs-1 }
   - match: { new_index: logs-2 }

+ 2 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/indices.shrink/10_basic.yaml

@@ -6,6 +6,7 @@
   - do:
       indices.create:
         index: source
+        wait_for_active_shards: 1
         body:
           settings:
             number_of_replicas: "0"
@@ -54,6 +55,7 @@
       indices.shrink:
         index: "source"
         target: "target"
+        wait_for_active_shards: 1
         body:
           settings:
             index.number_of_replicas: 0

+ 12 - 0
test/framework/src/main/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java

@@ -30,6 +30,7 @@ import org.elasticsearch.action.ShardOperationFailedException;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequestBuilder;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
 import org.elasticsearch.action.admin.indices.alias.exists.AliasesExistResponse;
+import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
 import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequestBuilder;
 import org.elasticsearch.action.admin.indices.delete.DeleteIndexResponse;
 import org.elasticsearch.action.admin.indices.template.get.GetIndexTemplatesResponse;
@@ -124,6 +125,17 @@ public class ElasticsearchAssertions {
         assertVersionSerializable(response);
     }
 
+    /**
+     * Assert that an index creation was fully acknowledged, meaning that both the index creation cluster
+     * state update was successful and that the requisite number of shard copies were started before returning.
+     */
+    public static void assertAcked(CreateIndexResponse response) {
+        assertThat(response.getClass().getSimpleName() + " failed - not acked", response.isAcknowledged(), equalTo(true));
+        assertVersionSerializable(response);
+        assertTrue(response.getClass().getSimpleName() + " failed - index creation acked but not all shards were started",
+            response.isShardsAcked());
+    }
+
     /**
      * Executes the request and fails if the request has not been blocked.
      *