|
@@ -27,6 +27,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDeci
|
|
|
import org.elasticsearch.cluster.service.ClusterService;
|
|
|
import org.elasticsearch.common.Strings;
|
|
|
import org.elasticsearch.common.settings.Settings;
|
|
|
+import org.elasticsearch.common.util.concurrent.TaskExecutionTimeTrackingEsThreadPoolExecutor;
|
|
|
import org.elasticsearch.core.TimeValue;
|
|
|
import org.elasticsearch.index.IndexService;
|
|
|
import org.elasticsearch.index.shard.IndexShard;
|
|
@@ -59,6 +60,8 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|
|
import static java.util.Collections.emptySet;
|
|
|
import static java.util.Collections.singletonList;
|
|
|
import static java.util.Collections.unmodifiableSet;
|
|
|
+import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
|
|
|
+import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
|
|
|
import static org.elasticsearch.common.util.set.Sets.newHashSet;
|
|
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
|
|
import static org.hamcrest.CoreMatchers.equalTo;
|
|
@@ -346,6 +349,8 @@ public class ClusterInfoServiceIT extends ESIntegTestCase {
|
|
|
WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
|
|
|
WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED
|
|
|
)
|
|
|
+ // Manually control cluster info refreshes
|
|
|
+ .put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.getKey(), "60m")
|
|
|
.build();
|
|
|
var masterName = internalCluster().startMasterOnlyNode(settings);
|
|
|
var dataNodeName = internalCluster().startDataOnlyNode(settings);
|
|
@@ -369,11 +374,8 @@ public class ClusterInfoServiceIT extends ESIntegTestCase {
|
|
|
}
|
|
|
);
|
|
|
|
|
|
- // Do some writes to create some write thread pool activity.
|
|
|
- final String indexName = randomIdentifier();
|
|
|
- for (int i = 0; i < randomIntBetween(1, 1000); i++) {
|
|
|
- index(indexName, Integer.toString(i), Collections.singletonMap("foo", "bar"));
|
|
|
- }
|
|
|
+ // Generate some writes to get some non-zero write thread pool stats.
|
|
|
+ doALotOfDataNodeWrites();
|
|
|
|
|
|
// Force a refresh of the ClusterInfo state to collect fresh info from the data nodes.
|
|
|
final InternalClusterInfoService masterClusterInfoService = asInstanceOf(
|
|
@@ -387,7 +389,7 @@ public class ClusterInfoServiceIT extends ESIntegTestCase {
|
|
|
|
|
|
final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools();
|
|
|
logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
|
|
|
- assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collectedg
|
|
|
+ assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected
|
|
|
var dataNodeId = getNodeId(dataNodeName);
|
|
|
var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
|
|
|
assertNotNull(nodeUsageStatsForThreadPool);
|
|
@@ -400,4 +402,174 @@ public class ClusterInfoServiceIT extends ESIntegTestCase {
|
|
|
assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThan(0f));
|
|
|
assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The {@link TransportNodeUsageStatsForThreadPoolsAction} returns the max value of two kinds of queue latencies:
|
|
|
+ * {@link TaskExecutionTimeTrackingEsThreadPoolExecutor#getMaxQueueLatencyMillisSinceLastPollAndReset()} and
|
|
|
+ * {@link TaskExecutionTimeTrackingEsThreadPoolExecutor#peekMaxQueueLatencyInQueueMillis()}. The latter looks at currently queued tasks,
|
|
|
+ * and the former tracks the queue latency of tasks when they are taken off of the queue to start execution.
|
|
|
+ */
|
|
|
+ public void testMaxQueueLatenciesInClusterInfo() throws Exception {
|
|
|
+ var settings = Settings.builder()
|
|
|
+ .put(
|
|
|
+ WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
|
|
|
+ WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED
|
|
|
+ )
|
|
|
+ // Manually control cluster info refreshes
|
|
|
+ .put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.getKey(), "60m")
|
|
|
+ .build();
|
|
|
+ var masterName = internalCluster().startMasterOnlyNode(settings);
|
|
|
+ var dataNodeName = internalCluster().startDataOnlyNode(settings);
|
|
|
+ ensureStableCluster(2);
|
|
|
+ assertEquals(internalCluster().getMasterName(), masterName);
|
|
|
+ assertNotEquals(internalCluster().getMasterName(), dataNodeName);
|
|
|
+ logger.info("---> master node: " + masterName + ", data node: " + dataNodeName);
|
|
|
+
|
|
|
+ // Block indexing on the data node by submitting write thread pool tasks equal to the number of write threads.
|
|
|
+ var barrier = blockDataNodeIndexing(dataNodeName);
|
|
|
+ try {
|
|
|
+ // Arbitrary number of tasks, which will queue because all the write threads are occupied already, greater than one: only
|
|
|
+ // strictly need a single task to occupy the queue.
|
|
|
+ int numberOfTasks = randomIntBetween(1, 5);
|
|
|
+ Thread[] threadsToJoin = new Thread[numberOfTasks];
|
|
|
+ String indexName = randomIdentifier();
|
|
|
+ createIndex(
|
|
|
+ indexName,
|
|
|
+ // NB: Set 0 replicas so that there aren't any stray GlobalCheckpointSyncAction tasks on the write thread pool.
|
|
|
+ Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5)).put(SETTING_NUMBER_OF_REPLICAS, 0).build()
|
|
|
+ );
|
|
|
+ for (int i = 0; i < numberOfTasks; ++i) {
|
|
|
+ threadsToJoin[i] = startParallelSingleWrite(indexName);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Reach into the data node's write thread pool to check that tasks have reached the queue.
|
|
|
+ var dataNodeThreadPool = internalCluster().getInstance(ThreadPool.class, dataNodeName);
|
|
|
+ var writeExecutor = dataNodeThreadPool.executor(ThreadPool.Names.WRITE);
|
|
|
+ assert writeExecutor instanceof TaskExecutionTimeTrackingEsThreadPoolExecutor;
|
|
|
+ var trackingWriteExecutor = (TaskExecutionTimeTrackingEsThreadPoolExecutor) writeExecutor;
|
|
|
+ assertBusy(
|
|
|
+ // Wait for the parallel threads' writes to get queued in the write thread pool.
|
|
|
+ () -> assertThat(
|
|
|
+ "Write thread pool dump: " + trackingWriteExecutor,
|
|
|
+ trackingWriteExecutor.peekMaxQueueLatencyInQueueMillis(),
|
|
|
+ greaterThan(0L)
|
|
|
+ )
|
|
|
+ );
|
|
|
+
|
|
|
+ // Force a refresh of the ClusterInfo state to collect fresh info from the data node.
|
|
|
+ final InternalClusterInfoService masterClusterInfoService = asInstanceOf(
|
|
|
+ InternalClusterInfoService.class,
|
|
|
+ internalCluster().getCurrentMasterNodeInstance(ClusterInfoService.class)
|
|
|
+ );
|
|
|
+ final ClusterInfo clusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService);
|
|
|
+
|
|
|
+ // Since tasks are actively queued right now, #peekMaxQueueLatencyInQueue, which is called from the
|
|
|
+ // TransportNodeUsageStatsForThreadPoolsAction that a ClusterInfoService refresh initiates, should return a max queue
|
|
|
+ // latency > 0;
|
|
|
+ {
|
|
|
+ final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools();
|
|
|
+ logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
|
|
|
+ assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data node should be collected
|
|
|
+ var dataNodeId = getNodeId(dataNodeName);
|
|
|
+ var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
|
|
|
+ assertNotNull(nodeUsageStatsForThreadPool);
|
|
|
+ logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool);
|
|
|
+
|
|
|
+ assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId());
|
|
|
+ var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
|
|
|
+ assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats);
|
|
|
+ assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0));
|
|
|
+ assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThanOrEqualTo(0f));
|
|
|
+ assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThan(0L));
|
|
|
+ }
|
|
|
+
|
|
|
+ // Now release the data node's indexing, and drain the queued tasks. Max queue latency of executed (not queued) tasks is reset
|
|
|
+ // by each TransportNodeUsageStatsForThreadPoolsAction call (#getMaxQueueLatencyMillisSinceLastPollAndReset), so the new queue
|
|
|
+ // latencies will be present in the next call. There will be nothing in the queue to peek at now, so the result of the max
|
|
|
+ // queue latency result in TransportNodeUsageStatsForThreadPoolsAction will reflect
|
|
|
+ // #getMaxQueueLatencyMillisSinceLastPollAndReset and not #peekMaxQueueLatencyInQueue.
|
|
|
+ barrier.await();
|
|
|
+ for (int i = 0; i < numberOfTasks; ++i) {
|
|
|
+ threadsToJoin[i].join();
|
|
|
+ }
|
|
|
+ assertThat(
|
|
|
+ "Unexpectedly found a task queued for the write thread pool. Write thread pool dump: " + trackingWriteExecutor,
|
|
|
+ trackingWriteExecutor.peekMaxQueueLatencyInQueueMillis(),
|
|
|
+ equalTo(0L)
|
|
|
+ );
|
|
|
+
|
|
|
+ final ClusterInfo nextClusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService);
|
|
|
+ {
|
|
|
+ final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = nextClusterInfo
|
|
|
+ .getNodeUsageStatsForThreadPools();
|
|
|
+ logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
|
|
|
+ assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected
|
|
|
+ var dataNodeId = getNodeId(dataNodeName);
|
|
|
+ var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
|
|
|
+ assertNotNull(nodeUsageStatsForThreadPool);
|
|
|
+ logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool);
|
|
|
+
|
|
|
+ assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId());
|
|
|
+ var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
|
|
|
+ assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats);
|
|
|
+ assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0));
|
|
|
+ assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThan(0f));
|
|
|
+ assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ // Ensure that the write threads have been released by signalling an interrupt on any callers waiting on the barrier. If the
|
|
|
+ // callers have already all been successfully released, then there will be nothing left to interrupt.
|
|
|
+ logger.info("---> Ensuring release of the barrier on write thread pool tasks");
|
|
|
+ barrier.reset();
|
|
|
+ }
|
|
|
+
|
|
|
+ // Now that there's nothing in the queue, and no activity since the last ClusterInfo refresh, the max latency returned should be
|
|
|
+ // zero. Verify this.
|
|
|
+ final InternalClusterInfoService masterClusterInfoService = asInstanceOf(
|
|
|
+ InternalClusterInfoService.class,
|
|
|
+ internalCluster().getCurrentMasterNodeInstance(ClusterInfoService.class)
|
|
|
+ );
|
|
|
+ final ClusterInfo clusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService);
|
|
|
+ {
|
|
|
+ final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools();
|
|
|
+ logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
|
|
|
+ assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected
|
|
|
+ var dataNodeId = getNodeId(dataNodeName);
|
|
|
+ var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
|
|
|
+ assertNotNull(nodeUsageStatsForThreadPool);
|
|
|
+ logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool);
|
|
|
+
|
|
|
+ assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId());
|
|
|
+ var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
|
|
|
+ assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats);
|
|
|
+ assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0));
|
|
|
+ assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), equalTo(0f));
|
|
|
+ assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), equalTo(0L));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Do some writes to create some write thread pool activity.
|
|
|
+ */
|
|
|
+ private void doALotOfDataNodeWrites() {
|
|
|
+ final String indexName = randomIdentifier();
|
|
|
+ final int randomInt = randomIntBetween(500, 1000);
|
|
|
+ for (int i = 0; i < randomInt; i++) {
|
|
|
+ index(indexName, Integer.toString(i), Collections.singletonMap("foo", "bar"));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Starts a single index request on a parallel thread and returns the reference so {@link Thread#join()} can be called eventually.
|
|
|
+ */
|
|
|
+ private Thread startParallelSingleWrite(String indexName) {
|
|
|
+ Thread running = new Thread(() -> doSingleWrite(indexName));
|
|
|
+ running.start();
|
|
|
+ return running;
|
|
|
+ }
|
|
|
+
|
|
|
+ private void doSingleWrite(String indexName) {
|
|
|
+ final int randomId = randomIntBetween(500, 1000);
|
|
|
+ index(indexName, Integer.toString(randomId), Collections.singletonMap("foo", "bar"));
|
|
|
+ }
|
|
|
}
|