Selaa lähdekoodia

Reserve bytes before fetching page (#105432)

Running heap-attack tests with multiple nodes can still lead to OOM 
errors. This is because the transport response messages are not tracked
by the circuit breaker. In heap attack tests, pages can be very large
(30MB — I will chunk them later), and for each exchange, we use three
concurrent channels, resulting in 100MB of untracked memory. This pull
request reserves extra bytes for exchange messages. Although this check
doesn't fully prevent OOM errors, it makes them unlikely in such cases.
Nhat Nguyen 1 vuosi sitten
vanhempi
commit
ce8f5441bf

+ 8 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeResponse.java

@@ -65,6 +65,14 @@ public final class ExchangeResponse extends TransportResponse implements Releasa
         return page;
     }
 
+    public long ramBytesUsedByPage() {
+        if (page != null) {
+            return page.ramBytesUsedByBlocks();
+        } else {
+            return 0;
+        }
+    }
+
     /**
      * Returns true if the {@link RemoteSink} is already completed. In this case, the {@link ExchangeSourceHandler}
      * can stop polling pages and finish itself.

+ 36 - 9
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java

@@ -38,6 +38,7 @@ import org.elasticsearch.transport.Transports;
 import java.io.IOException;
 import java.util.Map;
 import java.util.concurrent.Executor;
+import java.util.concurrent.atomic.AtomicLong;
 
 /**
  * {@link ExchangeService} is responsible for exchanging pages between exchange sinks and sources on the same or different nodes.
@@ -237,17 +238,40 @@ public final class ExchangeService extends AbstractLifecycleComponent {
         return new TransportRemoteSink(transportService, blockFactory, conn, parentTask, exchangeId, executor);
     }
 
-    record TransportRemoteSink(
-        TransportService transportService,
-        BlockFactory blockFactory,
-        Transport.Connection connection,
-        Task parentTask,
-        String exchangeId,
-        Executor responseExecutor
-    ) implements RemoteSink {
+    static final class TransportRemoteSink implements RemoteSink {
+        final TransportService transportService;
+        final BlockFactory blockFactory;
+        final Transport.Connection connection;
+        final Task parentTask;
+        final String exchangeId;
+        final Executor responseExecutor;
+
+        final AtomicLong estimatedPageSizeInBytes = new AtomicLong(0L);
+
+        TransportRemoteSink(
+            TransportService transportService,
+            BlockFactory blockFactory,
+            Transport.Connection connection,
+            Task parentTask,
+            String exchangeId,
+            Executor responseExecutor
+        ) {
+            this.transportService = transportService;
+            this.blockFactory = blockFactory;
+            this.connection = connection;
+            this.parentTask = parentTask;
+            this.exchangeId = exchangeId;
+            this.responseExecutor = responseExecutor;
+        }
 
         @Override
         public void fetchPageAsync(boolean allSourcesFinished, ActionListener<ExchangeResponse> listener) {
+            final long reservedBytes = estimatedPageSizeInBytes.get();
+            if (reservedBytes > 0) {
+                // This doesn't fully protect ESQL from OOM, but reduces the likelihood.
+                blockFactory.breaker().addEstimateBytesAndMaybeBreak(reservedBytes, "fetch page");
+                listener = ActionListener.runAfter(listener, () -> blockFactory.breaker().addWithoutBreaking(-reservedBytes));
+            }
             transportService.sendChildRequest(
                 connection,
                 EXCHANGE_ACTION_NAME,
@@ -256,7 +280,10 @@ public final class ExchangeService extends AbstractLifecycleComponent {
                 TransportRequestOptions.EMPTY,
                 new ActionListenerResponseHandler<>(listener, in -> {
                     try (BlockStreamInput bsi = new BlockStreamInput(in, blockFactory)) {
-                        return new ExchangeResponse(bsi);
+                        final ExchangeResponse resp = new ExchangeResponse(bsi);
+                        final long responseBytes = resp.ramBytesUsedByPage();
+                        estimatedPageSizeInBytes.getAndUpdate(curr -> Math.max(responseBytes, curr / 2));
+                        return resp;
                     }
                 }, responseExecutor)
             );