Przeglądaj źródła

Tests: Add logging to FieldSortIT (#131558) (#131613)

In order to better understand the infrequent failures in #129445 and
in the hope to reproduce the issue better, this PR adds logging
around which shards and nodes documents end up in
FieldSortIT#testSortMixedFieldTypes and increases the log level for
this test suite in the o.e.search packages to TRACE and in some
action.search packages to DEBUG to better understand where exceptions
are thrown and to better trace how resources are released after that.

Relates to #129445
Christoph Büscher 2 miesięcy temu
rodzic
commit
ff6016407d

+ 26 - 1
server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java

@@ -9,6 +9,7 @@
 
 package org.elasticsearch.search.sort;
 
+import org.apache.http.util.EntityUtils;
 import org.apache.lucene.tests.util.TestUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.UnicodeUtil;
@@ -20,6 +21,9 @@ import org.elasticsearch.action.search.SearchPhaseExecutionException;
 import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.action.search.ShardSearchFailure;
+import org.elasticsearch.client.Request;
+import org.elasticsearch.client.Response;
+import org.elasticsearch.client.RestClient;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.Strings;
@@ -36,6 +40,7 @@ import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHits;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.InternalSettingsPlugin;
+import org.elasticsearch.test.junit.annotations.TestIssueLogging;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentFactory;
 import org.elasticsearch.xcontent.XContentType;
@@ -82,6 +87,12 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo;
 import static org.hamcrest.Matchers.not;
 import static org.hamcrest.Matchers.nullValue;
 
+@TestIssueLogging(
+    issueUrl = "https://github.com/elastic/elasticsearch/issues/129445",
+    value = "org.elasticsearch.action.search.SearchQueryThenFetchAsyncAction:DEBUG,"
+        + "org.elasticsearch.action.search.SearchPhaseController:DEBUG,"
+        + "org.elasticsearch.search:TRACE"
+)
 public class FieldSortIT extends ESIntegTestCase {
     public static class CustomScriptPlugin extends MockScriptPlugin {
         @Override
@@ -110,6 +121,10 @@ public class FieldSortIT extends ESIntegTestCase {
         return Arrays.asList(InternalSettingsPlugin.class, CustomScriptPlugin.class);
     }
 
+    protected boolean addMockHttpTransport() {
+        return false;
+    }
+
     public void testIssue8226() {
         int numIndices = between(5, 10);
         final boolean useMapping = randomBoolean();
@@ -2242,7 +2257,7 @@ public class FieldSortIT extends ESIntegTestCase {
         );
     }
 
-    public void testSortMixedFieldTypes() {
+    public void testSortMixedFieldTypes() throws IOException {
         assertAcked(
             prepareCreate("index_long").setMapping("foo", "type=long"),
             prepareCreate("index_integer").setMapping("foo", "type=integer"),
@@ -2256,6 +2271,16 @@ public class FieldSortIT extends ESIntegTestCase {
         prepareIndex("index_keyword").setId("1").setSource("foo", "123").get();
         refresh();
 
+        // for debugging, we try to see where the documents are located
+        try (RestClient restClient = createRestClient()) {
+            Request checkShardsRequest = new Request(
+                "GET",
+                "/_cat/shards/index_long,index_double,index_keyword?format=json&h=index,node,shard,prirep,state,docs,index"
+            );
+            Response response = restClient.performRequest(checkShardsRequest);
+            logger.info("FieldSortIT#testSortMixedFieldTypes document distribution: " + EntityUtils.toString(response.getEntity()));
+        }
+
         { // mixing long and integer types is ok, as we convert integer sort to long sort
             assertNoFailures(prepareSearch("index_long", "index_integer").addSort(new FieldSortBuilder("foo")).setSize(10));
         }

+ 20 - 11
server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java

@@ -9,6 +9,8 @@
 
 package org.elasticsearch.action.search;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
@@ -72,6 +74,8 @@ import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
 
 public final class SearchPhaseController {
 
+    private static final Logger logger = LogManager.getLogger(SearchPhaseController.class);
+
     private final BiFunction<
         Supplier<Boolean>,
         AggregatorFactories.Builder,
@@ -153,17 +157,22 @@ public final class SearchPhaseController {
             return topDocs;
         }
         final TopDocs mergedTopDocs;
-        if (topDocs instanceof TopFieldGroups firstTopDocs) {
-            final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
-            TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]);
-            mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false);
-        } else if (topDocs instanceof TopFieldDocs firstTopDocs) {
-            TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]);
-            final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
-            mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
-        } else {
-            final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]);
-            mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs);
+        try {
+            if (topDocs instanceof TopFieldGroups firstTopDocs) {
+                final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
+                TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]);
+                mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false);
+            } else if (topDocs instanceof TopFieldDocs firstTopDocs) {
+                TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]);
+                final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
+                mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
+            } else {
+                final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]);
+                mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs);
+            }
+        } catch (IllegalArgumentException e) {
+            logger.debug("Failed to merge top docs: ", e);
+            throw e;
         }
         return mergedTopDocs;
     }

+ 2 - 0
server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java

@@ -454,6 +454,7 @@ public class SearchQueryThenFetchAsyncAction extends AbstractSearchAsyncAction<S
                 executeAsSingleRequest(routing, request.shards.get(0));
                 return;
             }
+            String nodeId = routing.nodeId();
             final Transport.Connection connection;
             try {
                 connection = getConnection(routing.clusterAlias(), routing.nodeId());
@@ -503,6 +504,7 @@ public class SearchQueryThenFetchAsyncAction extends AbstractSearchAsyncAction<S
                     @Override
                     public void handleException(TransportException e) {
                         Exception cause = (Exception) ExceptionsHelper.unwrapCause(e);
+                        logger.debug("handling node search exception coming from [" + nodeId + "]", cause);
                         if (e instanceof SendRequestTransportException || cause instanceof TaskCancelledException) {
                             // two possible special cases here where we do not want to fail the phase:
                             // failure to send out the request -> handle things the same way a shard would fail with unbatched execution