Selaa lähdekoodia

ESQL: Allow using the same index in FROM and LOOKUP (#118768) (#118933)

Changed the logic so that, instead of getting all the indices of the plan and then subtracting the lookup ones, it will now directly ignore the lookup part in the initial calculation.
Iván Cea Fontenla 9 kuukautta sitten
vanhempi
commit
222627c658

+ 16 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

@@ -182,6 +182,22 @@ language_code:integer | language_name:keyword       | country:keyword
 2                     | [German, German, German]    | [Austria, Germany, Switzerland]
 ;
 
+repeatedIndexOnFrom
+required_capability: join_lookup_v7
+required_capability: join_lookup_repeated_index_from
+
+FROM languages_lookup
+| LOOKUP JOIN languages_lookup ON language_code
+| SORT language_code
+;
+
+language_code:integer | language_name:keyword
+1                     | English
+2                     | French
+3                     | Spanish
+4                     | German
+;
+
 ###############################################
 # Filtering tests with languages_lookup index
 ###############################################
@@ -1061,4 +1077,3 @@ ignoreOrder:true
 2023-10-23T12:27:28.948Z | 172.21.2.113      | 2764889             | QA              | null
 2023-10-23T12:15:03.360Z | 172.21.2.162      | 3450233             | QA              | null
 ;
-

+ 5 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

@@ -545,6 +545,11 @@ public class EsqlCapabilities {
          */
         JOIN_LOOKUP_V7(Build.current().isSnapshot()),
 
+        /**
+         * LOOKUP JOIN with the same index as the FROM
+         */
+        JOIN_LOOKUP_REPEATED_INDEX_FROM(JOIN_LOOKUP_V7.isEnabled()),
+
         /**
          * Fix for https://github.com/elastic/elasticsearch/issues/117054
          */

+ 23 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java

@@ -22,6 +22,7 @@ import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
 import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates;
+import org.elasticsearch.xpack.esql.core.tree.Node;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
 import org.elasticsearch.xpack.esql.core.util.Holder;
@@ -40,6 +41,7 @@ import org.elasticsearch.xpack.esql.plan.physical.ExchangeExec;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec;
 import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;
+import org.elasticsearch.xpack.esql.plan.physical.LookupJoinExec;
 import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
 import org.elasticsearch.xpack.esql.planner.mapper.LocalMapper;
 import org.elasticsearch.xpack.esql.planner.mapper.Mapper;
@@ -48,9 +50,12 @@ import org.elasticsearch.xpack.esql.stats.SearchContextStats;
 import org.elasticsearch.xpack.esql.stats.SearchStats;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.function.Consumer;
+import java.util.function.Function;
 
 import static java.util.Arrays.asList;
 import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
@@ -105,10 +110,27 @@ public class PlannerUtils {
             return Set.of();
         }
         var indices = new LinkedHashSet<String>();
-        plan.forEachUp(FragmentExec.class, f -> f.fragment().forEachUp(EsRelation.class, r -> indices.addAll(r.index().concreteIndices())));
+        // TODO: This only works for LEFT join, we still need to support RIGHT join
+        forEachUpWithChildren(plan, node -> {
+            if (node instanceof FragmentExec f) {
+                f.fragment().forEachUp(EsRelation.class, r -> indices.addAll(r.index().concreteIndices()));
+            }
+        }, node -> node instanceof LookupJoinExec join ? List.of(join.left()) : node.children());
         return indices;
     }
 
+    /**
+     * Similar to {@link Node#forEachUp(Consumer)}, but with a custom callback to get the node children.
+     */
+    private static <T extends Node<T>> void forEachUpWithChildren(
+        T node,
+        Consumer<? super T> action,
+        Function<? super T, Collection<T>> childrenGetter
+    ) {
+        childrenGetter.apply(node).forEach(c -> forEachUpWithChildren(c, action, childrenGetter));
+        action.accept(node);
+    }
+
     /**
      * Returns the original indices specified in the FROM command of the query. We need the original query to resolve alias filters.
      */

+ 3 - 46
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java

@@ -63,12 +63,8 @@ import org.elasticsearch.xpack.esql.action.EsqlSearchShardsAction;
 import org.elasticsearch.xpack.esql.core.expression.Attribute;
 import org.elasticsearch.xpack.esql.enrich.EnrichLookupService;
 import org.elasticsearch.xpack.esql.enrich.LookupFromIndexService;
-import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
-import org.elasticsearch.xpack.esql.plan.logical.join.Join;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec;
-import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;
-import org.elasticsearch.xpack.esql.plan.physical.LookupJoinExec;
 import org.elasticsearch.xpack.esql.plan.physical.OutputExec;
 import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
 import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders;
@@ -81,7 +77,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -167,11 +162,9 @@ public class ComputeService {
         Map<String, OriginalIndices> clusterToConcreteIndices = transportService.getRemoteClusterService()
             .groupIndices(SearchRequest.DEFAULT_INDICES_OPTIONS, PlannerUtils.planConcreteIndices(physicalPlan).toArray(String[]::new));
         QueryPragmas queryPragmas = configuration.pragmas();
-        Set<String> lookupIndexNames = findLookupIndexNames(physicalPlan);
-        Set<String> concreteIndexNames = selectConcreteIndices(clusterToConcreteIndices, lookupIndexNames);
         if (dataNodePlan == null) {
-            if (concreteIndexNames.isEmpty() == false) {
-                String error = "expected no concrete indices without data node plan; got " + concreteIndexNames;
+            if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0) == false) {
+                String error = "expected no concrete indices without data node plan; got " + clusterToConcreteIndices;
                 assert false : error;
                 listener.onFailure(new IllegalStateException(error));
                 return;
@@ -194,7 +187,7 @@ public class ComputeService {
                 return;
             }
         } else {
-            if (concreteIndexNames.isEmpty()) {
+            if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0)) {
                 var error = "expected concrete indices with data node plan but got empty; data node plan " + dataNodePlan;
                 assert false : error;
                 listener.onFailure(new IllegalStateException(error));
@@ -268,42 +261,6 @@ public class ComputeService {
         }
     }
 
-    private Set<String> selectConcreteIndices(Map<String, OriginalIndices> clusterToConcreteIndices, Set<String> indexesToIgnore) {
-        Set<String> concreteIndexNames = new HashSet<>();
-        clusterToConcreteIndices.forEach((clusterAlias, concreteIndices) -> {
-            for (String index : concreteIndices.indices()) {
-                if (indexesToIgnore.contains(index) == false) {
-                    concreteIndexNames.add(index);
-                }
-            }
-        });
-        return concreteIndexNames;
-    }
-
-    private Set<String> findLookupIndexNames(PhysicalPlan physicalPlan) {
-        Set<String> lookupIndexNames = new HashSet<>();
-        // When planning JOIN on the coordinator node: "LookupJoinExec.lookup()->FragmentExec.fragment()->EsRelation.index()"
-        physicalPlan.forEachDown(
-            LookupJoinExec.class,
-            lookupJoinExec -> lookupJoinExec.lookup()
-                .forEachDown(
-                    FragmentExec.class,
-                    frag -> frag.fragment().forEachDown(EsRelation.class, esRelation -> lookupIndexNames.add(esRelation.index().name()))
-                )
-        );
-        // When planning JOIN on the data node: "FragmentExec.fragment()->Join.right()->EsRelation.index()"
-        // TODO this only works for LEFT join, so we still need to support RIGHT join
-        physicalPlan.forEachDown(
-            FragmentExec.class,
-            fragmentExec -> fragmentExec.fragment()
-                .forEachDown(
-                    Join.class,
-                    join -> join.right().forEachDown(EsRelation.class, esRelation -> lookupIndexNames.add(esRelation.index().name()))
-                )
-        );
-        return lookupIndexNames;
-    }
-
     // For queries like: FROM logs* | LIMIT 0 (including cross-cluster LIMIT 0 queries)
     private static void updateShardCountForCoordinatorOnlyQuery(EsqlExecutionInfo execInfo) {
         if (execInfo.isCrossClusterSearch()) {