Browse Source

Add second level of field collapsing (#31808)

* Put second level collapse under inner_hits

Closes #24855
Mayya Sharipova 7 years ago
parent
commit
80492cacfc

+ 102 - 0
docs/reference/search/request/collapse.asciidoc

@@ -116,3 +116,105 @@ The default is based on the number of data nodes and the default search thread p
 
 WARNING: `collapse` cannot be used in conjunction with <<search-request-scroll, scroll>>,
 <<search-request-rescore, rescore>> or <<search-request-search-after, search after>>.
+
+==== Second level of collapsing
+
+Second level of collapsing is also supported and is applied to `inner_hits`.
+For example, the following request finds the top scored tweets for
+each country, and within each country finds the top scored tweets
+for each user.
+
+[source,js]
+--------------------------------------------------
+GET /twitter/_search
+{
+    "query": {
+        "match": {
+            "message": "elasticsearch"
+        }
+    },
+    "collapse" : {
+        "field" : "country",
+        "inner_hits" : {
+            "name": "by_location",
+            "collapse" : {"field" : "user"},
+            "size": 3
+        }
+    }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+
+Response:
+[source,js]
+--------------------------------------------------
+{
+    ...
+    "hits": [
+        {
+            "_index": "twitter",
+            "_type": "_doc",
+            "_id": "9",
+            "_score": ...,
+            "_source": {...},
+            "fields": {"country": ["UK"]},
+            "inner_hits":{
+                "by_location": {
+                    "hits": {
+                       ...,
+                       "hits": [
+                          {
+                            ...
+                            "fields": {"user" : ["user124"]}
+                          },
+                          {
+                            ...
+                            "fields": {"user" : ["user589"]}
+                          },
+                          {
+                            ...
+                             "fields": {"user" : ["user001"]}
+                          }
+                       ]
+                    }
+                 }
+            }
+        },
+        {
+            "_index": "twitter",
+            "_type": "_doc",
+            "_id": "1",
+            "_score": ..,
+            "_source": {...},
+            "fields": {"country": ["Canada"]},
+            "inner_hits":{
+                "by_location": {
+                    "hits": {
+                       ...,
+                       "hits": [
+                          {
+                            ...
+                            "fields": {"user" : ["user444"]}
+                          },
+                          {
+                            ...
+                            "fields": {"user" : ["user1111"]}
+                          },
+                          {
+                            ...
+                             "fields": {"user" : ["user999"]}
+                          }
+                       ]
+                    }
+                 }
+            }
+
+        },
+        ....
+    ]
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+NOTE: Second level of of collapsing doesn't allow `inner_hits`.

+ 141 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/search/115_multiple_field_collapsing.yml

@@ -0,0 +1,141 @@
+---
+"two levels fields collapsing":
+    - skip:
+        version: " - 6.99.99"
+        reason: using multiple field collapsing from 7.0 on
+    - do:
+        indices.create:
+          index: addresses
+          body:
+            settings:
+              number_of_shards: 1
+              number_of_replicas: 1
+            mappings:
+              _doc:
+                properties:
+                  country: {"type": "keyword"}
+                  city: {"type": "keyword"}
+                  address: {"type": "text"}
+
+    - do:
+        bulk:
+          refresh: true
+          body:
+            - '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "1" } }'
+            - '{"country" : "Canada", "city" : "Saskatoon", "address" : "701 Victoria Avenue" }'
+            - '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "2" } }'
+            - '{"country" : "Canada", "city" : "Toronto", "address" : "74 Victoria Street, Suite, 74 Victoria Street, Suite 300" }'
+            - '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "3" } }'
+            - '{"country" : "Canada", "city" : "Toronto", "address" : "350 Victoria St" }'
+            - '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "4" } }'
+            - '{"country" : "Canada", "city" : "Toronto", "address" : "20 Victoria Street" }'
+            - '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "5" } }'
+            - '{"country" : "UK", "city" : "London", "address" : "58 Victoria Street" }'
+            - '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "6" } }'
+            - '{"country" : "UK", "city" : "London", "address" : "Victoria Street Victoria Palace Theatre" }'
+            - '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "7" } }'
+            - '{"country" : "UK", "city" : "Manchester", "address" : "75 Victoria street Westminster" }'
+            - '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "8" } }'
+            - '{"country" : "UK", "city" : "London", "address" : "Victoria Station Victoria Arcade" }'
+
+
+    # ************* error if internal collapse contains inner_hits
+    - do:
+        catch:  /parse_exception/
+        search:
+          index: addresses
+          body:
+            query: { "match" : { "address" : "victoria" }}
+            collapse:
+              field: country
+              inner_hits:
+                collapse:
+                  field : city
+                  inner_hits: {}
+
+
+    # ************* error if internal collapse contains another collapse
+    - do:
+        catch:  /parse_exception/
+        search:
+          index: addresses
+          body:
+            query: { "match" : { "address" : "victoria" }}
+            collapse:
+              field: country
+              inner_hits:
+                collapse:
+                  field : city
+                  collapse:  { field: city }
+
+
+
+    # ************* top scored
+    - do:
+        search:
+          index: addresses
+          body:
+            query: { "match" : { "address" : "victoria" }}
+            collapse:
+              field: country
+              inner_hits:
+                name: by_location
+                size: 3
+                collapse:
+                  field : city
+
+    - match: { hits.total: 8 }
+    - length: { hits.hits: 2 }
+    - match: { hits.hits.0.fields.country: ["UK"] }
+    - match: { hits.hits.0.inner_hits.by_location.hits.total: 4 }
+    # 2 inner hits returned instead of requested 3 as they are collapsed by city
+    - length: { hits.hits.0.inner_hits.by_location.hits.hits : 2}
+    - match: { hits.hits.0.inner_hits.by_location.hits.hits.0._id: "8" }
+    - match: { hits.hits.0.inner_hits.by_location.hits.hits.0.fields.city: ["London"] }
+    - match: { hits.hits.0.inner_hits.by_location.hits.hits.1._id: "7" }
+    - match: { hits.hits.0.inner_hits.by_location.hits.hits.1.fields.city: ["Manchester"] }
+
+    - match: { hits.hits.1.fields.country: ["Canada"] }
+    - match: { hits.hits.1.inner_hits.by_location.hits.total: 4 }
+    # 2 inner hits returned instead of requested 3 as they are collapsed by city
+    - length: { hits.hits.1.inner_hits.by_location.hits.hits : 2 }
+    - match: { hits.hits.1.inner_hits.by_location.hits.hits.0._id: "1" }
+    - match: { hits.hits.1.inner_hits.by_location.hits.hits.0.fields.city: ["Saskatoon"] }
+    - match: { hits.hits.1.inner_hits.by_location.hits.hits.1._id: "3" }
+    - match: { hits.hits.1.inner_hits.by_location.hits.hits.1.fields.city: ["Toronto"] }
+
+
+    # ************* sorted
+    - do:
+        search:
+          index: addresses
+          body:
+            query: { "match" : { "address" : "victoria" }}
+            collapse:
+              field: country
+              inner_hits:
+                name: by_location
+                size: 3
+                sort: [{ "city": "desc" }]
+                collapse:
+                  field : city
+
+    - match: { hits.total: 8 }
+    - length: { hits.hits: 2 }
+    - match: { hits.hits.0.fields.country: ["UK"] }
+    - match: { hits.hits.0.inner_hits.by_location.hits.total: 4 }
+    # 2 inner hits returned instead of requested 3 as they are collapsed by city
+    - length: { hits.hits.0.inner_hits.by_location.hits.hits : 2}
+    - match: { hits.hits.0.inner_hits.by_location.hits.hits.0._id: "7" }
+    - match: { hits.hits.0.inner_hits.by_location.hits.hits.0.fields.city: ["Manchester"] }
+    - match: { hits.hits.0.inner_hits.by_location.hits.hits.1._id: "5" }
+    - match: { hits.hits.0.inner_hits.by_location.hits.hits.1.fields.city: ["London"] }
+
+    - match: { hits.hits.1.fields.country: ["Canada"] }
+    - match: { hits.hits.1.inner_hits.by_location.hits.total: 4 }
+    # 2 inner hits returned instead of requested 3 as they are collapsed by city
+    - length: { hits.hits.1.inner_hits.by_location.hits.hits : 2 }
+    - match: { hits.hits.1.inner_hits.by_location.hits.hits.0._id: "2" }
+    - match: { hits.hits.1.inner_hits.by_location.hits.hits.0.fields.city: ["Toronto"] }
+    - match: { hits.hits.1.inner_hits.by_location.hits.hits.1._id: "1" }
+    - match: { hits.hits.1.inner_hits.by_location.hits.hits.1.fields.city: ["Saskatoon"] }

+ 6 - 2
server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java

@@ -87,7 +87,8 @@ final class ExpandSearchPhase extends SearchPhase {
                     groupQuery.must(origQuery);
                 }
                 for (InnerHitBuilder innerHitBuilder : innerHitBuilders) {
-                    SearchSourceBuilder sourceBuilder = buildExpandSearchSourceBuilder(innerHitBuilder)
+                    CollapseBuilder innerCollapseBuilder = innerHitBuilder.getInnerCollapseBuilder();
+                    SearchSourceBuilder sourceBuilder = buildExpandSearchSourceBuilder(innerHitBuilder, innerCollapseBuilder)
                         .query(groupQuery)
                         .postFilter(searchRequest.source().postFilter());
                     SearchRequest groupRequest = buildExpandSearchRequest(searchRequest, sourceBuilder);
@@ -135,7 +136,7 @@ final class ExpandSearchPhase extends SearchPhase {
         return groupRequest;
     }
 
-    private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder options) {
+    private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder options, CollapseBuilder innerCollapseBuilder) {
         SearchSourceBuilder groupSource = new SearchSourceBuilder();
         groupSource.from(options.getFrom());
         groupSource.size(options.getSize());
@@ -167,6 +168,9 @@ final class ExpandSearchPhase extends SearchPhase {
         groupSource.explain(options.isExplain());
         groupSource.trackScores(options.isTrackScores());
         groupSource.version(options.isVersion());
+        if (innerCollapseBuilder != null) {
+            groupSource.collapse(innerCollapseBuilder);
+        }
         return groupSource;
     }
 }

+ 47 - 2
server/src/main/java/org/elasticsearch/index/query/InnerHitBuilder.java

@@ -37,6 +37,7 @@ import org.elasticsearch.search.fetch.subphase.DocValueFieldsContext.FieldAndFor
 import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
 import org.elasticsearch.search.sort.SortBuilder;
+import org.elasticsearch.search.collapse.CollapseBuilder;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -55,6 +56,8 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
     public static final ParseField NAME_FIELD = new ParseField("name");
     public static final ParseField IGNORE_UNMAPPED = new ParseField("ignore_unmapped");
     public static final QueryBuilder DEFAULT_INNER_HIT_QUERY = new MatchAllQueryBuilder();
+    public static final ParseField COLLAPSE_FIELD = new ParseField("collapse");
+    public static final ParseField FIELD_FIELD = new ParseField("field");
 
     private static final ObjectParser<InnerHitBuilder, Void> PARSER = new ObjectParser<>("inner_hits", InnerHitBuilder::new);
 
@@ -91,6 +94,28 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
         }, SearchSourceBuilder._SOURCE_FIELD, ObjectParser.ValueType.OBJECT_ARRAY_BOOLEAN_OR_STRING);
         PARSER.declareObject(InnerHitBuilder::setHighlightBuilder, (p, c) -> HighlightBuilder.fromXContent(p),
                 SearchSourceBuilder.HIGHLIGHT_FIELD);
+        PARSER.declareField((parser, builder, context) -> {
+            Boolean isParsedCorrectly = false;
+            String field;
+            if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
+                if (parser.nextToken() == XContentParser.Token.FIELD_NAME) {
+                    if (FIELD_FIELD.match(parser.currentName(), parser.getDeprecationHandler())) {
+                        if (parser.nextToken() == XContentParser.Token.VALUE_STRING){
+                            field = parser.text();
+                            if (parser.nextToken() == XContentParser.Token.END_OBJECT){
+                                isParsedCorrectly = true;
+                                CollapseBuilder cb = new CollapseBuilder(field);
+                                builder.setInnerCollapse(cb);
+                            }
+                        }
+                    }
+                }
+            }
+            if (isParsedCorrectly == false) {
+                throw new ParsingException(parser.getTokenLocation(), "Invalid token in the inner collapse");
+            }
+
+        }, COLLAPSE_FIELD, ObjectParser.ValueType.OBJECT);
     }
 
     private String name;
@@ -109,6 +134,7 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
     private Set<ScriptField> scriptFields;
     private HighlightBuilder highlightBuilder;
     private FetchSourceContext fetchSourceContext;
+    private CollapseBuilder innerCollapseBuilder = null;
 
     public InnerHitBuilder() {
         this.name = null;
@@ -173,6 +199,9 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
             boolean hasChildren = in.readBoolean();
             assert hasChildren == false;
         }
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            this.innerCollapseBuilder = in.readOptionalWriteable(CollapseBuilder::new);
+        }
     }
 
     @Override
@@ -218,6 +247,9 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
             }
         }
         out.writeOptionalWriteable(highlightBuilder);
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeOptionalWriteable(innerCollapseBuilder);
+        }
     }
 
     /**
@@ -501,6 +533,15 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
         return query;
     }
 
+    public InnerHitBuilder setInnerCollapse(CollapseBuilder innerCollapseBuilder) {
+        this.innerCollapseBuilder = innerCollapseBuilder;
+        return this;
+    }
+
+    public CollapseBuilder getInnerCollapseBuilder() {
+        return innerCollapseBuilder;
+    }
+
     @Override
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject();
@@ -550,6 +591,9 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
         if (highlightBuilder != null) {
             builder.field(SearchSourceBuilder.HIGHLIGHT_FIELD.getPreferredName(), highlightBuilder, params);
         }
+        if (innerCollapseBuilder != null) {
+            builder.field(COLLAPSE_FIELD.getPreferredName(), innerCollapseBuilder);
+        }
         builder.endObject();
         return builder;
     }
@@ -572,13 +616,14 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
                 Objects.equals(scriptFields, that.scriptFields) &&
                 Objects.equals(fetchSourceContext, that.fetchSourceContext) &&
                 Objects.equals(sorts, that.sorts) &&
-                Objects.equals(highlightBuilder, that.highlightBuilder);
+                Objects.equals(highlightBuilder, that.highlightBuilder) &&
+                Objects.equals(innerCollapseBuilder, that.innerCollapseBuilder);
     }
 
     @Override
     public int hashCode() {
         return Objects.hash(name, ignoreUnmapped, from, size, explain, version, trackScores,
-                storedFieldsContext, docValueFields, scriptFields, fetchSourceContext, sorts, highlightBuilder);
+                storedFieldsContext, docValueFields, scriptFields, fetchSourceContext, sorts, highlightBuilder, innerCollapseBuilder);
     }
 
     public static InnerHitBuilder fromXContent(XContentParser parser) throws IOException {