Browse Source

Integration tests for diversified sampler (#77810)

Adds and integration test for the `diversified_sampler` aggregator.
Nik Everett 4 years ago
parent
commit
3e5dbb0808

+ 229 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml

@@ -0,0 +1,229 @@
+setup:
+  - do:
+      indices.create:
+          index: test
+          body:
+            settings:
+              number_of_shards: 1
+            mappings:
+              properties:
+                tags:
+                  type: text
+                author:
+                  type: keyword
+                number:
+                  type: integer
+                class:
+                  type: integer
+
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - '{"index": {}}'
+          - '{"tags": "kibana",     "author": "alice", "number": 1, "class": 1}'
+          - '{"index": {}}'
+          - '{"tags": "kibana",     "author": "bob",   "number": 2, "class": 1}'
+          - '{"index": {}}'
+          - '{"tags": "kibana",     "author": "alice", "number": 3, "class": 2}'
+          - '{"index": {}}'
+          - '{"tags": "javascript", "author": "alice", "number": 4, "class": 1}'
+
+---
+small shard_size:
+  - do:
+      search:
+        body:
+          size: 0
+          query:
+            query_string:
+              query: 'tags:kibana OR tags:javascript'
+          aggs:
+            diversified:
+              diversified_sampler:
+                field: author
+                shard_size: 1
+              aggs:
+                min_number:
+                  min:
+                    field: number
+                max_number:
+                  max:
+                    field: number
+
+
+  - match: { hits.total.value: 4 }
+  - match: { aggregations.diversified.doc_count: 1 }
+  # The most relevant document has a value of 4 so we only aggregate that.
+  - match: { aggregations.diversified.min_number.value: 4.0 }
+  - match: { aggregations.diversified.max_number.value: 4.0 }
+
+---
+defaults:
+  - do:
+      search:
+        body:
+          size: 0
+          query:
+            query_string:
+              query: 'tags:kibana OR tags:javascript'
+          aggs:
+            diversified:
+              diversified_sampler:
+                field: author
+              aggs:
+                min_number:
+                  min:
+                    field: number
+                max_number:
+                  max:
+                    field: number
+
+  - match: { hits.total.value: 4 }
+  # The max_docs_per_value defaults to 1 so we just get one doc for each of the two authors
+  - match: { aggregations.diversified.doc_count: 2 }
+  # Bob's only document is 2 so we get that as the min.
+  - match: { aggregations.diversified.min_number.value: 2.0 }
+  # Alice's most relevant document is 2 so we get that as the max.
+  - match: { aggregations.diversified.max_number.value: 4.0 }
+
+---
+override max_docs_per_value:
+  - do:
+      search:
+        body:
+          size: 0
+          query:
+            query_string:
+              query: 'tags:kibana OR tags:javascript'
+          aggs:
+            diversified:
+              diversified_sampler:
+                field: author
+                max_docs_per_value: 3
+              aggs:
+                min_number:
+                  min:
+                    field: number
+                max_number:
+                  max:
+                    field: number
+
+  - match: { hits.total.value: 4 }
+  # We've bumped the max_docs_per_value high enough to get all docs
+  - match: { aggregations.diversified.doc_count: 4 }
+  - match: { aggregations.diversified.min_number.value: 1.0 }
+  - match: { aggregations.diversified.max_number.value: 4.0 }
+
+---
+run on number:
+  - do:
+      search:
+        body:
+          size: 0
+          query:
+            query_string:
+              query: 'tags:kibana OR tags:javascript'
+          aggs:
+            diversified:
+              diversified_sampler:
+                field: class
+              aggs:
+                min_number:
+                  min:
+                    field: number
+                max_number:
+                  max:
+                    field: number
+
+  - match: { hits.total.value: 4 }
+  # The max_docs_per_value defaults to 1 so we just get one doc for each of the two classes
+  - match: { aggregations.diversified.doc_count: 2 }
+  - match: { aggregations.diversified.min_number.value: 3.0 }
+  - match: { aggregations.diversified.max_number.value: 4.0 }
+
+---
+force map mode:
+  - do:
+      search:
+        body:
+          size: 0
+          query:
+            query_string:
+              query: 'tags:kibana OR tags:javascript'
+          aggs:
+            diversified:
+              diversified_sampler:
+                field: author
+                execution_hint: map
+              aggs:
+                min_number:
+                  min:
+                    field: number
+                max_number:
+                  max:
+                    field: number
+
+  - match: { hits.total.value: 4 }
+  - match: { aggregations.diversified.doc_count: 2 }
+  - match: { aggregations.diversified.min_number.value: 2.0 }
+  - match: { aggregations.diversified.max_number.value: 4.0 }
+
+---
+force global ordinals mode:
+  - do:
+      search:
+        body:
+          size: 0
+          query:
+            query_string:
+              query: 'tags:kibana OR tags:javascript'
+          aggs:
+            diversified:
+              diversified_sampler:
+                field: author
+                execution_hint: global_ordinals
+              aggs:
+                min_number:
+                  min:
+                    field: number
+                max_number:
+                  max:
+                    field: number
+
+  - match: { hits.total.value: 4 }
+  - match: { aggregations.diversified.doc_count: 2 }
+  - match: { aggregations.diversified.min_number.value: 2.0 }
+  - match: { aggregations.diversified.max_number.value: 4.0 }
+
+---
+enable hash mode mode:
+  - do:
+      search:
+        body:
+          size: 0
+          query:
+            query_string:
+              query: 'tags:kibana OR tags:javascript'
+          aggs:
+            diversified:
+              diversified_sampler:
+                field: author
+                execution_hint: bytes_hash
+              aggs:
+                min_number:
+                  min:
+                    field: number
+                max_number:
+                  max:
+                    field: number
+
+  # This mode can have hash collisions. The hash is seeded with tests.seed
+  # so we have to have weaker constraints on these hits
+  - match: { hits.total.value: 4 }
+  - lte: { aggregations.diversified.doc_count: 2 }
+  - gte: { aggregations.diversified.doc_count: 1 }
+  - gte: { aggregations.diversified.min_number.value: 2.0 }
+  - lte: { aggregations.diversified.min_number.value: 4.0 }
+  - match: { aggregations.diversified.max_number.value: 4.0 }