3 years ago · e590e900a4
--- a/docs/reference/aggregations/bucket.asciidoc
+++ b/docs/reference/aggregations/bucket.asciidoc
@@ -36,6 +36,8 @@ include::bucket/filter-aggregation.asciidoc[]
 
															 include::bucket/filters-aggregation.asciidoc[]
														
 
															+include::bucket/frequent-items-aggregation.asciidoc[]
														
 
															+
														
 
															 include::bucket/geodistance-aggregation.asciidoc[]
														
 
															 include::bucket/geohashgrid-aggregation.asciidoc[]
														
--- a/docs/reference/aggregations/bucket/frequent-items-aggregation.asciidoc
+++ b/docs/reference/aggregations/bucket/frequent-items-aggregation.asciidoc
@@ -0,0 +1,298 @@
 
															+[[search-aggregations-bucket-frequent-items-aggregation]]
														
 
															+=== Frequent items aggregation
														
 
															+++++
														
 
															+<titleabbrev>Frequent items</titleabbrev>
														
 
															+++++
														
 
															+
														
 
															+experimental::[]
														
 
															+
														
 
															+A bucket aggregation which finds frequent item sets. It
														
 
															+is a form of association rules mining that identifies items that often occur 
														
 
															+together. It also helps you to discover relationships between different data 
														
 
															+points (items). Items that are frequently purchased together or log events that 
														
 
															+tend to co-occur are examples of frequent item sets. Finding frequent item sets 
														
 
															+helps to discover relationships between different data points (items).
														
 
															+
														
 
															+The aggregation reports closed item sets. A frequent item set is called closed 
														
 
															+if no superset exists with the same ratio of documents (also known as its 
														
 
															+<<frequent-items-minimum-support,support value>>). For example, we have the two 
														
 
															+following candidates for a frequent item set, which have the same support value:
														
 
															+1. `apple, orange, banana`
														
 
															+2. `apple, orange, banana, tomato`.
														
 
															+Only the second item set (`apple, orange, banana, tomato`) is returned, and the 
														
 
															+first set – which is a subset of the second one – is skipped. Both item sets 
														
 
															+might be returned if their support values are different.
														
 
															+
														
 
															+
														
 
															+==== Syntax
														
 
															+
														
 
															+A `frequent_items` aggregation looks like this in isolation:
														
 
															+
														
 
															+[source,js]
														
 
															+--------------------------------------------------
														
 
															+"frequent_items": {
														
 
															+  "minimum_set_size": 3,
														
 
															+  "fields": [
														
 
															+    {"field": "my_field_1"},
														
 
															+    {"field": "my_field_2"}
														
 
															+  ]
														
 
															+}
														
 
															+--------------------------------------------------
														
 
															+// NOTCONSOLE
														
 
															+
														
 
															+.`frequent_items` Parameters
														
 
															+|===
														
 
															+|Parameter Name |Description |Required |Default Value
														
 
															+|`fields` |(array) Fields to analyze. | Required |
														
 
															+|`minimum_set_size` | (integer) The <<frequent-items-minimum-set-size,minimum size>> of one item set. | Optional | `1`
														
 
															+|`minimum_support` | (integer) The <<frequent-items-minimum-support,minimum support>> of one item set. | Optional | `0.1`
														
 
															+|`size` | (integer) The number of top item sets to return. | Optional | `10`
														
 
															+|===
														
 
															+
														
 
															+[discrete]
														
 
															+[[frequent-items-minimum-set-size]]
														
 
															+==== Minimum set size
														
 
															+
														
 
															+The minimum set size is the minimum number of items the set needs to contain. A 
														
 
															+value of 1 returns the frequency of single items. The higher the minimum set 
														
 
															+size the less items are returned. For example, the item set `orange, banana, 
														
 
															+apple` is only returned if the minimum set size is 3 or lower.
														
 
															+
														
 
															+[discrete]
														
 
															+[[frequent-items-minimum-support]]
														
 
															+==== Minimum support
														
 
															+
														
 
															+The minimum support value is the ratio of documents that an item set must exist 
														
 
															+in to be considered "frequent". In particular, it is a normalized value between 
														
 
															+0 and 1. It is calculated by dividing the number of documents containing the 
														
 
															+item set by the total number of documents.
														
 
															+
														
 
															+For example, if a given item set is contained by five documents and the total 
														
 
															+number of documents is 20, then the support of the item set is 5/20 = 0.25. 
														
 
															+Therefore, this set is returned only if the minimum support is 0.25 or lower. 
														
 
															+As a higher minimum support prunes more items, the calculation is less resource 
														
 
															+intensive. The `minimum_support` parameter has an effect on the required memory 
														
 
															+and the runtime of the aggregation.
														
 
															+
														
 
															+
														
 
															+[discrete]
														
 
															+[[frequent-items-size]]
														
 
															+==== Size
														
 
															+
														
 
															+This parameter defines the maximum number of item sets to return. The result 
														
 
															+contains top-k item sets; the item sets with the highest support values. This 
														
 
															+parameter has a significant effect on the required memory and the runtime of the 
														
 
															+aggregation.
														
 
															+
														
 
															+
														
 
															+[discrete]
														
 
															+[[frequent-items-example]]
														
 
															+==== Examples
														
 
															+
														
 
															+In the following examples, we use the e-commerce {kib} sample data set.
														
 
															+
														
 
															+In the first example, the goal is to find out based on transaction data (1.) 
														
 
															+from what product categories the customers purchase products frequently together 
														
 
															+and (2.) from which cities they make those purchases. We are interested in sets 
														
 
															+with three or more items, and want to see the first three frequent item sets 
														
 
															+with the highest support.
														
 
															+
														
 
															+[source,console]
														
 
															+-------------------------------------------------
														
 
															+GET kibana_sample_data_ecommerce /_search 
														
 
															+{
														
 
															+  "size": 0,
														
 
															+  "aggs": {
														
 
															+    "my_agg": {
														
 
															+      "frequent_items": {
														
 
															+        "minimum_set_size": 3,
														
 
															+        "fields": [
														
 
															+          { "field": "category.keyword" },
														
 
															+          { "field": "geoip.city_name" }
														
 
															+        ],
														
 
															+        "size": 3
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+-------------------------------------------------
														
 
															+// TEST[skip:setup kibana sample data]
														
 
															+
														
 
															+The API returns a response similar to the following one:
														
 
															+
														
 
															+[source,console-result]
														
 
															+-------------------------------------------------
														
 
															+(...)
														
 
															+"aggregations" : {
														
 
															+    "my_agg" : {
														
 
															+      "buckets" : [
														
 
															+        {
														
 
															+          "key" : {
														
 
															+            "category.keyword" : [
														
 
															+              "Women's Clothing",
														
 
															+              "Women's Shoes"
														
 
															+            ],
														
 
															+            "geoip.city_name" : [
														
 
															+              "New York"
														
 
															+            ]
														
 
															+          },
														
 
															+          "doc_count" : 217,
														
 
															+          "support" : 0.04641711229946524
														
 
															+        },
														
 
															+        {
														
 
															+          "key" : {
														
 
															+            "category.keyword" : [
														
 
															+              "Women's Clothing",
														
 
															+              "Women's Accessories"
														
 
															+            ],
														
 
															+            "geoip.city_name" : [
														
 
															+              "New York"
														
 
															+            ]
														
 
															+          },
														
 
															+          "doc_count" : 135,
														
 
															+          "support" : 0.028877005347593583
														
 
															+        },
														
 
															+        {
														
 
															+          "key" : {
														
 
															+            "category.keyword" : [
														
 
															+              "Men's Clothing",
														
 
															+              "Men's Shoes"
														
 
															+            ],
														
 
															+            "geoip.city_name" : [
														
 
															+              "Cairo"
														
 
															+            ]
														
 
															+          },
														
 
															+          "doc_count" : 123,
														
 
															+          "support" : 0.026310160427807486
														
 
															+        }
														
 
															+      ],
														
 
															+    (...) 
														
 
															+  }
														
 
															+}
														
 
															+-------------------------------------------------
														
 
															+// TEST[skip:setup kibana sample data]
														
 
															+
														
 
															+The response shows that the categories customers purchase from most frequently 
														
 
															+together are `Women's Clothing` and `Women's Shoes` and customers from New York 
														
 
															+tend to buy items from these categories frequently togeher. In other words, 
														
 
															+customers who buy products labelled Women's Clothing more likely buy products 
														
 
															+also from the Women's Shoes category and customers from New York most likely buy 
														
 
															+products from these categories together. The item set with the second highest 
														
 
															+support is `Women's Clothing` and `Women's Accessories` with customers mostly 
														
 
															+from New York. Finally, the item set with the third highest support is 
														
 
															+`Men's Clothing` and `Men's Shoes` with customers mostly from Cairo.
														
 
															+
														
 
															+The frequent items aggregation enables you to bucket numeric values by using 
														
 
															+<<runtime,runtime fields>>. The next example demonstrates how to use a script to 
														
 
															+add a runtime field to your documents that called `price_range` which is 
														
 
															+calculated from the taxful total price of the individual transactions. The 
														
 
															+runtime field then can be used in the frequent items aggregation as a field to 
														
 
															+analyze.
														
 
															+
														
 
															+
														
 
															+[source,console]
														
 
															+-------------------------------------------------
														
 
															+GET kibana_sample_data_ecommerce/_search
														
 
															+{
														
 
															+  "runtime_mappings": {
														
 
															+    "price_range": {
														
 
															+      "type": "keyword",
														
 
															+      "script": {
														
 
															+        "source": """
														
 
															+           def bucket_start = (long) Math.floor(doc['taxful_total_price'].value / 50) * 50;
														
 
															+           def bucket_end = bucket_start + 50;
														
 
															+           emit(bucket_start.toString() + "-" + bucket_end.toString());
														
 
															+        """
														
 
															+      }
														
 
															+    }
														
 
															+  },
														
 
															+  "size": 0,
														
 
															+  "aggs": {
														
 
															+    "my_agg": {
														
 
															+      "frequent_items": {
														
 
															+        "minimum_set_size": 4,
														
 
															+        "fields": [
														
 
															+          {
														
 
															+            "field": "category.keyword"
														
 
															+          },
														
 
															+          {
														
 
															+            "field": "price_range"
														
 
															+          },
														
 
															+          {
														
 
															+            "field": "geoip.city_name"
														
 
															+          }
														
 
															+        ],
														
 
															+        "size": 3
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+-------------------------------------------------
														
 
															+// TEST[skip:setup kibana sample data]
														
 
															+
														
 
															+The API returns a response similar to the following one:
														
 
															+
														
 
															+[source,console-result]
														
 
															+-------------------------------------------------
														
 
															+(...)
														
 
															+"aggregations" : {
														
 
															+    "my_agg" : {
														
 
															+      "buckets" : [
														
 
															+        {
														
 
															+          "key" : {
														
 
															+            "category.keyword" : [
														
 
															+              "Women's Clothing",
														
 
															+              "Women's Shoes"
														
 
															+            ],
														
 
															+            "price_range" : [
														
 
															+              "50-100"
														
 
															+            ],
														
 
															+            "geoip.city_name" : [
														
 
															+              "New York"
														
 
															+            ]
														
 
															+          },
														
 
															+          "doc_count" : 100,
														
 
															+          "support" : 0.0213903743315508
														
 
															+        },
														
 
															+        {
														
 
															+          "key" : {
														
 
															+            "category.keyword" : [
														
 
															+              "Women's Clothing",
														
 
															+              "Women's Shoes"
														
 
															+            ],
														
 
															+            "price_range" : [
														
 
															+              "50-100"
														
 
															+            ],
														
 
															+            "geoip.city_name" : [
														
 
															+              "Dubai"
														
 
															+            ]
														
 
															+          },
														
 
															+          "doc_count" : 59,
														
 
															+          "support" : 0.012620320855614974
														
 
															+        },
														
 
															+        {
														
 
															+          "key" : {
														
 
															+            "category.keyword" : [
														
 
															+              "Men's Clothing",
														
 
															+              "Men's Shoes"
														
 
															+            ],
														
 
															+            "price_range" : [
														
 
															+              "50-100"
														
 
															+            ],
														
 
															+            "geoip.city_name" : [
														
 
															+              "Marrakesh"
														
 
															+            ]
														
 
															+          },
														
 
															+          "doc_count" : 53,
														
 
															+          "support" : 0.011336898395721925
														
 
															+        }
														
 
															+      ],
														
 
															+    (...)
														
 
															+    }
														
 
															+  }
														
 
															+-------------------------------------------------
														
 
															+// TEST[skip:setup kibana sample data]
														
 
															+
														
 
															+The response shows the categories that customers purchase from most frequently 
														
 
															+together, the location of the customers who tend to buy items from these 
														
 
															+categories, and the most frequent price ranges of these purchases.