3 gadi atpakaļ · e590e900a4
--- a/docs/reference/aggregations/bucket.asciidoc
+++ b/docs/reference/aggregations/bucket.asciidoc
@@ -36,6 +36,8 @@ include::bucket/filter-aggregation.asciidoc[]
 
				 
			
 
				 include::bucket/filters-aggregation.asciidoc[]
			
 
				 
			
 
				+include::bucket/frequent-items-aggregation.asciidoc[]
			
 
				+
			
 
				 include::bucket/geodistance-aggregation.asciidoc[]
			
 
				 
			
 
				 include::bucket/geohashgrid-aggregation.asciidoc[]
			
--- a/docs/reference/aggregations/bucket/frequent-items-aggregation.asciidoc
+++ b/docs/reference/aggregations/bucket/frequent-items-aggregation.asciidoc
@@ -0,0 +1,298 @@
 
				+[[search-aggregations-bucket-frequent-items-aggregation]]
			
 
				+=== Frequent items aggregation
			
 
				+++++
			
 
				+<titleabbrev>Frequent items</titleabbrev>
			
 
				+++++
			
 
				+
			
 
				+experimental::[]
			
 
				+
			
 
				+A bucket aggregation which finds frequent item sets. It
			
 
				+is a form of association rules mining that identifies items that often occur 
			
 
				+together. It also helps you to discover relationships between different data 
			
 
				+points (items). Items that are frequently purchased together or log events that 
			
 
				+tend to co-occur are examples of frequent item sets. Finding frequent item sets 
			
 
				+helps to discover relationships between different data points (items).
			
 
				+
			
 
				+The aggregation reports closed item sets. A frequent item set is called closed 
			
 
				+if no superset exists with the same ratio of documents (also known as its 
			
 
				+<<frequent-items-minimum-support,support value>>). For example, we have the two 
			
 
				+following candidates for a frequent item set, which have the same support value:
			
 
				+1. `apple, orange, banana`
			
 
				+2. `apple, orange, banana, tomato`.
			
 
				+Only the second item set (`apple, orange, banana, tomato`) is returned, and the 
			
 
				+first set – which is a subset of the second one – is skipped. Both item sets 
			
 
				+might be returned if their support values are different.
			
 
				+
			
 
				+
			
 
				+==== Syntax
			
 
				+
			
 
				+A `frequent_items` aggregation looks like this in isolation:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+"frequent_items": {
			
 
				+  "minimum_set_size": 3,
			
 
				+  "fields": [
			
 
				+    {"field": "my_field_1"},
			
 
				+    {"field": "my_field_2"}
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// NOTCONSOLE
			
 
				+
			
 
				+.`frequent_items` Parameters
			
 
				+|===
			
 
				+|Parameter Name |Description |Required |Default Value
			
 
				+|`fields` |(array) Fields to analyze. | Required |
			
 
				+|`minimum_set_size` | (integer) The <<frequent-items-minimum-set-size,minimum size>> of one item set. | Optional | `1`
			
 
				+|`minimum_support` | (integer) The <<frequent-items-minimum-support,minimum support>> of one item set. | Optional | `0.1`
			
 
				+|`size` | (integer) The number of top item sets to return. | Optional | `10`
			
 
				+|===
			
 
				+
			
 
				+[discrete]
			
 
				+[[frequent-items-minimum-set-size]]
			
 
				+==== Minimum set size
			
 
				+
			
 
				+The minimum set size is the minimum number of items the set needs to contain. A 
			
 
				+value of 1 returns the frequency of single items. The higher the minimum set 
			
 
				+size the less items are returned. For example, the item set `orange, banana, 
			
 
				+apple` is only returned if the minimum set size is 3 or lower.
			
 
				+
			
 
				+[discrete]
			
 
				+[[frequent-items-minimum-support]]
			
 
				+==== Minimum support
			
 
				+
			
 
				+The minimum support value is the ratio of documents that an item set must exist 
			
 
				+in to be considered "frequent". In particular, it is a normalized value between 
			
 
				+0 and 1. It is calculated by dividing the number of documents containing the 
			
 
				+item set by the total number of documents.
			
 
				+
			
 
				+For example, if a given item set is contained by five documents and the total 
			
 
				+number of documents is 20, then the support of the item set is 5/20 = 0.25. 
			
 
				+Therefore, this set is returned only if the minimum support is 0.25 or lower. 
			
 
				+As a higher minimum support prunes more items, the calculation is less resource 
			
 
				+intensive. The `minimum_support` parameter has an effect on the required memory 
			
 
				+and the runtime of the aggregation.
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[frequent-items-size]]
			
 
				+==== Size
			
 
				+
			
 
				+This parameter defines the maximum number of item sets to return. The result 
			
 
				+contains top-k item sets; the item sets with the highest support values. This 
			
 
				+parameter has a significant effect on the required memory and the runtime of the 
			
 
				+aggregation.
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[frequent-items-example]]
			
 
				+==== Examples
			
 
				+
			
 
				+In the following examples, we use the e-commerce {kib} sample data set.
			
 
				+
			
 
				+In the first example, the goal is to find out based on transaction data (1.) 
			
 
				+from what product categories the customers purchase products frequently together 
			
 
				+and (2.) from which cities they make those purchases. We are interested in sets 
			
 
				+with three or more items, and want to see the first three frequent item sets 
			
 
				+with the highest support.
			
 
				+
			
 
				+[source,console]
			
 
				+-------------------------------------------------
			
 
				+GET kibana_sample_data_ecommerce /_search 
			
 
				+{
			
 
				+  "size": 0,
			
 
				+  "aggs": {
			
 
				+    "my_agg": {
			
 
				+      "frequent_items": {
			
 
				+        "minimum_set_size": 3,
			
 
				+        "fields": [
			
 
				+          { "field": "category.keyword" },
			
 
				+          { "field": "geoip.city_name" }
			
 
				+        ],
			
 
				+        "size": 3
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+-------------------------------------------------
			
 
				+// TEST[skip:setup kibana sample data]
			
 
				+
			
 
				+The API returns a response similar to the following one:
			
 
				+
			
 
				+[source,console-result]
			
 
				+-------------------------------------------------
			
 
				+(...)
			
 
				+"aggregations" : {
			
 
				+    "my_agg" : {
			
 
				+      "buckets" : [
			
 
				+        {
			
 
				+          "key" : {
			
 
				+            "category.keyword" : [
			
 
				+              "Women's Clothing",
			
 
				+              "Women's Shoes"
			
 
				+            ],
			
 
				+            "geoip.city_name" : [
			
 
				+              "New York"
			
 
				+            ]
			
 
				+          },
			
 
				+          "doc_count" : 217,
			
 
				+          "support" : 0.04641711229946524
			
 
				+        },
			
 
				+        {
			
 
				+          "key" : {
			
 
				+            "category.keyword" : [
			
 
				+              "Women's Clothing",
			
 
				+              "Women's Accessories"
			
 
				+            ],
			
 
				+            "geoip.city_name" : [
			
 
				+              "New York"
			
 
				+            ]
			
 
				+          },
			
 
				+          "doc_count" : 135,
			
 
				+          "support" : 0.028877005347593583
			
 
				+        },
			
 
				+        {
			
 
				+          "key" : {
			
 
				+            "category.keyword" : [
			
 
				+              "Men's Clothing",
			
 
				+              "Men's Shoes"
			
 
				+            ],
			
 
				+            "geoip.city_name" : [
			
 
				+              "Cairo"
			
 
				+            ]
			
 
				+          },
			
 
				+          "doc_count" : 123,
			
 
				+          "support" : 0.026310160427807486
			
 
				+        }
			
 
				+      ],
			
 
				+    (...) 
			
 
				+  }
			
 
				+}
			
 
				+-------------------------------------------------
			
 
				+// TEST[skip:setup kibana sample data]
			
 
				+
			
 
				+The response shows that the categories customers purchase from most frequently 
			
 
				+together are `Women's Clothing` and `Women's Shoes` and customers from New York 
			
 
				+tend to buy items from these categories frequently togeher. In other words, 
			
 
				+customers who buy products labelled Women's Clothing more likely buy products 
			
 
				+also from the Women's Shoes category and customers from New York most likely buy 
			
 
				+products from these categories together. The item set with the second highest 
			
 
				+support is `Women's Clothing` and `Women's Accessories` with customers mostly 
			
 
				+from New York. Finally, the item set with the third highest support is 
			
 
				+`Men's Clothing` and `Men's Shoes` with customers mostly from Cairo.
			
 
				+
			
 
				+The frequent items aggregation enables you to bucket numeric values by using 
			
 
				+<<runtime,runtime fields>>. The next example demonstrates how to use a script to 
			
 
				+add a runtime field to your documents that called `price_range` which is 
			
 
				+calculated from the taxful total price of the individual transactions. The 
			
 
				+runtime field then can be used in the frequent items aggregation as a field to 
			
 
				+analyze.
			
 
				+
			
 
				+
			
 
				+[source,console]
			
 
				+-------------------------------------------------
			
 
				+GET kibana_sample_data_ecommerce/_search
			
 
				+{
			
 
				+  "runtime_mappings": {
			
 
				+    "price_range": {
			
 
				+      "type": "keyword",
			
 
				+      "script": {
			
 
				+        "source": """
			
 
				+           def bucket_start = (long) Math.floor(doc['taxful_total_price'].value / 50) * 50;
			
 
				+           def bucket_end = bucket_start + 50;
			
 
				+           emit(bucket_start.toString() + "-" + bucket_end.toString());
			
 
				+        """
			
 
				+      }
			
 
				+    }
			
 
				+  },
			
 
				+  "size": 0,
			
 
				+  "aggs": {
			
 
				+    "my_agg": {
			
 
				+      "frequent_items": {
			
 
				+        "minimum_set_size": 4,
			
 
				+        "fields": [
			
 
				+          {
			
 
				+            "field": "category.keyword"
			
 
				+          },
			
 
				+          {
			
 
				+            "field": "price_range"
			
 
				+          },
			
 
				+          {
			
 
				+            "field": "geoip.city_name"
			
 
				+          }
			
 
				+        ],
			
 
				+        "size": 3
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+-------------------------------------------------
			
 
				+// TEST[skip:setup kibana sample data]
			
 
				+
			
 
				+The API returns a response similar to the following one:
			
 
				+
			
 
				+[source,console-result]
			
 
				+-------------------------------------------------
			
 
				+(...)
			
 
				+"aggregations" : {
			
 
				+    "my_agg" : {
			
 
				+      "buckets" : [
			
 
				+        {
			
 
				+          "key" : {
			
 
				+            "category.keyword" : [
			
 
				+              "Women's Clothing",
			
 
				+              "Women's Shoes"
			
 
				+            ],
			
 
				+            "price_range" : [
			
 
				+              "50-100"
			
 
				+            ],
			
 
				+            "geoip.city_name" : [
			
 
				+              "New York"
			
 
				+            ]
			
 
				+          },
			
 
				+          "doc_count" : 100,
			
 
				+          "support" : 0.0213903743315508
			
 
				+        },
			
 
				+        {
			
 
				+          "key" : {
			
 
				+            "category.keyword" : [
			
 
				+              "Women's Clothing",
			
 
				+              "Women's Shoes"
			
 
				+            ],
			
 
				+            "price_range" : [
			
 
				+              "50-100"
			
 
				+            ],
			
 
				+            "geoip.city_name" : [
			
 
				+              "Dubai"
			
 
				+            ]
			
 
				+          },
			
 
				+          "doc_count" : 59,
			
 
				+          "support" : 0.012620320855614974
			
 
				+        },
			
 
				+        {
			
 
				+          "key" : {
			
 
				+            "category.keyword" : [
			
 
				+              "Men's Clothing",
			
 
				+              "Men's Shoes"
			
 
				+            ],
			
 
				+            "price_range" : [
			
 
				+              "50-100"
			
 
				+            ],
			
 
				+            "geoip.city_name" : [
			
 
				+              "Marrakesh"
			
 
				+            ]
			
 
				+          },
			
 
				+          "doc_count" : 53,
			
 
				+          "support" : 0.011336898395721925
			
 
				+        }
			
 
				+      ],
			
 
				+    (...)
			
 
				+    }
			
 
				+  }
			
 
				+-------------------------------------------------
			
 
				+// TEST[skip:setup kibana sample data]
			
 
				+
			
 
				+The response shows the categories that customers purchase from most frequently 
			
 
				+together, the location of the customers who tend to buy items from these 
			
 
				+categories, and the most frequent price ranges of these purchases.