Browse Source

[DOCS] Adds data frame analytics APIs to the ML APIs (#43875)

This PR adds the reference documentation pages of the data frame analytics APIs (PUT, START, STOP, GET, GET stats, DELETE, Evaluate) to the ML APIs pool.
István Zoltán Szabó 6 years ago
parent
commit
cccf5bac43

+ 39 - 0
docs/build.gradle

@@ -1146,3 +1146,42 @@ buildRestTests.setups['kibana_sample_data_ecommerce'] = '''
               number_of_shards: 1
               number_of_replicas: 0
 '''
+buildRestTests.setups['setup_logdata'] = '''
+  - do:
+        indices.create:
+          index: logdata
+          body:
+            settings:
+              number_of_shards: 1
+              number_of_replicas: 1
+            mappings:
+              properties:
+                grade:
+                  type: byte
+  - do:
+        bulk:
+          index: logdata
+          refresh: true
+          body: |
+            {"index":{}}
+            {"grade": 100, "weight": 2}
+            {"index":{}}
+            {"grade": 50, "weight": 3}
+'''
+buildRestTests.setups['logdata_job'] = buildRestTests.setups['setup_logdata'] + '''
+  - do:
+      ml.put_data_frame_analytics:
+        id: "loganalytics"
+        body:  >
+          {
+            "source": { 
+              "index": "logdata"
+              },
+            "dest": {
+              "index": "logdata_out"
+              },
+            "analysis": {
+              "outlier_detection": {}
+            }
+          }
+'''

+ 52 - 0
docs/reference/ml/apis/delete-dfanalytics.asciidoc

@@ -0,0 +1,52 @@
+[role="xpack"]
+[testenv="platinum"]
+[[delete-dfanalytics]]
+=== Delete {dfanalytics-jobs} API
+[subs="attributes"]
+++++
+<titleabbrev>Delete {dfanalytics-jobs}</titleabbrev>
+++++
+
+experimental[]
+
+Deletes an existing {dfanalytics-job}.
+
+[[ml-delete-dfanalytics-request]]
+==== {api-request-title}
+
+`DELETE _ml/data_frame/analytics/<data_frame_analytics_id>`
+
+[[ml-delete-dfanalytics-prereq]]
+==== {api-prereq-title}
+
+* You must have `machine_learning_admin` built-in role to use this API. For more 
+information, see {stack-ov}/security-privileges.html[Security privileges] and 
+{stack-ov}/built-in-roles.html[Built-in roles].
+
+[[ml-delete-dfanalytics-path-params]]
+==== {api-path-parms-title}
+
+`<data_frame_analytics_id>` (Required)::
+  (string) Identifier for the {dfanalytics-job} you want to delete.
+
+[[ml-delete-dfanalytics-example]]
+==== {api-examples-title}
+
+The following example deletes the `loganalytics` {dfanalytics-job}:
+
+[source,js]
+--------------------------------------------------
+DELETE _ml/data_frame/analytics/loganalytics
+--------------------------------------------------
+// CONSOLE
+// TEST[skip:TBD]
+
+The API returns the following result:
+
+[source,js]
+----
+{
+  "acknowledged" : true
+}
+----
+// TESTRESPONSE

+ 105 - 0
docs/reference/ml/apis/evaluate-dfanalytics.asciidoc

@@ -0,0 +1,105 @@
+[role="xpack"]
+[testenv="platinum"]
+[[evaluate-dfanalytics]]
+=== Evaluate {dfanalytics} API
+
+[subs="attributes"]
+++++
+<titleabbrev>Evaluate {dfanalytics}</titleabbrev>
+++++
+
+experimental[]
+
+Evaluates the executed analysis on an index that is already annotated with a 
+field that contains the results of the analytics (the `ground truth`) for each 
+{dataframe} row. Evaluation is typically done via calculating a set of metrics 
+that capture various aspects of the quality of the results over the data for 
+which we have the `ground truth`. For different types of analyses different 
+metrics are suitable. This API packages together commonly used metrics for 
+various analyses.
+
+[[ml-evaluate-dfanalytics-request]]
+==== {api-request-title}
+
+`POST _ml/data_frame/_evaluate`
+
+[[ml-evaluate-dfanalytics-prereq]]
+==== {api-prereq-title}
+
+* You must have `monitor_ml` privilege to use this API. For more 
+information, see {stack-ov}/security-privileges.html[Security privileges] and 
+{stack-ov}/built-in-roles.html[Built-in roles].
+
+[[ml-evaluate-dfanalytics-request-body]]
+==== {api-request-body-title}
+
+`index` (Required)::
+  (object) Defines the `index` in which the evaluation will be performed.
+  
+`evaluation` (Required)::
+  (object) Defines the type of evaluation you want to perform. For example: 
+  `binary_soft_classification`.
+  See Evaluate API resources.
+
+[[ml-evaluate-dfanalytics-example]]
+==== {api-examples-title}
+
+[source,js]
+--------------------------------------------------
+POST _ml/data_frame/_evaluate
+{
+  "index": "my_analytics_dest_index",
+  "evaluation": {
+    "binary_soft_classification": {
+      "actual_field": "is_outlier",
+      "predicted_probability_field": "ml.outlier_score"
+    }
+  }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[skip:TBD]
+
+The API returns the following results:
+
+[source,js]
+----
+{
+  "binary_soft_classification": {
+    "auc_roc": {
+      "score": 0.92584757746414444
+    },
+    "confusion_matrix": {
+      "0.25": {
+          "tp": 5,
+          "fp": 9,
+          "tn": 204,
+          "fn": 5
+      },
+      "0.5": {
+          "tp": 1,
+          "fp": 5,
+          "tn": 208,
+          "fn": 9
+      },
+      "0.75": {
+          "tp": 0,
+          "fp": 4,
+          "tn": 209,
+          "fn": 10
+      }
+    },
+    "precision": {
+        "0.25": 0.35714285714285715,
+        "0.5": 0.16666666666666666,
+        "0.75": 0
+    },
+    "recall": {
+        "0.25": 0.5,
+        "0.5": 0.1,
+        "0.75": 0
+    }
+  }
+}
+----
+// TESTRESPONSE

+ 91 - 0
docs/reference/ml/apis/get-dfanalytics-stats.asciidoc

@@ -0,0 +1,91 @@
+[role="xpack"]
+[testenv="platinum"]
+[[get-dfanalytics-stats]]
+=== Get {dfanalytics-jobs} statistics API
+[subs="attributes"]
+++++
+<titleabbrev>Get {dfanalytics-jobs} stats</titleabbrev>
+++++
+
+experimental[]
+
+Retrieves usage information for {dfanalytics-jobs}.
+
+[[ml-get-dfanalytics-stats-request]]
+==== {api-request-title}
+
+`GET _ml/data_frame/analytics/<data_frame_analytics_id>/_stats` +
+
+`GET _ml/data_frame/analytics/<data_frame_analytics_id>,<data_frame_analytics_id>/_stats` +
+
+`GET _ml/data_frame/analytics/_stats` +
+
+`GET _ml/data_frame/analytics/_all/_stats` +
+
+`GET _ml/data_frame/analytics/*/_stats`
+
+[[ml-get-dfanalytics-stats-prereq]]
+==== {api-prereq-title}
+
+* You must have `monitor_ml` privilege to use this API. For more 
+information, see {stack-ov}/security-privileges.html[Security privileges] and 
+{stack-ov}/built-in-roles.html[Built-in roles].
+
+[[ml-get-dfanalytics-stats-path-params]]
+==== {api-path-parms-title}
+
+`<data_frame_analytics_id>` (Optional)::
+  (string) Identifier for the {dfanalytics-job}. If you do not specify one of 
+  these options, the API returns information for the first hundred
+  {dfanalytics-jobs}.
+  
+`allow_no_match` (Optional)
+  (boolean) If `false` and the `data_frame_analytics_id` does not match any 
+  {dfanalytics-job} an error will be returned. The default value is `true`.
+
+[[ml-get-dfanalytics-stats-query-params]]
+==== {api-query-parms-title}
+
+`from` (Optional)::
+  (integer) Skips the specified number of {dfanalytics-jobs}. The default value 
+  is `0`.
+
+`size` (Optional)::
+  (integer) Specifies the maximum number of {dfanalytics-jobs} to obtain. The 
+  default value is `100`.
+
+[discrete]
+[[ml-get-dfanalytics-stats-response-body]]
+==== {api-response-body-title}
+
+The API returns the following information:
+
+`data_frame_analytics`::
+  (array) An array of statistics objects for {dfanalytics-jobs}, which are
+  sorted by the `id` value in ascending order.
+
+[[ml-get-dfanalytics-stats-example]]
+==== {api-examples-title}
+
+[source,js]
+--------------------------------------------------
+GET _ml/data_frame/analytics/loganalytics/_stats
+--------------------------------------------------
+// CONSOLE
+// TEST[skip:TBD]
+
+The API returns the following results:
+
+[source,js]
+----
+{
+    "count": 1,
+    "data_frame_analytics": [
+        {
+            "id": "loganalytics",
+            "state": "stopped"
+        }
+    ]
+}
+----
+// TESTRESPONSE

+ 106 - 0
docs/reference/ml/apis/get-dfanalytics.asciidoc

@@ -0,0 +1,106 @@
+[role="xpack"]
+[testenv="platinum"]
+[[get-dfanalytics]]
+=== Get {dfanalytics-jobs} API
+[subs="attributes"]
+++++
+<titleabbrev>Get {dfanalytics-jobs}</titleabbrev>
+++++
+
+experimental[]
+
+Retrieves configuration information for {dfanalytics-jobs}.
+
+[[ml-get-dfanalytics-request]]
+==== {api-request-title}
+
+`GET _ml/data_frame/analytics/<data_frame_analytics_id>` +
+
+`GET _ml/data_frame/analytics/<data_frame_analytics_id>,<data_frame_analytics_id>` +
+
+`GET _ml/data_frame/analytics/` +
+
+`GET _ml/data_frame/analytics/_all`
+
+[[ml-get-dfanalytics-prereq]]
+==== {api-prereq-title}
+
+* You must have `monitor_ml` privilege to use this API. For more 
+information, see {stack-ov}/security-privileges.html[Security privileges] and 
+{stack-ov}/built-in-roles.html[Built-in roles].
+
+[[ml-get-dfanalytics-desc]]
+==== {api-description-title}
+
+You can get information for multiple {dfanalytics-jobs} in a single API request 
+by using a comma-separated list of {dfanalytics-jobs} or a wildcard expression. 
+You can get information for all {dfanalytics-jobs} by using _all, by specifying 
+`*` as the `<data_frame_analytics_id>`, or by omitting the
+`<data_frame_analytics_id>`.
+
+[[ml-get-dfanalytics-path-params]]
+==== {api-path-parms-title}
+
+`<data_frame_analytics_id>` (Optional)::
+  (string) Identifier for the {dfanalytics-job}. If you do not specify one of 
+  these options, the API returns information for the first hundred
+  {dfanalytics-jobs}.
+  
+`allow_no_match` (Optional)
+  (boolean) If `false` and the `data_frame_analytics_id` does not match any 
+  {dfanalytics-job} an error will be returned. The default value is `true`.
+
+[[ml-get-dfanalytics-query-params]]
+==== {api-query-parms-title}
+
+`from` (Optional)::
+  (integer) Skips the specified number of {dfanalytics-jobs}. The default value 
+  is `0`.
+
+`size` (Optional)::
+  (integer) Specifies the maximum number of {dfanalytics-jobs} to obtain. The 
+  default value is `100`.
+
+[[ml-get-dfanalytics-example]]
+==== {api-examples-title}
+
+The following example gets configuration information for the `loganalytics` 
+{dfanalytics-job}:
+
+[source,js]
+--------------------------------------------------
+GET _ml/data_frame/analytics/loganalytics
+--------------------------------------------------
+// CONSOLE
+// TEST[skip:TBD]
+
+The API returns the following results:
+
+[source,js]
+----
+{
+    "count": 1,
+    "data_frame_analytics": [
+        {
+            "id": "loganalytics",
+            "source": {
+                "index": "logdata",
+                "query": {
+                    "match_all": {}
+                }
+            },
+            "dest": {
+                "index": "logdata_out",
+                "results_field": "ml"
+            },
+            "analysis": {
+                "outlier_detection": {}
+            },
+            "model_memory_limit": "1gb",
+            "create_time": 1562265491319,
+            "version" : "8.0.0"
+        }
+    ]
+}
+----
+// TESTRESPONSE

+ 17 - 0
docs/reference/ml/apis/ml-api.asciidoc

@@ -34,7 +34,16 @@ machine learning APIs and in advanced job configuration options in Kibana.
 * <<ml-preview-datafeed,Preview {dfeed}>>
 * <<ml-update-datafeed,Update {dfeed}>>
 
+[float]
+[[ml-api-dfanalytics-endpoint]]
+=== {dfanalytics-cap} APIs
 
+* <<put-dfanalytics,Create>> or 
+<<delete-dfanalytics,delete {dfanalytics-jobs}>>
+* <<get-dfanalytics,Get {dfanalytics-jobs} info>> or
+<<get-dfanalytics-stats,statistics>>
+* <<start-dfanalytics,Start>> or <<stop-dfanalytics,stop {dfanalytics-jobs}>>
+* <<evaluate-dfanalytics,Evaluate {dfanalytics}>>
 
 [float]
 [[ml-api-job-endpoint]]
@@ -104,6 +113,7 @@ include::put-calendar.asciidoc[]
 include::put-datafeed.asciidoc[]
 include::put-filter.asciidoc[]
 include::put-job.asciidoc[]
+include::put-dfanalytics.asciidoc[]
 //DELETE
 include::delete-calendar.asciidoc[]
 include::delete-datafeed.asciidoc[]
@@ -114,6 +124,9 @@ include::delete-job.asciidoc[]
 include::delete-calendar-job.asciidoc[]
 include::delete-snapshot.asciidoc[]
 include::delete-expired-data.asciidoc[]
+include::delete-dfanalytics.asciidoc[]
+//EVALUATE
+include::evaluate-dfanalytics.asciidoc[]
 //FIND
 include::find-file-structure.asciidoc[]
 //FLUSH
@@ -135,6 +148,8 @@ include::get-snapshot.asciidoc[]
 include::get-calendar-event.asciidoc[]
 include::get-filter.asciidoc[]
 include::get-record.asciidoc[]
+include::get-dfanalytics.asciidoc[]
+include::get-dfanalytics-stats.asciidoc[]
 //OPEN
 include::open-job.asciidoc[]
 //POST
@@ -146,7 +161,9 @@ include::revert-snapshot.asciidoc[]
 //SET/START/STOP
 include::set-upgrade-mode.asciidoc[]
 include::start-datafeed.asciidoc[]
+include::start-dfanalytics.asciidoc[]
 include::stop-datafeed.asciidoc[]
+include::stop-dfanalytics.asciidoc[]
 //UPDATE
 include::update-datafeed.asciidoc[]
 include::update-filter.asciidoc[]

+ 128 - 0
docs/reference/ml/apis/put-dfanalytics.asciidoc

@@ -0,0 +1,128 @@
+[role="xpack"]
+[testenv="platinum"]
+[[put-dfanalytics]]
+=== Create {dfanalytics-jobs} API
+[subs="attributes"]
+++++
+<titleabbrev>Create {dfanalytics-jobs}</titleabbrev>
+++++
+
+experimental[]
+
+Instantiates a {dfanalytics-job}.
+
+[[ml-put-dfanalytics-request]]
+==== {api-request-title}
+
+`PUT _ml/data_frame/analytics/<data_frame_analytics_id>`
+
+[[ml-put-dfanalytics-prereq]]
+==== {api-prereq-title}
+
+* You must have `machine_learning_admin` built-in role to use this API. You must 
+also have `read` and `view_index_metadata` privileges on the source index and 
+`read`, `create_index`, and `index` privileges on the destination index. For 
+more information, see {stack-ov}/security-privileges.html[Security privileges] 
+and {stack-ov}/built-in-roles.html[Built-in roles].
+
+[[ml-put-dfanalytics-desc]]
+==== {api-description-title}
+
+This API creates a {dfanalytics-job} that performs an analysis on the source 
+index and stores the outcome in a destination index.
+
+The destination index will be automatically created if it does not exist. The 
+`index.number_of_shards` and `index.number_of_replicas` settings of the source 
+index will be copied over the destination index. When the source index matches 
+multiple indices, these settings will be set to the maximum values found in the 
+source indices.
+
+The mappings of the source indices are also attempted to be copied over
+to the destination index, however, if the mappings of any of the fields don't 
+match among the source indices, the attempt will fail with an error message.
+
+If the destination index already exists, then it will be use as is. This makes 
+it possible to set up the destination index in advance with custom settings 
+and mappings.
+
+[[ml-put-dfanalytics-path-params]]
+==== {api-path-parms-title}
+
+`<data_frame_analytics_id>` (Required)::
+  (string) A numerical character string that uniquely identifies the 
+  {dfanalytics-job}. This identifier can contain lowercase alphanumeric characters 
+  (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric 
+  characters.
+
+[[ml-put-dfanalytics-request-body]]
+==== {api-request-body-title}
+
+`source` (Required)::
+  (object) The source configuration, consisting of `index` and optionally a 
+  `query`.
+  
+`dest` (Required)::
+  (object) The destination configuration, consisting of `index` and optionally 
+  `results_field` (`ml` by default).
+  
+`analysis` (Required)::
+  (object) Defines the type of {dfanalytics} you want to perform on your source 
+  index. For example: `outlier_detection`. 
+  See {oldetection} resources.
+  
+`analyzed_fields` (Optional)::
+  (object) You can specify both `includes` and/or `excludes` patterns. If 
+  `analyzed_fields` is not set, only the relevant fileds will be included. For 
+  example all the numeric fields for {oldetection}.
+
+[[ml-put-dfanalytics-example]]
+==== {api-examples-title}
+
+The following example creates the `loganalytics` {dfanalytics-job}, the analysis 
+type is `outlier_detection`:
+
+[source,js]
+--------------------------------------------------
+PUT _ml/data_frame/analytics/loganalytics
+{
+  "source": {
+    "index": "logdata"
+  },
+  "dest": {
+    "index": "logdata_out"
+  },
+  "analysis": {
+    "outlier_detection": {
+    }
+  }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[setup:setup_logdata]
+
+The API returns the following result:
+
+[source,js]
+----
+{
+    "id": "loganalytics",
+    "source": {
+        "index": ["logdata"],
+        "query": {
+            "match_all": {}
+        }
+    },
+    "dest": {
+        "index": "logdata_out",
+        "results_field": "ml"
+    },
+    "analysis": {
+        "outlier_detection": {}
+    },
+    "model_memory_limit": "1gb",
+    "create_time" : 1562265491319,
+    "version" : "8.0.0"
+}
+----
+// TESTRESPONSE[s/1562265491319/$body.$_path/]
+// TESTRESPONSE[s/"version": "8.0.0"/"version": $body.version/]

+ 61 - 0
docs/reference/ml/apis/start-dfanalytics.asciidoc

@@ -0,0 +1,61 @@
+[role="xpack"]
+[testenv="platinum"]
+[[start-dfanalytics]]
+=== Start {dfanalytics-jobs} API
+
+[subs="attributes"]
+++++
+<titleabbrev>Start {dfanalytics-jobs}</titleabbrev>
+++++
+
+experimental[]
+
+Starts a {dfanalytics-job}.
+
+[[ml-start-dfanalytics-request]]
+==== {api-request-title}
+
+`POST _ml/data_frame/analytics/<data_frame_analytics_id>/_start`
+
+[[ml-start-dfanalytics-prereq]]
+==== {api-prereq-title}
+
+* You must have `machine_learning_admin` built-in role to use this API. You must 
+also have `read` and `view_index_metadata` privileges on the source index and 
+`read`, `create_index`, and `index` privileges on the destination index. For 
+more information, see {stack-ov}/security-privileges.html[Security privileges] 
+and {stack-ov}/built-in-roles.html[Built-in roles].
+
+[[ml-start-dfanalytics-path-params]]
+==== {api-path-parms-title}
+
+`<data_frame_analytics_id>` (Required)::
+  (string) Identifier for the {dfanalytics-job}. This identifier can contain
+  lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It
+  must start and end with alphanumeric characters.
+  
+`timeout` (Optional)::
+  (time) Controls the amount of time to wait until the {dfanalytics-job} starts. 
+  The default value is 20 seconds.
+
+[[ml-start-dfanalytics-example]]
+==== {api-examples-title}
+
+The following example starts the `loganalytics` {dfanalytics-job}:
+
+[source,js]
+--------------------------------------------------
+POST _ml/data_frame/analytics/loganalytics/_start
+--------------------------------------------------
+// CONSOLE
+// TEST[skip:setup:logdata_job]
+
+When the {dfanalytics-job} starts, you receive the following results:
+
+[source,js]
+----
+{
+  "acknowledged" : true
+}
+----
+// TESTRESPONSE

+ 81 - 0
docs/reference/ml/apis/stop-dfanalytics.asciidoc

@@ -0,0 +1,81 @@
+[role="xpack"]
+[testenv="platinum"]
+[[stop-dfanalytics]]
+=== Stop {dfanalytics-jobs} API
+
+[subs="attributes"]
+++++
+<titleabbrev>Stop {dfanalytics-jobs}</titleabbrev>
+++++
+
+experimental[]
+
+Stops one or more {dfanalytics-jobs}.
+
+[[ml-stop-dfanalytics-request]]
+==== {api-request-title}
+
+`POST _ml/data_frame/analytics/<data_frame_analytics_id>/_stop` +
+
+`POST _ml/data_frame/analytics/<data_frame_analytics_id>,<data_frame_analytics_id>/_stop` +
+
+`POST _ml/data_frame/analytics/_all/_stop`
+
+[[ml-stop-dfanalytics-prereq]]
+==== {api-prereq-title}
+
+* You must have `machine_learning_admin` built-in role to use this API. For more 
+information, see {stack-ov}/security-privileges.html[Security privileges] and 
+{stack-ov}/built-in-roles.html[Built-in roles].
+
+[[ml-stop-dfanalytics-desc]]
+==== {api-description-title}
+
+A {dfanalytics-job} can be started and stopped multiple times throughout its 
+lifecycle.
+
+You can stop multiple {dfanalytics-jobs} in a single API request by using a 
+comma-separated list of {dfanalytics-jobs} or a wildcard expression. You can 
+stop all {dfanalytics-job} by using _all or by specifying * as the 
+<data_frame_analytics_id>.
+
+[[ml-stop-dfanalytics-path-params]]
+==== {api-path-parms-title}
+
+`<data_frame_analytics_id>` (Required)::
+  (string) Identifier for the {dfanalytics-job}. This identifier can contain
+  lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It
+  must start and end with alphanumeric characters.
+  
+`timeout` (Optional)::
+  Controls the amount of time to wait until the {dfanalytics-job} stops. 
+  The default value is 20 seconds.
+  
+`force` (Optional)::
+  (boolean) If true, the {dfanalytics-job} is stopped forcefully.
+  
+`allow_no_match` (Optional)
+  (boolean) If `false` and the `data_frame_analytics_id` does not match any 
+  {dfanalytics-job} an error will be returned. The default value is `true`.
+
+[[ml-stop-dfanalytics-example]]
+==== {api-examples-title}
+
+The following example stops the `loganalytics` {dfanalytics-job}:
+
+[source,js]
+--------------------------------------------------
+POST _ml/data_frame/analytics/loganalytics/_stop
+--------------------------------------------------
+// CONSOLE
+// TEST[skip:TBD]
+
+When the {dfanalytics-job} stops, you receive the following results:
+
+[source,js]
+----
+{
+  "stopped" : true
+}
+----
+// TESTRESPONSE