6 years ago · 74c16efe2a
--- a/docs/reference/ml/apis/dfanalyticsresources.asciidoc
+++ b/docs/reference/ml/apis/dfanalyticsresources.asciidoc
@@ -0,0 +1,108 @@
 
				+[role="xpack"]
			
 
				+[testenv="platinum"]
			
 
				+[[ml-dfanalytics-resources]]
			
 
				+=== {dfanalytics-cap} job resources
			
 
				+
			
 
				+{dfanalytics-cap} resources relate to APIs such as <<put-dfanalytics>> and
			
 
				+<<get-dfanalytics>>.	
			
 
				+
			
 
				+[discrete]	
			
 
				+[[ml-dfanalytics-properties]]	
			
 
				+==== {api-definitions-title}
			
 
				+
			
 
				+`analysis`::
			
 
				+  (object) The type of analysis that is performed on the `source`. For example: 
			
 
				+  `outlier_detection`. For more information, see <<dfanalytics-types>>.
			
 
				+  
			
 
				+`analyzed_fields`::
			
 
				+  (object) You can specify both `includes` and/or `excludes` patterns. If 
			
 
				+  `analyzed_fields` is not set, only the relevant fields will be included. For 
			
 
				+  example all the numeric fields for {oldetection}.
			
 
				+
			
 
				+`dest`::
			
 
				+  (object) The destination configuration of the analysis. For more information, 
			
 
				+  see <<dfanalytics-dest-resources>>.
			
 
				+
			
 
				+`id`::
			
 
				+  (string) The unique identifier for the {dfanalytics-job}. This identifier can 
			
 
				+  contain lowercase alphanumeric characters (a-z and 0-9), hyphens, and 
			
 
				+  underscores. It must start and end with alphanumeric characters. This property 
			
 
				+  is informational; you cannot change the identifier for existing jobs.
			
 
				+  
			
 
				+`model_memory_limit`::
			
 
				+  (string) The approximate maximum amount of memory resources that are 
			
 
				+  permitted for analytical processing. The default value for {dfanalytics-jobs} 
			
 
				+  is `1gb`. If your `elasticsearch.yml` file contains an 
			
 
				+  `xpack.ml.max_model_memory_limit` setting, an error occurs when you try to 
			
 
				+  create {dfanalytics-jobs} that have `model_memory_limit` values greater than 
			
 
				+  that setting. For more information, see <<ml-settings>>.
			
 
				+
			
 
				+`source`::
			
 
				+  (object) The source configuration, consisting of `index` and optionally a 
			
 
				+  `query`. For more information, see <<dfanalytics-source-resources>>.
			
 
				+
			
 
				+[[dfanalytics-types]]
			
 
				+==== Analysis objects
			
 
				+
			
 
				+{dfanalytics-cap} resources contain `analysis` objects. For example, when you
			
 
				+create a {dfanalytics-job}, you must define the type of analysis it performs.
			
 
				+  
			
 
				+[discrete]
			
 
				+[[oldetection-resources]]
			
 
				+===== {oldetection-cap} configuration objects 
			
 
				+
			
 
				+An {oldetection} configuration object has the following properties:
			
 
				+
			
 
				+[discrete]
			
 
				+[[oldetection-properties]]
			
 
				+==== {api-definitions-title}
			
 
				+
			
 
				+`n_neighbors`::
			
 
				+  (integer) Defines the value for how many nearest neighbors each method of 
			
 
				+  {oldetection} will use to calculate its {olscore}. When the value is 
			
 
				+  not set, the system will dynamically detect an appropriate value.
			
 
				+
			
 
				+`method`::
			
 
				+  (string) Sets the method that {oldetection} uses. If the method is not set 
			
 
				+  {oldetection} uses an ensemble of different methods and normalises and 
			
 
				+  combines their individual {olscores} to obtain the overall {olscore}. 
			
 
				+  Available methods are `lof`, `ldof`, `distance_kth_nn`, `distance_knn`.
			
 
				+
			
 
				+`feature_influence_threshold`:: 
			
 
				+  (double) The minimum {olscore} that a document needs to have in order to 
			
 
				+  calculate its {fiscore}. 
			
 
				+  Value range: 0-1 (`0.1` by default).
			
 
				+  
			
 
				+[[dfanalytics-dest-resources]]
			
 
				+==== Dest configuration objects
			
 
				+
			
 
				+{dfanalytics-cap} resources contain `dest` objects. For example, when you
			
 
				+create a {dfanalytics-job}, you must define its destination.
			
 
				+
			
 
				+[discrete]
			
 
				+[[dfanalytics-dest-properties]]
			
 
				+==== {api-definitions-title}
			
 
				+
			
 
				+`index`::
			
 
				+  (string) The name of the index in which to store the results of the 
			
 
				+  {dfanalytics-job}.
			
 
				+
			
 
				+`results_field`::
			
 
				+  (string) The name of the field in which to store the results of the analysis. 
			
 
				+  The default value is `ml`.
			
 
				+
			
 
				+[[dfanalytics-source-resources]]
			
 
				+==== Source configuration objects
			
 
				+
			
 
				+The `source` configuration object has the following properties:
			
 
				+
			
 
				+`index`::
			
 
				+  (array) An array of index names on which to perform the analysis. It can be a 
			
 
				+  single index or index pattern as well as an array of indices or patterns.
			
 
				+  
			
 
				+`query`::
			
 
				+  (object) The {es} query domain-specific language (DSL). This value
			
 
				+  corresponds to the query object in an {es} search POST body. All the
			
 
				+  options that are supported by {es} can be used, as this object is
			
 
				+  passed verbatim to {es}. By default, this property has the following
			
 
				+  value: `{"match_all": {"boost": 1}}`.
			
--- a/docs/reference/ml/apis/evaluate-dfanalytics.asciidoc
+++ b/docs/reference/ml/apis/evaluate-dfanalytics.asciidoc
@@ -8,15 +8,9 @@
 
				 <titleabbrev>Evaluate {dfanalytics}</titleabbrev>
			
 
				 ++++
			
 
				 
			
 
				-experimental[]
			
 
				+Evaluates the {dfanalytics} for an annotated index.
			
 
				 
			
 
				-Evaluates the executed analysis on an index that is already annotated with a 
			
 
				-field that contains the results of the analytics (the `ground truth`) for each 
			
 
				-{dataframe} row. Evaluation is typically done via calculating a set of metrics 
			
 
				-that capture various aspects of the quality of the results over the data for 
			
 
				-which we have the `ground truth`. For different types of analyses different 
			
 
				-metrics are suitable. This API packages together commonly used metrics for 
			
 
				-various analyses.
			
 
				+experimental[]
			
 
				 
			
 
				 [[ml-evaluate-dfanalytics-request]]
			
 
				 ==== {api-request-title}
			
@@ -30,6 +24,19 @@ various analyses.
 
				 information, see {stack-ov}/security-privileges.html[Security privileges] and 
			
 
				 {stack-ov}/built-in-roles.html[Built-in roles].
			
 
				 
			
 
				+[[ml-evaluate-dfanalytics-desc]]
			
 
				+==== {api-description-title}
			
 
				+
			
 
				+This API evaluates the executed analysis on an index that is already annotated
			
 
				+with a field that contains the results of the analytics (the `ground truth`)
			
 
				+for each {dataframe} row.
			
 
				+
			
 
				+Evaluation is typically done by calculating a set of metrics that capture various aspects of the quality of the results over the data for which you have the
			
 
				+`ground truth`.
			
 
				+
			
 
				+For different types of analyses different metrics are suitable. This API
			
 
				+packages together commonly used metrics for various analyses.
			
 
				+
			
 
				 [[ml-evaluate-dfanalytics-request-body]]
			
 
				 ==== {api-request-body-title}
			
 
				 
			
@@ -38,8 +45,22 @@ information, see {stack-ov}/security-privileges.html[Security privileges] and
 
				   
			
 
				 `evaluation` (Required)::
			
 
				   (object) Defines the type of evaluation you want to perform. For example: 
			
 
				-  `binary_soft_classification`.
			
 
				-  See Evaluate API resources.
			
 
				+  `binary_soft_classification`. See <<ml-evaluate-dfanalytics-resources>>.
			
 
				+  
			
 
				+[[ml-evaluate-dfanalytics-results]]
			
 
				+==== {api-response-body-title}
			
 
				+
			
 
				+`binary_soft_classification`::
			
 
				+  (object) If you chose to do binary soft classification, the API returns the
			
 
				+  following evaluation metrics:
			
 
				+  
			
 
				+`auc_roc`::: TBD
			
 
				+
			
 
				+`confusion_matrix`::: TBD
			
 
				+  
			
 
				+`precision`::: TBD
			
 
				+
			
 
				+`recall`::: TBD
			
 
				 
			
 
				 [[ml-evaluate-dfanalytics-example]]
			
 
				 ==== {api-examples-title}
			
--- a/docs/reference/ml/apis/evaluateresources.asciidoc
+++ b/docs/reference/ml/apis/evaluateresources.asciidoc
@@ -0,0 +1,63 @@
 
				+[role="xpack"]
			
 
				+[testenv="platinum"]
			
 
				+[[ml-evaluate-dfanalytics-resources]]
			
 
				+=== {dfanalytics-cap} evaluation resources
			
 
				+
			
 
				+Evaluation configuration objects relate to the <<evaluate-dfanalytics>>.
			
 
				+
			
 
				+[discrete]
			
 
				+[[ml-evaluate-dfanalytics-properties]]
			
 
				+==== {api-definitions-title}
			
 
				+
			
 
				+`evaluation`::
			
 
				+  (object) Defines the type of evaluation you want to perform. The value of this 
			
 
				+  object can be different depending on the type of evaluation you want to 
			
 
				+  perform. For example, it can contain <<binary-sc-resources>>.
			
 
				+
			
 
				+[[binary-sc-resources]]
			
 
				+==== Binary soft classification configuration objects
			
 
				+
			
 
				+Binary soft classification evaluates the results of an analysis which outputs 
			
 
				+the probability that each {dataframe} row belongs to a certain class. For 
			
 
				+example, in the context of outlier detection, the analysis outputs the 
			
 
				+probability whether each row is an outlier.
			
 
				+
			
 
				+[discrete]
			
 
				+[[binary-sc-resources-properties]]
			
 
				+===== {api-definitions-title}
			
 
				+
			
 
				+`actual_field`::
			
 
				+  (string) The field of the `index` which contains the `ground 
			
 
				+  truth`. The data type of this field can be boolean or integer. If the data 
			
 
				+  type is integer, the value has to be either `0` (false) or `1` (true).
			
 
				+
			
 
				+`predicted_probability_field`::
			
 
				+  (string) The field of the `index` that defines the probability of whether the 
			
 
				+  item belongs to the class in question or not. It's the field that contains the 
			
 
				+  results of the analysis.
			
 
				+
			
 
				+`metrics`::
			
 
				+  (object) Specifies the metrics that are used for the evaluation. Available 
			
 
				+  metrics:
			
 
				+  
			
 
				+  `auc_roc`::
			
 
				+    (object) The AUC ROC (area under the curve of the receiver operating 
			
 
				+    characteristic) score and optionally the curve.
			
 
				+    Default value is {"includes_curve": false}.
			
 
				+    
			
 
				+  `precision`::
			
 
				+    (object) Set the different thresholds of the {olscore} at where the metric 
			
 
				+    is calculated.
			
 
				+    Default value is {"at": [0.25, 0.50, 0.75]}.
			
 
				+  
			
 
				+  `recall`::
			
 
				+    (object) Set the different thresholds of the {olscore} at where the metric 
			
 
				+    is calculated.
			
 
				+    Default value is {"at": [0.25, 0.50, 0.75]}.
			
 
				+  
			
 
				+  `confusion_matrix`::
			
 
				+    (object) Set the different thresholds of the {olscore} at where the metrics 
			
 
				+    (`tp` - true positive, `fp` - false positive, `tn` - true negative, `fn` - 
			
 
				+    false negative) are calculated.
			
 
				+    Default value is {"at": [0.25, 0.50, 0.75]}.
			
 
				+  
			
--- a/docs/reference/ml/apis/get-dfanalytics.asciidoc
+++ b/docs/reference/ml/apis/get-dfanalytics.asciidoc
@@ -45,6 +45,10 @@ You can get information for all {dfanalytics-jobs} by using _all, by specifying
 
				   (string) Identifier for the {dfanalytics-job}. If you do not specify one of 
			
 
				   these options, the API returns information for the first hundred
			
 
				   {dfanalytics-jobs}.
			
 
				+  
			
 
				+`allow_no_match` (Optional)::
			
 
				+  (boolean) If `false` and the `data_frame_analytics_id` does not match any 
			
 
				+  {dfanalytics-job} an error will be returned. The default value is `true`.
			
 
				 
			
 
				 [[ml-get-dfanalytics-query-params]]
			
 
				 ==== {api-query-parms-title}
			
@@ -60,6 +64,13 @@ You can get information for all {dfanalytics-jobs} by using _all, by specifying
 
				 `size` (Optional)::
			
 
				   (integer) Specifies the maximum number of {dfanalytics-jobs} to obtain. The 
			
 
				   default value is `100`.
			
 
				+  
			
 
				+[[ml-get-dfanalytics-results]]
			
 
				+==== {api-response-body-title}
			
 
				+
			
 
				+`data_frame_analytics`::
			
 
				+  (array) An array of {dfanalytics-job} resources. For more information, see
			
 
				+  <<ml-dfanalytics-resources>>.
			
 
				 
			
 
				 [[ml-get-dfanalytics-example]]
			
 
				 ==== {api-examples-title}
			
--- a/docs/reference/ml/apis/put-dfanalytics.asciidoc
+++ b/docs/reference/ml/apis/put-dfanalytics.asciidoc
@@ -56,24 +56,23 @@ and mappings.
 
				 
			
 
				 [[ml-put-dfanalytics-request-body]]
			
 
				 ==== {api-request-body-title}
			
 
				-  
			
 
				+
			
 
				 `analysis` (Required)::
			
 
				   (object) Defines the type of {dfanalytics} you want to perform on your source 
			
 
				-  index. For example: `outlier_detection`. 
			
 
				-  See {oldetection} resources.
			
 
				+  index. For example: `outlier_detection`. See <<dfanalytics-types>>.
			
 
				   
			
 
				 `analyzed_fields` (Optional)::
			
 
				   (object) You can specify both `includes` and/or `excludes` patterns. If 
			
 
				-  `analyzed_fields` is not set, only the relevant fileds will be included. For 
			
 
				-  example all the numeric fields for {oldetection}.
			
 
				+  `analyzed_fields` is not set, only the relevant fields will be included. For 
			
 
				+  example, all the numeric fields for {oldetection}.
			
 
				   
			
 
				 `dest` (Required)::
			
 
				   (object) The destination configuration, consisting of `index` and optionally 
			
 
				-  `results_field` (`ml` by default).
			
 
				+  `results_field` (`ml` by default). See <<dfanalytics-dest-resources>>.
			
 
				   
			
 
				 `source` (Required)::
			
 
				   (object) The source configuration, consisting of `index` and optionally a 
			
 
				-  `query`.
			
 
				+  `query`. See <<dfanalytics-source-resources>>. 
			
 
				 
			
 
				 [[ml-put-dfanalytics-example]]
			
 
				 ==== {api-examples-title}
			
--- a/docs/reference/rest-api/defs.asciidoc
+++ b/docs/reference/rest-api/defs.asciidoc
@@ -8,7 +8,9 @@ These resource definitions are used in APIs related to {ml-features} and
 
				 * <<ml-calendar-resource,Calendars>>
			
 
				 * <<ml-datafeed-resource,{dfeeds-cap}>>
			
 
				 * <<ml-datafeed-counts,{dfeed-cap} counts>>
			
 
				+* <<ml-dfanalytics-resources,{dfanalytics-cap}>>
			
 
				 * <<data-frame-transform-resource,{dataframe-transforms-cap}>>
			
 
				+* <<ml-evaluate-dfanalytics-resources,Evaluate {dfanalytics}>>
			
 
				 * <<ml-filter-resource,Filters>>
			
 
				 * <<ml-job-resource,Jobs>>
			
 
				 * <<ml-jobstats,Job statistics>>
			
@@ -19,7 +21,9 @@ These resource definitions are used in APIs related to {ml-features} and
 
				 
			
 
				 include::{es-repo-dir}/ml/apis/calendarresource.asciidoc[]
			
 
				 include::{es-repo-dir}/ml/apis/datafeedresource.asciidoc[]
			
 
				+include::{es-repo-dir}/ml/apis/dfanalyticsresources.asciidoc[]
			
 
				 include::{es-repo-dir}/data-frames/apis/transformresource.asciidoc[]
			
 
				+include::{es-repo-dir}/ml/apis/evaluateresources.asciidoc[]
			
 
				 include::{es-repo-dir}/ml/apis/filterresource.asciidoc[]
			
 
				 include::{es-repo-dir}/ml/apis/jobresource.asciidoc[]
			
 
				 include::{es-repo-dir}/ml/apis/jobcounts.asciidoc[]