فهرست منبع

[DOCS] Adds cat anomaly detectors API (#52866)

Lisa Cawley 5 سال پیش
والد
کامیت
b6534834f9

+ 2 - 0
docs/reference/cat.asciidoc

@@ -227,6 +227,8 @@ include::cat/alias.asciidoc[]
 
 include::cat/allocation.asciidoc[]
 
+include::cat/anomaly-detectors.asciidoc[]
+
 include::cat/count.asciidoc[]
 
 include::cat/dataframeanalytics.asciidoc[]

+ 280 - 0
docs/reference/cat/anomaly-detectors.asciidoc

@@ -0,0 +1,280 @@
+[role="xpack"]
+[testenv="platinum"]
+[[cat-anomaly-detectors]]
+=== cat anomaly detectors API
+++++
+<titleabbrev>cat anomaly detectors</titleabbrev>
+++++
+
+Returns configuration and usage information about {anomaly-jobs}.
+
+[[cat-anomaly-detectors-request]]
+==== {api-request-title}
+
+`GET /_cat/ml/anomaly_detectors/<job_id>` +
+
+`GET /_cat/ml/anomaly_detectors`
+
+[[cat-anomaly-detectors-prereqs]]
+==== {api-prereq-title}
+
+* If the {es} {security-features} are enabled, you must have `monitor_ml`,
+`monitor`, `manage_ml`, or `manage` cluster privileges to use this API. See
+<<security-privileges>> and {ml-docs}/setup.html[Set up {ml-features}].
+
+
+[[cat-anomaly-detectors-desc]]
+==== {api-description-title}
+
+See {ml-docs}/ml-jobs.html[{anomaly-jobs-cap}].
+
+NOTE: This API returns a maximum of 10,000 jobs.
+
+[[cat-anomaly-detectors-path-params]]
+==== {api-path-parms-title}
+
+`<job_id>`::
+(Optional, string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
+
+[[cat-anomaly-detectors-query-params]]
+==== {api-query-parms-title}
+
+`allow_no_jobs`::
+(Optional, boolean)
+include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-jobs]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=bytes]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=http-format]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-h]
++
+If you do not specify which columns to include, the API returns the default
+columns. If you explicitly specify one or more columns, it returns only the
+specified columns.
++
+Valid columns are:
+
+`assignment_explanation`, `ae`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-anomaly-jobs]
+
+`buckets.count`, `bc`, `bucketsCount`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count-anomaly-jobs]
+
+`buckets.time.exp_avg`, `btea`, `bucketsTimeExpAvg`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average]
+
+`buckets.time.exp_avg_hour`, `bteah`, `bucketsTimeExpAvgHour`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average-hour]
+
+`buckets.time.max`, `btmax`, `bucketsTimeMax`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-maximum]
+
+`buckets.time.min`, `btmin`, `bucketsTimeMin`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-minimum]
+
+`buckets.time.total`, `btt`, `bucketsTimeTotal`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-total]
+
+`data.buckets`, `db`, `dataBuckets`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count]
+
+`data.earliest_record`, `der`, `dataEarliestRecord`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=earliest-record-timestamp]
+
+`data.empty_buckets`, `deb`, `dataEmptyBuckets`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=empty-bucket-count]
+
+`data.input_bytes`, `dib`, `dataInputBytes`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-bytes]
+
+`data.input_fields`, `dif`, `dataInputFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-field-count]
+
+`data.input_records`, `dir`, `dataInputRecords`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-record-count]
+
+`data.invalid_dates`, `did`, `dataInvalidDates`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=invalid-date-count]
+
+`data.last`, `dl`, `dataLast`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=last-data-time]
+
+`data.last_empty_bucket`, `dleb`, `dataLastEmptyBucket`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-empty-bucket-timestamp]
+
+`data.last_sparse_bucket`, `dlsb`, `dataLastSparseBucket`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-sparse-record-timestamp]
+
+`data.latest_record`, `dlr`, `dataLatestRecord`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-record-timestamp]
+
+`data.missing_fields`, `dmf`, `dataMissingFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=missing-field-count]
+
+`data.out_of_order_timestamps`, `doot`, `dataOutOfOrderTimestamps`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=out-of-order-timestamp-count]
+
+`data.processed_fields`, `dpf`, `dataProcessedFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=processed-field-count]
+
+`data.processed_records`, `dpr`, `dataProcessedRecords`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=processed-record-count]
+
+`data.sparse_buckets`, `dsb`, `dataSparseBuckets`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count]
+
+`forecasts.memory.avg`, `fmavg`, `forecastsMemoryAvg`:::
+The average memory usage in bytes for forecasts related to the {anomaly-job}.
+  
+`forecasts.memory.max`, `fmmax`, `forecastsMemoryMax`:::
+The maximum memory usage in bytes for forecasts related to the {anomaly-job}.
+
+`forecasts.memory.min`, `fmmin`, `forecastsMemoryMin`:::
+The minimum memory usage in bytes for forecasts related to the {anomaly-job}.
+
+`forecasts.memory.total`, `fmt`, `forecastsMemoryTotal`:::
+The total memory usage in bytes for forecasts related to the {anomaly-job}.                      
+  
+`forecasts.records.avg`, `fravg`, `forecastsRecordsAvg`:::
+The average number of `model_forecast` documents written for forecasts related
+to the {anomaly-job}.
+
+`forecasts.records.max`, `frmax`, `forecastsRecordsMax`:::
+The maximum number of `model_forecast` documents written for forecasts related
+to the {anomaly-job}.
+
+`forecasts.records.min`, `frmin`, `forecastsRecordsMin`:::
+The minimum number of `model_forecast` documents written for forecasts related
+to the {anomaly-job}.
+
+`forecasts.records.total`, `frt`, `forecastsRecordsTotal`:::
+The total number of `model_forecast` documents written for forecasts related to
+the {anomaly-job}.                         
+                                                   
+`forecasts.time.avg`, `ftavg`, `forecastsTimeAvg`:::
+The average runtime in milliseconds for forecasts related to the {anomaly-job}.
+
+`forecasts.time.max`, `ftmax`, `forecastsTimeMax`:::
+The maximum runtime in milliseconds for  forecasts related to the {anomaly-job}.
+
+`forecasts.time.min`, `ftmin`, `forecastsTimeMin`:::
+The minimum runtime in milliseconds for forecasts related to the {anomaly-job}.
+
+`forecasts.time.total`, `ftt`, `forecastsTimeTotal`:::
+The total runtime in milliseconds for forecasts related to the {anomaly-job}.
+
+`forecasts.total`, `ft`, `forecastsTotal`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=forecast-total]
+
+`id`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
+
+`model.bucket_allocation_failures`, `mbaf`, `modelBucketAllocationFailures`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-allocation-failures-count]
+
+`model.by_fields`, `mbf`, `modelByFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-by-field-count]
+
+`model.bytes`, `mb`, `modelBytes`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes]
+
+`model.bytes_exceeded`, `mbe`, `modelBytesExceeded`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded]
+
+`model.categorization_status`, `mcs`, `modelCategorizationStatus`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-status]
+                         
+`model.categorized_doc_count`, `mcdc`, `modelCategorizedDocCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=categorized-doc-count]
+
+`model.dead_category_count`, `mdcc`, `modelDeadCategoryCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=dead-category-count]
+
+`model.frequent_category_count`, `mfcc`, `modelFrequentCategoryCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=frequent-category-count]
+
+`model.log_time`, `mlt`, `modelLogTime`:::
+The timestamp when the model stats were gathered, according to server time.
+
+`model.memory_limit`, `mml`, `modelMemoryLimit`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs]
+
+`model.memory_status`, `mms`, `modelMemoryStatus`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-status]
+
+`model.over_fields`, `mof`, `modelOverFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-over-field-count]
+
+`model.partition_fields`, `mpf`, `modelPartitionFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-partition-field-count]
+
+`model.rare_category_count`, `mrcc`, `modelRareCategoryCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=rare-category-count]
+
+`model.timestamp`, `mt`, `modelTimestamp`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-timestamp]
+                                                           
+`model.total_category_count`, `mtcc`, `modelTotalCategoryCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-category-count]
+                            
+`node.address`, `na`, `nodeAddress`:::
+The network address of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs]
+
+`node.ephemeral_id`, `ne`, `nodeEphemeralId`:::
+The ephemeral ID of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs]
+
+`node.id`, `ni`, `nodeId`:::
+The unique identifier of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs]
+
+`node.name`, `nn`, `nodeName`:::
+The node name.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs]
+
+`opened_time`, `ot`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=open-time]
+
+`state`, `s`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] 
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=help]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-s]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=time]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-v]
+
+[[cat-anomaly-detectors-example]]
+==== {api-examples-title}
+
+[source,console]
+--------------------------------------------------
+GET _cat/ml/anomaly_detectors?h=id,s,dpr,mb&v
+--------------------------------------------------
+// TEST[skip:kibana sample data]
+
+[source,console-result]
+----
+id                        s dpr   mb
+high_sum_total_sales closed 14022 1.5mb
+low_request_rate     closed 1216  40.5kb
+response_code_rates  closed 28146 132.7kb
+url_scanning         closed 28146 501.6kb
+----
+// TESTRESPONSE[skip:kibana sample data]

+ 42 - 66
docs/reference/cat/datafeeds.asciidoc

@@ -22,12 +22,14 @@ Returns configuration and usage information about {dfeeds}.
 `monitor`, `manage_ml`, or `manage` cluster privileges to use this API. See
 <<security-privileges>> and {ml-docs}/setup.html[Set up {ml-features}].
 
-////
+
 [[cat-datafeeds-desc]]
 ==== {api-description-title}
 
-TBD: This API returns a maximum of 10,000 {dfeeds}. 
-////
+{dfeeds-cap} retrieve data from {es} for analysis by {anomaly-jobs}. For more 
+information, see {ml-docs}/ml-dfeeds.html[{dfeeds-cap}].
+
+NOTE: This API returns a maximum of 10,000 jobs.
 
 [[cat-datafeeds-path-params]]
 ==== {api-path-parms-title}
@@ -46,94 +48,68 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds]
 include::{docdir}/rest-api/common-parms.asciidoc[tag=http-format]
 
 include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-h]
-
-include::{docdir}/rest-api/common-parms.asciidoc[tag=help]
-
-include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-s]
-
-include::{docdir}/rest-api/common-parms.asciidoc[tag=time]
-
-include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-v]
-
-[[cat-datafeeds-results]]
-==== {api-response-body-title}
-
-`assignment_explanation`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation]
 +
-To retrieve this information, specify the `ae` column in the `h` query parameter. 
+If you do not specify which columns to include, the API returns the default
+columns. If you explicitly specify one or more columns, it returns only the
+specified columns.
++
+Valid columns are:
 
-`bucket.count`:: 
+`assignment_explanation`, `ae`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-datafeeds]
+
+`buckets.count`, `bc`, `bucketsCount`:::
+(Default)
 include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count]
-+
-To retrieve this information, specify the `bc` or `bucketCount` column in the
-`h` query parameter.
 
-`id`::
+`id`:::
+(Default)
 include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id]
-+
-To retrieve this information, specify the `id` column in the `h` query parameter.
-  
-`node.address`::
+
+`node.address`, `na`, `nodeAddress`:::
 The network address of the node.
-+ 
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
 +
-To retrieve this information, specify the `na` or `nodeAddress` column in the
-`h` query parameter.
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
   
-`node.ephemeral_id`::
+`node.ephemeral_id`, `ne`, `nodeEphemeralId`:::
 The ephemeral ID of the node.
 +
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-+
-To retrieve this information, specify the `ne` or `nodeEphemeralId` column in
-the `h` query parameter.
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
   
-`node.id`::
+`node.id`, `ni`, `nodeId`:::
 The unique identifier of the node.
 +
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-+
-To retrieve this information, specify the `ni` or `nodeId` column in the `h`
-query parameter.
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
 
-`node.name`::
+`node.name`, `nn`, `nodeName`:::
 The node name.
 +
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-+
-To retrieve this information, specify the `nn` or `nodeName` column in the `h`
-query parameter.
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
 
-`search.bucket_avg`::
+`search.bucket_avg`, `sba`, `searchBucketAvg`:::
 include::{docdir}/ml/ml-shared.asciidoc[tag=search-bucket-avg]
-+
-To retrieve this information, specify the `sba` or `searchBucketAvg` column in
-the `h` query parameter.
   
-`search.count`::
+`search.count`, `sc`, `searchCount`:::
+(Default)
 include::{docdir}/ml/ml-shared.asciidoc[tag=search-count]
-+
-To retrieve this information, specify the `sc` or `searchCount` column in the
-`h` query parameter.
 
-`search.exp_avg_hour`::
+`search.exp_avg_hour`, `seah`, `searchExpAvgHour`:::
 include::{docdir}/ml/ml-shared.asciidoc[tag=search-exp-avg-hour]
-+
-To retrieve this information, specify the `seah` or `searchExpAvgHour` column in
-the `h` query parameter.
 
-`search.time`::
+`search.time`, `st`, `searchTime`:::
 include::{docdir}/ml/ml-shared.asciidoc[tag=search-time]
-+
-To retrieve this information, specify the `st` or `searchTime` column in the `h`
-query parameter.
 
-`state`::
+`state`, `s`:::
+(Default)
 include::{docdir}/ml/ml-shared.asciidoc[tag=state-datafeed]
-+
-To retrieve this information, specify the `s` column in the `h` query parameter. 
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=help]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-s]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=time]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-v]
 
 [[cat-datafeeds-example]]
 ==== {api-examples-title}
@@ -146,7 +122,7 @@ GET _cat/ml/datafeeds?v
 
 [source,console-result]
 ----
-id                              state bucket.count search.count
+id                              state buckets.count search.count
 datafeed-high_sum_total_sales stopped 743          7
 datafeed-low_request_rate     stopped 1457         3
 datafeed-response_code_rates  stopped 1460         18

+ 10 - 4
docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc

@@ -69,7 +69,7 @@ informational; you cannot update their values.
 
 `assignment_explanation`::
 (string)
-include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation]
+include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-datafeeds]
 
 `datafeed_id`::
 (string)
@@ -77,10 +77,16 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id]
 
 `node`::
 (object)
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-`node`.`id`::: The unique identifier of the node. For example, "0-o0tOoRTwKFZifatTWKNw".
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
+
+`node`.`id`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-id]
+
 `node`.`name`::: The node name. For example, `0-o0tOo`.
-`node`.`ephemeral_id`::: The node ephemeral ID.
+
+`node`.`ephemeral_id`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
+
 `node`.`transport_address`::: The host and port where transport HTTP connections are
 accepted. For example, `127.0.0.1:9300`.
 `node`.`attributes`::: For example, `{"ml.machine_memory": "17179869184"}`.

+ 89 - 128
docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc

@@ -57,8 +57,8 @@ The API returns the following information about the operational progress of a
 job:
 
 `assignment_explanation`::
-(string) For open jobs only, contains messages relating to the selection
-of a node to run the job.
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-anomaly-jobs]
 
 [[datacounts]]`data_counts`::
 (object) An object that describes the quantity of input to the job and any
@@ -67,85 +67,73 @@ a job. If a model snapshot is reverted or old results are deleted, the job
 counts are not reset.
 
 `data_counts`.`bucket_count`:::
-(long) The number of bucket results produced by the job.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count-anomaly-jobs]
 
 `data_counts`.`earliest_record_timestamp`:::
-(date) The timestamp of the earliest chronologically input document.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=earliest-record-timestamp]
 
 `data_counts`.`empty_bucket_count`:::
-(long) The number of buckets which did not contain any data. If your data
-contains many empty buckets, consider increasing your `bucket_span` or using
-functions that are tolerant to gaps in data such as `mean`, `non_null_sum` or
-`non_zero_count`.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=empty-bucket-count]
 
 `data_counts`.`input_bytes`:::
-(long) The number of bytes of input data posted to the job.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-bytes]
 
 `data_counts`.`input_field_count`:::
-(long) The total number of fields in input documents posted to the job. This
-count includes fields that are not used in the analysis. However, be aware that
-if you are using a {dfeed}, it extracts only the required fields from the
-documents it retrieves before posting them to the job.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-field-count]
 
 `data_counts`.`input_record_count`:::
-(long) The number of input documents posted to the job.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-record-count]
 
 `data_counts`.`invalid_date_count`:::
-(long) The number of input documents with either a missing date field or a date
-that could not be parsed.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=invalid-date-count]
 
 `data_counts`.`job_id`:::
 (string)
 include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
 
 `data_counts`.`last_data_time`:::
-(date) The timestamp at which data was last analyzed, according to server time.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=last-data-time]
 
 `data_counts`.`latest_empty_bucket_timestamp`:::
-(date) The timestamp of the last bucket that did not contain any data.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-empty-bucket-timestamp]
 
 `data_counts`.`latest_record_timestamp`:::
-(date) The timestamp of the latest chronologically input document.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-record-timestamp]
 
 `data_counts`.`latest_sparse_bucket_timestamp`:::
-(date) The timestamp of the last bucket that was considered sparse.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-sparse-record-timestamp]
 
 `data_counts`.`missing_field_count`:::
-(long) The number of input documents that are missing a field that the job is
-configured to analyze. Input documents with missing fields are still processed
-because it is possible that not all fields are missing. The value of
-`processed_record_count` includes this count.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=missing-field-count]
 +
---
-NOTE: If you are using {dfeeds} or posting data to the job in JSON format, a
-high `missing_field_count` is often not an indication of data issues. It is not
-necessarily a cause for concern.
-
---
+The value of `processed_record_count` includes this count.
 
 `data_counts`.`out_of_order_timestamp_count`:::
-(long) The number of input documents that are out of time sequence and outside
-of the latency window. This information is applicable only when you provide data
-to the job by using the <<ml-post-data,post data API>>. These out of order
-documents are  discarded, since jobs require time series data to be in ascending
-chronological order.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=out-of-order-timestamp-count]
 
 `data_counts`.`processed_field_count`:::
-(long) The total number of fields in all the documents that have been processed
-by the job. Only fields that are specified in the detector configuration object
-contribute to this count. The timestamp is not included in this count.
+include::{docdir}/ml/ml-shared.asciidoc[tag=processed-field-count]
 
 `data_counts`.`processed_record_count`:::
-(long) The number of input documents that have been processed by the job. This
-value includes documents with missing fields, since they are nonetheless
-analyzed. If you use {dfeeds} and have aggregations in your search query, the
-`processed_record_count` will be the number of aggregation results processed,
-not the number of {es} documents.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=processed-record-count]
 
 `data_counts`.`sparse_bucket_count`:::
-(long) The number of buckets that contained few data points compared to the
-expected number of data points. If your data contains many sparse buckets,
-consider using a longer `bucket_span`.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count]
 
 [[forecastsstats]]`forecasts_stats`::
 (object) An object that provides statistical information about forecasts 
@@ -167,8 +155,9 @@ value of `1` indicates that at least one forecast exists.
 related to this job. If there are no forecasts, this property is omitted.
 
 `forecasts_stats`.`records`:::
-(object) The `avg`, `min`, `max` and `total` number of model_forecast documents 
-written for forecasts related to this job. If there are no forecasts, this property is omitted.
+(object) The `avg`, `min`, `max` and `total` number of `model_forecast` documents 
+written for forecasts related to this job. If there are no forecasts, this
+property is omitted.
 
 `forecasts_stats`.`processing_time_ms`:::
 (object) The `avg`, `min`, `max` and `total` runtime in milliseconds for 
@@ -179,8 +168,8 @@ forecasts related to this job. If there are no forecasts, this property is omitt
 {"finished" : 2, "started" : 1}. If there are no forecasts, this property is omitted.
 
 `forecasts_stats`.`total`:::
-(long) The number of individual forecasts currently available for this job. A 
-value of `1` or more indicates that forecasts exist.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=forecast-total]
 
 `job_id`::
 (string)
@@ -191,38 +180,24 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
 model. It has the following properties:
  
 `model_size_stats`.`bucket_allocation_failures_count`:::
-(long) The number of buckets for which new entities in incoming data were not
-processed due to insufficient model memory. This situation is also signified
-by a `hard_limit: memory_status` property value.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-allocation-failures-count]
 
 `model_size_stats`.`categorized_doc_count`:::
-(long) The number of documents that have had a field categorized.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=categorized-doc-count]
 
 `model_size_stats`.`categorization_status`:::
-(string) The status of categorization for this job.
-Contains one of the following values.
-+
---
-* `ok`: Categorization is performing acceptably well (or not being
-used at all).
-* `warn`: Categorization is detecting a distribution of categories
-that suggests the input data is inappropriate for categorization.
-Problems could be that there is only one category, more than 90% of
-categories are rare, the number of categories is greater than 50% of
-the number of categorized documents, there are no frequently
-matched categories, or more than 50% of categories are dead.
-
---
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-status]
 
 `model_size_stats`.`dead_category_count`:::
-(long) The number of categories created by categorization that will
-never be assigned again because another category's definition
-makes it a superset of the dead category.  (Dead categories are a
-side effect of the way categorization has no prior training.)
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=dead-category-count]
 
 `model_size_stats`.`frequent_category_count`:::
-(long) The number of categories that match more than 1% of categorized
-documents.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=frequent-category-count]
 
 `model_size_stats`.`job_id`:::
 (string)
@@ -232,53 +207,47 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
 (date) The timestamp of the `model_size_stats` according to server time.
 
 `model_size_stats`.`memory_status`:::
-(string) The status of the mathematical models. This property can have one of
-the following values:
-+
---
-* `ok`: The models stayed below the configured value.
-* `soft_limit`: The models used more than 60% of the configured memory limit
-and older unused models will be pruned to free up space.
-* `hard_limit`: The models used more space than the configured memory limit.
-As a result, not all incoming data was processed.
---
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-status]
 
 `model_size_stats`.`model_bytes`:::
-(long) The number of bytes of memory used by the models. This is the maximum
-value since the last time the model was persisted. If the job is closed,
-this value indicates the latest size.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes]
 
 `model_size_stats`.`model_bytes_exceeded`:::
- (long) The number of bytes over the high limit for memory usage at the last
- allocation failure.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded]
 
 `model_size_stats`.`model_bytes_memory_limit`:::
-(long) The upper limit for memory usage, checked on increasing values.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs]
 
 `model_size_stats`.`rare_category_count`:::
-(long) The number of categories that match just one categorized document.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=rare-category-count]
 
 `model_size_stats`.`result_type`:::
 (string) For internal use. The type of result.
 
 `model_size_stats`.`total_by_field_count`:::
-(long) The number of `by` field values that were analyzed by the models. This 
-value is cumulative for all detectors.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-by-field-count]
 
 `model_size_stats`.`total_category_count`:::
-(long) The number of categories created by categorization.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-category-count]
 
 `model_size_stats`.`total_over_field_count`:::
-(long) The number of `over` field values that were analyzed by the models. This 
-value is cumulative for all detectors.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-over-field-count]
 
 `model_size_stats`.`total_partition_field_count`:::
-(long) The number of `partition` field values that were analyzed by the models. 
-This value is cumulative for all detectors.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-partition-field-count]
 
 `model_size_stats`.`timestamp`:::
-(date) The timestamp of the `model_size_stats` according to the timestamp of the
-data.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-timestamp]
 
 [[stats-node]]`node`::
 (object) Contains properties for the node that runs the job. This information is
@@ -289,10 +258,12 @@ available only for open jobs.
 `{"ml.machine_memory": "17179869184", "ml.max_open_jobs" : "20"}`.
   
 `node`.`ephemeral_id`:::
-(string) The ephemeral ID of the node.
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
 
 `node`.`id`:::
-(string) The unique identifier of the node.
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-id]
 
 `node`.`name`:::
 (string) The node name.
@@ -301,26 +272,12 @@ available only for open jobs.
 (string) The host and port where transport HTTP connections are accepted.
 
 `open_time`::
-(string) For open jobs only, the elapsed time for which the job has been open.
-For example, `28746386s`.
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=open-time]
 
 `state`::
-(string) The status of the job, which can be one of the following values:
-+
---
-* `closed`: The job finished successfully with its model state persisted. The
-job must be opened before it can accept further data.
-* `closing`: The job close action is in progress and has not yet completed. A
-closing job cannot accept further data.
-* `failed`: The job did not finish successfully due to an error. This situation
-can occur due to invalid input data, a fatal error occurring during the analysis,
-or an external interaction such as the process being killed by the Linux out of
-memory (OOM) killer. If the job had irrevocably failed, it must be force closed
-and then deleted. If the {dfeed} can be corrected, the job can be closed and
-then re-opened.
-* `opened`: The job is available to receive and process data.
-* `opening`: The job open action is in progress and has not yet completed.
---
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=state-anomaly-job]
 
 [[timingstats]]`timing_stats`::
 (object) An object that provides statistical information about timing aspect of
@@ -330,28 +287,32 @@ this job. It has the following properties:
 (double) Average of all bucket processing times in milliseconds.
 
 `timing_stats`.`bucket_count`:::
-(long) The number of buckets processed.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count]
 
 `timing_stats`.`exponential_average_bucket_processing_time_ms`:::
-(double) Exponential moving average of all bucket processing times in
-milliseconds.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average]
 
 `timing_stats`.`exponential_average_bucket_processing_time_per_hour_ms`:::
-(double) Exponentially-weighted moving average of bucket processing times
-calculated in a 1 hour time window.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average-hour]
 
 `timing_stats`.`job_id`:::
 (string)
 include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
 
 `timing_stats`.`maximum_bucket_processing_time_ms`:::
-(double) Maximum among all bucket processing times in milliseconds.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-maximum]
 
 `timing_stats`.`minimum_bucket_processing_time_ms`:::
-(double) Minimum among all bucket processing times in milliseconds.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-minimum]
 
 `timing_stats`.`total_bucket_processing_time_ms`:::
-(double) Sum of all bucket processing times in milliseconds.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-total]
 
 [[ml-get-job-stats-response-codes]]
 ==== {api-response-codes-title}

+ 263 - 11
docs/reference/ml/ml-shared.asciidoc

@@ -137,9 +137,14 @@ tag::analyzed-fields-includes[]
 An array of strings that defines the fields that will be included in the analysis.
 end::analyzed-fields-includes[]
 
-tag::assignment-explanation[]
+tag::assignment-explanation-anomaly-jobs[]
+For open {anomaly-jobs} only, contains messages relating to the selection
+of a node to run the job.
+end::assignment-explanation-anomaly-jobs[]
+
+tag::assignment-explanation-datafeeds[]
 For started {dfeeds} only, contains messages relating to the selection of a node.
-end::assignment-explanation[]
+end::assignment-explanation-datafeeds[]
 
 tag::assignment-explanation-dfanalytics[]
 Contains messages relating to the selection of a node.
@@ -158,10 +163,20 @@ so do not set the `background_persist_interval` value too low.
 --
 end::background-persist-interval[]
 
+tag::bucket-allocation-failures-count[]
+The number of buckets for which new entities in incoming data were not processed
+due to insufficient model memory. This situation is also signified by a
+`hard_limit: memory_status` property value.
+end::bucket-allocation-failures-count[]
+
 tag::bucket-count[]
 The number of buckets processed.
 end::bucket-count[]
 
+tag::bucket-count-anomaly-jobs[]
+The number of bucket results produced by the job.
+end::bucket-count-anomaly-jobs[]
+
 tag::bucket-span[]
 The size of the interval that the analysis is aggregated into, typically between
 `5m` and `1h`. The default value is `5m`. If the {anomaly-job} uses a {dfeed}
@@ -175,6 +190,27 @@ The length of the bucket in seconds. This value matches the `bucket_span`
 that is specified in the job.
 end::bucket-span-results[]
 
+tag::bucket-time-exponential-average[]
+Exponential moving average of all bucket processing times, in milliseconds.
+end::bucket-time-exponential-average[]
+
+tag::bucket-time-exponential-average-hour[]
+Exponentially-weighted moving average of bucket processing times
+calculated in a 1 hour time window, in milliseconds.
+end::bucket-time-exponential-average-hour[]
+
+tag::bucket-time-maximum[]
+Maximum among all bucket processing times, in milliseconds.
+end::bucket-time-maximum[]
+
+tag::bucket-time-minimum[]
+Minimum among all bucket processing times, in milliseconds.
+end::bucket-time-minimum[]
+
+tag::bucket-time-total[]
+Sum of all bucket processing times, in milliseconds.
+end::bucket-time-total[]
+
 tag::by-field-name[]
 The field used to split the data. In particular, this property is used for 
 analyzing the splits with respect to their own history. It is used for finding 
@@ -252,6 +288,24 @@ customize the tokenizer or post-tokenization filtering, use the
 `pattern_replace` character filters. The effect is exactly the same.
 end::categorization-filters[]
 
+tag::categorization-status[]
+The status of categorization for the job. Contains one of the following values:
++
+--
+* `ok`: Categorization is performing acceptably well (or not being used at all).
+* `warn`: Categorization is detecting a distribution of categories that suggests
+the input data is inappropriate for categorization. Problems could be that there
+is only one category, more than 90% of categories are rare, the number of
+categories is greater than 50% of the number of categorized documents, there are
+no frequently matched categories, or more than 50% of categories are dead.
+
+--
+end::categorization-status[]
+
+tag::categorized-doc-count[]
+The number of documents that have had a field categorized.
+end::categorized-doc-count[]
+
 tag::char-filter[]
 One or more <<analysis-charfilters,character filters>>. In addition to the
 built-in character filters, other plugins can provide more character filters.
@@ -263,7 +317,6 @@ add them here as
 <<analysis-pattern-replace-charfilter,pattern replace character filters>>.
 end::char-filter[]
 
-
 tag::compute-feature-influence[]
 If `true`, the feature influence calculation is enabled. Defaults to `true`.
 end::compute-feature-influence[]
@@ -484,6 +537,13 @@ Identifier for the {dfeed}. It can be a {dfeed} identifier or a wildcard
 expression.
 end::datafeed-id-wildcard[]
 
+tag::dead-category-count[]
+The number of categories created by categorization that will never be assigned
+again because another category's definition makes it a superset of the dead
+category. (Dead categories are a side effect of the way categorization has no
+prior training.)
+end::dead-category-count[]
+
 tag::decompress-definition[]
 Specifies whether the included model definition should be returned as a JSON map (`true`) or 
 in a custom compressed format (`false`). Defaults to `true`.
@@ -564,6 +624,17 @@ A unique identifier for the detector. This identifier is based on the order of
 the detectors in the `analysis_config`, starting at zero.
 end::detector-index[]
 
+tag::earliest-record-timestamp[]
+The timestamp of the earliest chronologically input document.
+end::earliest-record-timestamp[]
+
+tag::empty-bucket-count[]
+The number of buckets which did not contain any data. If your data
+contains many empty buckets, consider increasing your `bucket_span` or using
+functions that are tolerant to gaps in data such as `mean`, `non_null_sum` or
+`non_zero_count`.
+end::empty-bucket-count[]
+
 tag::eta[]
 Advanced configuration option. The shrinkage applied to the weights. Smaller
 values result in larger forests which have better generalization error. However,
@@ -630,6 +701,11 @@ tag::filter-id[]
 A string that uniquely identifies a filter.
 end::filter-id[]
 
+tag::forecast-total[]
+The number of individual forecasts currently available for the job. A value of
+`1` or more indicates that forecasts exist.
+end::forecast-total[]
+
 tag::frequency[]
 The interval at which scheduled queries are made while the {dfeed} runs in real
 time. The default value is either the bucket span for short bucket spans, or,
@@ -640,6 +716,10 @@ bucket results. If the {dfeed} uses aggregations, this value must be divisible
 by the interval of the date histogram aggregation.
 end::frequency[]
 
+tag::frequent-category-count[]
+The number of categories that match more than 1% of categorized documents.
+end::frequent-category-count[]
+
 tag::from[]
 Skips the specified number of {dfanalytics-jobs}. The default value is `0`.
 end::from[]
@@ -700,6 +780,26 @@ is available as part of the input data. When you use multiple detectors, the use
 of influencers is recommended as it aggregates results for each influencer entity.
 end::influencers[]
 
+tag::input-bytes[]
+The number of bytes of input data posted to the {anomaly-job}.
+end::input-bytes[]
+
+tag::input-field-count[]
+The total number of fields in input documents posted to the {anomaly-job}. This
+count includes fields that are not used in the analysis. However, be aware that
+if you are using a {dfeed}, it extracts only the required fields from the
+documents it retrieves before posting them to the job.
+end::input-field-count[]
+
+tag::input-record-count[]
+The number of input documents posted to the {anomaly-job}.
+end::input-record-count[]
+
+tag::invalid-date-count[]
+The number of input documents with either a missing date field or a date that
+could not be parsed.
+end::invalid-date-count[]
+
 tag::is-interim[]
 If `true`, this is an interim result. In other words, the results are calculated
 based on partial input data.
@@ -765,6 +865,10 @@ relevant relationships between the features and the {depvar}. The smaller this
 parameter the larger individual trees will be and the longer train will take.
 end::lambda[]
 
+tag::last-data-time[]
+The timestamp at which data was last analyzed, according to server time.
+end::last-data-time[]
+
 tag::latency[]
 The size of the window in which to expect data that is out of time order. The 
 default value is 0 (no latency). If you specify a non-zero value, it must be 
@@ -778,6 +882,18 @@ the <<ml-post-data,post data>> API.
 --
 end::latency[]
 
+tag::latest-empty-bucket-timestamp[]
+The timestamp of the last bucket that did not contain any data.
+end::latest-empty-bucket-timestamp[]
+
+tag::latest-record-timestamp[]
+The timestamp of the latest chronologically input document.
+end::latest-record-timestamp[]
+
+tag::latest-sparse-record-timestamp[]
+The timestamp of the last bucket that was considered sparse.
+end::latest-sparse-record-timestamp[]
+
 tag::max-empty-searches[]
 If a real-time {dfeed} has never seen any data (including during any initial
 training period) then it will automatically stop itself and close its associated
@@ -815,6 +931,19 @@ ensemble method. Available methods are `lof`, `ldof`, `distance_kth_nn`,
 `distance_knn`.
 end::method[]
 
+tag::missing-field-count[]
+The number of input documents that are missing a field that the {anomaly-job} is
+configured to analyze. Input documents with missing fields are still processed
+because it is possible that not all fields are missing.
++
+--
+NOTE: If you are using {dfeeds} or posting data to the job in JSON format, a
+high `missing_field_count` is often not an indication of data issues. It is not
+necessarily a cause for concern.
+
+--
+end::missing-field-count[]
+
 tag::mode[]
 There are three available modes: 
 +
@@ -826,6 +955,17 @@ recommended value.
 --
 end::mode[]
 
+tag::model-bytes[]
+The number of bytes of memory used by the models. This is the maximum value
+since the last time the model was persisted. If the job is closed, this value
+indicates the latest size.
+end::model-bytes[]
+
+tag::model-bytes-exceeded[]
+The number of bytes over the high limit for memory usage at the last allocation
+failure.
+end::model-bytes-exceeded[]
+
 tag::model-id[]
 The unique identifier of the trained {infer} model.
 end::model-id[]
@@ -855,6 +995,10 @@ see <<ml-settings>>.
 --
 end::model-memory-limit[]
 
+tag::model-memory-limit-anomaly-jobs[]
+The upper limit for model memory usage, checked on increasing values.
+end::model-memory-limit-anomaly-jobs[]
+
 tag::model-memory-limit-dfa[]
 The approximate maximum amount of memory resources that are permitted for 
 analytical processing. The default value for {dfanalytics-jobs} is `1gb`. If 
@@ -864,6 +1008,19 @@ setting, an error occurs when you try to create {dfanalytics-jobs} that have
 <<ml-settings>>.
 end::model-memory-limit-dfa[]
 
+tag::model-memory-status[]
+The status of the mathematical models, which can have one of the following
+values:
++
+--
+* `ok`: The models stayed below the configured value.
+* `soft_limit`: The models used more than 60% of the configured memory limit
+and older unused models will be pruned to free up space.
+* `hard_limit`: The models used more space than the configured memory limit.
+As a result, not all incoming data was processed.
+--
+end::model-memory-status[]
+
 tag::model-plot-config[]
 This advanced configuration option stores model information along with the
 results. It provides a more detailed view into {anomaly-detect}.
@@ -906,6 +1063,10 @@ The default value is `1`, which means snapshots that are one day (twenty-four ho
 older than the newest snapshot are deleted.
 end::model-snapshot-retention-days[]
 
+tag::model-timestamp[]
+The timestamp of the last record when the model stats were gathered.
+end::model-timestamp[]
+
 tag::multivariate-by-fields[]
 This functionality is reserved for internal use. It is not supported for use in 
 customer environments and is not subject to the support SLA of official GA 
@@ -936,10 +1097,27 @@ improve diversity in the ensemble. Therefore, only override this if you are
 confident that the value you choose is appropriate for the data set.
 end::n-neighbors[]
 
-tag::node[]
+tag::node-address[]
+The network address of the node.
+end::node-address[]
+
+tag::node-datafeeds[]
 For started {dfeeds} only, this information pertains to the node upon which the
 {dfeed} is started.
-end::node[]
+end::node-datafeeds[]
+
+tag::node-ephemeral-id[]
+The ephemeral ID of the node.
+end::node-ephemeral-id[]
+
+tag::node-id[]
+The unique identifier of the node.
+end::node-id[]
+
+tag::node-jobs[]
+Contains properties for the node that runs the job. This information is
+available only for open jobs.
+end::node-jobs[]
 
 tag::num-top-classes[]
 Defines the number of categories for which the predicted 
@@ -948,12 +1126,17 @@ total number of categories (in the {version} version of the {stack}, it's two)
 to predict then we will report all category probabilities. Defaults to 2.
 end::num-top-classes[]
 
-tag::over-field-name[]
-The field used to split the data. In particular, this property is used for 
-analyzing the splits with respect to the history of all splits. It is used for 
-finding unusual values in the population of all splits. For more information,
-see {ml-docs}/ml-configuring-pop.html[Performing population analysis].
-end::over-field-name[]
+tag::open-time[]
+For open jobs only, the elapsed time for which the job has been open.
+end::open-time[]
+
+tag::out-of-order-timestamp-count[]
+The number of input documents that are out of time sequence and outside
+of the latency window. This information is applicable only when you provide data
+to the {anomaly-job} by using the <<ml-post-data,post data API>>. These out of
+order documents are  discarded, since jobs require time series data to be in
+ascending chronological order.
+end::out-of-order-timestamp-count[]
 
 tag::outlier-fraction[]
 Sets the proportion of the data set that is assumed to be outlying prior to 
@@ -961,6 +1144,13 @@ Sets the proportion of the data set that is assumed to be outlying prior to
 outliers and 95% are inliers.
 end::outlier-fraction[]
 
+tag::over-field-name[]
+The field used to split the data. In particular, this property is used for 
+analyzing the splits with respect to the history of all splits. It is used for 
+finding unusual values in the population of all splits. For more information,
+see {ml-docs}/ml-configuring-pop.html[Performing population analysis].
+end::over-field-name[]
+
 tag::partition-field-name[]
 The field used to segment the analysis. When you use this property, you have 
 completely independent baselines for each value of this field.
@@ -971,6 +1161,20 @@ Defines the name of the prediction field in the results.
 Defaults to `<dependent_variable>_prediction`.
 end::prediction-field-name[]
 
+tag::processed-field-count[]
+The total number of fields in all the documents that have been processed by the
+{anomaly-job}. Only fields that are specified in the detector configuration
+object contribute to this count. The timestamp is not included in this count.
+end::processed-field-count[]
+
+tag::processed-record-count[]
+The number of input documents that have been processed by the {anomaly-job}.
+This value includes documents with missing fields, since they are nonetheless
+analyzed. If you use {dfeeds} and have aggregations in your search query, the
+`processed_record_count` is the number of aggregation results processed, not the
+number of {es} documents.
+end::processed-record-count[]
+
 tag::randomize-seed[]
 Defines the seed to the random generator that is used to pick which documents 
 will be used for training. By default it is randomly generated. Set it to a 
@@ -995,6 +1199,10 @@ multiple jobs running on the same node. For more information, see
 {ml-docs}/ml-delayed-data-detection.html[Handling delayed data].
 end::query-delay[]
 
+tag::rare-category-count[]
+The number of categories that match just one categorized document.
+end::rare-category-count[]
+
 tag::renormalization-window-days[]
 Advanced configuration option. The period over which adjustments to the score
 are applied, as new data is seen. The default value is the longer of 30 days or
@@ -1088,6 +1296,12 @@ The configuration of how to source the analysis data. It requires an
       excluded from the destination.
 end::source-put-dfa[]
 
+tag::sparse-bucket-count[]
+The number of buckets that contained few data points compared to the expected
+number of data points. If your data contains many sparse buckets, consider using
+a longer `bucket_span`.
+end::sparse-bucket-count[]
+
 tag::standardization-enabled[]
 If `true`, then the following operation is performed on the columns before 
 computing outlier scores: (x_i - mean(x_i)) / sd(x_i). Defaults to `true`. For 
@@ -1095,6 +1309,25 @@ more information, see
 https://en.wikipedia.org/wiki/Feature_scaling#Standardization_(Z-score_Normalization)[this wiki page about standardization].
 end::standardization-enabled[]
 
+tag::state-anomaly-job[]
+The status of the {anomaly-job}, which can be one of the following values:
++
+--
+* `closed`: The job finished successfully with its model state persisted. The
+job must be opened before it can accept further data.
+* `closing`: The job close action is in progress and has not yet completed. A
+closing job cannot accept further data.
+* `failed`: The job did not finish successfully due to an error. This situation
+can occur due to invalid input data, a fatal error occurring during the analysis,
+or an external interaction such as the process being killed by the Linux out of
+memory (OOM) killer. If the job had irrevocably failed, it must be force closed
+and then deleted. If the {dfeed} can be corrected, the job can be closed and
+then re-opened.
+* `opened`: The job is available to receive and process data.
+* `opening`: The job open action is in progress and has not yet completed.
+--
+end::state-anomaly-job[]
+
 tag::state-datafeed[]
 The status of the {dfeed}, which can be one of the following values:
 +
@@ -1170,6 +1403,25 @@ that tokenizer but change the character or token filters, specify
 `"tokenizer": "ml_classic"` in your `categorization_analyzer`.
 end::tokenizer[]
 
+tag::total-by-field-count[]
+The number of `by` field values that were analyzed by the models. This value is
+cumulative for all detectors in the job.
+end::total-by-field-count[]
+
+tag::total-category-count[]
+The number of categories created by categorization.
+end::total-category-count[]
+
+tag::total-over-field-count[]
+The number of `over` field values that were analyzed by the models. This value
+is cumulative for all detectors in the job.
+end::total-over-field-count[]
+
+tag::total-partition-field-count[]
+The number of `partition` field values that were analyzed by the models. This
+value is cumulative for all detectors in the job.
+end::total-partition-field-count[]
+
 tag::training-percent[]
 Defines what percentage of the eligible documents that will 
 be used for training. Documents that are ignored by the analysis (for example 

+ 2 - 2
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java

@@ -78,9 +78,9 @@ public class RestCatDatafeedsAction extends AbstractCatAction {
                 .build());
 
         // Timing stats
-        table.addCell("bucket.count",
+        table.addCell("buckets.count",
             TableColumnAttributeBuilder.builder("bucket count")
-                .setAliases("bc", "bucketCount")
+                .setAliases("bc", "bucketsCount")
                 .build());
         table.addCell("search.count",
             TableColumnAttributeBuilder.builder("number of searches ran by the datafeed")

+ 38 - 38
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java

@@ -93,7 +93,7 @@ public class RestCatJobsAction extends AbstractCatAction {
             .build());
         table.addCell("data.processed_fields",
             TableColumnAttributeBuilder.builder("number of processed fields", false)
-                .setAliases("dpr", "dataProcessedFields")
+                .setAliases("dpf", "dataProcessedFields")
                 .build());
         table.addCell("data.input_bytes",
             TableColumnAttributeBuilder.builder("total input bytes", false)
@@ -219,55 +219,55 @@ public class RestCatJobsAction extends AbstractCatAction {
                 .build());
 
         // Forecast Stats
-        table.addCell("forecast." + ForecastStats.Fields.TOTAL,
-            TableColumnAttributeBuilder.builder("total number of forecasts").setAliases("ft", "forecastTotal").build());
-        table.addCell("forecast.memory.min",
+        table.addCell("forecasts." + ForecastStats.Fields.TOTAL,
+            TableColumnAttributeBuilder.builder("total number of forecasts").setAliases("ft", "forecastsTotal").build());
+        table.addCell("forecasts.memory.min",
             TableColumnAttributeBuilder.builder("minimum memory used by forecasts", false)
-                .setAliases("fmmin", "forecastMemoryMin")
+                .setAliases("fmmin", "forecastsMemoryMin")
                 .build());
-        table.addCell("forecast.memory.max",
+        table.addCell("forecasts.memory.max",
             TableColumnAttributeBuilder.builder("maximum memory used by forecasts", false)
                 .setAliases("fmmax", "forecastsMemoryMax")
                 .build());
-        table.addCell("forecast.memory.avg",
+        table.addCell("forecasts.memory.avg",
             TableColumnAttributeBuilder.builder("average memory used by forecasts", false)
-                .setAliases("fmavg", "forecastMemoryAvg")
+                .setAliases("fmavg", "forecastsMemoryAvg")
                 .build());
-        table.addCell("forecast.memory.total",
+        table.addCell("forecasts.memory.total",
             TableColumnAttributeBuilder.builder("total memory used by all forecasts", false)
-                .setAliases("fmt", "forecastMemoryTotal")
+                .setAliases("fmt", "forecastsMemoryTotal")
                 .build());
-        table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".min",
+        table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".min",
             TableColumnAttributeBuilder.builder("minimum record count for forecasts", false)
-                .setAliases("frmin", "forecastRecordsMin")
+                .setAliases("frmin", "forecastsRecordsMin")
                 .build());
-        table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".max",
+        table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".max",
             TableColumnAttributeBuilder.builder("maximum record count for forecasts", false)
-                .setAliases("frmax", "forecastRecordsMax")
+                .setAliases("frmax", "forecastsRecordsMax")
                 .build());
-        table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".avg",
+        table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".avg",
             TableColumnAttributeBuilder.builder("average record count for forecasts", false)
-                .setAliases("fravg", "forecastRecordsAvg")
+                .setAliases("fravg", "forecastsRecordsAvg")
                 .build());
-        table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".total",
+        table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".total",
             TableColumnAttributeBuilder.builder("total record count for all forecasts", false)
-                .setAliases("frt", "forecastRecordsTotal")
+                .setAliases("frt", "forecastsRecordsTotal")
                 .build());
-        table.addCell("forecast.time.min",
+        table.addCell("forecasts.time.min",
             TableColumnAttributeBuilder.builder("minimum runtime for forecasts", false)
-                .setAliases("ftmin", "forecastTimeMin")
+                .setAliases("ftmin", "forecastsTimeMin")
                 .build());
-        table.addCell("forecast.time.max",
+        table.addCell("forecasts.time.max",
             TableColumnAttributeBuilder.builder("maximum run time for forecasts", false)
-                .setAliases("ftmax", "forecastTimeMax")
+                .setAliases("ftmax", "forecastsTimeMax")
                 .build());
-        table.addCell("forecast.time.avg",
+        table.addCell("forecasts.time.avg",
             TableColumnAttributeBuilder.builder("average runtime for all forecasts (milliseconds)", false)
-                .setAliases("ftavg", "forecastTimeAvg")
+                .setAliases("ftavg", "forecastsTimeAvg")
                 .build());
-        table.addCell("forecast.time.total",
+        table.addCell("forecasts.time.total",
             TableColumnAttributeBuilder.builder("total runtime for all forecasts", false)
-                .setAliases("ftt", "forecastTimeTotal").build());
+                .setAliases("ftt", "forecastsTimeTotal").build());
 
         //Node info
         table.addCell("node.id",
@@ -288,29 +288,29 @@ public class RestCatJobsAction extends AbstractCatAction {
                 .build());
 
         //Timing Stats
-        table.addCell("bucket.count",
+        table.addCell("buckets.count",
             TableColumnAttributeBuilder.builder("bucket count")
-                .setAliases("bc", "bucketCount")
+                .setAliases("bc", "bucketsCount")
                 .build());
-        table.addCell("bucket.time.total",
+        table.addCell("buckets.time.total",
             TableColumnAttributeBuilder.builder("total bucket processing time", false)
-                .setAliases("btt", "bucketTimeTotal")
+                .setAliases("btt", "bucketsTimeTotal")
                 .build());
-        table.addCell("bucket.time.min",
+        table.addCell("buckets.time.min",
             TableColumnAttributeBuilder.builder("minimum bucket processing time", false)
-                .setAliases("btmin", "bucketTimeMin")
+                .setAliases("btmin", "bucketsTimeMin")
                 .build());
-        table.addCell("bucket.time.max",
+        table.addCell("buckets.time.max",
             TableColumnAttributeBuilder.builder("maximum bucket processing time", false)
-                .setAliases("btmax", "bucketTimeMax")
+                .setAliases("btmax", "bucketsTimeMax")
                 .build());
-        table.addCell("bucket.time.exp_avg",
+        table.addCell("buckets.time.exp_avg",
             TableColumnAttributeBuilder.builder("exponential average bucket processing time (milliseconds)", false)
-                .setAliases("btea", "bucketTimeExpAvg")
+                .setAliases("btea", "bucketsTimeExpAvg")
                 .build());
-        table.addCell("bucket.time.exp_avg_hour",
+        table.addCell("buckets.time.exp_avg_hour",
             TableColumnAttributeBuilder.builder("exponential average bucket processing time by hour (milliseconds)", false)
-                .setAliases("bteah", "bucketTimeExpAvgHour")
+                .setAliases("bteah", "bucketsTimeExpAvgHour")
                 .build());
 
         table.endHeaders();

+ 1 - 1
x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json

@@ -1,7 +1,7 @@
 {
   "cat.ml_jobs":{
     "documentation":{
-      "url":"http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-get-job-stats.html"
+      "url":"http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-anomaly-detectors.html"
     },
     "stability":"stable",
     "url":{

+ 2 - 2
x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml

@@ -86,7 +86,7 @@ setup:
         datafeed_id: datafeed-job-stats-test
   - match:
       $body: |
-        / #id                             state    bucket.count     search.count
+        / #id                             state    buckets.count     search.count
         ^ (datafeed\-job\-stats\-test \s+ \w+ \s+  \d+         \s+  \d+         \n)+  $/
 
   - do:
@@ -95,7 +95,7 @@ setup:
         datafeed_id: datafeed-job-stats-test
   - match:
       $body: |
-        /^  id                          \s+  state \s+ bucket\.count \s+ search\.count \n
+        /^  id                          \s+  state \s+ buckets\.count \s+ search\.count \n
            (datafeed\-job\-stats\-test  \s+  \w+   \s+ \d+           \s+ \d+           \n)+  $/
 
   - do:

+ 2 - 2
x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml

@@ -90,7 +90,7 @@ setup:
         job_id: job-stats-test
   - match:
       $body: |
-        / #id                    state    data.processed_records     model.bytes    model.memory_status     forecast.total     bucket.count
+        / #id                    state    data.processed_records     model.bytes    model.memory_status     forecasts.total     buckets.count
         ^ (job\-stats\-test \s+  \w+  \s+ \d+                   \s+  .*?        \s+ \w+                 \s+ \d+           \s+  \d+         \n)+  $/
 
   - do:
@@ -99,7 +99,7 @@ setup:
         job_id: job-stats-test
   - match:
       $body: |
-        /^  id                \s+  state \s+ data\.processed_records \s+ model\.bytes \s+ model\.memory_status \s+ forecast\.total \s+ bucket\.count  \n
+        /^  id                \s+  state \s+ data\.processed_records \s+ model\.bytes \s+ model\.memory_status \s+ forecasts\.total \s+ buckets\.count  \n
            (job\-stats\-test  \s+  \w+   \s+ \d+                     \s+ .*?         \s+ \w+                  \s+ \d+             \s+ \d+            \n)+  $/
 
   - do: