1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615 |
- tag::aggregations[]
- If set, the {dfeed} performs aggregation searches. Support for aggregations is
- limited and should be used only with low cardinality data. For more information,
- see
- {ml-docs}/ml-configuring-aggregation.html[Aggregating data for faster performance].
- end::aggregations[]
- tag::allow-lazy-open[]
- Advanced configuration option. Specifies whether this job can open when there is
- insufficient {ml} node capacity for it to be immediately assigned to a node. The
- default value is `false`; if a {ml} node with capacity to run the job cannot
- immediately be found, the <<ml-open-job,open {anomaly-jobs} API>> returns an
- error. However, this is also subject to the cluster-wide
- `xpack.ml.max_lazy_ml_nodes` setting; see <<advanced-ml-settings>>. If this
- option is set to `true`, the <<ml-open-job,open {anomaly-jobs} API>> does not
- return an error and the job waits in the `opening` state until sufficient {ml}
- node capacity is available.
- end::allow-lazy-open[]
- tag::allow-no-datafeeds[]
- Specifies what to do when the request:
- +
- --
- * Contains wildcard expressions and there are no {dfeeds} that match.
- * Contains the `_all` string or no identifiers and there are no matches.
- * Contains wildcard expressions and there are only partial matches.
- The default value is `true`, which returns an empty `datafeeds` array when
- there are no matches and the subset of results when there are partial matches.
- If this parameter is `false`, the request returns a `404` status code when there
- are no matches or only partial matches.
- --
- end::allow-no-datafeeds[]
- tag::allow-no-jobs[]
- Specifies what to do when the request:
- +
- --
- * Contains wildcard expressions and there are no jobs that match.
- * Contains the `_all` string or no identifiers and there are no matches.
- * Contains wildcard expressions and there are only partial matches.
- The default value is `true`, which returns an empty `jobs` array
- when there are no matches and the subset of results when there are partial
- matches. If this parameter is `false`, the request returns a `404` status code
- when there are no matches or only partial matches.
- --
- end::allow-no-jobs[]
- tag::allow-no-match[]
- Specifies what to do when the request:
- +
- --
- * Contains wildcard expressions and there are no {dfanalytics-jobs} that match.
- * Contains the `_all` string or no identifiers and there are no matches.
- * Contains wildcard expressions and there are only partial matches.
- The default value is `true`, which returns an empty `data_frame_analytics` array
- when there are no matches and the subset of results when there are partial
- matches. If this parameter is `false`, the request returns a `404` status code
- when there are no matches or only partial matches.
- --
- end::allow-no-match[]
- tag::allow-no-match-models[]
- Specifies what to do when the request:
- +
- --
- * Contains wildcard expressions and there are no models that match.
- * Contains the `_all` string or no identifiers and there are no matches.
- * Contains wildcard expressions and there are only partial matches.
- The default value is `true`, which returns an empty array when there are no
- matches and the subset of results when there are partial matches. If this
- parameter is `false`, the request returns a `404` status code when there are no
- matches or only partial matches.
- --
- end::allow-no-match-models[]
- tag::analysis[]
- Defines the type of {dfanalytics} you want to perform on your source index. For
- example: `outlier_detection`. See <<ml-dfa-analysis-objects>>.
- end::analysis[]
- tag::analysis-config[]
- The analysis configuration, which specifies how to analyze the data. After you
- create a job, you cannot change the analysis configuration; all the properties
- are informational.
- end::analysis-config[]
- tag::analysis-limits[]
- Limits can be applied for the resources required to hold the mathematical models
- in memory. These limits are approximate and can be set per job. They do not
- control the memory used by other processes, for example the {es} Java processes.
- end::analysis-limits[]
- tag::assignment-explanation-anomaly-jobs[]
- For open {anomaly-jobs} only, contains messages relating to the selection
- of a node to run the job.
- end::assignment-explanation-anomaly-jobs[]
- tag::assignment-explanation-datafeeds[]
- For started {dfeeds} only, contains messages relating to the selection of a
- node.
- end::assignment-explanation-datafeeds[]
- tag::assignment-explanation-dfanalytics[]
- Contains messages relating to the selection of a node.
- end::assignment-explanation-dfanalytics[]
- tag::assignment-memory-basis[]
- Where should the memory requirement used for deciding which node the job
- will run on come from? The possible values are:
- +
- --
- * `model_memory_limit`: The job's memory requirement will be calculated on
- the basis that its model memory will grow to the `model_memory_limit`
- specified in the `analysis_limits` of its config.
- * `current_model_bytes`: The job's memory requirement will be calculated on
- the basis that its current model memory size is a good reflection of what
- it will be in the future.
- * `peak_model_bytes`: The job's memory requirement will be calculated on
- the basis that its peak model memory size is a good reflection of what
- the model size will be in the future.
- --
- end::assignment-memory-basis[]
- tag::background-persist-interval[]
- Advanced configuration option. The time between each periodic persistence of the
- model. The default value is a randomized value between 3 to 4 hours, which
- avoids all jobs persisting at exactly the same time. The smallest allowed value
- is 1 hour.
- +
- --
- TIP: For very large models (several GB), persistence could take 10-20 minutes,
- so do not set the `background_persist_interval` value too low.
- --
- end::background-persist-interval[]
- tag::bucket-allocation-failures-count[]
- The number of buckets for which new entities in incoming data were not processed
- due to insufficient model memory. This situation is also signified by a
- `hard_limit: memory_status` property value.
- end::bucket-allocation-failures-count[]
- tag::bucket-count[]
- The number of buckets processed.
- end::bucket-count[]
- tag::bucket-count-anomaly-jobs[]
- The number of bucket results produced by the job.
- end::bucket-count-anomaly-jobs[]
- tag::bucket-span[]
- The size of the interval that the analysis is aggregated into, typically between
- `5m` and `1h`. The default value is `5m`. If the {anomaly-job} uses a {dfeed}
- with {ml-docs}/ml-configuring-aggregation.html[aggregations], this value must be
- divisible by the interval of the date histogram aggregation. For more
- information, see {ml-docs}/ml-buckets.html[Buckets].
- end::bucket-span[]
- tag::bucket-span-results[]
- The length of the bucket in seconds. This value matches the `bucket_span`
- that is specified in the job.
- end::bucket-span-results[]
- tag::bucket-time-exponential-average[]
- Exponential moving average of all bucket processing times, in milliseconds.
- end::bucket-time-exponential-average[]
- tag::bucket-time-exponential-average-hour[]
- Exponentially-weighted moving average of bucket processing times
- calculated in a 1 hour time window, in milliseconds.
- end::bucket-time-exponential-average-hour[]
- tag::bucket-time-maximum[]
- Maximum among all bucket processing times, in milliseconds.
- end::bucket-time-maximum[]
- tag::bucket-time-minimum[]
- Minimum among all bucket processing times, in milliseconds.
- end::bucket-time-minimum[]
- tag::bucket-time-total[]
- Sum of all bucket processing times, in milliseconds.
- end::bucket-time-total[]
- tag::by-field-name[]
- The field used to split the data. In particular, this property is used for
- analyzing the splits with respect to their own history. It is used for finding
- unusual values in the context of the split.
- end::by-field-name[]
- tag::calendar-id[]
- A string that uniquely identifies a calendar.
- end::calendar-id[]
- tag::categorization-analyzer[]
- If `categorization_field_name` is specified, you can also define the analyzer
- that is used to interpret the categorization field. This property cannot be used
- at the same time as `categorization_filters`. The categorization analyzer
- specifies how the `categorization_field` is interpreted by the categorization
- process. The syntax is very similar to that used to define the `analyzer` in the
- <<indices-analyze,Analyze endpoint>>. For more information, see
- {ml-docs}/ml-configuring-categories.html[Categorizing log messages].
- +
- The `categorization_analyzer` field can be specified either as a string or as an
- object. If it is a string it must refer to a
- <<analysis-analyzers,built-in analyzer>> or one added by another plugin. If it
- is an object it has the following properties:
- +
- .Properties of `categorization_analyzer`
- [%collapsible%open]
- =====
- `char_filter`::::
- (array of strings or objects)
- include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=char-filter]
- `tokenizer`::::
- (string or object)
- include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=tokenizer]
- `filter`::::
- (array of strings or objects)
- include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=filter]
- =====
- end::categorization-analyzer[]
- tag::categorization-examples-limit[]
- The maximum number of examples stored per category in memory and in the results
- data store. The default value is 4. If you increase this value, more examples
- are available, however it requires that you have more storage available. If you
- set this value to `0`, no examples are stored.
- +
- NOTE: The `categorization_examples_limit` only applies to analysis that uses
- categorization. For more information, see
- {ml-docs}/ml-configuring-categories.html[Categorizing log messages].
- end::categorization-examples-limit[]
- tag::categorization-field-name[]
- If this property is specified, the values of the specified field will be
- categorized. The resulting categories must be used in a detector by setting
- `by_field_name`, `over_field_name`, or `partition_field_name` to the keyword
- `mlcategory`. For more information, see
- {ml-docs}/ml-configuring-categories.html[Categorizing log messages].
- end::categorization-field-name[]
- tag::categorization-filters[]
- If `categorization_field_name` is specified, you can also define optional
- filters. This property expects an array of regular expressions. The expressions
- are used to filter out matching sequences from the categorization field values.
- You can use this functionality to fine tune the categorization by excluding
- sequences from consideration when categories are defined. For example, you can
- exclude SQL statements that appear in your log files. For more information, see
- {ml-docs}/ml-configuring-categories.html[Categorizing log messages]. This
- property cannot be used at the same time as `categorization_analyzer`. If you
- only want to define simple regular expression filters that are applied prior to
- tokenization, setting this property is the easiest method. If you also want to
- customize the tokenizer or post-tokenization filtering, use the
- `categorization_analyzer` property instead and include the filters as
- `pattern_replace` character filters. The effect is exactly the same.
- end::categorization-filters[]
- tag::categorization-status[]
- The status of categorization for the job. Contains one of the following values:
- +
- --
- * `ok`: Categorization is performing acceptably well (or not being used at all).
- * `warn`: Categorization is detecting a distribution of categories that suggests
- the input data is inappropriate for categorization. Problems could be that there
- is only one category, more than 90% of categories are rare, the number of
- categories is greater than 50% of the number of categorized documents, there are
- no frequently matched categories, or more than 50% of categories are dead.
- --
- end::categorization-status[]
- tag::categorized-doc-count[]
- The number of documents that have had a field categorized.
- end::categorized-doc-count[]
- tag::char-filter[]
- One or more <<analysis-charfilters,character filters>>. In addition to the
- built-in character filters, other plugins can provide more character filters.
- This property is optional. If it is not specified, no character filters are
- applied prior to categorization. If you are customizing some other aspect of the
- analyzer and you need to achieve the equivalent of `categorization_filters`
- (which are not permitted when some other aspect of the analyzer is customized),
- add them here as
- <<analysis-pattern-replace-charfilter,pattern replace character filters>>.
- end::char-filter[]
- tag::chunking-config[]
- {dfeeds-cap} might be required to search over long time periods, for several
- months or years. This search is split into time chunks in order to ensure the
- load on {es} is managed. Chunking configuration controls how the size of these
- time chunks are calculated and is an advanced configuration option.
- +
- .Properties of `chunking_config`
- [%collapsible%open]
- ====
- `mode`:::
- (string)
- include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=mode]
- `time_span`:::
- (<<time-units,time units>>)
- include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=time-span]
- ====
- end::chunking-config[]
- tag::class-assignment-objective[]
- Defines the objective to optimize when assigning class labels:
- `maximize_accuracy` or `maximize_minimum_recall`. When maximizing accuracy,
- class labels are chosen to maximize the number of correct predictions. When
- maximizing minimum recall, labels are chosen to maximize the minimum recall for
- any class. Defaults to `maximize_minimum_recall`.
- end::class-assignment-objective[]
- tag::compute-feature-influence[]
- Specifies whether the feature influence calculation is enabled. Defaults to
- `true`.
- end::compute-feature-influence[]
- tag::custom-preprocessor[]
- (Optional, Boolean)
- Boolean value indicating if the analytics job created the preprocessor
- or if a user provided it. This adjusts the feature importance calculation.
- When `true`, the feature importance calculation returns importance for the
- processed feature. When `false`, the total importance of the original field
- is returned. Default is `false`.
- end::custom-preprocessor[]
- tag::custom-rules[]
- An array of custom rule objects, which enable you to customize the way detectors
- operate. For example, a rule may dictate to the detector conditions under which
- results should be skipped. For more examples, see
- {ml-docs}/ml-configuring-detector-custom-rules.html[Customizing detectors with custom rules].
- end::custom-rules[]
- tag::custom-rules-actions[]
- The set of actions to be triggered when the rule applies. If
- more than one action is specified the effects of all actions are combined. The
- available actions include:
- * `skip_result`: The result will not be created. This is the default value.
- Unless you also specify `skip_model_update`, the model will be updated as usual
- with the corresponding series value.
- * `skip_model_update`: The value for that series will not be used to update the
- model. Unless you also specify `skip_result`, the results will be created as
- usual. This action is suitable when certain values are expected to be
- consistently anomalous and they affect the model in a way that negatively
- impacts the rest of the results.
- end::custom-rules-actions[]
- tag::custom-rules-scope[]
- An optional scope of series where the rule applies. A rule must either
- have a non-empty scope or at least one condition. By default, the scope includes
- all series. Scoping is allowed for any of the fields that are also specified in
- `by_field_name`, `over_field_name`, or `partition_field_name`. To add a scope
- for a field, add the field name as a key in the scope object and set its value
- to an object with the following properties:
- end::custom-rules-scope[]
- tag::custom-rules-scope-filter-id[]
- The id of the filter to be used.
- end::custom-rules-scope-filter-id[]
- tag::custom-rules-scope-filter-type[]
- Either `include` (the rule applies for values in the filter) or `exclude` (the
- rule applies for values not in the filter). Defaults to `include`.
- end::custom-rules-scope-filter-type[]
- tag::custom-rules-conditions[]
- An optional array of numeric conditions when the rule applies. A rule must
- either have a non-empty scope or at least one condition. Multiple conditions are
- combined together with a logical `AND`. A condition has the following
- properties:
- end::custom-rules-conditions[]
- tag::custom-rules-conditions-applies-to[]
- Specifies the result property to which the condition applies. The available
- options are `actual`, `typical`, `diff_from_typical`, `time`. If your detector
- uses `lat_long`, `metric`, `rare`, or `freq_rare` functions, you can only
- specify conditions that apply to `time`.
- end::custom-rules-conditions-applies-to[]
- tag::custom-rules-conditions-operator[]
- Specifies the condition operator. The available options are `gt` (greater than),
- `gte` (greater than or equals), `lt` (less than) and `lte` (less than or
- equals).
- end::custom-rules-conditions-operator[]
- tag::custom-rules-conditions-value[]
- The value that is compared against the `applies_to` field using the `operator`.
- end::custom-rules-conditions-value[]
- tag::custom-settings[]
- Advanced configuration option. Contains custom meta data about the job. For
- example, it can contain custom URL information as shown in
- {ml-docs}/ml-configuring-url.html[Adding custom URLs to {ml} results].
- end::custom-settings[]
- tag::daily-model-snapshot-retention-after-days[]
- Advanced configuration option, which affects the automatic removal of old model
- snapshots for this job. It specifies a period of time (in days) after which only
- the first snapshot per day is retained. This period is relative to the timestamp
- of the most recent snapshot for this job. Valid values range from `0` to
- `model_snapshot_retention_days`. For new jobs, the default value is `1`. For
- jobs created before version 7.8.0, the default value matches
- `model_snapshot_retention_days`. For more information, refer to
- {ml-docs}/ml-model-snapshots.html[Model snapshots].
- end::daily-model-snapshot-retention-after-days[]
- tag::data-description[]
- The data description defines the format of the input data when you send data to
- the job by using the <<ml-post-data,post data>> API. Note that when configure
- a {dfeed}, these properties are automatically set. When data is received via
- the <<ml-post-data,post data>> API, it is not stored in {es}. Only the results
- for {anomaly-detect} are retained.
- +
- .Properties of `data_description`
- [%collapsible%open]
- ====
- `format`:::
- (string) Only `JSON` format is supported at this time.
- `time_field`:::
- (string) The name of the field that contains the timestamp.
- The default value is `time`.
- `time_format`:::
- (string)
- include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=time-format]
- ====
- end::data-description[]
- tag::datafeed-id[]
- A numerical character string that uniquely identifies the
- {dfeed}. This identifier can contain lowercase alphanumeric characters (a-z
- and 0-9), hyphens, and underscores. It must start and end with alphanumeric
- characters.
- end::datafeed-id[]
- tag::datafeed-id-wildcard[]
- Identifier for the {dfeed}. It can be a {dfeed} identifier or a wildcard
- expression.
- end::datafeed-id-wildcard[]
- tag::dead-category-count[]
- The number of categories created by categorization that will never be assigned
- again because another category's definition makes it a superset of the dead
- category. (Dead categories are a side effect of the way categorization has no
- prior training.)
- end::dead-category-count[]
- tag::delayed-data-check-config[]
- Specifies whether the {dfeed} checks for missing data and the size of the
- window. For example: `{"enabled": true, "check_window": "1h"}`.
- +
- The {dfeed} can optionally search over indices that have already been read in
- an effort to determine whether any data has subsequently been added to the
- index. If missing data is found, it is a good indication that the `query_delay`
- option is set too low and the data is being indexed after the {dfeed} has passed
- that moment in time. See
- {ml-docs}/ml-delayed-data-detection.html[Working with delayed data].
- +
- This check runs only on real-time {dfeeds}.
- +
- .Properties of `delayed_data_check_config`
- [%collapsible%open]
- ====
- `check_window`::
- (<<time-units,time units>>) The window of time that is searched for late data.
- This window of time ends with the latest finalized bucket. It defaults to
- `null`, which causes an appropriate `check_window` to be calculated when the
- real-time {dfeed} runs. In particular, the default `check_window` span
- calculation is based on the maximum of `2h` or `8 * bucket_span`.
- `enabled`::
- (Boolean) Specifies whether the {dfeed} periodically checks for delayed data.
- Defaults to `true`.
- ====
- end::delayed-data-check-config[]
- tag::dependent-variable[]
- Defines which field of the document is to be predicted.
- This parameter is supplied by field name and must match one of the fields in
- the index being used to train. If this field is missing from a document, then
- that document will not be used for training, but a prediction with the trained
- model will be generated for it. It is also known as continuous target variable.
- end::dependent-variable[]
- tag::desc-results[]
- If true, the results are sorted in descending order.
- end::desc-results[]
- tag::description-dfa[]
- A description of the job.
- end::description-dfa[]
- tag::dest[]
- The destination configuration, consisting of `index` and optionally
- `results_field` (`ml` by default).
- +
- .Properties of `dest`
- [%collapsible%open]
- ====
- `index`:::
- (Required, string) Defines the _destination index_ to store the results of the
- {dfanalytics-job}.
- `results_field`:::
- (Optional, string) Defines the name of the field in which to store the results
- of the analysis. Defaults to `ml`.
- ====
- end::dest[]
- tag::detector-description[]
- A description of the detector. For example, `Low event rate`.
- end::detector-description[]
- tag::detector-field-name[]
- The field that the detector uses in the function. If you use an event rate
- function such as `count` or `rare`, do not specify this field.
- +
- --
- NOTE: The `field_name` cannot contain double quotes or backslashes.
- --
- end::detector-field-name[]
- tag::detector-index[]
- A unique identifier for the detector. This identifier is based on the order of
- the detectors in the `analysis_config`, starting at zero.
- end::detector-index[]
- tag::dfas-alpha[]
- Advanced configuration option. {ml-cap} uses loss guided tree growing, which
- means that the decision trees grow where the regularized loss decreases most
- quickly. This parameter affects loss calculations by acting as a multiplier of
- the tree depth. Higher alpha values result in shallower trees and faster
- training times. By default, this value is calculated during hyperparameter
- optimization. It must be greater than or equal to zero.
- end::dfas-alpha[]
- tag::dfas-downsample-factor[]
- Advanced configuration option. Controls the fraction of data that is used to
- compute the derivatives of the loss function for tree training. A small value
- results in the use of a small fraction of the data. If this value is set to be
- less than 1, accuracy typically improves. However, too small a value may result
- in poor convergence for the ensemble and so require more trees. For more
- information about shrinkage, refer to
- {wikipedia}/Gradient_boosting#Stochastic_gradient_boosting[this wiki article].
- By default, this value is calculated during hyperparameter optimization. It
- must be greater than zero and less than or equal to 1.
- end::dfas-downsample-factor[]
- tag::dfas-early-stopping-enabled[]
- Advanced configuration option.
- Specifies whether the training process should finish if it is not finding any
- better perfoming models. If disabled, the training process can take significantly
- longer and the chance of finding a better performing model is unremarkable.
- By default, early stoppping is enabled.
- end::dfas-early-stopping-enabled[]
- tag::dfas-eta-growth[]
- Advanced configuration option. Specifies the rate at which `eta` increases for
- each new tree that is added to the forest. For example, a rate of 1.05
- increases `eta` by 5% for each extra tree. By default, this value is calculated
- during hyperparameter optimization. It must be between 0.5 and 2.
- end::dfas-eta-growth[]
- tag::dfas-feature-bag-fraction[]
- The fraction of features that is used when selecting a random bag for each
- candidate split.
- end::dfas-feature-bag-fraction[]
- tag::dfas-feature-processors[]
- Advanced configuration option.
- A collection of feature preprocessors that modify one or more included fields.
- The analysis uses the resulting one or more features instead of the
- original document field. Multiple `feature_processors` entries can refer to the
- same document fields. Automatic categorical
- {ml-docs}/ml-feature-encoding.html[feature encoding] still occurs for the fields
- that are unprocessed by a custom processor or that have categorical values.
- Only use this if you want to override the automatic feature encoding of the
- specified fields. Refer to
- {ml-docs}/ml-feature-processors.html[{dfanalytics} feature processors] to learn
- more.
- end::dfas-feature-processors[]
- tag::dfas-feature-processors-feat-name[]
- The resulting feature name.
- end::dfas-feature-processors-feat-name[]
- tag::dfas-feature-processors-field[]
- The name of the field to encode.
- end::dfas-feature-processors-field[]
- tag::dfas-feature-processors-frequency[]
- The configuration information necessary to perform frequency encoding.
- end::dfas-feature-processors-frequency[]
- tag::dfas-feature-processors-frequency-map[]
- The resulting frequency map for the field value. If the field value is missing
- from the `frequency_map`, the resulting value is `0`.
- end::dfas-feature-processors-frequency-map[]
- tag::dfas-feature-processors-multi[]
- The configuration information necessary to perform multi encoding. It allows
- multiple processors to be changed together. This way the output of a processor
- can then be passed to another as an input.
- end::dfas-feature-processors-multi[]
- tag::dfas-feature-processors-multi-proc[]
- The ordered array of custom processors to execute. Must be more than 1.
- end::dfas-feature-processors-multi-proc[]
- tag::dfas-feature-processors-ngram[]
- The configuration information necessary to perform n-gram encoding. Features
- written out by this encoder have the following name format:
- `<feature_prefix>.<ngram><string position>`. For example, if the
- `feature_prefix` is `f`, the feature name for the second unigram in a string is
- `f.11`.
- end::dfas-feature-processors-ngram[]
- tag::dfas-feature-processors-ngram-feat-pref[]
- The feature name prefix. Defaults to `ngram_<start>_<length>`.
- end::dfas-feature-processors-ngram-feat-pref[]
- tag::dfas-feature-processors-ngram-field[]
- The name of the text field to encode.
- end::dfas-feature-processors-ngram-field[]
- tag::dfas-feature-processors-ngram-length[]
- Specifies the length of the n-gram substring. Defaults to `50`. Must be greater
- than `0`.
- end::dfas-feature-processors-ngram-length[]
- tag::dfas-feature-processors-ngram-ngrams[]
- Specifies which n-grams to gather. It’s an array of integer values where the
- minimum value is 1, and a maximum value is 5.
- end::dfas-feature-processors-ngram-ngrams[]
- tag::dfas-feature-processors-ngram-start[]
- Specifies the zero-indexed start of the n-gram substring. Negative values are
- allowed for encoding n-grams of string suffixes. Defaults to `0`.
- end::dfas-feature-processors-ngram-start[]
- tag::dfas-feature-processors-one-hot[]
- The configuration information necessary to perform one hot encoding.
- end::dfas-feature-processors-one-hot[]
- tag::dfas-feature-processors-one-hot-map[]
- The one hot map mapping the field value with the column name.
- end::dfas-feature-processors-one-hot-map[]
- tag::dfas-feature-processors-target-mean[]
- The configuration information necessary to perform target mean encoding.
- end::dfas-feature-processors-target-mean[]
- tag::dfas-feature-processors-target-mean-default[]
- The default value if field value is not found in the `target_map`.
- end::dfas-feature-processors-target-mean-default[]
- tag::dfas-feature-processors-target-mean-map[]
- The field value to target mean transition map.
- end::dfas-feature-processors-target-mean-map[]
- tag::dfas-iteration[]
- The number of iterations on the analysis.
- end::dfas-iteration[]
- tag::dfas-max-attempts[]
- If the algorithm fails to determine a non-trivial tree (more than a single
- leaf), this parameter determines how many of such consecutive failures are
- tolerated. Once the number of attempts exceeds the threshold, the forest
- training stops.
- end::dfas-max-attempts[]
- tag::dfas-max-optimization-rounds[]
- Advanced configuration option.
- A multiplier responsible for determining the maximum number of
- hyperparameter optimization steps in the Bayesian optimization procedure.
- The maximum number of steps is determined based on the number of undefined
- hyperparameters times the maximum optimization rounds per hyperparameter.
- By default, this value is calculated during hyperparameter optimization.
- end::dfas-max-optimization-rounds[]
- tag::dfas-num-folds[]
- The maximum number of folds for the cross-validation procedure.
- end::dfas-num-folds[]
- tag::dfas-num-splits[]
- Determines the maximum number of splits for every feature that can occur in a
- decision tree when the tree is trained.
- end::dfas-num-splits[]
- tag::dfas-soft-limit[]
- Advanced configuration option. {ml-cap} uses loss guided tree growing, which
- means that the decision trees grow where the regularized loss decreases most
- quickly. This soft limit combines with the `soft_tree_depth_tolerance` to
- penalize trees that exceed the specified depth; the regularized loss increases
- quickly beyond this depth. By default, this value is calculated during
- hyperparameter optimization. It must be greater than or equal to 0.
- end::dfas-soft-limit[]
- tag::dfas-soft-tolerance[]
- Advanced configuration option. This option controls how quickly the regularized
- loss increases when the tree depth exceeds `soft_tree_depth_limit`. By default,
- this value is calculated during hyperparameter optimization. It must be greater
- than or equal to 0.01.
- end::dfas-soft-tolerance[]
- tag::dfas-timestamp[]
- The timestamp when the statistics were reported in milliseconds since the epoch.
- end::dfas-timestamp[]
- tag::dfas-timing-stats[]
- An object containing time statistics about the {dfanalytics-job}.
- end::dfas-timing-stats[]
- tag::dfas-timing-stats-elapsed[]
- Runtime of the analysis in milliseconds.
- end::dfas-timing-stats-elapsed[]
- tag::dfas-timing-stats-iteration[]
- Runtime of the latest iteration of the analysis in milliseconds.
- end::dfas-timing-stats-iteration[]
- tag::dfas-validation-loss[]
- An object containing information about validation loss.
- end::dfas-validation-loss[]
- tag::dfas-validation-loss-fold[]
- Validation loss values for every added decision tree during the forest growing
- procedure.
- end::dfas-validation-loss-fold[]
- tag::dfas-validation-loss-type[]
- The type of the loss metric. For example, `binomial_logistic`.
- end::dfas-validation-loss-type[]
- tag::earliest-record-timestamp[]
- The timestamp of the earliest chronologically input document.
- end::earliest-record-timestamp[]
- tag::empty-bucket-count[]
- The number of buckets which did not contain any data. If your data
- contains many empty buckets, consider increasing your `bucket_span` or using
- functions that are tolerant to gaps in data such as `mean`, `non_null_sum` or
- `non_zero_count`.
- end::empty-bucket-count[]
- tag::eta[]
- Advanced configuration option. The shrinkage applied to the weights. Smaller
- values result in larger forests which have a better generalization error.
- However, larger forests cause slower training. For more information about
- shrinkage, refer to
- {wikipedia}/Gradient_boosting#Shrinkage[this wiki article].
- By default, this value is calculated during hyperparameter optimization. It must
- be a value between 0.001 and 1.
- end::eta[]
- tag::exclude-frequent[]
- Contains one of the following values: `all`, `none`, `by`, or `over`. If set,
- frequent entities are excluded from influencing the anomaly results. Entities
- can be considered frequent over time or frequent in a population. If you are
- working with both over and by fields, then you can set `exclude_frequent` to
- `all` for both fields, or to `by` or `over` for those specific fields.
- end::exclude-frequent[]
- tag::exclude-interim-results[]
- If `true`, the output excludes interim results. By default, interim results are
- included.
- end::exclude-interim-results[]
- tag::failed-category-count[]
- The number of times that categorization wanted to create a new category but
- couldn't because the job had hit its `model_memory_limit`. This count does not
- track which specific categories failed to be created. Therefore you cannot use
- this value to determine the number of unique categories that were missed.
- end::failed-category-count[]
- tag::feature-bag-fraction[]
- Advanced configuration option. Defines the fraction of features that will be
- used when selecting a random bag for each candidate split. By default, this
- value is calculated during hyperparameter optimization.
- end::feature-bag-fraction[]
- tag::feature-influence-threshold[]
- The minimum {olscore} that a document needs to have in order to calculate its
- {fiscore}. Value range: 0-1 (`0.1` by default).
- end::feature-influence-threshold[]
- tag::filter[]
- One or more <<analysis-tokenfilters,token filters>>. In addition to the built-in
- token filters, other plugins can provide more token filters. This property is
- optional. If it is not specified, no token filters are applied prior to
- categorization.
- end::filter[]
- tag::filter-id[]
- A string that uniquely identifies a filter.
- end::filter-id[]
- tag::forecast-total[]
- The number of individual forecasts currently available for the job. A value of
- `1` or more indicates that forecasts exist.
- end::forecast-total[]
- tag::exclude-generated[]
- Indicates if certain fields should be removed from the configuration on
- retrieval. This allows the configuration to be in an acceptable format to be retrieved
- and then added to another cluster. Default is false.
- end::exclude-generated[]
- tag::frequency[]
- The interval at which scheduled queries are made while the {dfeed} runs in real
- time. The default value is either the bucket span for short bucket spans, or,
- for longer bucket spans, a sensible fraction of the bucket span. For example:
- `150s`. When `frequency` is shorter than the bucket span, interim results for
- the last (partial) bucket are written then eventually overwritten by the full
- bucket results. If the {dfeed} uses aggregations, this value must be divisible
- by the interval of the date histogram aggregation.
- end::frequency[]
- tag::frequent-category-count[]
- The number of categories that match more than 1% of categorized documents.
- end::frequent-category-count[]
- tag::from[]
- Skips the specified number of {dfanalytics-jobs}. The default value is `0`.
- end::from[]
- tag::from-models[]
- Skips the specified number of models. The default value is `0`.
- end::from-models[]
- tag::function[]
- The analysis function that is used. For example, `count`, `rare`, `mean`, `min`,
- `max`, and `sum`. For more information, see
- {ml-docs}/ml-functions.html[Function reference].
- end::function[]
- tag::gamma[]
- Advanced configuration option. Regularization parameter to prevent overfitting
- on the training data set. Multiplies a linear penalty associated with the size
- of individual trees in the forest. A high gamma value causes training to prefer
- small trees. A small gamma value results in larger individual trees and slower
- training. By default, this value is calculated during hyperparameter
- optimization. It must be a nonnegative value.
- end::gamma[]
- tag::groups[]
- A list of job groups. A job can belong to no groups or many.
- end::groups[]
- tag::indices[]
- An array of index names. Wildcards are supported. For example:
- `["it_ops_metrics", "server*"]`.
- +
- --
- NOTE: If any indices are in remote clusters then `node.remote_cluster_client`
- must not be set to `false` on any {ml} nodes.
- --
- end::indices[]
- tag::indices-options[]
- Specifies index expansion options that are used during search.
- +
- --
- For example:
- ```
- {
- "expand_wildcards": ["all"],
- "ignore_unavailable": true,
- "allow_no_indices": "false",
- "ignore_throttled": true
- }
- ```
- For more information about these options, see <<multi-index>>.
- --
- end::indices-options[]
- tag::runtime-mappings[]
- Specifies runtime fields for the datafeed search.
- +
- --
- For example:
- ```
- {
- "day_of_week": {
- "type": "keyword",
- "script": {
- "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))"
- }
- }
- }
- ```
- --
- end::runtime-mappings[]
- tag::inference-config-classification-num-top-classes[]
- Specifies the number of top class predictions to return. Defaults to 0.
- end::inference-config-classification-num-top-classes[]
- tag::inference-config-classification-num-top-feature-importance-values[]
- Specifies the maximum number of
- {ml-docs}/ml-feature-importance.html[{feat-imp}] values per document. By
- default, it is zero and no {feat-imp} calculation occurs.
- end::inference-config-classification-num-top-feature-importance-values[]
- tag::inference-config-classification-top-classes-results-field[]
- Specifies the field to which the top classes are written. Defaults to
- `top_classes`.
- end::inference-config-classification-top-classes-results-field[]
- tag::inference-config-classification-prediction-field-type[]
- Specifies the type of the predicted field to write.
- Acceptable values are: `string`, `number`, `boolean`. When `boolean` is provided
- `1.0` is transformed to `true` and `0.0` to `false`.
- end::inference-config-classification-prediction-field-type[]
- tag::inference-config-regression-num-top-feature-importance-values[]
- Specifies the maximum number of
- {ml-docs}/ml-feature-importance.html[{feat-imp}] values per document.
- By default, it is zero and no {feat-imp} calculation occurs.
- end::inference-config-regression-num-top-feature-importance-values[]
- tag::inference-config-results-field[]
- The field that is added to incoming documents to contain the inference
- prediction. Defaults to `predicted_value`.
- end::inference-config-results-field[]
- tag::inference-config-results-field-processor[]
- The field that is added to incoming documents to contain the inference
- prediction. Defaults to the `results_field` value of the {dfanalytics-job} that was
- used to train the model, which defaults to `<dependent_variable>_prediction`.
- end::inference-config-results-field-processor[]
- tag::inference-metadata-feature-importance-feature-name[]
- The feature for which this importance was calculated.
- end::inference-metadata-feature-importance-feature-name[]
- tag::inference-metadata-feature-importance-magnitude[]
- The average magnitude of this feature across all the training data.
- This value is the average of the absolute values of the importance
- for this feature.
- end::inference-metadata-feature-importance-magnitude[]
- tag::inference-metadata-feature-importance-max[]
- The maximum importance value across all the training data for this
- feature.
- end::inference-metadata-feature-importance-max[]
- tag::inference-metadata-feature-importance-min[]
- The minimum importance value across all the training data for this
- feature.
- end::inference-metadata-feature-importance-min[]
- tag::influencers[]
- A comma separated list of influencer field names. Typically these can be the by,
- over, or partition fields that are used in the detector configuration. You might
- also want to use a field name that is not specifically named in a detector, but
- is available as part of the input data. When you use multiple detectors, the use
- of influencers is recommended as it aggregates results for each influencer
- entity.
- end::influencers[]
- tag::input-bytes[]
- The number of bytes of input data posted to the {anomaly-job}.
- end::input-bytes[]
- tag::input-field-count[]
- The total number of fields in input documents posted to the {anomaly-job}. This
- count includes fields that are not used in the analysis. However, be aware that
- if you are using a {dfeed}, it extracts only the required fields from the
- documents it retrieves before posting them to the job.
- end::input-field-count[]
- tag::input-record-count[]
- The number of input documents posted to the {anomaly-job}.
- end::input-record-count[]
- tag::invalid-date-count[]
- The number of input documents with either a missing date field or a date that
- could not be parsed.
- end::invalid-date-count[]
- tag::is-interim[]
- If `true`, this is an interim result. In other words, the results are calculated
- based on partial input data.
- end::is-interim[]
- tag::job-id-anomaly-detection[]
- Identifier for the {anomaly-job}.
- end::job-id-anomaly-detection[]
- tag::job-id-data-frame-analytics[]
- Identifier for the {dfanalytics-job}.
- end::job-id-data-frame-analytics[]
- tag::job-id-anomaly-detection-default[]
- Identifier for the {anomaly-job}. It can be a job identifier, a group name, or a
- wildcard expression. If you do not specify one of these options, the API returns
- information for all {anomaly-jobs}.
- end::job-id-anomaly-detection-default[]
- tag::job-id-data-frame-analytics-default[]
- Identifier for the {dfanalytics-job}. If you do not specify this option, the API
- returns information for the first hundred {dfanalytics-jobs}.
- end::job-id-data-frame-analytics-default[]
- tag::job-id-anomaly-detection-list[]
- An identifier for the {anomaly-jobs}. It can be a job
- identifier, a group name, or a comma-separated list of jobs or groups.
- end::job-id-anomaly-detection-list[]
- tag::job-id-anomaly-detection-wildcard[]
- Identifier for the {anomaly-job}. It can be a job identifier, a group name, or a
- wildcard expression.
- end::job-id-anomaly-detection-wildcard[]
- tag::job-id-anomaly-detection-wildcard-list[]
- Identifier for the {anomaly-job}. It can be a job identifier, a group name, a
- comma-separated list of jobs or groups, or a wildcard expression.
- end::job-id-anomaly-detection-wildcard-list[]
- tag::job-id-anomaly-detection-define[]
- Identifier for the {anomaly-job}. This identifier can contain lowercase
- alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start
- and end with alphanumeric characters.
- end::job-id-anomaly-detection-define[]
- tag::job-id-data-frame-analytics-define[]
- Identifier for the {dfanalytics-job}. This identifier can contain lowercase
- alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start
- and end with alphanumeric characters.
- end::job-id-data-frame-analytics-define[]
- tag::job-id-datafeed[]
- The unique identifier for the job to which the {dfeed} sends data.
- end::job-id-datafeed[]
- tag::lambda[]
- Advanced configuration option. Regularization parameter to prevent overfitting
- on the training data set. Multiplies an L2 regularization term which applies to
- leaf weights of the individual trees in the forest. A high lambda value causes
- training to favor small leaf weights. This behavior makes the prediction
- function smoother at the expense of potentially not being able to capture
- relevant relationships between the features and the {depvar}. A small lambda
- value results in large individual trees and slower training. By default, this
- value is calculated during hyperparameter optimization. It must be a nonnegative
- value.
- end::lambda[]
- tag::last-data-time[]
- The timestamp at which data was last analyzed, according to server time.
- end::last-data-time[]
- tag::latency[]
- The size of the window in which to expect data that is out of time order. The
- default value is 0 (no latency). If you specify a non-zero value, it must be
- greater than or equal to one second. For more information about time units, see
- <<time-units>>.
- +
- --
- NOTE: Latency is only applicable when you send data by using
- the <<ml-post-data,post data>> API.
- --
- end::latency[]
- tag::latest-empty-bucket-timestamp[]
- The timestamp of the last bucket that did not contain any data.
- end::latest-empty-bucket-timestamp[]
- tag::latest-record-timestamp[]
- The timestamp of the latest chronologically input document.
- end::latest-record-timestamp[]
- tag::latest-sparse-record-timestamp[]
- The timestamp of the last bucket that was considered sparse.
- end::latest-sparse-record-timestamp[]
- tag::max-empty-searches[]
- If a real-time {dfeed} has never seen any data (including during any initial
- training period) then it will automatically stop itself and close its associated
- job after this many real-time searches that return no documents. In other words,
- it will stop after `frequency` times `max_empty_searches` of real-time
- operation. If not set then a {dfeed} with no end time that sees no data will
- remain started until it is explicitly stopped. By default this setting is not
- set.
- end::max-empty-searches[]
- tag::max-trees[]
- Advanced configuration option. Defines the maximum number of decision trees in
- the forest. The maximum value is 2000. By default, this value is calculated
- during hyperparameter optimization.
- end::max-trees[]
- tag::method[]
- The method that {oldetection} uses. Available methods are `lof`, `ldof`,
- `distance_kth_nn`, `distance_knn`, and `ensemble`. The default value is
- `ensemble`, which means that {oldetection} uses an ensemble of different methods
- and normalises and combines their individual {olscores} to obtain the overall
- {olscore}.
- end::method[]
- tag::missing-field-count[]
- The number of input documents that are missing a field that the {anomaly-job} is
- configured to analyze. Input documents with missing fields are still processed
- because it is possible that not all fields are missing.
- +
- --
- NOTE: If you are using {dfeeds} or posting data to the job in JSON format, a
- high `missing_field_count` is often not an indication of data issues. It is not
- necessarily a cause for concern.
- --
- end::missing-field-count[]
- tag::mode[]
- There are three available modes:
- +
- --
- * `auto`: The chunk size is dynamically calculated. This is the default and
- recommended value when the {dfeed} does not use aggregations.
- * `manual`: Chunking is applied according to the specified `time_span`. Use this
- mode when the {dfeed} uses aggregations.
- * `off`: No chunking is applied.
- --
- end::mode[]
- tag::model-bytes[]
- The number of bytes of memory used by the models. This is the maximum value
- since the last time the model was persisted. If the job is closed, this value
- indicates the latest size.
- end::model-bytes[]
- tag::model-bytes-exceeded[]
- The number of bytes over the high limit for memory usage at the last allocation
- failure.
- end::model-bytes-exceeded[]
- tag::model-id[]
- The unique identifier of the trained model.
- end::model-id[]
- tag::model-memory-limit[]
- The approximate maximum amount of memory resources that are required for
- analytical processing. Once this limit is approached, data pruning becomes
- more aggressive. Upon exceeding this limit, new entities are not modeled. The
- default value for jobs created in version 6.1 and later is `1024mb`.
- This value will need to be increased for jobs that are expected to analyze high
- cardinality fields, but the default is set to a relatively small size to ensure
- that high resource usage is a conscious decision. The default value for jobs
- created in versions earlier than 6.1 is `4096mb`.
- +
- If you specify a number instead of a string, the units are assumed to be MiB.
- Specifying a string is recommended for clarity. If you specify a byte size unit
- of `b` or `kb` and the number does not equate to a discrete number of megabytes,
- it is rounded down to the closest MiB. The minimum valid value is 1 MiB. If you
- specify a value less than 1 MiB, an error occurs. For more information about
- supported byte size units, see <<byte-units>>.
- +
- If your `elasticsearch.yml` file contains an `xpack.ml.max_model_memory_limit`
- setting, an error occurs when you try to create jobs that have
- `model_memory_limit` values greater than that setting. For more information,
- see <<ml-settings>>.
- end::model-memory-limit[]
- tag::model-memory-limit-anomaly-jobs[]
- The upper limit for model memory usage, checked on increasing values.
- end::model-memory-limit-anomaly-jobs[]
- tag::model-memory-status[]
- The status of the mathematical models, which can have one of the following
- values:
- +
- --
- * `ok`: The models stayed below the configured value.
- * `soft_limit`: The models used more than 60% of the configured memory limit
- and older unused models will be pruned to free up space. Additionally, in
- categorization jobs no further category examples will be stored.
- * `hard_limit`: The models used more space than the configured memory limit.
- As a result, not all incoming data was processed.
- --
- end::model-memory-status[]
- tag::model-plot-config[]
- This advanced configuration option stores model information along with the
- results. It provides a more detailed view into {anomaly-detect}.
- +
- --
- WARNING: If you enable model plot it can add considerable overhead to the
- performance of the system; it is not feasible for jobs with many entities.
- Model plot provides a simplified and indicative view of the model and its
- bounds. It does not display complex features such as multivariate correlations
- or multimodal data. As such, anomalies may occasionally be reported which cannot
- be seen in the model plot.
- Model plot config can be configured when the job is created or updated later. It
- must be disabled if performance issues are experienced.
- --
- end::model-plot-config[]
- tag::model-plot-config-annotations-enabled[]
- If true, enables calculation and storage of the model change annotations
- for each entity that is being analyzed. Defaults to `enabled`.
- end::model-plot-config-annotations-enabled[]
- tag::model-plot-config-enabled[]
- If true, enables calculation and storage of the model bounds for each entity
- that is being analyzed. By default, this is not enabled.
- end::model-plot-config-enabled[]
- tag::model-plot-config-terms[]
- Limits data collection to this comma separated list of partition or by field
- values. If terms are not specified or it is an empty string, no filtering is
- applied. For example, "CPU,NetworkIn,DiskWrites". Wildcards are not supported.
- Only the specified `terms` can be viewed when using the Single Metric Viewer.
- end::model-plot-config-terms[]
- tag::model-snapshot-id[]
- A numerical character string that uniquely identifies the model snapshot. For
- example, `1575402236000 `.
- end::model-snapshot-id[]
- tag::model-snapshot-retention-days[]
- Advanced configuration option, which affects the automatic removal of old model
- snapshots for this job. It specifies the maximum period of time (in days) that
- snapshots are retained. This period is relative to the timestamp of the most
- recent snapshot for this job. The default value is `10`, which means snapshots
- ten days older than the newest snapshot are deleted. For more information, refer
- to {ml-docs}/ml-model-snapshots.html[Model snapshots].
- end::model-snapshot-retention-days[]
- tag::model-timestamp[]
- The timestamp of the last record when the model stats were gathered.
- end::model-timestamp[]
- tag::multivariate-by-fields[]
- This functionality is reserved for internal use. It is not supported for use in
- customer environments and is not subject to the support SLA of official GA
- features.
- +
- --
- If set to `true`, the analysis will automatically find correlations between
- metrics for a given `by` field value and report anomalies when those
- correlations cease to hold. For example, suppose CPU and memory usage on host A
- is usually highly correlated with the same metrics on host B. Perhaps this
- correlation occurs because they are running a load-balanced application.
- If you enable this property, then anomalies will be reported when, for example,
- CPU usage on host A is high and the value of CPU usage on host B is low. That
- is to say, you'll see an anomaly when the CPU of host A is unusual given
- the CPU of host B.
- NOTE: To use the `multivariate_by_fields` property, you must also specify
- `by_field_name` in your detector.
- --
- end::multivariate-by-fields[]
- tag::n-neighbors[]
- Defines the value for how many nearest neighbors each method of {oldetection}
- uses to calculate its {olscore}. When the value is not set, different values are
- used for different ensemble members. This default behavior helps improve the
- diversity in the ensemble; only override it if you are confident that the value
- you choose is appropriate for the data set.
- end::n-neighbors[]
- tag::node-address[]
- The network address of the node.
- end::node-address[]
- tag::node-attributes[]
- Lists node attributes such as `ml.machine_memory` or `ml.max_open_jobs` settings.
- end::node-attributes[]
- tag::node-datafeeds[]
- For started {dfeeds} only, this information pertains to the node upon which the
- {dfeed} is started.
- end::node-datafeeds[]
- tag::node-ephemeral-id[]
- The ephemeral ID of the node.
- end::node-ephemeral-id[]
- tag::node-id[]
- The unique identifier of the node.
- end::node-id[]
- tag::node-jobs[]
- Contains properties for the node that runs the job. This information is
- available only for open jobs.
- end::node-jobs[]
- tag::node-transport-address[]
- The host and port where transport HTTP connections are accepted.
- end::node-transport-address[]
- tag::open-time[]
- For open jobs only, the elapsed time for which the job has been open.
- end::open-time[]
- tag::out-of-order-timestamp-count[]
- The number of input documents that are out of time sequence and outside
- of the latency window. This information is applicable only when you provide data
- to the {anomaly-job} by using the <<ml-post-data,post data API>>. These out of
- order documents are discarded, since jobs require time series data to be in
- ascending chronological order.
- end::out-of-order-timestamp-count[]
- tag::outlier-fraction[]
- The proportion of the data set that is assumed to be outlying prior to
- {oldetection}. For example, 0.05 means it is assumed that 5% of values are real
- outliers and 95% are inliers.
- end::outlier-fraction[]
- tag::over-field-name[]
- The field used to split the data. In particular, this property is used for
- analyzing the splits with respect to the history of all splits. It is used for
- finding unusual values in the population of all splits. For more information,
- see {ml-docs}/ml-configuring-populations.html[Performing population analysis].
- end::over-field-name[]
- tag::partition-field-name[]
- The field used to segment the analysis. When you use this property, you have
- completely independent baselines for each value of this field.
- end::partition-field-name[]
- tag::peak-model-bytes[]
- The peak number of bytes of memory ever used by the models.
- end::peak-model-bytes[]
- tag::per-partition-categorization[]
- Settings related to how categorization interacts with partition fields.
- end::per-partition-categorization[]
- tag::per-partition-categorization-enabled[]
- To enable this setting, you must also set the partition_field_name property to
- the same value in every detector that uses the keyword mlcategory. Otherwise,
- job creation fails.
- end::per-partition-categorization-enabled[]
- tag::per-partition-categorization-stop-on-warn[]
- This setting can be set to true only if per-partition categorization is enabled.
- If true, both categorization and subsequent anomaly detection stops for
- partitions where the categorization status changes to `warn`. This setting makes
- it viable to have a job where it is expected that categorization works well for
- some partitions but not others; you do not pay the cost of bad categorization
- forever in the partitions where it works badly.
- end::per-partition-categorization-stop-on-warn[]
- tag::prediction-field-name[]
- Defines the name of the prediction field in the results.
- Defaults to `<dependent_variable>_prediction`.
- end::prediction-field-name[]
- tag::processed-field-count[]
- The total number of fields in all the documents that have been processed by the
- {anomaly-job}. Only fields that are specified in the detector configuration
- object contribute to this count. The timestamp is not included in this count.
- end::processed-field-count[]
- tag::processed-record-count[]
- The number of input documents that have been processed by the {anomaly-job}.
- This value includes documents with missing fields, since they are nonetheless
- analyzed. If you use {dfeeds} and have aggregations in your search query, the
- `processed_record_count` is the number of aggregation results processed, not the
- number of {es} documents.
- end::processed-record-count[]
- tag::randomize-seed[]
- Defines the seed for the random generator that is used to pick training data. By
- default, it is randomly generated. Set it to a specific value to use the same
- training data each time you start a job (assuming other related parameters such
- as `source` and `analyzed_fields` are the same).
- end::randomize-seed[]
- tag::query[]
- The {es} query domain-specific language (DSL). This value corresponds to the
- query object in an {es} search POST body. All the options that are supported by
- {es} can be used, as this object is passed verbatim to {es}. By default, this
- property has the following value: `{"match_all": {"boost": 1}}`.
- end::query[]
- tag::query-delay[]
- The number of seconds behind real time that data is queried. For example, if
- data from 10:04 a.m. might not be searchable in {es} until 10:06 a.m., set this
- property to 120 seconds. The default value is randomly selected between `60s`
- and `120s`. This randomness improves the query performance when there are
- multiple jobs running on the same node. For more information, see
- {ml-docs}/ml-delayed-data-detection.html[Handling delayed data].
- end::query-delay[]
- tag::rare-category-count[]
- The number of categories that match just one categorized document.
- end::rare-category-count[]
- tag::renormalization-window-days[]
- Advanced configuration option. The period over which adjustments to the score
- are applied, as new data is seen. The default value is the longer of 30 days or
- 100 `bucket_spans`.
- end::renormalization-window-days[]
- tag::results-index-name[]
- A text string that affects the name of the {ml} results index. The default value
- is `shared`, which generates an index named `.ml-anomalies-shared`.
- end::results-index-name[]
- tag::results-retention-days[]
- Advanced configuration option. The period of time (in days) that results are
- retained. Age is calculated relative to the timestamp of the latest bucket
- result. If this property has a non-null value, once per day at 00:30 (server
- time), results that are the specified number of days older than the latest
- bucket result are deleted from {es}. The default value is null, which means all
- results are retained.
- end::results-retention-days[]
- tag::retain[]
- If `true`, this snapshot will not be deleted during automatic cleanup of
- snapshots older than `model_snapshot_retention_days`. However, this snapshot
- will be deleted when the job is deleted. The default value is `false`.
- end::retain[]
- tag::script-fields[]
- Specifies scripts that evaluate custom expressions and returns script fields to
- the {dfeed}. The detector configuration objects in a job can contain functions
- that use these script fields. For more information, see
- {ml-docs}/ml-configuring-transform.html[Transforming data with script fields]
- and <<script-fields,Script fields>>.
- end::script-fields[]
- tag::scroll-size[]
- The `size` parameter that is used in {es} searches when the {dfeed} does not use
- aggregations. The default value is `1000`. The maximum value is the value of
- `index.max_result_window` which is 10,000 by default.
- end::scroll-size[]
- tag::search-bucket-avg[]
- The average search time per bucket, in milliseconds.
- end::search-bucket-avg[]
- tag::search-count[]
- The number of searches run by the {dfeed}.
- end::search-count[]
- tag::search-exp-avg-hour[]
- The exponential average search time per hour, in milliseconds.
- end::search-exp-avg-hour[]
- tag::search-time[]
- The total time the {dfeed} spent searching, in milliseconds.
- end::search-time[]
- tag::size[]
- Specifies the maximum number of {dfanalytics-jobs} to obtain. The default value
- is `100`.
- end::size[]
- tag::size-models[]
- Specifies the maximum number of models to obtain. The default value
- is `100`.
- end::size-models[]
- tag::snapshot-id[]
- Identifier for the model snapshot.
- end::snapshot-id[]
- tag::sparse-bucket-count[]
- The number of buckets that contained few data points compared to the expected
- number of data points. If your data contains many sparse buckets, consider using
- a longer `bucket_span`.
- end::sparse-bucket-count[]
- tag::standardization-enabled[]
- If `true`, the following operation is performed on the columns before computing
- outlier scores: (x_i - mean(x_i)) / sd(x_i). Defaults to `true`. For more
- information about this concept, see
- {wikipedia}/Feature_scaling#Standardization_(Z-score_Normalization)[Wikipedia].
- end::standardization-enabled[]
- tag::state-anomaly-job[]
- The status of the {anomaly-job}, which can be one of the following values:
- +
- --
- * `closed`: The job finished successfully with its model state persisted. The
- job must be opened before it can accept further data.
- * `closing`: The job close action is in progress and has not yet completed. A
- closing job cannot accept further data.
- * `failed`: The job did not finish successfully due to an error. This situation
- can occur due to invalid input data, a fatal error occurring during the
- analysis, or an external interaction such as the process being killed by the
- Linux out of memory (OOM) killer. If the job had irrevocably failed, it must be
- force closed and then deleted. If the {dfeed} can be corrected, the job can be
- closed and then re-opened.
- * `opened`: The job is available to receive and process data.
- * `opening`: The job open action is in progress and has not yet completed.
- --
- end::state-anomaly-job[]
- tag::state-datafeed[]
- The status of the {dfeed}, which can be one of the following values:
- +
- --
- * `starting`: The {dfeed} has been requested to start but has not yet started.
- * `started`: The {dfeed} is actively receiving data.
- * `stopping`: The {dfeed} has been requested to stop gracefully and is
- completing its final action.
- * `stopped`: The {dfeed} is stopped and will not receive data until it is
- re-started.
- --
- end::state-datafeed[]
- tag::summary-count-field-name[]
- If this property is specified, the data that is fed to the job is expected to be
- pre-summarized. This property value is the name of the field that contains the
- count of raw data points that have been summarized. The same
- `summary_count_field_name` applies to all detectors in the job.
- +
- --
- NOTE: The `summary_count_field_name` property cannot be used with the `metric`
- function.
- --
- end::summary-count-field-name[]
- tag::tags[]
- A comma delimited string of tags. A trained model can have many tags, or none.
- When supplied, only trained models that contain all the supplied tags are
- returned.
- end::tags[]
- tag::timeout-start[]
- Controls the amount of time to wait until the {dfanalytics-job} starts. Defaults
- to 20 seconds.
- end::timeout-start[]
- tag::timeout-stop[]
- Controls the amount of time to wait until the {dfanalytics-job} stops. Defaults
- to 20 seconds.
- end::timeout-stop[]
- tag::time-format[]
- The time format, which can be `epoch`, `epoch_ms`, or a custom pattern. The
- default value is `epoch`, which refers to UNIX or Epoch time (the number of
- seconds since 1 Jan 1970). The value `epoch_ms` indicates that time is measured
- in milliseconds since the epoch. The `epoch` and `epoch_ms` time formats accept
- either integer or real values. +
- +
- NOTE: Custom patterns must conform to the Java `DateTimeFormatter` class.
- When you use date-time formatting patterns, it is recommended that you provide
- the full date, time and time zone. For example: `yyyy-MM-dd'T'HH:mm:ssX`.
- If the pattern that you specify is not sufficient to produce a complete
- timestamp, job creation fails.
- end::time-format[]
- tag::time-span[]
- The time span that each search will be querying. This setting is only applicable
- when the mode is set to `manual`. For example: `3h`.
- end::time-span[]
- tag::timestamp-results[]
- The start time of the bucket for which these results were calculated.
- end::timestamp-results[]
- tag::tokenizer[]
- The name or definition of the <<analysis-tokenizers,tokenizer>> to use after
- character filters are applied. This property is compulsory if
- `categorization_analyzer` is specified as an object. Machine learning provides a
- tokenizer called `ml_classic` that tokenizes in the same way as the
- non-customizable tokenizer in older versions of the product. If you want to use
- that tokenizer but change the character or token filters, specify
- `"tokenizer": "ml_classic"` in your `categorization_analyzer`.
- end::tokenizer[]
- tag::total-by-field-count[]
- The number of `by` field values that were analyzed by the models. This value is
- cumulative for all detectors in the job.
- end::total-by-field-count[]
- tag::total-category-count[]
- The number of categories created by categorization.
- end::total-category-count[]
- tag::total-over-field-count[]
- The number of `over` field values that were analyzed by the models. This value
- is cumulative for all detectors in the job.
- end::total-over-field-count[]
- tag::total-partition-field-count[]
- The number of `partition` field values that were analyzed by the models. This
- value is cumulative for all detectors in the job.
- end::total-partition-field-count[]
- tag::training-percent[]
- Defines what percentage of the eligible documents that will
- be used for training. Documents that are ignored by the analysis (for example
- those that contain arrays with more than one value) won’t be included in the
- calculation for used percentage. Defaults to `100`.
- end::training-percent[]
- tag::use-null[]
- Defines whether a new series is used as the null series when there is no value
- for the by or partition fields. The default value is `false`.
- end::use-null[]
- tag::verbose[]
- Defines whether the stats response should be verbose. The default value is `false`.
- end::verbose[]
|