|
@@ -509,6 +509,335 @@ tag::data-frame-analytics-stats[]
|
|
|
An array of statistics objects for {dfanalytics-jobs}, which are
|
|
|
sorted by the `id` value in ascending order.
|
|
|
|
|
|
+//Begin analysis_stats
|
|
|
+`analysis_stats`::
|
|
|
+(object)
|
|
|
+An object containing statistical data about the analysis.
|
|
|
++
|
|
|
+.Properties of `analysis_stats`
|
|
|
+[%collapsible%open]
|
|
|
+====
|
|
|
+//Begin classification_stats
|
|
|
+`classification_stats`:::
|
|
|
+(object)
|
|
|
+An object containing statistical data about the {classanalysis}.
|
|
|
++
|
|
|
+.Properties of `classification_stats`
|
|
|
+[%collapsible%open]
|
|
|
+=====
|
|
|
+//Begin class_hyperparameters
|
|
|
+`hyperparameters`::::
|
|
|
+(object)
|
|
|
+An object containing the parameters of the {classanalysis}.
|
|
|
++
|
|
|
+.Properties of `hyperparameters`
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+tag::dfas-alpha[]
|
|
|
+`alpha`::::
|
|
|
+(double)
|
|
|
+Regularization factor to penalize deeper trees when training decision trees.
|
|
|
+end::dfas-alpha[]
|
|
|
+
|
|
|
+`class_assignment_objective`::::
|
|
|
+(string)
|
|
|
+Defines whether class assignment maximizes the accuracy or the minimum recall
|
|
|
+metric. Possible values are `maximize_accuracy` and `maximize_minimum_recall`.
|
|
|
+
|
|
|
+tag::dfas-downsample-factor[]
|
|
|
+`downsample_factor`::::
|
|
|
+(double)
|
|
|
+The value of the downsample factor.
|
|
|
+end::dfas-downsample-factor[]
|
|
|
+
|
|
|
+tag::dfas-eta[]
|
|
|
+`eta`::::
|
|
|
+(double)
|
|
|
+The value of the eta hyperparameter.
|
|
|
+end::dfas-eta[]
|
|
|
+
|
|
|
+tag::dfas-eta-growth[]
|
|
|
+`eta_growth_rate_per_tree`::::
|
|
|
+(double)
|
|
|
+Specifies the rate at which the `eta` increases for each new tree that is added to the
|
|
|
+forest. For example, a rate of `1.05` increases `eta` by 5%.
|
|
|
+end::dfas-eta-growth[]
|
|
|
+
|
|
|
+tag::dfas-feature-bag-fraction[]
|
|
|
+`feature_bag_fraction`::::
|
|
|
+(double)
|
|
|
+The fraction of features that is used when selecting a random bag for each
|
|
|
+candidate split.
|
|
|
+end::dfas-feature-bag-fraction[]
|
|
|
+
|
|
|
+tag::dfas-gamma[]
|
|
|
+`gamma`::::
|
|
|
+(double)
|
|
|
+Regularization factor to penalize trees with large numbers of nodes.
|
|
|
+end::dfas-gamma[]
|
|
|
+
|
|
|
+tag::dfas-lambda[]
|
|
|
+`lambda`::::
|
|
|
+(double)
|
|
|
+Regularization factor to penalize large leaf weights.
|
|
|
+end::dfas-lambda[]
|
|
|
+
|
|
|
+tag::dfas-max-attempts[]
|
|
|
+`max_attempts_to_add_tree`::::
|
|
|
+(integer)
|
|
|
+If the algorithm fails to determine a non-trivial tree (more than a single
|
|
|
+leaf), this parameter determines how many of such consecutive failures are
|
|
|
+tolerated. Once the number of attempts exceeds the threshold, the forest
|
|
|
+training stops.
|
|
|
+end::dfas-max-attempts[]
|
|
|
+
|
|
|
+tag::dfas-max-optimization-rounds[]
|
|
|
+`max_optimization_rounds_per_hyperparameter`::::
|
|
|
+(integer)
|
|
|
+A multiplier responsible for determining the maximum number of
|
|
|
+hyperparameter optimization steps in the Bayesian optimization procedure.
|
|
|
+The maximum number of steps is determined based on the number of undefined hyperparameters
|
|
|
+times the maximum optimization rounds per hyperparameter.
|
|
|
+end::dfas-max-optimization-rounds[]
|
|
|
+
|
|
|
+tag::dfas-max-trees[]
|
|
|
+`max_trees`::::
|
|
|
+(integer)
|
|
|
+The maximum number of trees in the forest.
|
|
|
+end::dfas-max-trees[]
|
|
|
+
|
|
|
+tag::dfas-num-folds[]
|
|
|
+`num_folds`::::
|
|
|
+(integer)
|
|
|
+The maximum number of folds for the cross-validation procedure.
|
|
|
+end::dfas-num-folds[]
|
|
|
+
|
|
|
+tag::dfas-num-splits[]
|
|
|
+`num_splits_per_feature`::::
|
|
|
+(integer)
|
|
|
+Determines the maximum number of splits for every feature that can occur in a
|
|
|
+decision tree when the tree is trained.
|
|
|
+end::dfas-num-splits[]
|
|
|
+
|
|
|
+tag::dfas-soft-limit[]
|
|
|
+`soft_tree_depth_limit`::::
|
|
|
+(double)
|
|
|
+Tree depth limit is used for calculating the tree depth penalty. This is a soft
|
|
|
+limit, it can be exceeded.
|
|
|
+end::dfas-soft-limit[]
|
|
|
+
|
|
|
+tag::dfas-soft-tolerance[]
|
|
|
+`soft_tree_depth_tolerance`::::
|
|
|
+(double)
|
|
|
+Tree depth tolerance is used for calculating the tree depth penalty. This is a
|
|
|
+soft limit, it can be exceeded.
|
|
|
+end::dfas-soft-tolerance[]
|
|
|
+======
|
|
|
+//End class_hyperparameters
|
|
|
+
|
|
|
+tag::dfas-iteration[]
|
|
|
+`iteration`::::
|
|
|
+(integer)
|
|
|
+The number of iterations on the analysis.
|
|
|
+end::dfas-iteration[]
|
|
|
+
|
|
|
+tag::dfas-timestamp[]
|
|
|
+`timestamp`::::
|
|
|
+(date)
|
|
|
+The timestamp when the statistics were reported in milliseconds since the epoch.
|
|
|
+end::dfas-timestamp[]
|
|
|
+
|
|
|
+//Begin class_timing_stats
|
|
|
+tag::dfas-timing-stats[]
|
|
|
+`timing_stats`::::
|
|
|
+(object)
|
|
|
+An object containing time statistics about the {dfanalytics-job}.
|
|
|
+end::dfas-timing-stats[]
|
|
|
++
|
|
|
+.Properties of `timing_stats`
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+tag::dfas-timing-stats-elapsed[]
|
|
|
+`elapsed_time`::::
|
|
|
+(integer)
|
|
|
+Runtime of the analysis in milliseconds.
|
|
|
+end::dfas-timing-stats-elapsed[]
|
|
|
+
|
|
|
+tag::dfas-timing-stats-iteration[]
|
|
|
+`iteration_time`::::
|
|
|
+(integer)
|
|
|
+Runtime of the latest iteration of the analysis in milliseconds.
|
|
|
+end::dfas-timing-stats-iteration[]
|
|
|
+======
|
|
|
+//End class_timing_stats
|
|
|
+
|
|
|
+//Begin class_validation_loss
|
|
|
+tag::dfas-validation-loss[]
|
|
|
+`validation_loss`::::
|
|
|
+(object)
|
|
|
+An object containing information about validation loss.
|
|
|
+end::dfas-validation-loss[]
|
|
|
++
|
|
|
+.Properties of `validation_loss`
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+tag::dfas-validation-loss-type[]
|
|
|
+`loss_type`::::
|
|
|
+(string)
|
|
|
+The type of the loss metric. For example, `binomial_logistic`.
|
|
|
+end::dfas-validation-loss-type[]
|
|
|
+
|
|
|
+tag::dfas-validation-loss-fold[]
|
|
|
+`fold_values`::::
|
|
|
+(array of strings)
|
|
|
+Validation loss values for every added decision tree during the forest growing
|
|
|
+procedure.
|
|
|
+end::dfas-validation-loss-fold[]
|
|
|
+======
|
|
|
+//End class_validation_loss
|
|
|
+=====
|
|
|
+//End classification_stats
|
|
|
+
|
|
|
+//Begin outlier_detection_stats
|
|
|
+`outlier_detection_stats`:::
|
|
|
+(object)
|
|
|
+An object containing statistical data about the {oldetection} job.
|
|
|
++
|
|
|
+.Properties of `outlier_detection_stats`
|
|
|
+[%collapsible%open]
|
|
|
+=====
|
|
|
+//Begin parameters
|
|
|
+`parameters`::::
|
|
|
+(object)
|
|
|
+The list of job parameters specified by the user or determined by algorithmic
|
|
|
+heuristics.
|
|
|
++
|
|
|
+.Properties of `parameters`
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+`compute_feature_influence`::::
|
|
|
+(boolean)
|
|
|
+If true, feature influence calculation is enabled.
|
|
|
+
|
|
|
+`feature_influence_threshold`::::
|
|
|
+(double)
|
|
|
+The minimum {olscore} that a document needs to have to calculate its feature
|
|
|
+influence score.
|
|
|
+
|
|
|
+`method`::::
|
|
|
+(string)
|
|
|
+The method that {oldetection} uses. Possible values are `lof`, `ldof`,
|
|
|
+`distance_kth_nn`, `distance_knn`, and `ensemble`.
|
|
|
+
|
|
|
+`n_neighbors`::::
|
|
|
+(integer)
|
|
|
+The value for how many nearest neighbors each method of {oldetection} uses to
|
|
|
+calculate its outlier score.
|
|
|
+
|
|
|
+`outlier_fraction`::::
|
|
|
+(double)
|
|
|
+The proportion of the data set that is assumed to be outlying prior to
|
|
|
+{oldetection}.
|
|
|
+
|
|
|
+`standardization_enabled`::::
|
|
|
+(boolean)
|
|
|
+If true, then the following operation is performed on the columns before
|
|
|
+computing {olscores}: (x_i - mean(x_i)) / sd(x_i).
|
|
|
+======
|
|
|
+//End parameters
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-timestamp]
|
|
|
+
|
|
|
+//Begin od_timing_stats
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-timing-stats]
|
|
|
++
|
|
|
+.Property of `timing_stats`
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-timing-stats-elapsed]
|
|
|
+======
|
|
|
+//End od_timing_stats
|
|
|
+=====
|
|
|
+//End outlier_detection_stats
|
|
|
+
|
|
|
+//Begin regression_stats
|
|
|
+`regression_stats`:::
|
|
|
+(object)
|
|
|
+An object containing statistical data about the {reganalysis}.
|
|
|
++
|
|
|
+.Properties of `regression_stats`
|
|
|
+[%collapsible%open]
|
|
|
+=====
|
|
|
+//Begin reg_hyperparameters
|
|
|
+`hyperparameters`::::
|
|
|
+(object)
|
|
|
+An object containing the parameters of the {reganalysis}.
|
|
|
++
|
|
|
+.Properties of `hyperparameters`
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-alpha]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-downsample-factor]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-eta]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-eta-growth]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-feature-bag-fraction]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-gamma]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-lambda]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-max-attempts]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-max-optimization-rounds]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-max-trees]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-num-folds]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-num-splits]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-soft-limit]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-soft-tolerance]
|
|
|
+======
|
|
|
+//End reg_hyperparameters
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-iteration]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-timestamp]
|
|
|
+
|
|
|
+//Begin reg_timing_stats
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-timing-stats]
|
|
|
++
|
|
|
+.Propertis of `timing_stats`
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-timing-stats-elapsed]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-timing-stats-iteration]
|
|
|
+======
|
|
|
+//End reg_timing_stats
|
|
|
+
|
|
|
+//Begin reg_validation_loss
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-validation-loss]
|
|
|
++
|
|
|
+.Properties of `validation_loss`
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-validation-loss-type]
|
|
|
+
|
|
|
+include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-validation-loss-fold]
|
|
|
+======
|
|
|
+//End reg_validation_loss
|
|
|
+=====
|
|
|
+//End regression_stats
|
|
|
+====
|
|
|
+//End analysis_stats
|
|
|
+
|
|
|
`assignment_explanation`:::
|
|
|
(string)
|
|
|
For running jobs only, contains messages relating to the selection of a node to
|