1
0
Эх сурвалжийг харах

Add docs for HLRC for Estimate memory usage API (#45538)

Przemysław Witek 6 жил өмнө
parent
commit
c6a25a818d

+ 69 - 0
client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java

@@ -48,6 +48,7 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
+import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
 import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -194,11 +195,13 @@ import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
+import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.closeTo;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.lessThan;
 import static org.hamcrest.core.Is.is;
 
 public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
@@ -3262,6 +3265,72 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
         }
     }
 
+    public void testEstimateMemoryUsage() throws Exception {
+        createIndex("estimate-test-source-index");
+        BulkRequest bulkRequest =
+            new BulkRequest("estimate-test-source-index")
+                .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
+        for (int i = 0; i < 10; ++i) {
+            bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L));
+        }
+        RestHighLevelClient client = highLevelClient();
+        client.bulk(bulkRequest, RequestOptions.DEFAULT);
+        {
+            // tag::estimate-memory-usage-request
+            DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
+                .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
+                .setAnalysis(OutlierDetection.createDefault())
+                .build();
+            PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1>
+            // end::estimate-memory-usage-request
+
+            // tag::estimate-memory-usage-execute
+            EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT);
+            // end::estimate-memory-usage-execute
+
+            // tag::estimate-memory-usage-response
+            ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1>
+            ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2>
+            // end::estimate-memory-usage-response
+
+            // We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow.
+            ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB);
+            ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB);
+            assertThat(expectedMemoryWithoutDisk, allOf(greaterThan(lowerBound), lessThan(upperBound)));
+            assertThat(expectedMemoryWithDisk, allOf(greaterThan(lowerBound), lessThan(upperBound)));
+        }
+        {
+            DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
+                .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
+                .setAnalysis(OutlierDetection.createDefault())
+                .build();
+            PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config);
+            // tag::estimate-memory-usage-execute-listener
+            ActionListener<EstimateMemoryUsageResponse> listener = new ActionListener<>() {
+                @Override
+                public void onResponse(EstimateMemoryUsageResponse response) {
+                    // <1>
+                }
+
+                @Override
+                public void onFailure(Exception e) {
+                    // <2>
+                }
+            };
+            // end::estimate-memory-usage-execute-listener
+
+            // Replace the empty listener by a blocking listener in test
+            final CountDownLatch latch = new CountDownLatch(1);
+            listener = new LatchedActionListener<>(listener, latch);
+
+            // tag::estimate-memory-usage-execute-async
+            client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1>
+            // end::estimate-memory-usage-execute-async
+
+            assertTrue(latch.await(30L, TimeUnit.SECONDS));
+        }
+    }
+
     public void testCreateFilter() throws Exception {
         RestHighLevelClient client = highLevelClient();
         {

+ 35 - 0
docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc

@@ -0,0 +1,35 @@
+--
+:api: estimate-memory-usage
+:request: PutDataFrameAnalyticsRequest
+:response: EstimateMemoryUsageResponse
+--
+[id="{upid}-{api}"]
+=== Estimate memory usage API
+
+The Estimate memory usage API is used to estimate memory usage of {dfanalytics}.
+Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on.
+
+The API accepts an +{request}+ object and returns an +{response}+.
+
+[id="{upid}-{api}-request"]
+==== Estimate memory usage Request
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-request]
+--------------------------------------------------
+<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed
+
+include::../execution.asciidoc[]
+
+[id="{upid}-{api}-response"]
+==== Response
+
+The returned +{response}+ contains the memory usage estimates.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-response]
+--------------------------------------------------
+<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk).
+<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.

+ 2 - 0
docs/java-rest/high-level/supported-apis.asciidoc

@@ -295,6 +295,7 @@ The Java High Level REST Client supports the following Machine Learning APIs:
 * <<{upid}-start-data-frame-analytics>>
 * <<{upid}-stop-data-frame-analytics>>
 * <<{upid}-evaluate-data-frame>>
+* <<{upid}-estimate-memory-usage>>
 * <<{upid}-put-filter>>
 * <<{upid}-get-filters>>
 * <<{upid}-update-filter>>
@@ -346,6 +347,7 @@ include::ml/delete-data-frame-analytics.asciidoc[]
 include::ml/start-data-frame-analytics.asciidoc[]
 include::ml/stop-data-frame-analytics.asciidoc[]
 include::ml/evaluate-data-frame.asciidoc[]
+include::ml/estimate-memory-usage.asciidoc[]
 include::ml/put-filter.asciidoc[]
 include::ml/get-filters.asciidoc[]
 include::ml/update-filter.asciidoc[]

+ 6 - 6
docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc

@@ -42,14 +42,14 @@ Serves as an advice on how to set `model_memory_limit` when creating {dfanalytic
 [[ml-estimate-memory-usage-dfanalytics-results]]
 ==== {api-response-body-title}
 
-`expected_memory_usage_with_one_partition`::
+`expected_memory_without_disk`::
   (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory
   (i.e. without overflowing to disk).
   
-`expected_memory_usage_with_max_partitions`::
+`expected_memory_with_disk`::
   (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
-  `expected_memory_usage_with_max_partitions` is usually smaller than `expected_memory_usage_with_one_partition`
-  as using disk allows to limit the main memory needed to perform {dfanalytics}.
+  `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to
+  limit the main memory needed to perform {dfanalytics}.
 
 [[ml-estimate-memory-usage-dfanalytics-example]]
 ==== {api-examples-title}
@@ -76,8 +76,8 @@ The API returns the following results:
 [source,js]
 ----
 {
-  "expected_memory_usage_with_one_partition": "128MB",
-  "expected_memory_usage_with_max_partitions": "32MB"
+  "expected_memory_without_disk": "128MB",
+  "expected_memory_with_disk": "32MB"
 }
 ----
 // TESTRESPONSE