Przeglądaj źródła

[ML] Fix monitoring if orphaned anomaly detector persistent tasks exist (#57235)

Since #51888 the ML job stats endpoint has returned entries for
jobs that have a persistent task but not job config. Such
orphaned tasks caused monitoring to fail.

This change ignores any such corrupt jobs for monitoring purposes.
David Roberts 5 lat temu
rodzic
commit
4bfd07697f

+ 6 - 1
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java

@@ -184,9 +184,14 @@ public class MachineLearningUsageTransportAction extends XPackUsageFeatureTransp
         Map<String, Long> allJobsCreatedBy = jobs.stream().map(this::jobCreatedBy)
             .collect(Collectors.groupingBy(item -> item, Collectors.counting()));;
         for (GetJobsStatsAction.Response.JobStats jobStats : jobsStats) {
-            ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
             Job job = jobMap.get(jobStats.getJobId());
+            if (job == null) {
+                // It's possible we can get job stats without a corresponding job config, if a
+                // persistent task is orphaned. Omit these corrupt jobs from the usage info.
+                continue;
+            }
             int detectorsCount = job.getAnalysisConfig().getDetectors().size();
+            ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
             double modelSize = modelSizeStats == null ? 0.0
                 : jobStats.getModelSizeStats().getModelBytes();
 

+ 39 - 0
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java

@@ -327,6 +327,45 @@ public class MachineLearningInfoTransportActionTests extends ESTestCase {
         }
     }
 
+    public void testUsageWithOrphanedTask() throws Exception {
+        when(licenseState.isAllowed(XPackLicenseState.Feature.MACHINE_LEARNING)).thenReturn(true);
+        Settings.Builder settings = Settings.builder().put(commonSettings);
+        settings.put("xpack.ml.enabled", true);
+
+        Job opened1 = buildJob("opened1", Collections.singletonList(buildMinDetector("foo")),
+            Collections.singletonMap("created_by", randomFrom("a-cool-module", "a_cool_module", "a cool module")));
+        GetJobsStatsAction.Response.JobStats opened1JobStats = buildJobStats("opened1", JobState.OPENED, 100L, 3L);
+        // NB: we have JobStats but no Job for "opened2"
+        GetJobsStatsAction.Response.JobStats opened2JobStats = buildJobStats("opened2", JobState.OPENED, 200L, 8L);
+        Job closed1 = buildJob("closed1", Arrays.asList(buildMinDetector("foo"), buildMinDetector("bar"), buildMinDetector("foobar")));
+        GetJobsStatsAction.Response.JobStats closed1JobStats = buildJobStats("closed1", JobState.CLOSED, 300L, 0);
+        givenJobs(Arrays.asList(opened1, closed1), Arrays.asList(opened1JobStats, opened2JobStats, closed1JobStats));
+
+        var usageAction = newUsageAction(settings.build());
+        PlainActionFuture<XPackUsageFeatureResponse> future = new PlainActionFuture<>();
+        usageAction.masterOperation(null, null, ClusterState.EMPTY_STATE, future);
+        XPackFeatureSet.Usage usage = future.get().getUsage();
+
+        XContentSource source;
+        try (XContentBuilder builder = XContentFactory.jsonBuilder()) {
+            usage.toXContent(builder, ToXContent.EMPTY_PARAMS);
+            source = new XContentSource(builder);
+        }
+
+        // The orphaned job should be excluded from the usage info
+        assertThat(source.getValue("jobs._all.count"), equalTo(2));
+        assertThat(source.getValue("jobs._all.detectors.min"), equalTo(1.0));
+        assertThat(source.getValue("jobs._all.detectors.max"), equalTo(3.0));
+        assertThat(source.getValue("jobs._all.detectors.total"), equalTo(4.0));
+        assertThat(source.getValue("jobs._all.detectors.avg"), equalTo(2.0));
+        assertThat(source.getValue("jobs._all.model_size.min"), equalTo(100.0));
+        assertThat(source.getValue("jobs._all.model_size.max"), equalTo(300.0));
+        assertThat(source.getValue("jobs._all.model_size.total"), equalTo(400.0));
+        assertThat(source.getValue("jobs._all.model_size.avg"), equalTo(200.0));
+        assertThat(source.getValue("jobs._all.created_by.a_cool_module"), equalTo(1));
+        assertThat(source.getValue("jobs._all.created_by.unknown"), equalTo(1));
+    }
+
     public void testUsageDisabledML() throws Exception {
         when(licenseState.isAllowed(XPackLicenseState.Feature.MACHINE_LEARNING)).thenReturn(true);
         Settings.Builder settings = Settings.builder().put(commonSettings);