Browse Source

[ML] fixing and unmuting testHRDSplit test (#55349)

This fixes the long muted testHRDSplit. Some minor adjustments for modern day elasticsearch changes :). 

The cause of the failure is that a new `by` field entering the model with an exceptionally high count does not cause an anomaly. We have since stopped combining the `rare` and `by` in this manner. New entries in a `by` field are not anomalous because we have no history on them yet. 

closes https://github.com/elastic/elasticsearch/issues/32966
Benjamin Trent 5 years ago
parent
commit
7adbc35bbf

+ 68 - 50
x-pack/plugin/ml/qa/single-node-tests/src/test/java/org/elasticsearch/xpack/ml/transforms/PainlessDomainSplitIT.java

@@ -14,7 +14,7 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.test.rest.ESRestTestCase;
 import org.elasticsearch.xpack.ml.MachineLearning;
 
-import java.time.Clock;
+import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
@@ -25,6 +25,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 
 public class PainlessDomainSplitIT extends ESRestTestCase {
@@ -239,7 +240,6 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
         }
     }
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/32966")
     public void testHRDSplit() throws Exception {
         // Create job
         Request createJobRequest = new Request("PUT", MachineLearning.BASE_PATH + "anomaly_detectors/hrd-split-job");
@@ -268,7 +268,7 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
                 "\"time\": { \"type\": \"date\" } }");
 
         // Index some data
-        ZonedDateTime baseTime = ZonedDateTime.now(Clock.systemDefaultZone()).minusYears(1);
+        ZonedDateTime baseTime = ZonedDateTime.now(ZoneOffset.UTC).minusYears(1);
         TestConfiguration test = tests.get(randomInt(tests.size()-1));
 
         // domainSplit() tests had subdomain, testHighestRegisteredDomainCases() did not, so we need a special case for sub
@@ -276,20 +276,18 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
         String expectedHRD = test.domainExpected.replace(".", "\\.");
         Pattern pattern = Pattern.compile("domain_split\":\\[\"(" + expectedSub + "),(" + expectedHRD +")\"[,\\]]");
 
-        for (int i = 0; i < 100; i++) {
-
+        for (int i = 1; i <= 100; i++) {
             ZonedDateTime time = baseTime.plusHours(i);
-            if (i == 64) {
+            String formattedTime = time.format(DateTimeFormatter.ISO_DATE_TIME);
+            if (i % 50 == 0) {
                 // Anomaly has 100 docs, but we don't care about the value
                 for (int j = 0; j < 100; j++) {
-                    String formattedTime = time.format(DateTimeFormatter.ISO_DATE_TIME);
-                    Request createDocRequest = new Request("PUT", "/painless/_doc/" + formattedTime + "_" + j);
+                    Request createDocRequest = new Request("POST", "/painless/_doc");
                     createDocRequest.setJsonEntity("{\"domain\": \"" + "bar.bar.com\", \"time\": \"" + formattedTime + "\"}");
                     client().performRequest(createDocRequest);
                 }
             } else {
                 // Non-anomalous values will be what's seen when the anomaly is reported
-                String formattedTime = time.format(DateTimeFormatter.ISO_DATE_TIME);
                 Request createDocRequest = new Request("PUT", "/painless/_doc/" + formattedTime);
                 createDocRequest.setJsonEntity("{\"domain\": \"" + test.hostName + "\", \"time\": \"" + formattedTime + "\"}");
                 client().performRequest(createDocRequest);
@@ -304,7 +302,6 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
                 "{\n" +
                 "   \"job_id\":\"hrd-split-job\",\n" +
                 "   \"indexes\":[\"painless\"],\n" +
-                "   \"types\":[\"_doc\"],\n" +
                 "   \"script_fields\": {\n" +
                 "      \"domain_split\": {\n" +
                 "         \"script\": \"return domainSplit(doc['domain'].value, params);\"\n" +
@@ -313,46 +310,67 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
                 "}");
 
         client().performRequest(createFeedRequest);
-        client().performRequest(new Request("POST", MachineLearning.BASE_PATH + "datafeeds/hrd-split-datafeed/_start"));
-
-        try {
-            assertBusy(() -> {
-                client().performRequest(new Request("POST", "/_refresh"));
-
-                Response response = client().performRequest(new Request("GET",
-                    MachineLearning.BASE_PATH + "anomaly_detectors/hrd-split-job/results/records"));
-                String responseBody = EntityUtils.toString(response.getEntity());
-
-                if (responseBody.contains("\"count\":2")) {
-                    Matcher m = pattern.matcher(responseBody);
-
-                    String actualSubDomain = "";
-                    String actualDomain = "";
-                    if (m.find()) {
-                        actualSubDomain = m.group(1).replace("\"", "");
-                        actualDomain = m.group(2).replace("\"", "");
-                    }
-
-                    String expectedTotal = "[" + test.subDomainExpected + "," + test.domainExpected + "]";
-                    String actualTotal = "[" + actualSubDomain + "," + actualDomain + "]";
-
-                    // domainSplit() tests had subdomain, testHighestRegisteredDomainCases() do not
-                    if (test.subDomainExpected != null) {
-                        assertThat("Expected subdomain [" + test.subDomainExpected + "] but found [" + actualSubDomain
-                            + "]. Actual " + actualTotal + " vs Expected " + expectedTotal, actualSubDomain,
-                            equalTo(test.subDomainExpected));
-                    }
-
-                    assertThat("Expected domain [" + test.domainExpected + "] but found [" + actualDomain + "].  Actual "
-                        + actualTotal + " vs Expected " + expectedTotal, actualDomain, equalTo(test.domainExpected));
-                } else {
-                    logger.error(responseBody);
-                    fail("Response body didn't contain [\"count\":2]");
-                }
-            }, 5, TimeUnit.SECONDS);
-        } catch (Exception e) {
-            logger.error(e.getMessage());
-            fail("Anomaly records were not found within 5 seconds");
+        Request startDatafeedRequest = new Request("POST", MachineLearning.BASE_PATH + "datafeeds/hrd-split-datafeed/_start");
+        startDatafeedRequest.addParameter("start", baseTime.format(DateTimeFormatter.ISO_DATE_TIME));
+        startDatafeedRequest.addParameter("end", ZonedDateTime.now(ZoneOffset.UTC).format(DateTimeFormatter.ISO_DATE_TIME));
+        client().performRequest(startDatafeedRequest);
+
+        waitUntilDatafeedIsStopped("hrd-split-datafeed");
+        waitUntilJobIsClosed("hrd-split-job");
+
+        client().performRequest(new Request("POST", "/.ml-anomalies-*/_refresh"));
+
+        Response records = client().performRequest(new Request("GET",
+            MachineLearning.BASE_PATH + "anomaly_detectors/hrd-split-job/results/records"));
+        String responseBody = EntityUtils.toString(records.getEntity());
+        assertThat("response body [" + responseBody + "] did not contain [\"count\":2]",
+            responseBody,
+            containsString("\"count\":2"));
+
+        Matcher m = pattern.matcher(responseBody);
+        String actualSubDomain = "";
+        String actualDomain = "";
+        if (m.find()) {
+            actualSubDomain = m.group(1).replace("\"", "");
+            actualDomain = m.group(2).replace("\"", "");
+        }
+
+        String expectedTotal = "[" + test.subDomainExpected + "," + test.domainExpected + "]";
+        String actualTotal = "[" + actualSubDomain + "," + actualDomain + "]";
+
+        // domainSplit() tests had subdomain, testHighestRegisteredDomainCases() do not
+        if (test.subDomainExpected != null) {
+            assertThat("Expected subdomain [" + test.subDomainExpected + "] but found [" + actualSubDomain
+                + "]. Actual " + actualTotal + " vs Expected " + expectedTotal, actualSubDomain,
+                equalTo(test.subDomainExpected));
         }
+
+        assertThat("Expected domain [" + test.domainExpected + "] but found [" + actualDomain + "].  Actual "
+           + actualTotal + " vs Expected " + expectedTotal, actualDomain, equalTo(test.domainExpected));
+    }
+
+    private void waitUntilJobIsClosed(String jobId) throws Exception {
+        assertBusy(() -> {
+            try {
+                Response jobStatsResponse = client().performRequest(new Request("GET",
+                    MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId + "/_stats"));
+                assertThat(EntityUtils.toString(jobStatsResponse.getEntity()), containsString("\"state\":\"closed\""));
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
+    private void waitUntilDatafeedIsStopped(String dfId) throws Exception {
+        assertBusy(() -> {
+            try {
+                Response datafeedStatsResponse = client().performRequest(new Request("GET",
+                    MachineLearning.BASE_PATH + "datafeeds/" + dfId + "/_stats"));
+                assertThat(EntityUtils.toString(datafeedStatsResponse.getEntity()),
+                    containsString("\"state\":\"stopped\""));
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        }, 60, TimeUnit.SECONDS);
     }
 }