瀏覽代碼

Handle nested and aliased fields correctly when copying mapping. (#50918)

Przemysław Witek 5 年之前
父節點
當前提交
3c6f649ab6

+ 80 - 6
x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java

@@ -63,6 +63,9 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
     private static final String NUMERICAL_FIELD = "numerical-field";
     private static final String DISCRETE_NUMERICAL_FIELD = "discrete-numerical-field";
     private static final String KEYWORD_FIELD = "keyword-field";
+    private static final String NESTED_FIELD = "outer-field.inner-field";
+    private static final String ALIAS_TO_KEYWORD_FIELD = "alias-to-keyword-field";
+    private static final String ALIAS_TO_NESTED_FIELD = "alias-to-nested-field";
     private static final List<Boolean> BOOLEAN_FIELD_VALUES = Collections.unmodifiableList(Arrays.asList(false, true));
     private static final List<Double> NUMERICAL_FIELD_VALUES = Collections.unmodifiableList(Arrays.asList(1.0, 2.0));
     private static final List<Integer> DISCRETE_NUMERICAL_FIELD_VALUES = Collections.unmodifiableList(Arrays.asList(10, 20));
@@ -301,7 +304,6 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
         assertInferenceModelPersisted(jobId);
         assertMlResultsFieldMappings(predictedClassField, "keyword");
         assertEvaluation(KEYWORD_FIELD, KEYWORD_FIELD_VALUES, "ml." + predictedClassField);
-
     }
 
     public void testDependentVariableCardinalityTooHighError() throws Exception {
@@ -343,6 +345,63 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
         assertProgress(jobId, 100, 100, 100, 100);
     }
 
+    public void testDependentVariableIsNested() throws Exception {
+        initialize("dependent_variable_is_nested");
+        String predictedClassField = NESTED_FIELD + "_prediction";
+        indexData(sourceIndex, 100, 0, NESTED_FIELD);
+
+        DataFrameAnalyticsConfig config = buildAnalytics(jobId, sourceIndex, destIndex, null, new Classification(NESTED_FIELD));
+        registerAnalytics(config);
+        putAnalytics(config);
+        startAnalytics(jobId);
+        waitUntilAnalyticsIsStopped(jobId);
+
+        assertProgress(jobId, 100, 100, 100, 100);
+        assertThat(searchStoredProgress(jobId).getHits().getTotalHits().value, equalTo(1L));
+        assertModelStatePersisted(stateDocId());
+        assertInferenceModelPersisted(jobId);
+        assertMlResultsFieldMappings(predictedClassField, "keyword");
+        assertEvaluation(NESTED_FIELD, KEYWORD_FIELD_VALUES, "ml." + predictedClassField);
+    }
+
+    public void testDependentVariableIsAliasToKeyword() throws Exception {
+        initialize("dependent_variable_is_alias");
+        String predictedClassField = ALIAS_TO_KEYWORD_FIELD + "_prediction";
+        indexData(sourceIndex, 100, 0, KEYWORD_FIELD);
+
+        DataFrameAnalyticsConfig config = buildAnalytics(jobId, sourceIndex, destIndex, null, new Classification(ALIAS_TO_KEYWORD_FIELD));
+        registerAnalytics(config);
+        putAnalytics(config);
+        startAnalytics(jobId);
+        waitUntilAnalyticsIsStopped(jobId);
+
+        assertProgress(jobId, 100, 100, 100, 100);
+        assertThat(searchStoredProgress(jobId).getHits().getTotalHits().value, equalTo(1L));
+        assertModelStatePersisted(stateDocId());
+        assertInferenceModelPersisted(jobId);
+        assertMlResultsFieldMappings(predictedClassField, "keyword");
+        assertEvaluation(ALIAS_TO_KEYWORD_FIELD, KEYWORD_FIELD_VALUES, "ml." + predictedClassField);
+    }
+
+    public void testDependentVariableIsAliasToNested() throws Exception {
+        initialize("dependent_variable_is_alias_to_nested");
+        String predictedClassField = ALIAS_TO_NESTED_FIELD + "_prediction";
+        indexData(sourceIndex, 100, 0, NESTED_FIELD);
+
+        DataFrameAnalyticsConfig config = buildAnalytics(jobId, sourceIndex, destIndex, null, new Classification(ALIAS_TO_NESTED_FIELD));
+        registerAnalytics(config);
+        putAnalytics(config);
+        startAnalytics(jobId);
+        waitUntilAnalyticsIsStopped(jobId);
+
+        assertProgress(jobId, 100, 100, 100, 100);
+        assertThat(searchStoredProgress(jobId).getHits().getTotalHits().value, equalTo(1L));
+        assertModelStatePersisted(stateDocId());
+        assertInferenceModelPersisted(jobId);
+        assertMlResultsFieldMappings(predictedClassField, "keyword");
+        assertEvaluation(ALIAS_TO_NESTED_FIELD, KEYWORD_FIELD_VALUES, "ml." + predictedClassField);
+    }
+
     public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception {
         String sourceIndex = "classification_two_jobs_with_same_randomize_seed_source";
         String dependentVariable = KEYWORD_FIELD;
@@ -434,7 +493,10 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
                 BOOLEAN_FIELD, "type=boolean",
                 NUMERICAL_FIELD, "type=double",
                 DISCRETE_NUMERICAL_FIELD, "type=integer",
-                KEYWORD_FIELD, "type=keyword")
+                KEYWORD_FIELD, "type=keyword",
+                NESTED_FIELD, "type=keyword",
+                ALIAS_TO_KEYWORD_FIELD, "type=alias,path=" + KEYWORD_FIELD,
+                ALIAS_TO_NESTED_FIELD, "type=alias,path=" + NESTED_FIELD)
             .get();
     }
 
@@ -446,7 +508,8 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
                 BOOLEAN_FIELD, BOOLEAN_FIELD_VALUES.get(i % BOOLEAN_FIELD_VALUES.size()),
                 NUMERICAL_FIELD, NUMERICAL_FIELD_VALUES.get(i % NUMERICAL_FIELD_VALUES.size()),
                 DISCRETE_NUMERICAL_FIELD, DISCRETE_NUMERICAL_FIELD_VALUES.get(i % DISCRETE_NUMERICAL_FIELD_VALUES.size()),
-                KEYWORD_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()));
+                KEYWORD_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()),
+                NESTED_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()));
             IndexRequest indexRequest = new IndexRequest(sourceIndex).source(source.toArray());
             bulkRequestBuilder.add(indexRequest);
         }
@@ -465,6 +528,9 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
             if (KEYWORD_FIELD.equals(dependentVariable) == false) {
                 source.addAll(List.of(KEYWORD_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size())));
             }
+            if (NESTED_FIELD.equals(dependentVariable) == false) {
+                source.addAll(List.of(NESTED_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size())));
+            }
             IndexRequest indexRequest = new IndexRequest(sourceIndex).source(source.toArray());
             bulkRequestBuilder.add(indexRequest);
         }
@@ -487,10 +553,12 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
     }
 
     /**
-     * Wrapper around extractValue with implicit casting to the appropriate type.
+     * Wrapper around extractValue that:
+     * - allows dots (".") in the path elements provided as arguments
+     * - supports implicit casting to the appropriate type
      */
     private static <T> T getFieldValue(Map<String, Object> doc, String... path) {
-        return (T)extractValue(doc, path);
+        return (T)extractValue(String.join(".", path), doc);
     }
 
     private static <T> void assertTopClasses(Map<String, Object> resultsObject,
@@ -582,8 +650,14 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
                 .mappings()
                 .get(destIndex)
                 .sourceAsMap();
-        assertThat(getFieldValue(mappings, "properties", "ml", "properties", predictedClassField, "type"), equalTo(expectedType));
         assertThat(
+            mappings.toString(),
+            getFieldValue(
+                mappings,
+                "properties", "ml", "properties", String.join(".properties.", predictedClassField.split("\\.")), "type"),
+            equalTo(expectedType));
+        assertThat(
+            mappings.toString(),
             getFieldValue(mappings, "properties", "ml", "properties", "top_classes", "properties", "class_name", "type"),
             equalTo(expectedType));
     }

+ 21 - 4
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsIndex.java

@@ -24,6 +24,8 @@ import org.elasticsearch.cluster.metadata.MappingMetaData;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexSortConfig;
+import org.elasticsearch.index.mapper.FieldAliasMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.xpack.core.ClientHelper;
 import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
@@ -38,6 +40,7 @@ import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Supplier;
 
+import static org.elasticsearch.common.xcontent.support.XContentMapValues.extractValue;
 import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
 
 /**
@@ -155,21 +158,36 @@ public final class DataFrameAnalyticsIndex {
         return maxValue;
     }
 
+    @SuppressWarnings("unchecked")
     private static Map<String, Object> createAdditionalMappings(DataFrameAnalyticsConfig config, Map<String, Object> mappingsProperties) {
         Map<String, Object> properties = new HashMap<>();
-        properties.put(ID_COPY, Map.of("type", "keyword"));
+        properties.put(ID_COPY, Map.of("type", KeywordFieldMapper.CONTENT_TYPE));
         for (Map.Entry<String, String> entry
                 : config.getAnalysis().getExplicitlyMappedFields(config.getDest().getResultsField()).entrySet()) {
             String destFieldPath = entry.getKey();
             String sourceFieldPath = entry.getValue();
-            Object sourceFieldMapping = mappingsProperties.get(sourceFieldPath);
-            if (sourceFieldMapping != null) {
+            Object sourceFieldMapping = extractMapping(sourceFieldPath, mappingsProperties);
+            if (sourceFieldMapping instanceof Map) {
+                Map<String, Object> sourceFieldMappingAsMap = (Map) sourceFieldMapping;
+                // If the source field is an alias, fetch the concrete field that the alias points to.
+                if (FieldAliasMapper.CONTENT_TYPE.equals(sourceFieldMappingAsMap.get("type"))) {
+                    String path = (String) sourceFieldMappingAsMap.get(FieldAliasMapper.Names.PATH);
+                    sourceFieldMapping = extractMapping(path, mappingsProperties);
+                }
+            }
+            // We may have updated the value of {@code sourceFieldMapping} in the "if" block above.
+            // Hence, we need to check the "instanceof" condition again.
+            if (sourceFieldMapping instanceof Map) {
                 properties.put(destFieldPath, sourceFieldMapping);
             }
         }
         return properties;
     }
 
+    private static Object extractMapping(String path, Map<String, Object> mappingsProperties) {
+        return extractValue(String.join("." + PROPERTIES + ".", path.split("\\.")), mappingsProperties);
+    }
+
     private static Map<String, Object> createMetaData(String analyticsId, Clock clock) {
         Map<String, Object> metadata = new HashMap<>();
         metadata.put(CREATION_DATE_MILLIS, clock.millis());
@@ -227,4 +245,3 @@ public final class DataFrameAnalyticsIndex {
         }
     }
 }
-

+ 74 - 24
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsIndexTests.java

@@ -71,7 +71,11 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
     private static final String ANALYTICS_ID = "some-analytics-id";
     private static final String[] SOURCE_INDEX = new String[] {"source-index"};
     private static final String DEST_INDEX = "dest-index";
-    private static final String DEPENDENT_VARIABLE = "dep_var";
+    private static final String NUMERICAL_FIELD = "numerical-field";
+    private static final String OUTER_FIELD = "outer-field";
+    private static final String INNER_FIELD = "inner-field";
+    private static final String ALIAS_TO_NUMERICAL_FIELD = "alias-to-numerical-field";
+    private static final String ALIAS_TO_NESTED_FIELD = "alias-to-nested-field";
     private static final int CURRENT_TIME_MILLIS = 123456789;
     private static final String CREATED_BY = "data-frame-analytics";
 
@@ -116,17 +120,18 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
         doAnswer(callListenerOnResponse(getSettingsResponse))
             .when(client).execute(eq(GetSettingsAction.INSTANCE), getSettingsRequestCaptor.capture(), any());
 
-        Map<String, Object> index1Mappings =
+        Map<String, Object> indexMappings =
             Map.of(
                 "properties",
-                Map.of("field_1", "field_1_mappings", "field_2", "field_2_mappings", DEPENDENT_VARIABLE, Map.of("type", "integer")));
-        MappingMetaData index1MappingMetaData = new MappingMetaData("_doc", index1Mappings);
-
-        Map<String, Object> index2Mappings =
-            Map.of(
-                "properties",
-                Map.of("field_1", "field_1_mappings", "field_2", "field_2_mappings", DEPENDENT_VARIABLE, Map.of("type", "integer")));
-        MappingMetaData index2MappingMetaData = new MappingMetaData("_doc", index2Mappings);
+                Map.of(
+                    "field_1", "field_1_mappings",
+                    "field_2", "field_2_mappings",
+                    NUMERICAL_FIELD, Map.of("type", "integer"),
+                    OUTER_FIELD, Map.of("properties", Map.of(INNER_FIELD, Map.of("type", "integer"))),
+                    ALIAS_TO_NUMERICAL_FIELD, Map.of("type", "alias", "path", NUMERICAL_FIELD),
+                    ALIAS_TO_NESTED_FIELD, Map.of("type", "alias", "path", "outer-field.inner-field")));
+        MappingMetaData index1MappingMetaData = new MappingMetaData("_doc", indexMappings);
+        MappingMetaData index2MappingMetaData = new MappingMetaData("_doc", indexMappings);
 
         ImmutableOpenMap.Builder<String, MappingMetaData> mappings = ImmutableOpenMap.builder();
         mappings.put("index_1", index1MappingMetaData);
@@ -143,7 +148,9 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
             config,
             ActionListener.wrap(
                 response -> {},
-                e -> fail(e.getMessage())));
+                e -> fail(e.getMessage())
+            )
+        );
 
         GetSettingsRequest capturedGetSettingsRequest = getSettingsRequestCaptor.getValue();
         assertThat(capturedGetSettingsRequest.indices(), equalTo(SOURCE_INDEX));
@@ -166,6 +173,10 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
             assertThat(extractValue("_doc.properties.ml__id_copy.type", map), equalTo("keyword"));
             assertThat(extractValue("_doc.properties.field_1", map), equalTo("field_1_mappings"));
             assertThat(extractValue("_doc.properties.field_2", map), equalTo("field_2_mappings"));
+            assertThat(extractValue("_doc.properties.numerical-field.type", map), equalTo("integer"));
+            assertThat(extractValue("_doc.properties.outer-field.properties.inner-field.type", map), equalTo("integer"));
+            assertThat(extractValue("_doc.properties.alias-to-numerical-field.type", map), equalTo("alias"));
+            assertThat(extractValue("_doc.properties.alias-to-nested-field.type", map), equalTo("alias"));
             assertThat(extractValue("_doc._meta.analytics", map), equalTo(ANALYTICS_ID));
             assertThat(extractValue("_doc._meta.creation_date_in_millis", map), equalTo(CURRENT_TIME_MILLIS));
             assertThat(extractValue("_doc._meta.created_by", map), equalTo(CREATED_BY));
@@ -178,13 +189,31 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
     }
 
     public void testCreateDestinationIndex_Regression() throws IOException {
-        Map<String, Object> map = testCreateDestinationIndex(new Regression(DEPENDENT_VARIABLE));
-        assertThat(extractValue("_doc.properties.ml.dep_var_prediction.type", map), equalTo("integer"));
+        Map<String, Object> map = testCreateDestinationIndex(new Regression(NUMERICAL_FIELD));
+        assertThat(extractValue("_doc.properties.ml.numerical-field_prediction.type", map), equalTo("integer"));
     }
 
     public void testCreateDestinationIndex_Classification() throws IOException {
-        Map<String, Object> map = testCreateDestinationIndex(new Classification(DEPENDENT_VARIABLE));
-        assertThat(extractValue("_doc.properties.ml.dep_var_prediction.type", map), equalTo("integer"));
+        Map<String, Object> map = testCreateDestinationIndex(new Classification(NUMERICAL_FIELD));
+        assertThat(extractValue("_doc.properties.ml.numerical-field_prediction.type", map), equalTo("integer"));
+        assertThat(extractValue("_doc.properties.ml.top_classes.class_name.type", map), equalTo("integer"));
+    }
+
+    public void testCreateDestinationIndex_Classification_DependentVariableIsNested() throws IOException {
+        Map<String, Object> map = testCreateDestinationIndex(new Classification(OUTER_FIELD + "." + INNER_FIELD));
+        assertThat(extractValue("_doc.properties.ml.outer-field.inner-field_prediction.type", map), equalTo("integer"));
+        assertThat(extractValue("_doc.properties.ml.top_classes.class_name.type", map), equalTo("integer"));
+    }
+
+    public void testCreateDestinationIndex_Classification_DependentVariableIsAlias() throws IOException {
+        Map<String, Object> map = testCreateDestinationIndex(new Classification(ALIAS_TO_NUMERICAL_FIELD));
+        assertThat(extractValue("_doc.properties.ml.alias-to-numerical-field_prediction.type", map), equalTo("integer"));
+        assertThat(extractValue("_doc.properties.ml.top_classes.class_name.type", map), equalTo("integer"));
+    }
+
+    public void testCreateDestinationIndex_Classification_DependentVariableIsAliasToNested() throws IOException {
+        Map<String, Object> map = testCreateDestinationIndex(new Classification(ALIAS_TO_NESTED_FIELD));
+        assertThat(extractValue("_doc.properties.ml.alias-to-nested-field_prediction.type", map), equalTo("integer"));
         assertThat(extractValue("_doc.properties.ml.top_classes.class_name.type", map), equalTo("integer"));
     }
 
@@ -213,10 +242,15 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
         );
     }
 
-    private Map<String, Object> testUpdateMappingsToDestIndex(DataFrameAnalysis analysis,
-                                                              Map<String, Object> properties) throws IOException {
+    private Map<String, Object> testUpdateMappingsToDestIndex(DataFrameAnalysis analysis) throws IOException {
         DataFrameAnalyticsConfig config = createConfig(analysis);
 
+        Map<String, Object> properties = Map.of(
+            NUMERICAL_FIELD, Map.of("type", "integer"),
+            OUTER_FIELD, Map.of("properties", Map.of(INNER_FIELD, Map.of("type", "integer"))),
+            ALIAS_TO_NUMERICAL_FIELD, Map.of("type", "alias", "path", NUMERICAL_FIELD),
+            ALIAS_TO_NESTED_FIELD, Map.of("type", "alias", "path", OUTER_FIELD + "." + INNER_FIELD)
+        );
         ImmutableOpenMap.Builder<String, MappingMetaData> mappings = ImmutableOpenMap.builder();
         mappings.put("", new MappingMetaData("_doc", Map.of("properties", properties)));
         GetIndexResponse getIndexResponse =
@@ -252,19 +286,35 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
     }
 
     public void testUpdateMappingsToDestIndex_OutlierDetection() throws IOException {
-        testUpdateMappingsToDestIndex(new OutlierDetection.Builder().build(), Map.of(DEPENDENT_VARIABLE, Map.of("type", "integer")));
+        testUpdateMappingsToDestIndex(new OutlierDetection.Builder().build());
     }
 
     public void testUpdateMappingsToDestIndex_Regression() throws IOException {
-        Map<String, Object> map =
-            testUpdateMappingsToDestIndex(new Regression(DEPENDENT_VARIABLE), Map.of(DEPENDENT_VARIABLE, Map.of("type", "integer")));
-        assertThat(extractValue("properties.ml.dep_var_prediction.type", map), equalTo("integer"));
+        Map<String, Object> map = testUpdateMappingsToDestIndex(new Regression(NUMERICAL_FIELD));
+        assertThat(extractValue("properties.ml.numerical-field_prediction.type", map), equalTo("integer"));
     }
 
     public void testUpdateMappingsToDestIndex_Classification() throws IOException {
-        Map<String, Object> map =
-            testUpdateMappingsToDestIndex(new Classification(DEPENDENT_VARIABLE), Map.of(DEPENDENT_VARIABLE, Map.of("type", "integer")));
-        assertThat(extractValue("properties.ml.dep_var_prediction.type", map), equalTo("integer"));
+        Map<String, Object> map = testUpdateMappingsToDestIndex(new Classification(NUMERICAL_FIELD));
+        assertThat(extractValue("properties.ml.numerical-field_prediction.type", map), equalTo("integer"));
+        assertThat(extractValue("properties.ml.top_classes.class_name.type", map), equalTo("integer"));
+    }
+
+    public void testUpdateMappingsToDestIndex_Classification_DependentVariableIsNested() throws IOException {
+        Map<String, Object> map = testUpdateMappingsToDestIndex(new Classification(OUTER_FIELD + "." + INNER_FIELD));
+        assertThat(extractValue("properties.ml.outer-field.inner-field_prediction.type", map), equalTo("integer"));
+        assertThat(extractValue("properties.ml.top_classes.class_name.type", map), equalTo("integer"));
+    }
+
+    public void testUpdateMappingsToDestIndex_Classification_DependentVariableIsAlias() throws IOException {
+        Map<String, Object> map = testUpdateMappingsToDestIndex(new Classification(ALIAS_TO_NUMERICAL_FIELD));
+        assertThat(extractValue("properties.ml.alias-to-numerical-field_prediction.type", map), equalTo("integer"));
+        assertThat(extractValue("properties.ml.top_classes.class_name.type", map), equalTo("integer"));
+    }
+
+    public void testUpdateMappingsToDestIndex_Classification_DependentVariableIsAliasToNested() throws IOException {
+        Map<String, Object> map = testUpdateMappingsToDestIndex(new Classification(ALIAS_TO_NESTED_FIELD));
+        assertThat(extractValue("properties.ml.alias-to-nested-field_prediction.type", map), equalTo("integer"));
         assertThat(extractValue("properties.ml.top_classes.class_name.type", map), equalTo("integer"));
     }