Browse Source

[Transform] Handle multi-fields properly when creating destination index. (#66273)

Przemysław Witek 4 years ago
parent
commit
81343ac7e3

+ 2 - 2
x-pack/plugin/src/test/resources/rest-api-spec/test/transform/preview_transforms.yml

@@ -101,8 +101,8 @@ setup:
   - match: { generated_dest_index.mappings.properties.airline.type: "keyword" }
   - match: { generated_dest_index.mappings.properties.by-hour.type: "date" }
   - match: { generated_dest_index.mappings.properties.avg_response.type: "double" }
-  - match: { generated_dest_index.mappings.properties.time\.max.type: "date" }
-  - match: { generated_dest_index.mappings.properties.time\.min.type: "date" }
+  - match: { generated_dest_index.mappings.properties.time.properties.max.type: "date" }
+  - match: { generated_dest_index.mappings.properties.time.properties.min.type: "date" }
 
   - do:
       ingest.put_pipeline:

+ 51 - 31
x-pack/plugin/transform/qa/multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/transform/integration/LatestIT.java

@@ -26,7 +26,9 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.stream.Stream;
 
+import static java.util.Collections.singletonMap;
 import static java.util.stream.Collectors.toList;
+import static java.util.stream.Collectors.toMap;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.hasSize;
@@ -55,8 +57,10 @@ public class LatestIT extends TransformIntegTestCase {
     private static final String BUSINESS_ID = "business_id";
     private static final String COUNT = "count";
     private static final String STARS = "stars";
+    private static final String COMMENT = "comment";
 
-    private static final Map<String, Object> row(String userId, String businessId, int count, int stars, String timestamp) {
+    private static final Map<String, Object> row(
+            String userId, String businessId, int count, int stars, String timestamp, String comment) {
         return new HashMap<>() {{
             if (userId != null) {
                 put(USER_ID, userId);
@@ -65,40 +69,43 @@ public class LatestIT extends TransformIntegTestCase {
             put(COUNT, count);
             put(STARS, stars);
             put(TIMESTAMP, timestamp);
+            put(COMMENT, comment);
+            put("regular_object", singletonMap("foo", 42));
+            put("nested_object", singletonMap("bar", 43));
         }};
     }
 
     private static final Object[] EXPECTED_DEST_INDEX_ROWS =
         new Object[] {
-            row("user_0", "business_37", 87, 2, "2017-04-04T12:30:00Z"),
-            row("user_1", "business_38", 88, 3, "2017-04-05T12:30:00Z"),
-            row("user_2", "business_39", 89, 4, "2017-04-06T12:30:00Z"),
-            row("user_3", "business_40", 90, 0, "2017-04-07T12:30:00Z"),
-            row("user_4", "business_41", 91, 1, "2017-04-08T12:30:00Z"),
-            row("user_5", "business_42", 92, 2, "2017-04-09T12:30:00Z"),
-            row("user_6", "business_43", 93, 3, "2017-04-10T12:30:00Z"),
-            row("user_7", "business_44", 94, 4, "2017-04-11T12:30:00Z"),
-            row("user_8", "business_45", 95, 0, "2017-04-12T12:30:00Z"),
-            row("user_9", "business_46", 96, 1, "2017-04-13T12:30:00Z"),
-            row("user_10", "business_47", 97, 2, "2017-04-14T12:30:00Z"),
-            row("user_11", "business_48", 98, 3, "2017-04-15T12:30:00Z"),
-            row("user_12", "business_49", 99, 4, "2017-04-16T12:30:00Z"),
-            row("user_13", "business_21", 71, 1, "2017-03-16T12:30:00Z"),
-            row("user_14", "business_22", 72, 2, "2017-03-17T12:30:00Z"),
-            row("user_15", "business_23", 73, 3, "2017-03-18T12:30:00Z"),
-            row("user_16", "business_24", 74, 4, "2017-03-19T12:30:00Z"),
-            row("user_17", "business_25", 75, 0, "2017-03-20T12:30:00Z"),
-            row("user_18", "business_26", 76, 1, "2017-03-21T12:30:00Z"),
-            row("user_19", "business_27", 77, 2, "2017-03-22T12:30:00Z"),
-            row("user_20", "business_28", 78, 3, "2017-03-23T12:30:00Z"),
-            row("user_21", "business_29", 79, 4, "2017-03-24T12:30:00Z"),
-            row("user_22", "business_30", 80, 0, "2017-03-25T12:30:00Z"),
-            row("user_23", "business_31", 81, 1, "2017-03-26T12:30:00Z"),
-            row("user_24", "business_32", 82, 2, "2017-03-27T12:30:00Z"),
-            row("user_25", "business_33", 83, 3, "2017-03-28T12:30:00Z"),
-            row("user_26", "business_34", 84, 4, "2017-04-01T12:30:00Z"),
-            row("user_27", "business_35", 85, 0, "2017-04-02T12:30:00Z"),
-            row(null, "business_36", 86, 1, "2017-04-03T12:30:00Z")
+            row("user_0", "business_37", 87, 2, "2017-04-04T12:30:00Z", "Great stuff, deserves 2 stars"),
+            row("user_1", "business_38", 88, 3, "2017-04-05T12:30:00Z", "Great stuff, deserves 3 stars"),
+            row("user_2", "business_39", 89, 4, "2017-04-06T12:30:00Z", "Great stuff, deserves 4 stars"),
+            row("user_3", "business_40", 90, 0, "2017-04-07T12:30:00Z", "Great stuff, deserves 0 stars"),
+            row("user_4", "business_41", 91, 1, "2017-04-08T12:30:00Z", "Great stuff, deserves 1 stars"),
+            row("user_5", "business_42", 92, 2, "2017-04-09T12:30:00Z", "Great stuff, deserves 2 stars"),
+            row("user_6", "business_43", 93, 3, "2017-04-10T12:30:00Z", "Great stuff, deserves 3 stars"),
+            row("user_7", "business_44", 94, 4, "2017-04-11T12:30:00Z", "Great stuff, deserves 4 stars"),
+            row("user_8", "business_45", 95, 0, "2017-04-12T12:30:00Z", "Great stuff, deserves 0 stars"),
+            row("user_9", "business_46", 96, 1, "2017-04-13T12:30:00Z", "Great stuff, deserves 1 stars"),
+            row("user_10", "business_47", 97, 2, "2017-04-14T12:30:00Z", "Great stuff, deserves 2 stars"),
+            row("user_11", "business_48", 98, 3, "2017-04-15T12:30:00Z", "Great stuff, deserves 3 stars"),
+            row("user_12", "business_49", 99, 4, "2017-04-16T12:30:00Z", "Great stuff, deserves 4 stars"),
+            row("user_13", "business_21", 71, 1, "2017-03-16T12:30:00Z", "Great stuff, deserves 1 stars"),
+            row("user_14", "business_22", 72, 2, "2017-03-17T12:30:00Z", "Great stuff, deserves 2 stars"),
+            row("user_15", "business_23", 73, 3, "2017-03-18T12:30:00Z", "Great stuff, deserves 3 stars"),
+            row("user_16", "business_24", 74, 4, "2017-03-19T12:30:00Z", "Great stuff, deserves 4 stars"),
+            row("user_17", "business_25", 75, 0, "2017-03-20T12:30:00Z", "Great stuff, deserves 0 stars"),
+            row("user_18", "business_26", 76, 1, "2017-03-21T12:30:00Z", "Great stuff, deserves 1 stars"),
+            row("user_19", "business_27", 77, 2, "2017-03-22T12:30:00Z", "Great stuff, deserves 2 stars"),
+            row("user_20", "business_28", 78, 3, "2017-03-23T12:30:00Z", "Great stuff, deserves 3 stars"),
+            row("user_21", "business_29", 79, 4, "2017-03-24T12:30:00Z", "Great stuff, deserves 4 stars"),
+            row("user_22", "business_30", 80, 0, "2017-03-25T12:30:00Z", "Great stuff, deserves 0 stars"),
+            row("user_23", "business_31", 81, 1, "2017-03-26T12:30:00Z", "Great stuff, deserves 1 stars"),
+            row("user_24", "business_32", 82, 2, "2017-03-27T12:30:00Z", "Great stuff, deserves 2 stars"),
+            row("user_25", "business_33", 83, 3, "2017-03-28T12:30:00Z", "Great stuff, deserves 3 stars"),
+            row("user_26", "business_34", 84, 4, "2017-04-01T12:30:00Z", "Great stuff, deserves 4 stars"),
+            row("user_27", "business_35", 85, 0, "2017-04-02T12:30:00Z", "Great stuff, deserves 0 stars"),
+            row(null, "business_36", 86, 1, "2017-04-03T12:30:00Z", "Great stuff, deserves 1 stars")
         };
 
     @After
@@ -162,11 +169,24 @@ public class LatestIT extends TransformIntegTestCase {
             GetMappingsResponse sourceIndexMapping =
                 restClient.indices().getMapping(new GetMappingsRequest().indices(SOURCE_INDEX_NAME), RequestOptions.DEFAULT);
             assertThat(
-                previewResponse.getMappings().get("properties"),
+                // Mappings we get from preview sometimes contain redundant { "type": "object" } entries.
+                // We clear them here to be able to compare with the GetMappingsAction output.
+                clearDefaultObjectType(previewResponse.getMappings().get("properties")),
                 is(equalTo(sourceIndexMapping.mappings().get(SOURCE_INDEX_NAME).sourceAsMap().get("properties"))));
             // Verify preview contents
             assertThat(previewResponse.getDocs(), hasSize(NUM_USERS + 1));
             assertThat(previewResponse.getDocs(), containsInAnyOrder(EXPECTED_DEST_INDEX_ROWS));
         }
     }
+
+    private static Object clearDefaultObjectType(Object obj) {
+        if (obj instanceof Map == false) {
+            return obj;
+        }
+        @SuppressWarnings("unchecked")
+        Map<String, Object> map = (Map<String, Object>) obj;
+        return map.entrySet().stream()
+            .filter(entry -> (entry.getKey().equals("type") && entry.getValue().equals("object")) == false)
+            .collect(toMap(entry -> entry.getKey(), entry -> clearDefaultObjectType(entry.getValue())));
+    }
 }

+ 18 - 0
x-pack/plugin/transform/qa/multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/transform/integration/TransformIntegTestCase.java

@@ -342,6 +342,20 @@ abstract class TransformIntegTestCase extends ESRestTestCase {
                         .startObject("stars")
                         .field("type", "integer")
                         .endObject()
+                        .startObject("regular_object")
+                        .field("type", "object")
+                        .endObject()
+                        .startObject("nested_object")
+                        .field("type", "nested")
+                        .endObject()
+                        .startObject("comment")
+                        .field("type", "text")
+                        .startObject("fields")
+                        .startObject("keyword")
+                        .field("type", "keyword")
+                        .endObject()
+                        .endObject()
+                        .endObject()
                         .endObject();
                 }
                 builder.endObject();
@@ -374,6 +388,10 @@ abstract class TransformIntegTestCase extends ESRestTestCase {
                     .append(business)
                     .append("\",\"stars\":")
                     .append(stars)
+                    .append(",\"comment\":")
+                    .append("\"Great stuff, deserves " + stars + " stars\"")
+                    .append(",\"regular_object\":{\"foo\": 42}")
+                    .append(",\"nested_object\":{\"bar\": 43}")
                     .append(",\"timestamp\":\"")
                     .append(dateString)
                     .append("\"}");

+ 36 - 3
x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/persistence/TransformIndex.java

@@ -15,21 +15,36 @@ import org.elasticsearch.action.admin.indices.create.CreateIndexAction;
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.mapper.ObjectMapper;
 import org.elasticsearch.xpack.core.transform.TransformField;
 import org.elasticsearch.xpack.core.transform.TransformMessages;
 import org.elasticsearch.xpack.core.transform.transforms.TransformConfig;
 import org.elasticsearch.xpack.core.transform.transforms.TransformDestIndexSettings;
 
 import java.time.Clock;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import static java.util.Map.Entry.comparingByKey;
+
 public final class TransformIndex {
     private static final Logger logger = LogManager.getLogger(TransformIndex.class);
 
+    /**
+     * The list of object types used in the mappings.
+     * We include {@code null} as an alternative for "object", which is the default.
+     */
+    private static final Set<String> OBJECT_TYPES =
+        new HashSet<>(Arrays.asList(null, ObjectMapper.CONTENT_TYPE, ObjectMapper.NESTED_CONTENT_TYPE));
     private static final String PROPERTIES = "properties";
+    private static final String FIELDS = "fields";
     private static final String META = "_meta";
 
     private TransformIndex() {}
@@ -134,10 +149,28 @@ public final class TransformIndex {
      * }
      * @param mappings A Map of the form {"fieldName": "fieldType"}
      */
-    private static Map<String, Object> createMappingsFromStringMap(Map<String, String> mappings) {
+    static Map<String, Object> createMappingsFromStringMap(Map<String, String> mappings) {
+        List<Map.Entry<String, String>> sortedMappingsEntries = new ArrayList<>(mappings.entrySet());
+        // We sort the entry list to make sure that for each (parent, parent.child) pair, parent entry will be processed before child entry.
+        sortedMappingsEntries.sort(comparingByKey());
         Map<String, Object> fieldMappings = new HashMap<>();
-        mappings.forEach((k, v) -> fieldMappings.put(k, Map.of("type", v)));
-
+        for (Map.Entry<String, String> entry : sortedMappingsEntries) {
+            String[] parts = Strings.tokenizeToStringArray(entry.getKey(), ".");
+            String type = entry.getValue();
+            Map<String, Object> current = fieldMappings;
+            current = diveInto(current, parts[0]);
+            for (int j = 1; j < parts.length; ++j) {
+                // Here we decide whether a dot ('.') means inner object or a multi-field.
+                current = diveInto(current, OBJECT_TYPES.contains(current.get("type")) ? PROPERTIES : FIELDS);
+                current = diveInto(current, parts[j]);
+            }
+            current.put("type", type);
+        }
         return fieldMappings;
     }
+
+    @SuppressWarnings("unchecked")
+    private static Map<String, Object> diveInto(Map<String, Object> map, String key) {
+        return (Map<String, Object>) map.computeIfAbsent(key, k -> new HashMap<>());
+    }
 }

+ 100 - 0
x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/persistence/TransformIndexTests.java

@@ -23,8 +23,12 @@ import java.time.ZoneId;
 import java.util.HashMap;
 import java.util.Map;
 
+import static java.util.Collections.emptyMap;
+import static java.util.Collections.singletonMap;
 import static org.elasticsearch.common.xcontent.support.XContentMapValues.extractValue;
+import static org.hamcrest.Matchers.anEmptyMap;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.doAnswer;
@@ -68,4 +72,100 @@ public class TransformIndexTests extends ESTestCase {
             assertThat(extractValue("_doc._meta.created_by", map), equalTo(CREATED_BY));
         }
     }
+
+    public void testCreateMappingsFromStringMap() {
+        assertThat(TransformIndex.createMappingsFromStringMap(emptyMap()), is(anEmptyMap()));
+        assertThat(
+            TransformIndex.createMappingsFromStringMap(singletonMap("a", "long")),
+            is(equalTo(singletonMap("a", singletonMap("type", "long"))))
+        );
+        assertThat(
+            TransformIndex.createMappingsFromStringMap(new HashMap<>() {{
+                put("a", "long");
+                put("b", "keyword");
+            }}),
+            is(equalTo(new HashMap<>() {{
+                put("a", singletonMap("type", "long"));
+                put("b", singletonMap("type", "keyword"));
+            }}))
+        );
+        assertThat(
+            TransformIndex.createMappingsFromStringMap(new HashMap<>() {{
+                put("a", "long");
+                put("a.b", "keyword");
+            }}),
+            is(equalTo(new HashMap<>() {{
+                put("a", new HashMap<>() {{
+                    put("type", "long");
+                    put("fields", singletonMap("b", singletonMap("type", "keyword")));
+                }});
+            }}))
+        );
+        assertThat(
+            TransformIndex.createMappingsFromStringMap(new HashMap<>() {{
+                put("a", "long");
+                put("a.b", "text");
+                put("a.b.c", "keyword");
+            }}),
+            is(equalTo(new HashMap<>() {{
+                put("a", new HashMap<>() {{
+                    put("type", "long");
+                    put("fields", new HashMap<>() {{
+                        put("b", new HashMap<>() {{
+                            put("type", "text");
+                            put("fields", new HashMap<>() {{
+                                put("c", singletonMap("type", "keyword"));
+                            }});
+                        }});
+                    }});
+                }});
+            }}))
+        );
+        assertThat(
+            TransformIndex.createMappingsFromStringMap(new HashMap<>() {{
+                put("a", "object");
+                put("a.b", "long");
+                put("c", "nested");
+                put("c.d", "boolean");
+                put("f", "object");
+                put("f.g", "object");
+                put("f.g.h", "text");
+                put("f.g.h.i", "text");
+            }}),
+            is(equalTo(new HashMap<>() {{
+                put("a", new HashMap<>() {{
+                    put("type", "object");
+                    put("properties", new HashMap<>() {{
+                        put("b", new HashMap<>() {{
+                            put("type", "long");
+                        }});
+                    }});
+                }});
+                put("c", new HashMap<>() {{
+                    put("type", "nested");
+                    put("properties", new HashMap<>() {{
+                        put("d", new HashMap<>() {{
+                            put("type", "boolean");
+                        }});
+                    }});
+                }});
+                put("f", new HashMap<>() {{
+                    put("type", "object");
+                    put("properties", new HashMap<>() {{
+                        put("g", new HashMap<>() {{
+                            put("type", "object");
+                            put("properties", new HashMap<>() {{
+                                put("h", new HashMap<>() {{
+                                    put("type", "text");
+                                    put("fields", new HashMap<>() {{
+                                        put("i", singletonMap("type", "text"));
+                                    }});
+                                }});
+                            }});
+                        }});
+                    }});
+                }});
+            }}))
+        );
+    }
 }