Browse Source

LogsDB data generator - support nested object field (#111206)

Oleksandr Kolomiiets 1 year ago
parent
commit
930cf6f0d0

+ 1 - 1
test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java

@@ -22,7 +22,7 @@ public class DataGenerator {
     private final FieldDataGenerator topLevelGenerator;
 
     public DataGenerator(DataGeneratorSpecification specification) {
-        this.topLevelGenerator = new ObjectFieldDataGenerator(specification, 0);
+        this.topLevelGenerator = new ObjectFieldDataGenerator(specification);
     }
 
     public void writeMapping(XContentBuilder mapping) throws IOException {

+ 50 - 4
test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java

@@ -13,13 +13,59 @@ import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary;
 
 /**
  * Allows configuring behavior of {@link  DataGenerator}.
+ * @param arbitrary provides arbitrary values used during generation
  * @param maxFieldCountPerLevel maximum number of fields that an individual object in mapping has.
  *                              Applies to subobjects.
  * @param maxObjectDepth maximum depth of nested objects
- * @param arbitrary provides arbitrary values used during generation
+ * @param nestedFieldsLimit how many total nested fields can be present in a produced mapping
  */
-public record DataGeneratorSpecification(int maxFieldCountPerLevel, int maxObjectDepth, Arbitrary arbitrary) {
-    public DataGeneratorSpecification() {
-        this(50, 3, new RandomBasedArbitrary());
+public record DataGeneratorSpecification(Arbitrary arbitrary, int maxFieldCountPerLevel, int maxObjectDepth, int nestedFieldsLimit) {
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public static DataGeneratorSpecification buildDefault() {
+        return builder().build();
+    }
+
+    public static class Builder {
+        private Arbitrary arbitrary;
+        private int maxFieldCountPerLevel;
+        private int maxObjectDepth;
+        private int nestedFieldsLimit;
+
+        public Builder() {
+            // Simply sufficiently big numbers to get some permutations
+            maxFieldCountPerLevel = 50;
+            maxObjectDepth = 3;
+            // Default value of index.mapping.nested_fields.limit
+            nestedFieldsLimit = 50;
+            arbitrary = new RandomBasedArbitrary();
+        }
+
+        public Builder withArbitrary(Arbitrary arbitrary) {
+            this.arbitrary = arbitrary;
+            return this;
+        }
+
+        public Builder withMaxFieldCountPerLevel(int maxFieldCountPerLevel) {
+            this.maxFieldCountPerLevel = maxFieldCountPerLevel;
+            return this;
+        }
+
+        public Builder withMaxObjectDepth(int maxObjectDepth) {
+            this.maxObjectDepth = maxObjectDepth;
+            return this;
+        }
+
+        public Builder withNestedFieldsLimit(int nestedFieldsLimit) {
+            this.nestedFieldsLimit = nestedFieldsLimit;
+            return this;
+        }
+
+        public DataGeneratorSpecification build() {
+            return new DataGeneratorSpecification(arbitrary, maxFieldCountPerLevel, maxObjectDepth, nestedFieldsLimit);
+        }
     }
 }

+ 2 - 0
test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java

@@ -16,6 +16,8 @@ import org.elasticsearch.logsdb.datageneration.FieldType;
 public interface Arbitrary {
     boolean generateSubObject();
 
+    boolean generateNestedObject();
+
     int childFieldCount(int lowerBound, int upperBound);
 
     String fieldName(int lengthLowerBound, int lengthUpperBound);

+ 6 - 0
test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java

@@ -23,6 +23,12 @@ public class RandomBasedArbitrary implements Arbitrary {
         return randomDouble() <= 0.1;
     }
 
+    @Override
+    public boolean generateNestedObject() {
+        // Using a static 10% change, this is just a chosen value that can be tweaked.
+        return randomDouble() <= 0.1;
+    }
+
     @Override
     public int childFieldCount(int lowerBound, int upperBound) {
         return randomIntBetween(lowerBound, upperBound);

+ 49 - 0
test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java

@@ -0,0 +1,49 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields;
+
+import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
+
+class Context {
+    private final DataGeneratorSpecification specification;
+    private final int objectDepth;
+    private final int nestedFieldsCount;
+
+    Context(DataGeneratorSpecification specification) {
+        this(specification, 0, 0);
+    }
+
+    private Context(DataGeneratorSpecification specification, int objectDepth, int nestedFieldsCount) {
+        this.specification = specification;
+        this.objectDepth = objectDepth;
+        this.nestedFieldsCount = nestedFieldsCount;
+    }
+
+    public DataGeneratorSpecification specification() {
+        return specification;
+    }
+
+    public Context subObject() {
+        return new Context(specification, objectDepth + 1, nestedFieldsCount);
+    }
+
+    public Context nestedObject() {
+        return new Context(specification, objectDepth + 1, nestedFieldsCount + 1);
+    }
+
+    public boolean shouldAddObjectField() {
+        return specification.arbitrary().generateSubObject() && objectDepth < specification.maxObjectDepth();
+    }
+
+    public boolean shouldAddNestedField() {
+        return specification.arbitrary().generateNestedObject()
+            && objectDepth < specification.maxObjectDepth()
+            && nestedFieldsCount < specification.nestedFieldsLimit();
+    }
+}

+ 107 - 0
test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java

@@ -0,0 +1,107 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.FieldType;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Generic generator for any type of object field (e.g. "object", "nested").
+ */
+public class GenericSubObjectFieldDataGenerator {
+    private final Context context;
+
+    private final List<ChildField> childFields;
+
+    public GenericSubObjectFieldDataGenerator(Context context) {
+        this.context = context;
+
+        childFields = new ArrayList<>();
+        generateChildFields();
+    }
+
+    public CheckedConsumer<XContentBuilder, IOException> mappingWriter(
+        CheckedConsumer<XContentBuilder, IOException> customMappingParameters
+    ) {
+        return b -> {
+            b.startObject();
+            customMappingParameters.accept(b);
+
+            b.startObject("properties");
+            for (var childField : childFields) {
+                b.field(childField.fieldName);
+                childField.generator.mappingWriter().accept(b);
+            }
+            b.endObject();
+
+            b.endObject();
+        };
+    }
+
+    public CheckedConsumer<XContentBuilder, IOException> fieldValueGenerator() {
+        return b -> {
+            b.startObject();
+
+            for (var childField : childFields) {
+                b.field(childField.fieldName);
+                childField.generator.fieldValueGenerator().accept(b);
+            }
+
+            b.endObject();
+        };
+    }
+
+    private void generateChildFields() {
+        var existingFields = new HashSet<String>();
+        // no child fields is legal
+        var childFieldsCount = context.specification().arbitrary().childFieldCount(0, context.specification().maxFieldCountPerLevel());
+
+        for (int i = 0; i < childFieldsCount; i++) {
+            var fieldName = generateFieldName(existingFields);
+
+            if (context.shouldAddObjectField()) {
+                childFields.add(new ChildField(fieldName, new ObjectFieldDataGenerator(context.subObject())));
+            } else if (context.shouldAddNestedField()) {
+                childFields.add(new ChildField(fieldName, new NestedFieldDataGenerator(context.nestedObject())));
+            } else {
+                var fieldType = context.specification().arbitrary().fieldType();
+                addLeafField(fieldType, fieldName);
+            }
+        }
+    }
+
+    private void addLeafField(FieldType type, String fieldName) {
+        var generator = switch (type) {
+            case LONG -> new LongFieldDataGenerator(context.specification().arbitrary());
+            case KEYWORD -> new KeywordFieldDataGenerator(context.specification().arbitrary());
+        };
+
+        childFields.add(new ChildField(fieldName, generator));
+    }
+
+    private String generateFieldName(Set<String> existingFields) {
+        var fieldName = context.specification().arbitrary().fieldName(1, 10);
+        while (existingFields.contains(fieldName)) {
+            fieldName = context.specification().arbitrary().fieldName(1, 10);
+        }
+        existingFields.add(fieldName);
+
+        return fieldName;
+    }
+
+    private record ChildField(String fieldName, FieldDataGenerator generator) {}
+}

+ 33 - 0
test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java

@@ -0,0 +1,33 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+
+public class NestedFieldDataGenerator implements FieldDataGenerator {
+    private final GenericSubObjectFieldDataGenerator delegate;
+
+    public NestedFieldDataGenerator(Context context) {
+        this.delegate = new GenericSubObjectFieldDataGenerator(context);
+    }
+
+    @Override
+    public CheckedConsumer<XContentBuilder, IOException> mappingWriter() {
+        return delegate.mappingWriter(b -> b.field("type", "nested"));
+    }
+
+    @Override
+    public CheckedConsumer<XContentBuilder, IOException> fieldValueGenerator() {
+        return delegate.fieldValueGenerator();
+    }
+}

+ 8 - 71
test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java

@@ -11,91 +11,28 @@ package org.elasticsearch.logsdb.datageneration.fields;
 import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
 import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
-import org.elasticsearch.logsdb.datageneration.FieldType;
 import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
 
 public class ObjectFieldDataGenerator implements FieldDataGenerator {
-    private final DataGeneratorSpecification specification;
-    private final int depth;
+    private final GenericSubObjectFieldDataGenerator delegate;
 
-    private final List<ChildField> childFields;
+    public ObjectFieldDataGenerator(DataGeneratorSpecification specification) {
+        this(new Context(specification));
+    }
 
-    public ObjectFieldDataGenerator(DataGeneratorSpecification specification, int depth) {
-        this.specification = specification;
-        this.depth = depth;
-        this.childFields = new ArrayList<>();
-        generateChildFields();
+    ObjectFieldDataGenerator(Context context) {
+        this.delegate = new GenericSubObjectFieldDataGenerator(context);
     }
 
     @Override
     public CheckedConsumer<XContentBuilder, IOException> mappingWriter() {
-        return b -> {
-            b.startObject().startObject("properties");
-
-            for (var childField : childFields) {
-                b.field(childField.fieldName);
-                childField.generator.mappingWriter().accept(b);
-            }
-
-            b.endObject().endObject();
-        };
+        return delegate.mappingWriter(b -> {});
     }
 
     @Override
     public CheckedConsumer<XContentBuilder, IOException> fieldValueGenerator() {
-        return b -> {
-            b.startObject();
-
-            for (var childField : childFields) {
-                b.field(childField.fieldName);
-                childField.generator.fieldValueGenerator().accept(b);
-            }
-
-            b.endObject();
-        };
-    }
-
-    private void generateChildFields() {
-        var existingFields = new HashSet<String>();
-        // no child fields is legal
-        var childFieldsCount = specification.arbitrary().childFieldCount(0, specification.maxFieldCountPerLevel());
-
-        for (int i = 0; i < childFieldsCount; i++) {
-            var fieldName = generateFieldName(existingFields);
-
-            if (specification.arbitrary().generateSubObject() && depth < specification.maxObjectDepth()) {
-                childFields.add(new ChildField(fieldName, new ObjectFieldDataGenerator(specification, depth + 1)));
-            } else {
-                var fieldType = specification.arbitrary().fieldType();
-                addLeafField(fieldType, fieldName);
-            }
-        }
-    }
-
-    private void addLeafField(FieldType type, String fieldName) {
-        var generator = switch (type) {
-            case LONG -> new LongFieldDataGenerator(specification.arbitrary());
-            case KEYWORD -> new KeywordFieldDataGenerator(specification.arbitrary());
-        };
-
-        childFields.add(new ChildField(fieldName, generator));
+        return delegate.fieldValueGenerator();
     }
-
-    private String generateFieldName(Set<String> existingFields) {
-        var fieldName = specification.arbitrary().fieldName(1, 10);
-        while (existingFields.contains(fieldName)) {
-            fieldName = specification.arbitrary().fieldName(1, 10);
-        }
-        existingFields.add(fieldName);
-
-        return fieldName;
-    }
-
-    private record ChildField(String fieldName, FieldDataGenerator generator) {}
 }

+ 56 - 20
test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java

@@ -16,7 +16,13 @@ import org.elasticsearch.xcontent.XContentType;
 
 public class DataGeneratorSnapshotTests extends ESTestCase {
     public void testSnapshot() throws Exception {
-        var dataGenerator = new DataGenerator(new DataGeneratorSpecification(5, 2, new TestArbitrary()));
+        var dataGenerator = new DataGenerator(
+            DataGeneratorSpecification.builder()
+                .withArbitrary(new TestArbitrary())
+                .withMaxFieldCountPerLevel(5)
+                .withMaxObjectDepth(2)
+                .build()
+        );
 
         var mapping = XContentBuilder.builder(XContentType.JSON.xContent()).prettyPrint();
         dataGenerator.writeMapping(mapping);
@@ -31,22 +37,45 @@ public class DataGeneratorSnapshotTests extends ESTestCase {
                   "f1" : {
                     "properties" : {
                       "f2" : {
-                        "type" : "keyword"
-                      },
-                      "f3" : {
                         "properties" : {
+                          "f3" : {
+                            "type" : "keyword"
+                          },
                           "f4" : {
                             "type" : "long"
-                          },
-                          "f5" : {
+                          }
+                        }
+                      },
+                      "f5" : {
+                        "properties" : {
+                          "f6" : {
                             "type" : "keyword"
+                          },
+                          "f7" : {
+                            "type" : "long"
                           }
                         }
                       }
                     }
                   },
-                  "f6" : {
-                    "type" : "long"
+                  "f8" : {
+                    "type" : "nested",
+                    "properties" : {
+                      "f9" : {
+                        "type" : "nested",
+                        "properties" : {
+                          "f10" : {
+                            "type" : "keyword"
+                          },
+                          "f11" : {
+                            "type" : "long"
+                          }
+                        }
+                      },
+                      "f12" : {
+                        "type" : "keyword"
+                      }
+                    }
                   }
                 }
               }
@@ -55,13 +84,22 @@ public class DataGeneratorSnapshotTests extends ESTestCase {
         var expectedDocument = """
             {
               "f1" : {
-                "f2" : "string1",
-                "f3" : {
-                  "f4" : 0,
-                  "f5" : "string2"
+                "f2" : {
+                  "f3" : "string1",
+                  "f4" : 0
+                },
+                "f5" : {
+                  "f6" : "string2",
+                  "f7" : 1
                 }
               },
-              "f6" : 1
+              "f8" : {
+                "f9" : {
+                  "f10" : "string3",
+                  "f11" : 2
+                },
+                "f12" : "string4"
+              }
             }""";
 
         assertEquals(expectedMapping, Strings.toString(mapping));
@@ -69,7 +107,6 @@ public class DataGeneratorSnapshotTests extends ESTestCase {
     }
 
     private class TestArbitrary implements Arbitrary {
-        private boolean generateSubObject = true;
         private int generatedFields = 0;
         private FieldType fieldType = FieldType.KEYWORD;
         private long longValue = 0;
@@ -77,13 +114,12 @@ public class DataGeneratorSnapshotTests extends ESTestCase {
 
         @Override
         public boolean generateSubObject() {
-            if (generateSubObject) {
-                generateSubObject = false;
-                return true;
-            }
+            return generatedFields < 6;
+        }
 
-            generateSubObject = true;
-            return false;
+        @Override
+        public boolean generateNestedObject() {
+            return generatedFields > 6 && generatedFields < 12;
         }
 
         @Override

+ 64 - 6
test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java

@@ -12,7 +12,6 @@ import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.index.mapper.MapperServiceTestCase;
 import org.elasticsearch.index.mapper.SourceToParse;
 import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary;
-import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentType;
@@ -21,7 +20,7 @@ import java.io.IOException;
 
 public class DataGeneratorTests extends ESTestCase {
     public void testDataGeneratorSanity() throws IOException {
-        var dataGenerator = new DataGenerator(new DataGeneratorSpecification());
+        var dataGenerator = new DataGenerator(DataGeneratorSpecification.buildDefault());
 
         var mapping = XContentBuilder.builder(XContentType.JSON.xContent());
         dataGenerator.writeMapping(mapping);
@@ -33,8 +32,60 @@ public class DataGeneratorTests extends ESTestCase {
     }
 
     public void testDataGeneratorProducesValidMappingAndDocument() throws IOException {
-        // Let's keep number of fields under 1000 field limit
-        var dataGenerator = new DataGenerator(new DataGeneratorSpecification(10, 3, new RandomBasedArbitrary()));
+        // Make sure objects, nested objects and all field types are covered.
+        var testArbitrary = new Arbitrary() {
+            private boolean subObjectCovered = false;
+            private boolean nestedCovered = false;
+            private int generatedFields = 0;
+
+            @Override
+            public boolean generateSubObject() {
+                if (subObjectCovered == false) {
+                    subObjectCovered = true;
+                    return true;
+                }
+
+                return false;
+            }
+
+            @Override
+            public boolean generateNestedObject() {
+                if (nestedCovered == false) {
+                    nestedCovered = true;
+                    return true;
+                }
+
+                return false;
+            }
+
+            @Override
+            public int childFieldCount(int lowerBound, int upperBound) {
+                // Make sure to generate enough fields to go through all field types.
+                return 20;
+            }
+
+            @Override
+            public String fieldName(int lengthLowerBound, int lengthUpperBound) {
+                return "f" + generatedFields++;
+            }
+
+            @Override
+            public FieldType fieldType() {
+                return FieldType.values()[generatedFields % FieldType.values().length];
+            }
+
+            @Override
+            public long longValue() {
+                return randomLong();
+            }
+
+            @Override
+            public String stringValue(int lengthLowerBound, int lengthUpperBound) {
+                return randomAlphaOfLengthBetween(lengthLowerBound, lengthUpperBound);
+            }
+        };
+
+        var dataGenerator = new DataGenerator(DataGeneratorSpecification.builder().withArbitrary(testArbitrary).build());
 
         var mapping = XContentBuilder.builder(XContentType.JSON.xContent());
         dataGenerator.writeMapping(mapping);
@@ -49,7 +100,7 @@ public class DataGeneratorTests extends ESTestCase {
     }
 
     public void testDataGeneratorStressTest() throws IOException {
-        // Let's generate 1000000 fields to test an extreme case (2 levels of nested objects + 1 leaf level with 100 fields per object).
+        // Let's generate 1000000 fields to test an extreme case (2 levels of objects + 1 leaf level with 100 fields per object).
         var arbitrary = new Arbitrary() {
             private int generatedFields = 0;
 
@@ -58,6 +109,11 @@ public class DataGeneratorTests extends ESTestCase {
                 return true;
             }
 
+            @Override
+            public boolean generateNestedObject() {
+                return false;
+            }
+
             @Override
             public int childFieldCount(int lowerBound, int upperBound) {
                 return upperBound;
@@ -83,7 +139,9 @@ public class DataGeneratorTests extends ESTestCase {
                 return "";
             }
         };
-        var dataGenerator = new DataGenerator(new DataGeneratorSpecification(100, 2, arbitrary));
+        var dataGenerator = new DataGenerator(
+            DataGeneratorSpecification.builder().withArbitrary(arbitrary).withMaxFieldCountPerLevel(100).withMaxObjectDepth(2).build()
+        );
 
         var mapping = XContentBuilder.builder(XContentType.JSON.xContent());
         dataGenerator.writeMapping(mapping);