Browse Source

Synthetic _source: support histogram field (#89833)

Adds support for the `histogram` field type to synthetic _source.

![image](https://user-images.githubusercontent.com/215970/188691249-9d23d1dc-64ab-49a4-8b24-f60fc966c0ac.png)
Nik Everett 3 years ago
parent
commit
b667aa33f0

+ 5 - 0
docs/changelog/89833.yaml

@@ -0,0 +1,5 @@
+pr: 89833
+summary: "Synthetic _source: support histogram field"
+area: TSDB
+type: enhancement
+issues: []

+ 1 - 0
docs/reference/mapping/fields/synthetic-source.asciidoc

@@ -35,6 +35,7 @@ types:
 ** <<numeric-synthetic-source,`float`>>
 ** <<geo-point-synthetic-source,`geo_point`>>
 ** <<numeric-synthetic-source,`half_float`>>
+** <<histogram-synthetic-source,`histogram`>>
 ** <<numeric-synthetic-source,`integer`>>
 ** <<ip-synthetic-source,`ip`>>
 ** <<keyword-synthetic-source,`keyword`>>

+ 6 - 0
docs/reference/mapping/types/histogram.asciidoc

@@ -85,6 +85,12 @@ The histogram field is "algorithm agnostic" and does not store data specific to
 means the field can technically be aggregated with either algorithm, in practice the user should chose one algorithm and
 index data in that manner (e.g. centroids for T-Digest or intervals for HDRHistogram) to ensure best accuracy.
 
+[[histogram-synthetic-source]]
+==== Synthetic source preview:[]
+`histogram` fields support <<synthetic-source,synthetic `_source`>> in their
+default configuration. Synthetic `_source` cannot be used together with
+<<ignore-malformed,`ignore_malformed`>> or <<copy-to,`copy_to`>>.
+
 [[histogram-ex]]
 ==== Examples
 

+ 28 - 0
test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java

@@ -13,6 +13,7 @@ import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.search.FieldExistsQuery;
@@ -71,6 +72,7 @@ import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.matchesPattern;
+import static org.hamcrest.Matchers.nullValue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
@@ -910,6 +912,32 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
         assertThat(syntheticSource(mapper, b -> b.startArray("field").endArray()), equalTo("{}"));
     }
 
+    public final void testSyntheticEmptyListNoDocValuesLoader() throws IOException {
+        assumeTrue("Field does not support [] as input", supportsEmptyInputArray());
+        SyntheticSourceExample syntheticSourceExample = syntheticSourceSupport().example(5);
+        DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> {
+            b.startObject("field");
+            syntheticSourceExample.mapping().accept(b);
+            b.endObject();
+        }));
+        try (Directory directory = newDirectory()) {
+            RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
+            LuceneDocument doc = mapper.parse(source(b -> b.startArray("field").endArray())).rootDoc();
+            iw.addDocument(doc);
+            iw.close();
+            try (DirectoryReader reader = DirectoryReader.open(directory)) {
+                LeafReader leafReader = getOnlyLeafReader(reader);
+                SourceLoader.SyntheticFieldLoader fieldLoader = mapper.mapping().getRoot().getMapper("field").syntheticFieldLoader();
+                /*
+                 * null means "there are no values for this field, don't call me".
+                 * Empty fields are common enough that we need to make sure this
+                 * optimization kicks in.
+                 */
+                assertThat(fieldLoader.docValuesLoader(leafReader, new int[] { 0 }), nullValue());
+            }
+        }
+    }
+
     public final void testSyntheticSourceInvalid() throws IOException {
         List<SyntheticSourceInvalidExample> examples = new ArrayList<>(syntheticSourceSupport().invalidExample());
         if (supportsCopyTo()) {

+ 74 - 0
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java

@@ -10,6 +10,7 @@ import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
@@ -31,6 +32,7 @@ import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
 import org.elasticsearch.index.mapper.MapperParsingException;
+import org.elasticsearch.index.mapper.SourceLoader;
 import org.elasticsearch.index.mapper.SourceValueFetcher;
 import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.index.mapper.TimeSeriesParams;
@@ -42,6 +44,7 @@ import org.elasticsearch.search.MultiValueMode;
 import org.elasticsearch.search.sort.BucketedSort;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.xcontent.ParseField;
+import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xcontent.XContentSubParser;
 import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
@@ -49,6 +52,7 @@ import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSou
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Map;
+import java.util.stream.Stream;
 
 import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
 
@@ -447,4 +451,74 @@ public class HistogramFieldMapper extends FieldMapper {
             return count;
         }
     }
+
+    @Override
+    public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
+        if (ignoreMalformed.value()) {
+            throw new IllegalArgumentException(
+                "field [" + name() + "] of type [histogram] doesn't support synthetic source because it ignores malformed histograms"
+            );
+        }
+        if (copyTo.copyToFields().isEmpty() != true) {
+            throw new IllegalArgumentException(
+                "field [" + name() + "] of type [histogram] doesn't support synthetic source because it declares copy_to"
+            );
+        }
+        return new SourceLoader.SyntheticFieldLoader() {
+            private final InternalHistogramValue value = new InternalHistogramValue();
+            private BytesRef binaryValue;
+
+            @Override
+            public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
+                return Stream.of();
+            }
+
+            @Override
+            public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
+                BinaryDocValues docValues = leafReader.getBinaryDocValues(fieldType().name());
+                if (docValues == null) {
+                    // No values in this leaf
+                    binaryValue = null;
+                    return null;
+                }
+                return docId -> {
+                    if (docValues.advanceExact(docId)) {
+                        binaryValue = docValues.binaryValue();
+                        return true;
+                    }
+                    binaryValue = null;
+                    return false;
+                };
+            }
+
+            @Override
+            public boolean hasValue() {
+                return binaryValue != null;
+            }
+
+            @Override
+            public void write(XContentBuilder b) throws IOException {
+                if (binaryValue == null) {
+                    return;
+                }
+                b.startObject(simpleName());
+
+                value.reset(binaryValue);
+                b.startArray("values");
+                while (value.next()) {
+                    b.value(value.value());
+                }
+                b.endArray();
+
+                value.reset(binaryValue);
+                b.startArray("counts");
+                while (value.next()) {
+                    b.value(value.count());
+                }
+                b.endArray();
+
+                b.endObject();
+            }
+        };
+    }
 }

+ 53 - 3
x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java

@@ -19,11 +19,14 @@ import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
 import org.junit.AssumptionViolatedException;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.matchesPattern;
 import static org.hamcrest.Matchers.notNullValue;
 import static org.hamcrest.Matchers.nullValue;
 
@@ -347,12 +350,59 @@ public class HistogramFieldMapperTests extends MapperTestCase {
     }
 
     @Override
-    protected SyntheticSourceSupport syntheticSourceSupport() {
+    protected IngestScriptSupport ingestScriptSupport() {
         throw new AssumptionViolatedException("not supported");
     }
 
     @Override
-    protected IngestScriptSupport ingestScriptSupport() {
-        throw new AssumptionViolatedException("not supported");
+    protected SyntheticSourceSupport syntheticSourceSupport() {
+        return new HistogramFieldSyntheticSourceSupport();
+    }
+
+    private static class HistogramFieldSyntheticSourceSupport implements SyntheticSourceSupport {
+        @Override
+        public SyntheticSourceExample example(int maxVals) {
+            if (randomBoolean()) {
+                Map<String, Object> value = new LinkedHashMap<>();
+                value.put("values", List.of(randomDouble()));
+                value.put("counts", List.of(randomCount()));
+                return new SyntheticSourceExample(value, value, this::mapping);
+            }
+            int size = between(1, maxVals);
+            List<Double> values = new ArrayList<>(size);
+            double prev = randomDouble();
+            values.add(prev);
+            while (values.size() < size && prev != Double.MAX_VALUE) {
+                prev = randomDoubleBetween(prev, Double.MAX_VALUE, false);
+                values.add(prev);
+            }
+            Map<String, Object> value = new LinkedHashMap<>();
+            value.put("values", values);
+            value.put("counts", randomList(values.size(), values.size(), this::randomCount));
+            return new SyntheticSourceExample(value, value, this::mapping);
+        }
+
+        private int randomCount() {
+            return between(1, Integer.MAX_VALUE);
+        }
+
+        private void mapping(XContentBuilder b) throws IOException {
+            b.field("type", "histogram");
+        }
+
+        @Override
+        public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
+            return List.of(
+                new SyntheticSourceInvalidExample(
+                    matchesPattern(
+                        "field \\[field] of type \\[histogram] doesn't support synthetic source because it ignores malformed histograms"
+                    ),
+                    b -> {
+                        b.field("type", "histogram");
+                        b.field("ignore_malformed", true);
+                    }
+                )
+            );
+        }
     }
 }

+ 46 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml

@@ -249,3 +249,49 @@ histogram with wrong time series mappings:
                           latency:
                             type: histogram
                             time_series_metric: counter
+
+---
+histogram with synthetic source:
+  - skip:
+      version: " - 8.4.99"
+      reason: introduced in 8.5.0
+
+  - do:
+      indices.create:
+        index: histo_synthetic
+        body:
+          mappings:
+            _source:
+              mode: synthetic
+            properties:
+              latency:
+                type: histogram
+  - do:
+      bulk:
+        index: histo_synthetic
+        refresh: true
+        body:
+          - '{"index": {"_id": 1}}'
+          - '{"latency": {"values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 6]}}'
+          - '{"index": {"_id": 2}}'
+          - '{"latency": {"values" : [0, 0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 2, 5, 10, 1, 8]}}'
+
+  - do:
+      get:
+        index: histo_synthetic
+        id: 1
+  - match:
+      _source:
+        latency:
+          values: [0.1, 0.2, 0.3, 0.4, 0.5]
+          counts: [3, 7, 23, 12, 6]
+
+  - do:
+      get:
+        index: histo_synthetic
+        id: 2
+  - match:
+      _source:
+        latency:
+          values: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
+          counts: [3, 2, 5, 10, 1, 8]