Browse Source

Add implementation for exponential histogram merging and percentiles (#131220)

Jonas Kunz 2 months ago
parent
commit
37b6f8fd7a
29 changed files with 3454 additions and 0 deletions
  1. 1 0
      benchmarks/build.gradle
  2. 98 0
      benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java
  3. 112 0
      benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java
  4. 1 0
      build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/LicenseHeadersTask.java
  5. 5 0
      gradle/verification-metadata.xml
  6. 201 0
      libs/exponential-histogram/LICENSE.txt
  7. 24 0
      libs/exponential-histogram/NOTICES.txt
  8. 32 0
      libs/exponential-histogram/build.gradle
  9. 106 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/Base2ExponentialHistogramIndexer.java
  10. 73 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java
  11. 36 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/CopyableBucketIterator.java
  12. 112 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java
  13. 160 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java
  14. 138 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java
  15. 186 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java
  16. 165 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java
  17. 267 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java
  18. 261 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java
  19. 109 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java
  20. 95 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java
  21. 172 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java
  22. 180 0
      libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/package-info.java
  23. 88 0
      libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java
  24. 45 0
      libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java
  25. 173 0
      libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java
  26. 211 0
      libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java
  27. 60 0
      libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java
  28. 310 0
      libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java
  29. 33 0
      libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java

+ 1 - 0
benchmarks/build.gradle

@@ -49,6 +49,7 @@ dependencies {
   api(project(':x-pack:plugin:esql:compute'))
   implementation project(path: ':libs:native')
   implementation project(path: ':libs:simdvec')
+  implementation project(path: ':libs:exponential-histogram')
   expression(project(path: ':modules:lang-expression', configuration: 'zip'))
   painless(project(path: ':modules:lang-painless', configuration: 'zip'))
   nativeLib(project(':libs:native'))

+ 98 - 0
benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java

@@ -0,0 +1,98 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.exponentialhistogram;
+
+import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.profile.GCProfiler;
+import org.openjdk.jmh.profile.StackProfiler;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.util.Random;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import java.util.function.DoubleSupplier;
+
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
+@Fork(1)
+@Threads(1)
+@State(Scope.Thread)
+public class ExponentialHistogramGenerationBench {
+
+    @Param({ "100", "500", "1000", "5000", "10000", "20000" })
+    int bucketCount;
+
+    @Param({ "NORMAL", "GAUSSIAN" })
+    String distribution;
+
+    Random random;
+    ExponentialHistogramGenerator histoGenerator;
+
+    double[] data = new double[1000000];
+
+    int index;
+
+    @Setup
+    public void setUp() {
+        random = ThreadLocalRandom.current();
+        histoGenerator = new ExponentialHistogramGenerator(bucketCount);
+
+        DoubleSupplier nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble();
+
+        // Make sure that we start with a non-empty histogram, as this distorts initial additions
+        for (int i = 0; i < 10000; ++i) {
+            histoGenerator.add(nextRandom.getAsDouble());
+        }
+
+        for (int i = 0; i < data.length; ++i) {
+            data[i] = nextRandom.getAsDouble();
+        }
+
+        index = 0;
+    }
+
+    @Benchmark
+    @BenchmarkMode(Mode.AverageTime)
+    @OutputTimeUnit(TimeUnit.MICROSECONDS)
+    public void add() {
+        if (index >= data.length) {
+            index = 0;
+        }
+        histoGenerator.add(data[index++]);
+    }
+
+    public static void main(String[] args) throws RunnerException {
+        Options opt = new OptionsBuilder().include(".*" + ExponentialHistogramGenerationBench.class.getSimpleName() + ".*")
+            .warmupIterations(5)
+            .measurementIterations(5)
+            .addProfiler(GCProfiler.class)
+            .addProfiler(StackProfiler.class)
+            .build();
+
+        new Runner(opt).run();
+    }
+}

+ 112 - 0
benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java

@@ -0,0 +1,112 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.exponentialhistogram;
+
+import org.elasticsearch.exponentialhistogram.BucketIterator;
+import org.elasticsearch.exponentialhistogram.ExponentialHistogram;
+import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator;
+import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
+@Fork(1)
+@Threads(1)
+@State(Scope.Thread)
+public class ExponentialHistogramMergeBench {
+
+    @Param({ "1000", "2000", "5000" })
+    int bucketCount;
+
+    @Param({ "0.01", "0.1", "0.25", "0.5", "1.0", "2.0" })
+    double mergedHistoSizeFactor;
+
+    Random random;
+    ExponentialHistogramMerger histoMerger;
+
+    ExponentialHistogram[] toMerge = new ExponentialHistogram[10_000];
+
+    int index;
+
+    @Setup
+    public void setUp() {
+        random = ThreadLocalRandom.current();
+        histoMerger = new ExponentialHistogramMerger(bucketCount);
+
+        ExponentialHistogramGenerator initial = new ExponentialHistogramGenerator(bucketCount);
+        for (int j = 0; j < bucketCount; j++) {
+            initial.add(Math.pow(1.001, j));
+        }
+        ExponentialHistogram initialHisto = initial.get();
+        int cnt = getBucketCount(initialHisto);
+        if (cnt < bucketCount) {
+            throw new IllegalArgumentException("Expected bucket count to be " + bucketCount + ", but was " + cnt);
+        }
+        histoMerger.add(initialHisto);
+
+        int dataPointSize = (int) Math.round(bucketCount * mergedHistoSizeFactor);
+
+        for (int i = 0; i < toMerge.length; i++) {
+            ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(dataPointSize);
+
+            int bucketIndex = 0;
+            for (int j = 0; j < dataPointSize; j++) {
+                bucketIndex += 1 + random.nextInt(bucketCount) % (Math.max(1, bucketCount / dataPointSize));
+                generator.add(Math.pow(1.001, bucketIndex));
+            }
+            toMerge[i] = generator.get();
+            cnt = getBucketCount(toMerge[i]);
+            if (cnt < dataPointSize) {
+                throw new IllegalArgumentException("Expected bucket count to be " + dataPointSize + ", but was " + cnt);
+            }
+        }
+
+        index = 0;
+    }
+
+    private static int getBucketCount(ExponentialHistogram histo) {
+        int cnt = 0;
+        for (BucketIterator it : List.of(histo.negativeBuckets().iterator(), histo.positiveBuckets().iterator())) {
+            while (it.hasNext()) {
+                cnt++;
+                it.advance();
+            }
+        }
+        return cnt;
+    }
+
+    @Benchmark
+    @BenchmarkMode(Mode.AverageTime)
+    @OutputTimeUnit(TimeUnit.MICROSECONDS)
+    public void add() {
+        if (index >= toMerge.length) {
+            index = 0;
+        }
+        histoMerger.add(toMerge[index++]);
+    }
+}

+ 1 - 0
build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/LicenseHeadersTask.java

@@ -173,6 +173,7 @@ public abstract class LicenseHeadersTask extends DefaultTask {
         matchers.add(subStringMatcher("BSD4 ", "Original BSD License (with advertising clause)", "All advertising materials"));
         // Apache
         matchers.add(subStringMatcher("AL   ", "Apache", "Licensed to Elasticsearch B.V. under one or more contributor"));
+        matchers.add(subStringMatcher("AL   ", "Apache", "Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V."));
         // Apache lz4-java
         matchers.add(subStringMatcher("ALLZ4", "Apache LZ4-Java", "Copyright 2020 Adrien Grand and the lz4-java contributors"));
         // Generated resources

+ 5 - 0
gradle/verification-metadata.xml

@@ -66,6 +66,11 @@
             <sha256 value="3366d2c88fb576e486d830f521184e8f1839f8c15dcd2151a3f6e1f62b0b37a0" origin="Generated by Gradle"/>
          </artifact>
       </component>
+      <component group="ch.obermuhlner" name="big-math" version="2.3.2">
+         <artifact name="big-math-2.3.2.jar">
+            <sha256 value="693e1bb7c7f5184b448f03c2a2c0c45d07d8e89e4641fdc31ab0a8057027f43d" origin="Generated by Gradle"/>
+         </artifact>
+      </component>
       <component group="ch.randelshofer" name="fastdoubleparser" version="0.8.0">
          <artifact name="fastdoubleparser-0.8.0.jar">
             <sha256 value="10fe288fd7a2cdaf5175332b73529f9abf8fd54dcfff317d6967c0c35ffb133b" origin="Generated by Gradle"/>

+ 201 - 0
libs/exponential-histogram/LICENSE.txt

@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 24 - 0
libs/exponential-histogram/NOTICES.txt

@@ -0,0 +1,24 @@
+Elastic-exponential-histogram
+
+Copyright 2025 Elasticsearch B.V.
+
+--
+Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+under one or more license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright
+ownership. Elasticsearch B.V. licenses this file to you under
+the Apache License, Version 2.0 (the "License"); you may
+not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ 	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+
+This project is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+--

+ 32 - 0
libs/exponential-histogram/build.gradle

@@ -0,0 +1,32 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+// TODO: publish this when ready?
+//apply plugin: 'elasticsearch.publish'
+apply plugin: 'elasticsearch.build'
+
+dependencies {
+  testImplementation(project(":test:framework"))
+  testImplementation('ch.obermuhlner:big-math:2.3.2')
+  testImplementation('org.apache.commons:commons-math3:3.6.1')
+}
+
+tasks.named('forbiddenApisMain').configure {
+  // this lib does not depend on core, so only jdk signatures should be checked
+  replaceSignatureFiles 'jdk-signatures'
+}
+
+ext.projectLicenses.set(['The Apache Software License, Version 2.0': providers.provider(() -> 'http://www.apache.org/licenses/LICENSE-2.0')])
+licenseFile.set(layout.settingsDirectory.file('licenses/APACHE-LICENSE-2.0.txt').asFile)
+
+tasks.named("licenseHeaders").configure {
+  approvedLicenses = ['Apache', 'Generated', 'Vendored']
+}
+
+

+ 106 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/Base2ExponentialHistogramIndexer.java

@@ -0,0 +1,106 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+/**
+ * The code in this class was copied and slightly adapted from the
+ * <a href="https://github.com/open-telemetry/opentelemetry-java/blob/78a917da2e8f4bc3645f4fb10361e3e844aab9fb/sdk/metrics/src/main/java/io/opentelemetry/sdk/metrics/internal/aggregator/Base2ExponentialHistogramIndexer.java">OpenTelemetry Base2ExponentialHistogramIndexer implementation</a>,
+ * licensed under the Apache License 2.0.
+ */
+class Base2ExponentialHistogramIndexer {
+
+    /**
+     * Bit mask used to isolate exponent of IEEE 754 double precision number.
+     */
+    private static final long EXPONENT_BIT_MASK = 0x7FF0000000000000L;
+
+    /**
+     * Bit mask used to isolate the significand of IEEE 754 double precision number.
+     */
+    private static final long SIGNIFICAND_BIT_MASK = 0xFFFFFFFFFFFFFL;
+
+    /**
+     * Bias used in representing the exponent of IEEE 754 double precision number.
+     */
+    private static final int EXPONENT_BIAS = 1023;
+
+    /**
+     * The number of bits used to represent the significand of IEEE 754 double precision number,
+     * excluding the implicit bit.
+     */
+    private static final int SIGNIFICAND_WIDTH = 52;
+
+    /**
+     * The number of bits used to represent the exponent of IEEE 754 double precision number.
+     */
+    private static final int EXPONENT_WIDTH = 11;
+
+    private static final double LOG_BASE2_E = 1D / Math.log(2);
+
+    static long computeIndex(double value, int scale) {
+        double absValue = Math.abs(value);
+        // For positive scales, compute the index by logarithm, which is simpler but may be
+        // inaccurate near bucket boundaries
+        if (scale > 0) {
+            return getIndexByLogarithm(absValue, scale);
+        }
+        // For scale zero, compute the exact index by extracting the exponent
+        if (scale == 0) {
+            return mapToIndexScaleZero(absValue);
+        }
+        // For negative scales, compute the exact index by extracting the exponent and shifting it to
+        // the right by -scale
+        return mapToIndexScaleZero(absValue) >> -scale;
+    }
+
+    /**
+     * Compute the bucket index using a logarithm based approach.
+     *
+     * @see <a
+     * href="https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#all-scales-use-the-logarithm-function">All
+     * Scales: Use the Logarithm Function</a>
+     */
+    private static long getIndexByLogarithm(double value, int scale) {
+        return (long) Math.ceil(Math.scalb(Math.log(value) * LOG_BASE2_E, scale)) - 1;
+    }
+
+    /**
+     * Compute the exact bucket index for scale zero by extracting the exponent.
+     *
+     * @see <a
+     * href="https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#scale-zero-extract-the-exponent">Scale
+     * Zero: Extract the Exponent</a>
+     */
+    private static long mapToIndexScaleZero(double value) {
+        long rawBits = Double.doubleToLongBits(value);
+        long rawExponent = (rawBits & EXPONENT_BIT_MASK) >> SIGNIFICAND_WIDTH;
+        long rawSignificand = rawBits & SIGNIFICAND_BIT_MASK;
+        if (rawExponent == 0) {
+            rawExponent -= Long.numberOfLeadingZeros(rawSignificand - 1) - EXPONENT_WIDTH - 1;
+        }
+        int ieeeExponent = (int) (rawExponent - EXPONENT_BIAS);
+        if (rawSignificand == 0) {
+            return ieeeExponent - 1;
+        }
+        return ieeeExponent;
+    }
+}

+ 73 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketIterator.java

@@ -0,0 +1,73 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+/**
+ * An iterator over the non-empty buckets of the histogram for either the positive or negative range.
+ * <ul>
+ *     <li>The iterator always iterates from the lowest bucket index to the highest.</li>
+ *     <li>The iterator never returns duplicate buckets (buckets with the same index).</li>
+ *     <li>The iterator never returns empty buckets ({@link #peekCount()} is never zero).</li>
+ * </ul>
+ */
+public interface BucketIterator {
+    /**
+     * Checks if there are any buckets remaining to be visited by this iterator.
+     * If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()}, or {@link #advance()}.
+     *
+     * @return {@code true} if the iterator has more elements, {@code false} otherwise
+     */
+    boolean hasNext();
+
+    /**
+     * The number of items in the bucket at the current iterator position. Does not advance the iterator.
+     * Must not be called if {@link #hasNext()} returns {@code false}.
+     *
+     * @return the number of items in the bucket, always greater than zero
+     */
+    long peekCount();
+
+    /**
+     * The index of the bucket at the current iterator position. Does not advance the iterator.
+     * Must not be called if {@link #hasNext()} returns {@code false}.
+     *
+     * @return the index of the bucket, guaranteed to be in the range
+     *         [{@link ExponentialHistogram#MIN_INDEX},
+     *          {@link ExponentialHistogram#MAX_INDEX}]
+     */
+    long peekIndex();
+
+    /**
+     * Moves the iterator to the next, non-empty bucket.
+     * If {@link #hasNext()} is {@code true} after calling {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value
+     * greater than the value returned prior to the {@link #advance()} call.
+     */
+    void advance();
+
+    /**
+     * Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries,
+     * e.g., via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}.
+     *
+     * @return the scale, which is guaranteed to be constant over the lifetime of this iterator
+     */
+    int scale();
+}

+ 36 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/CopyableBucketIterator.java

@@ -0,0 +1,36 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+/**
+ * A {@link BucketIterator} that can be copied.
+ */
+public interface CopyableBucketIterator extends BucketIterator {
+
+    /**
+     * Creates a copy of this bucket iterator, pointing at the same bucket of the same range of buckets.
+     * Calling {@link #advance()} on the copied iterator does not affect this instance and vice-versa.
+     *
+     * @return a copy of this iterator
+     */
+    CopyableBucketIterator copy();
+}

+ 112 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java

@@ -0,0 +1,112 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.Arrays;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+
+/**
+ * A data structure for efficiently computing the required scale reduction for a histogram to reach a target number of buckets.
+ * This works by examining pairs of neighboring buckets and determining at which scale reduction they would merge into a single bucket.
+ */
+class DownscaleStats {
+
+    // collapsedBucketCount[i] stores the number of additional
+    // collapsed buckets when increasing the scale by (i+1) instead of just by (i)
+    int[] collapsedBucketCount = new int[MAX_INDEX_BITS];
+
+    /**
+     * Resets the data structure to its initial state.
+     */
+    void reset() {
+        Arrays.fill(collapsedBucketCount, 0);
+    }
+
+    /**
+     * Adds a pair of neighboring bucket indices to track for potential merging.
+     *
+     * @param previousBucketIndex the index of the previous bucket
+     * @param currentBucketIndex the index of the current bucket
+     */
+    void add(long previousBucketIndex, long currentBucketIndex) {
+        assert currentBucketIndex > previousBucketIndex;
+        assert previousBucketIndex >= MIN_INDEX && previousBucketIndex <= MAX_INDEX;
+        assert currentBucketIndex <= MAX_INDEX;
+        /*
+         * Below is an efficient variant of the following algorithm:
+         * for (int i=0; i<63; i++) {
+         *     if (prevIndex>>(i+1) == currIndex>>(i+1)) {
+         *         collapsedBucketCount[i]++;
+         *         break;
+         *     }
+         * }
+         * So we find the smallest scale reduction required to make the two buckets collapse into one.
+         */
+        long bitXor = previousBucketIndex ^ currentBucketIndex;
+        int numEqualLeadingBits = Long.numberOfLeadingZeros(bitXor);
+        // if there are zero equal leading bits, the indices have a different sign.
+        // Therefore right-shifting will never make the buckets combine
+        if (numEqualLeadingBits > 0) {
+            int requiredScaleChange = 64 - numEqualLeadingBits;
+            collapsedBucketCount[requiredScaleChange - 1]++;
+        }
+    }
+
+    /**
+     * Returns the number of buckets that will be merged after applying the given scale reduction.
+     *
+     * @param reduction the scale reduction factor
+     * @return the number of buckets that will be merged
+     */
+    int getCollapsedBucketCountAfterScaleReduction(int reduction) {
+        assert reduction >= 0 && reduction <= MAX_INDEX_BITS;
+        int totalCollapsed = 0;
+        for (int i = 0; i < reduction; i++) {
+            totalCollapsed += collapsedBucketCount[i];
+        }
+        return totalCollapsed;
+    }
+
+    /**
+     * Returns the required scale reduction to reduce the number of buckets by at least the given amount.
+     *
+     * @param desiredCollapsedBucketCount the target number of buckets to collapse
+     * @return the required scale reduction
+     */
+    int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCount) {
+        assert desiredCollapsedBucketCount >= 0;
+        if (desiredCollapsedBucketCount == 0) {
+            return 0;
+        }
+        int totalCollapsed = 0;
+        for (int i = 0; i < collapsedBucketCount.length; i++) {
+            totalCollapsed += collapsedBucketCount[i];
+            if (totalCollapsed >= desiredCollapsedBucketCount) {
+                return i + 1;
+            }
+        }
+        throw new IllegalStateException("Cannot reduce the bucket count by " + desiredCollapsedBucketCount);
+    }
+}

+ 160 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java

@@ -0,0 +1,160 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.OptionalLong;
+
+/**
+ * Interface for implementations of exponential histograms adhering to the
+ * <a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram">OpenTelemetry definition</a>.
+ * This interface supports sparse implementations, allowing iteration over buckets without requiring direct index access.<br>
+ * The most important properties are:
+ * <ul>
+ *     <li>The histogram has a scale parameter, which defines the accuracy. A higher scale implies a higher accuracy.
+ *     The {@code base} for the buckets is defined as {@code base = 2^(2^-scale)}.</li>
+ *     <li>The histogram bucket at index {@code i} has the range {@code (base^i, base^(i+1)]}</li>
+ *     <li>Negative values are represented by a separate negative range of buckets with the boundaries {@code (-base^(i+1), -base^i]}</li>
+ *     <li>Histograms are perfectly subsetting: increasing the scale by one merges each pair of neighboring buckets</li>
+ *     <li>A special {@link ZeroBucket} is used to handle zero and close-to-zero values</li>
+ * </ul>
+ *
+ * <br>
+ * Additionally, all algorithms assume that samples within a bucket are located at a single point: the point of least relative error
+ * (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}).
+ */
+public interface ExponentialHistogram {
+
+    // TODO(b/128622): support min/max/sum/count storage and merging.
+    // TODO(b/128622): Add special positive and negative infinity buckets
+    // to allow representation of explicit bucket histograms with open boundaries.
+
+    // A scale of 38 is the largest scale where we don't run into problems at the borders due to floating-point precision when computing
+    // indices for double values.
+    // Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values.
+    // For that to work, the math for converting from double to indices and back would need to be reworked.
+    // One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple
+    int MAX_SCALE = 38;
+
+    // At this scale, all double values fall into a single bucket.
+    int MIN_SCALE = -11;
+
+    // Only use 62 bits (plus the sign bit) at max to allow computing the difference between the smallest and largest index without causing
+    // an overflow.
+    // The extra bit also provides room for compact storage tricks.
+    int MAX_INDEX_BITS = 62;
+    long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1;
+    long MIN_INDEX = -MAX_INDEX;
+
+    /**
+     * The scale of the histogram. Higher scales result in higher accuracy but potentially more buckets.
+     * Must be less than or equal to {@link #MAX_SCALE} and greater than or equal to {@link #MIN_SCALE}.
+     *
+     * @return the scale of the histogram
+     */
+    int scale();
+
+    /**
+     * @return the {@link ZeroBucket} representing the number of zero (or close-to-zero) values and its threshold
+     */
+    ZeroBucket zeroBucket();
+
+    /**
+     * @return a {@link Buckets} instance for the populated buckets covering the positive value range of this histogram.
+     * The {@link BucketIterator#scale()} of iterators obtained via {@link Buckets#iterator()} must be the same as {@link #scale()}.
+     */
+    Buckets positiveBuckets();
+
+    /**
+     * @return a {@link Buckets} instance for the populated buckets covering the negative value range of this histogram.
+     * The {@link BucketIterator#scale()} of iterators obtained via {@link Buckets#iterator()} must be the same as {@link #scale()}.
+     */
+    Buckets negativeBuckets();
+
+    /**
+     * Represents a bucket range of an {@link ExponentialHistogram}, either the positive or the negative range.
+     */
+    interface Buckets {
+
+        /**
+         * @return a {@link BucketIterator} for the populated buckets of this bucket range.
+         * The {@link BucketIterator#scale()} of the returned iterator must be the same as {@link #scale()}.
+         */
+        CopyableBucketIterator iterator();
+
+        /**
+         * @return the highest populated bucket index, or an empty optional if no buckets are populated
+         */
+        OptionalLong maxBucketIndex();
+
+        /**
+         * @return the sum of the counts across all buckets of this range
+         */
+        long valueCount();
+
+    }
+
+    /**
+     * Creates a histogram representing the distribution of the given values with at most the given number of buckets.
+     * If the given {@code maxBucketCount} is greater than or equal to the number of values, the resulting histogram will have a
+     * relative error of less than {@code 2^(2^-MAX_SCALE) - 1}.
+     *
+     * @param maxBucketCount the maximum number of buckets
+     * @param values the values to be added to the histogram
+     * @return a new {@link ExponentialHistogram}
+     */
+    static ExponentialHistogram create(int maxBucketCount, double... values) {
+        ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(maxBucketCount);
+        for (double val : values) {
+            generator.add(val);
+        }
+        return generator.get();
+    }
+
+    /**
+     * Merges the provided exponential histograms to a new, single histogram with at most the given amount of buckets.
+     *
+     * @param maxBucketCount the maximum number of buckets the result histogram is allowed to have
+     * @param histograms teh histograms to merge
+     * @return the merged histogram
+     */
+    static ExponentialHistogram merge(int maxBucketCount, Iterator<ExponentialHistogram> histograms) {
+        ExponentialHistogramMerger merger = new ExponentialHistogramMerger(maxBucketCount);
+        while (histograms.hasNext()) {
+            merger.add(histograms.next());
+        }
+        return merger.get();
+    }
+
+    /**
+     * Merges the provided exponential histograms to a new, single histogram with at most the given amount of buckets.
+     *
+     * @param maxBucketCount the maximum number of buckets the result histogram is allowed to have
+     * @param histograms teh histograms to merge
+     * @return the merged histogram
+     */
+    static ExponentialHistogram merge(int maxBucketCount, ExponentialHistogram... histograms) {
+        return merge(maxBucketCount, List.of(histograms).iterator());
+    }
+
+}

+ 138 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java

@@ -0,0 +1,138 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.Arrays;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
+
+/**
+ * Only intended for use in tests currently.
+ * A class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum number of buckets.
+ *
+ * If the number of values is less than or equal to the bucket capacity, the resulting histogram is guaranteed
+ * to represent the exact raw values with a relative error less than {@code 2^(2^-MAX_SCALE) - 1}.
+ */
+public class ExponentialHistogramGenerator {
+
+    // Merging individual values into a histogram would be way too slow with our sparse, array-backed histogram representation.
+    // Therefore, for a bucket capacity of c, we first buffer c raw values to be inserted.
+    // We then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator.
+    // This yields an amortized runtime of O(log(c)).
+    private final double[] rawValueBuffer;
+    int valueCount;
+
+    private final ExponentialHistogramMerger resultMerger;
+    private final FixedCapacityExponentialHistogram valueBuffer;
+
+    private boolean isFinished = false;
+
+    /**
+     * Creates a new instance with the specified maximum number of buckets.
+     *
+     * @param maxBucketCount the maximum number of buckets for the generated histogram
+     */
+    public ExponentialHistogramGenerator(int maxBucketCount) {
+        rawValueBuffer = new double[maxBucketCount];
+        valueCount = 0;
+        valueBuffer = new FixedCapacityExponentialHistogram(maxBucketCount);
+        resultMerger = new ExponentialHistogramMerger(maxBucketCount);
+    }
+
+    /**
+     * Adds the given value to the histogram.
+     * Must not be called after {@link #get()} has been called.
+     *
+     * @param value the value to add
+     */
+    public void add(double value) {
+        if (isFinished) {
+            throw new IllegalStateException("get() has already been called");
+        }
+        if (valueCount == rawValueBuffer.length) {
+            mergeValuesToHistogram();
+        }
+        rawValueBuffer[valueCount] = value;
+        valueCount++;
+    }
+
+    /**
+     * Returns the histogram representing the distribution of all accumulated values.
+     *
+     * @return the histogram representing the distribution of all accumulated values
+     */
+    public ExponentialHistogram get() {
+        isFinished = true;
+        mergeValuesToHistogram();
+        return resultMerger.get();
+    }
+
+    private void mergeValuesToHistogram() {
+        if (valueCount == 0) {
+            return;
+        }
+        Arrays.sort(rawValueBuffer, 0, valueCount);
+        int negativeValuesCount = 0;
+        while (negativeValuesCount < valueCount && rawValueBuffer[negativeValuesCount] < 0) {
+            negativeValuesCount++;
+        }
+
+        valueBuffer.reset();
+        int scale = valueBuffer.scale();
+
+        // Buckets must be provided with their indices in ascending order.
+        // For the negative range, higher bucket indices correspond to bucket boundaries closer to -INF
+        // and smaller bucket indices correspond to bucket boundaries closer to zero.
+        // therefore we have to iterate the negative values in the sorted rawValueBuffer reverse order,
+        // from the value closest to -INF to the value closest to zero.
+        // not that i here is the index of the value in the rawValueBuffer array
+        // and is unrelated to the histogram bucket index for the value.
+        for (int i = negativeValuesCount - 1; i >= 0; i--) {
+            long count = 1;
+            long index = computeIndex(rawValueBuffer[i], scale);
+            while ((i - 1) >= 0 && computeIndex(rawValueBuffer[i - 1], scale) == index) {
+                i--;
+                count++;
+            }
+            valueBuffer.tryAddBucket(index, count, false);
+        }
+
+        int zeroCount = 0;
+        while ((negativeValuesCount + zeroCount) < valueCount && rawValueBuffer[negativeValuesCount + zeroCount] == 0) {
+            zeroCount++;
+        }
+        valueBuffer.setZeroBucket(ZeroBucket.minimalWithCount(zeroCount));
+        for (int i = negativeValuesCount + zeroCount; i < valueCount; i++) {
+            long count = 1;
+            long index = computeIndex(rawValueBuffer[i], scale);
+            while ((i + 1) < valueCount && computeIndex(rawValueBuffer[i + 1], scale) == index) {
+                i++;
+                count++;
+            }
+            valueBuffer.tryAddBucket(index, count, true);
+        }
+
+        resultMerger.add(valueBuffer);
+        valueCount = 0;
+    }
+
+}

+ 186 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java

@@ -0,0 +1,186 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.OptionalLong;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease;
+
+/**
+ * Allows accumulating multiple {@link ExponentialHistogram} into a single one
+ * while keeping the bucket count in the result below a given limit.
+ */
+public class ExponentialHistogramMerger {
+
+    // Our algorithm is not in-place, therefore we use two histograms and ping-pong between them
+    private FixedCapacityExponentialHistogram result;
+    private FixedCapacityExponentialHistogram buffer;
+
+    private final DownscaleStats downscaleStats;
+
+    private boolean isFinished;
+
+    /**
+     * Creates a new instance with the specified bucket limit.
+     *
+     * @param bucketLimit the maximum number of buckets the result histogram is allowed to have
+     */
+    public ExponentialHistogramMerger(int bucketLimit) {
+        downscaleStats = new DownscaleStats();
+        result = new FixedCapacityExponentialHistogram(bucketLimit);
+        buffer = new FixedCapacityExponentialHistogram(bucketLimit);
+    }
+
+    // Only intended for testing, using this in production means an unnecessary reduction of precision
+    private ExponentialHistogramMerger(int bucketLimit, int minScale) {
+        this(bucketLimit);
+        result.resetBuckets(minScale);
+        buffer.resetBuckets(minScale);
+    }
+
+    static ExponentialHistogramMerger createForTesting(int bucketLimit, int minScale) {
+        return new ExponentialHistogramMerger(bucketLimit, minScale);
+    }
+
+    /**
+     * Merges the given histogram into the current result.
+     * Must not be called after {@link #get()} has been called.
+     *
+     * @param toAdd the histogram to merge
+     */
+    public void add(ExponentialHistogram toAdd) {
+        if (isFinished) {
+            throw new IllegalStateException("get() has already been called");
+        }
+        doMerge(toAdd);
+    }
+
+    /**
+     * Returns the merged histogram.
+     *
+     * @return the merged histogram
+     */
+    public ExponentialHistogram get() {
+        isFinished = true;
+        return result;
+    }
+
+    // TODO(b/128622): this algorithm is very efficient if b has roughly as many buckets as a
+    // However, if b is much smaller we still have to iterate over all buckets of a which is very wasteful.
+    // This can be optimized by buffering multiple histograms to accumulate first,
+    // then in O(log(n)) turn them into a single, merged histogram.
+    // (n is the number of buffered buckets)
+
+    private void doMerge(ExponentialHistogram b) {
+
+        ExponentialHistogram a = result;
+
+        CopyableBucketIterator posBucketsA = a.positiveBuckets().iterator();
+        CopyableBucketIterator negBucketsA = a.negativeBuckets().iterator();
+        CopyableBucketIterator posBucketsB = b.positiveBuckets().iterator();
+        CopyableBucketIterator negBucketsB = b.negativeBuckets().iterator();
+
+        ZeroBucket zeroBucket = a.zeroBucket().merge(b.zeroBucket());
+        zeroBucket = zeroBucket.collapseOverlappingBucketsForAll(posBucketsA, negBucketsA, posBucketsB, negBucketsB);
+
+        buffer.setZeroBucket(zeroBucket);
+
+        // We attempt to bring everything to the scale of A.
+        // This might involve increasing the scale for B, which would increase its indices.
+        // We need to ensure that we do not exceed MAX_INDEX / MIN_INDEX in this case.
+        int targetScale = a.scale();
+        if (targetScale > b.scale()) {
+            if (negBucketsB.hasNext()) {
+                long smallestIndex = negBucketsB.peekIndex();
+                OptionalLong maximumIndex = b.negativeBuckets().maxBucketIndex();
+                assert maximumIndex.isPresent()
+                    : "We checked that the negative bucket range is not empty, therefore the maximum index should be present";
+                int maxScaleIncrease = Math.min(getMaximumScaleIncrease(smallestIndex), getMaximumScaleIncrease(maximumIndex.getAsLong()));
+                targetScale = Math.min(targetScale, b.scale() + maxScaleIncrease);
+            }
+            if (posBucketsB.hasNext()) {
+                long smallestIndex = posBucketsB.peekIndex();
+                OptionalLong maximumIndex = b.positiveBuckets().maxBucketIndex();
+                assert maximumIndex.isPresent()
+                    : "We checked that the positive bucket range is not empty, therefore the maximum index should be present";
+                int maxScaleIncrease = Math.min(getMaximumScaleIncrease(smallestIndex), getMaximumScaleIncrease(maximumIndex.getAsLong()));
+                targetScale = Math.min(targetScale, b.scale() + maxScaleIncrease);
+            }
+        }
+
+        // Now we are sure that everything fits numerically into targetScale.
+        // However, we might exceed our limit for the total number of buckets.
+        // Therefore, we try the merge optimistically. If we fail, we reduce the target scale to make everything fit.
+
+        MergingBucketIterator positiveMerged = new MergingBucketIterator(posBucketsA.copy(), posBucketsB.copy(), targetScale);
+        MergingBucketIterator negativeMerged = new MergingBucketIterator(negBucketsA.copy(), negBucketsB.copy(), targetScale);
+
+        buffer.resetBuckets(targetScale);
+        downscaleStats.reset();
+        int overflowCount = putBuckets(buffer, negativeMerged, false, downscaleStats);
+        overflowCount += putBuckets(buffer, positiveMerged, true, downscaleStats);
+
+        if (overflowCount > 0) {
+            // UDD-sketch approach: decrease the scale and retry.
+            int reduction = downscaleStats.getRequiredScaleReductionToReduceBucketCountBy(overflowCount);
+            targetScale -= reduction;
+            buffer.resetBuckets(targetScale);
+            positiveMerged = new MergingBucketIterator(posBucketsA, posBucketsB, targetScale);
+            negativeMerged = new MergingBucketIterator(negBucketsA, negBucketsB, targetScale);
+            overflowCount = putBuckets(buffer, negativeMerged, false, null);
+            overflowCount += putBuckets(buffer, positiveMerged, true, null);
+
+            assert overflowCount == 0 : "Should never happen, the histogram should have had enough space";
+        }
+        FixedCapacityExponentialHistogram temp = result;
+        result = buffer;
+        buffer = temp;
+    }
+
+    private static int putBuckets(
+        FixedCapacityExponentialHistogram output,
+        BucketIterator buckets,
+        boolean isPositive,
+        DownscaleStats downscaleStats
+    ) {
+        boolean collectDownScaleStatsOnNext = false;
+        long prevIndex = 0;
+        int overflowCount = 0;
+        while (buckets.hasNext()) {
+            long idx = buckets.peekIndex();
+            if (collectDownScaleStatsOnNext) {
+                downscaleStats.add(prevIndex, idx);
+            } else {
+                collectDownScaleStatsOnNext = downscaleStats != null;
+            }
+
+            if (output.tryAddBucket(idx, buckets.peekCount(), isPositive) == false) {
+                overflowCount++;
+            }
+
+            prevIndex = idx;
+            buckets.advance();
+        }
+        return overflowCount;
+    }
+
+}

+ 165 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java

@@ -0,0 +1,165 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.OptionalLong;
+
+/**
+ * Provides quantile estimation for {@link ExponentialHistogram} instances.
+ */
+public class ExponentialHistogramQuantile {
+
+    /**
+     * Estimates a quantile for the distribution represented by the given histogram.
+     *
+     * It returns the value of the element at rank {@code max(0, min(n - 1, (quantile * (n + 1)) - 1))}, where n is the total number of
+     * values and rank starts at 0. If the rank is fractional, the result is linearly interpolated from the values of the two
+     * neighboring ranks.
+     *
+     * @param histo    the histogram representing the distribution
+     * @param quantile the quantile to query, in the range [0, 1]
+     * @return the estimated quantile value, or {@link Double#NaN} if the histogram is empty
+     */
+    public static double getQuantile(ExponentialHistogram histo, double quantile) {
+        if (quantile < 0 || quantile > 1) {
+            throw new IllegalArgumentException("quantile must be in range [0, 1]");
+        }
+
+        long zeroCount = histo.zeroBucket().count();
+        long negCount = histo.negativeBuckets().valueCount();
+        long posCount = histo.positiveBuckets().valueCount();
+
+        long totalCount = zeroCount + negCount + posCount;
+        if (totalCount == 0) {
+            // Can't compute quantile on an empty histogram
+            return Double.NaN;
+        }
+
+        double exactRank = quantile * (totalCount - 1);
+        long lowerRank = (long) Math.floor(exactRank);
+        long upperRank = (long) Math.ceil(exactRank);
+        double upperFactor = exactRank - lowerRank;
+
+        ValueAndPreviousValue values = getElementAtRank(histo, upperRank);
+
+        double result;
+        if (lowerRank == upperRank) {
+            result = values.valueAtRank();
+        } else {
+            result = values.valueAtPreviousRank() * (1 - upperFactor) + values.valueAtRank() * upperFactor;
+        }
+        return removeNegativeZero(result);
+    }
+
+    private static double removeNegativeZero(double result) {
+        return result == 0.0 ? 0.0 : result;
+    }
+
+    private static ValueAndPreviousValue getElementAtRank(ExponentialHistogram histo, long rank) {
+        long negativeValuesCount = histo.negativeBuckets().valueCount();
+        long zeroCount = histo.zeroBucket().count();
+        if (rank < negativeValuesCount) {
+            if (rank == 0) {
+                return new ValueAndPreviousValue(Double.NaN, -getLastBucketMidpoint(histo.negativeBuckets()));
+            } else {
+                return getBucketMidpointForRank(histo.negativeBuckets().iterator(), negativeValuesCount - rank).negateAndSwap();
+            }
+        } else if (rank < (negativeValuesCount + zeroCount)) {
+            if (rank == negativeValuesCount) {
+                // the element at the previous rank falls into the negative bucket range
+                return new ValueAndPreviousValue(-getFirstBucketMidpoint(histo.negativeBuckets()), 0.0);
+            } else {
+                return new ValueAndPreviousValue(0.0, 0.0);
+            }
+        } else {
+            ValueAndPreviousValue result = getBucketMidpointForRank(
+                histo.positiveBuckets().iterator(),
+                rank - negativeValuesCount - zeroCount
+            );
+            if ((rank - 1) < negativeValuesCount) {
+                // previous value falls into the negative bucket range or has rank -1 and therefore doesn't exist
+                return new ValueAndPreviousValue(-getFirstBucketMidpoint(histo.negativeBuckets()), result.valueAtRank);
+            } else if ((rank - 1) < (negativeValuesCount + zeroCount)) {
+                // previous value falls into the zero bucket
+                return new ValueAndPreviousValue(0.0, result.valueAtRank);
+            } else {
+                return result;
+            }
+        }
+    }
+
+    private static double getFirstBucketMidpoint(ExponentialHistogram.Buckets buckets) {
+        CopyableBucketIterator iterator = buckets.iterator();
+        if (iterator.hasNext()) {
+            return ExponentialScaleUtils.getPointOfLeastRelativeError(iterator.peekIndex(), iterator.scale());
+        } else {
+            return Double.NaN;
+        }
+    }
+
+    private static double getLastBucketMidpoint(ExponentialHistogram.Buckets buckets) {
+        OptionalLong highestIndex = buckets.maxBucketIndex();
+        if (highestIndex.isPresent()) {
+            return ExponentialScaleUtils.getPointOfLeastRelativeError(highestIndex.getAsLong(), buckets.iterator().scale());
+        } else {
+            return Double.NaN;
+        }
+    }
+
+    private static ValueAndPreviousValue getBucketMidpointForRank(BucketIterator buckets, long rank) {
+        long prevIndex = Long.MIN_VALUE;
+        long seenCount = 0;
+        while (buckets.hasNext()) {
+            seenCount += buckets.peekCount();
+            if (rank < seenCount) {
+                double center = ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale());
+                double prevCenter;
+                if (rank > 0) {
+                    if ((rank - 1) >= (seenCount - buckets.peekCount())) {
+                        // element at previous rank is in same bucket
+                        prevCenter = center;
+                    } else {
+                        // element at previous rank is in the previous bucket
+                        prevCenter = ExponentialScaleUtils.getPointOfLeastRelativeError(prevIndex, buckets.scale());
+                    }
+                } else {
+                    // there is no previous element
+                    prevCenter = Double.NaN;
+                }
+                return new ValueAndPreviousValue(prevCenter, center);
+            }
+            prevIndex = buckets.peekIndex();
+            buckets.advance();
+        }
+        throw new IllegalStateException("The total number of elements in the buckets is less than the desired rank.");
+    }
+
+    /**
+     * @param valueAtPreviousRank the value at the rank before the desired rank, NaN if not applicable.
+     * @param valueAtRank         the value at the desired rank
+     */
+    private record ValueAndPreviousValue(double valueAtPreviousRank, double valueAtRank) {
+        ValueAndPreviousValue negateAndSwap() {
+            return new ValueAndPreviousValue(-valueAtRank, -valueAtPreviousRank);
+        }
+    }
+}

+ 267 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java

@@ -0,0 +1,267 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE;
+
+/**
+ * A collection of utility methods for working with indices and scales of exponential bucket histograms.
+ */
+public class ExponentialScaleUtils {
+
+    private static final double LN_2 = Math.log(2);
+
+    /**
+     * This table is visible for testing to ensure it is up-to-date.
+     * <br>
+     * For each scale from {@link ExponentialHistogram#MIN_SCALE} to {@link ExponentialHistogram#MAX_SCALE},
+     * the table contains a pre-computed constant for up-scaling bucket indices.
+     * The constant is computed using the following formula:
+     * {@code 2^63 * (1 + 2^scale * (1 - log2(1 + 2^(2^-scale))))}
+     */
+    static final long[] SCALE_UP_CONSTANT_TABLE = new long[] {
+        4503599627370495L,
+        9007199254740991L,
+        18014398509481983L,
+        36028797018963967L,
+        72057594037927935L,
+        144115188075855871L,
+        288230376054894118L,
+        576448062320457790L,
+        1146436840887505800L,
+        2104167428150631728L,
+        3127054724296373505L,
+        3828045265094622256L,
+        4214097751025163417L,
+        4412149414858430624L,
+        4511824212543271281L,
+        4561743405547877994L,
+        4586713247558758689L,
+        4599199449917992829L,
+        4605442711287634239L,
+        4608564361996858084L,
+        4610125189854540715L,
+        4610905604096266504L,
+        4611295811256239977L,
+        4611490914841115537L,
+        4611588466634164420L,
+        4611637242530765249L,
+        4611661630479075212L,
+        4611673824453231387L,
+        4611679921440309624L,
+        4611682969933848761L,
+        4611684494180618332L,
+        4611685256304003118L,
+        4611685637365695511L,
+        4611685827896541707L,
+        4611685923161964805L,
+        4611685970794676354L,
+        4611685994611032129L,
+        4611686006519210016L,
+        4611686012473298960L,
+        4611686015450343432L,
+        4611686016938865668L,
+        4611686017683126786L,
+        4611686018055257345L,
+        4611686018241322624L,
+        4611686018334355264L,
+        4611686018380871584L,
+        4611686018404129744L,
+        4611686018415758824L,
+        4611686018421573364L,
+        4611686018424480634L };
+
+    /**
+     * Computes the new index for a bucket when adjusting the scale of the histogram.
+     * This method supports both down-scaling (reducing the scale) and up-scaling.
+     * When up-scaling, it returns the bucket containing the point of least error of the original bucket.
+     *
+     * @param index           the current bucket index to be adjusted
+     * @param currentScale    the current scale
+     * @param scaleAdjustment the adjustment to make; the new scale will be {@code currentScale + scaleAdjustment}
+     * @return the index of the bucket in the new scale
+     */
+    static long adjustScale(long index, int currentScale, int scaleAdjustment) {
+        checkIndexAndScaleBounds(index, currentScale);
+
+        int newScale = currentScale + scaleAdjustment;
+        assert newScale >= MIN_SCALE && newScale <= MAX_SCALE
+            : "adjusted scale must be in the range [" + MIN_SCALE + ", " + MAX_SCALE + "]";
+
+        if (scaleAdjustment <= 0) {
+            return index >> -scaleAdjustment;
+        } else {
+            assert scaleAdjustment <= MAX_INDEX_BITS : "Scaling up more than " + MAX_INDEX_BITS + " does not make sense";
+            // When scaling up, we want to return the bucket containing the point of least relative error.
+            // This bucket index can be computed as (index << adjustment) + offset.
+            // The offset is a constant that depends only on the scale and adjustment, not the index.
+            // The mathematically correct formula for the offset is:
+            // 2^adjustment * (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale))))
+            // This is hard to compute with double-precision floating-point numbers due to rounding errors and is also expensive.
+            // Therefore, we precompute 2^63 * (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale)))) and store it
+            // in SCALE_UP_CONSTANT_TABLE for each scale.
+            // This can then be converted to the correct offset by dividing with (2^(63-adjustment)),
+            // which is equivalent to a right shift with (63-adjustment)
+            long offset = SCALE_UP_CONSTANT_TABLE[currentScale - MIN_SCALE] >> (63 - scaleAdjustment);
+            return (index << scaleAdjustment) + offset;
+        }
+    }
+
+    /**
+     * Compares the lower boundaries of two buckets, which may have different scales.
+     * This is equivalent to a mathematically correct comparison of the lower bucket boundaries.
+     * Note that this method allows for scales and indices of the full numeric range of the types.
+     *
+     * @param idxA           the index of the first bucket
+     * @param scaleA         the scale of the first bucket
+     * @param idxB           the index of the second bucket
+     * @param scaleB         the scale of the second bucket
+     * @return a negative integer, zero, or a positive integer as the first bucket's lower boundary is
+     *         less than, equal to, or greater than the second bucket's lower boundary
+     */
+    public static int compareExponentiallyScaledValues(long idxA, int scaleA, long idxB, int scaleB) {
+        if (scaleA > scaleB) {
+            return -compareExponentiallyScaledValues(idxB, scaleB, idxA, scaleA);
+        }
+        // scaleA <= scaleB
+        int shifts = scaleB - scaleA;
+
+        long scaledDownB = idxB >> shifts;
+        int result = Long.compare(idxA, scaledDownB);
+        if (result == 0) {
+            // the scaled down values are equal
+            // this means that b is bigger if it has a "fractional" part, which corresponds to the bits that were removed on the right-shift
+            assert (1L << shifts) > 0;
+            long shiftedAway = idxB & ((1L << shifts) - 1);
+            if (shiftedAway > 0) {
+                return -1;
+            } else {
+                return 0;
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Returns the maximum permissible scale increase that does not cause the index to grow out
+     * of the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MIN_INDEX}] range.
+     *
+     * @param index the index to check
+     * @return the maximum permissible scale increase
+     */
+    public static int getMaximumScaleIncrease(long index) {
+        checkIndexBounds(index);
+        // Scale increase by one corresponds to a left shift, which in turn is the same as multiplying by two.
+        // Because we know that MIN_INDEX = -MAX_INDEX, we can just compute the maximum increase of the absolute index.
+        // This allows us to reason only about non-negative indices further below.
+        index = Math.abs(index);
+        // the maximum scale increase is defined by how many left-shifts we can do without growing beyond MAX_INDEX
+        // MAX_INDEX is defined as a number where the left MAX_INDEX_BITS are all ones.
+        // So in other words, we must ensure that the leftmost (64 - MAX_INDEX_BITS) remain zero,
+        // which is exactly what the formula below does.
+        return Long.numberOfLeadingZeros(index) - (64 - MAX_INDEX_BITS);
+    }
+
+    /**
+     * Returns the upper boundary of the bucket with the given index and scale.
+     *
+     * @param index the index of the bucket
+     * @param scale the scale of the bucket
+     * @return the upper boundary of the bucket
+     */
+    public static double getUpperBucketBoundary(long index, int scale) {
+        checkIndexAndScaleBounds(index, scale);
+        return exponentiallyScaledToDoubleValue(index + 1, scale);
+    }
+
+    /**
+     * Returns the lower boundary of the bucket with the given index and scale.
+     *
+     * @param index the index of the bucket in the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MAX_INDEX}] range.
+     * @param scale the scale of the bucket
+     * @return the lower boundary of the bucket
+     */
+    public static double getLowerBucketBoundary(long index, int scale) {
+        checkIndexAndScaleBounds(index, scale);
+        return exponentiallyScaledToDoubleValue(index, scale);
+    }
+
+    /**
+     * Computes (2^(2^-scale))^index,
+     * allowing also indices outside of the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MAX_INDEX}] range.
+     */
+    static double exponentiallyScaledToDoubleValue(long index, int scale) {
+        // Math.exp is expected to be faster and more accurate than Math.pow
+        // For that reason we use (2^(2^-scale))^index = 2^( (2^-scale) * index) = (e^ln(2))^( (2^-scale) * index)
+        // = e^( ln(2) * (2^-scale) * index)
+        double inverseFactor = Math.scalb(LN_2, -scale);
+        return Math.exp(inverseFactor * index);
+    }
+
+    /**
+     * For a bucket with the given index, computes the point {@code x} in the bucket such that
+     * {@code (x - l) / l} equals {@code (u - x) / u}, where {@code l} is the lower bucket boundary and {@code u}
+     * is the upper bucket boundary.
+     * <br>
+     * In other words, we select the point in the bucket that has the least relative error with respect to any other point in the bucket.
+     *
+     * @param bucketIndex the index of the bucket
+     * @param scale       the scale of the bucket
+     * @return the point of least relative error
+     */
+    public static double getPointOfLeastRelativeError(long bucketIndex, int scale) {
+        checkIndexAndScaleBounds(bucketIndex, scale);
+        double upperBound = getUpperBucketBoundary(bucketIndex, scale);
+        double histogramBase = Math.pow(2, Math.scalb(1, -scale));
+        return 2 / (histogramBase + 1) * upperBound;
+    }
+
+    /**
+     * Provides the index of the bucket of the exponential histogram with the given scale that contains the provided value.
+     *
+     * @param value the value to find the bucket for
+     * @param scale the scale of the histogram
+     * @return the index of the bucket
+     */
+    public static long computeIndex(double value, int scale) {
+        checkScaleBounds(scale);
+        return Base2ExponentialHistogramIndexer.computeIndex(value, scale);
+    }
+
+    private static void checkIndexAndScaleBounds(long index, int scale) {
+        checkIndexBounds(index);
+        checkScaleBounds(scale);
+    }
+
+    private static void checkScaleBounds(int scale) {
+        assert scale >= MIN_SCALE && scale <= MAX_SCALE : "scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]";
+    }
+
+    private static void checkIndexBounds(long index) {
+        assert index >= MIN_INDEX && index <= MAX_INDEX : "index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]";
+    }
+
+}

+ 261 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java

@@ -0,0 +1,261 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.OptionalLong;
+
+/**
+ * An implementation of a mutable {@link ExponentialHistogram} with a sparse, array-backed representation.
+ * <br>
+ * Consumers must ensure that if the histogram is mutated, all previously acquired {@link BucketIterator}
+ * instances are no longer used.
+ */
+final class FixedCapacityExponentialHistogram implements ExponentialHistogram {
+
+    // These arrays represent both the positive and the negative buckets.
+    // To avoid confusion, we refer to positions within the array as "slots" instead of indices in this file
+    // When we use term "index", we mean the exponential histogram bucket index.
+    // They store all buckets for the negative range first, with the bucket indices in ascending order,
+    // followed by all buckets for the positive range, also with their indices in ascending order.
+    // This means we store the buckets ordered by their boundaries in ascending order (from -INF to +INF).
+    private final long[] bucketIndices;
+    private final long[] bucketCounts;
+
+    private int bucketScale;
+
+    private final Buckets negativeBuckets = new Buckets(false);
+
+    private ZeroBucket zeroBucket;
+
+    private final Buckets positiveBuckets = new Buckets(true);
+
+    /**
+     * Creates an empty histogram with the given capacity and a {@link ZeroBucket#minimalEmpty()} zero bucket.
+     * The scale is initialized to the maximum possible precision ({@link #MAX_SCALE}).
+     *
+     * @param bucketCapacity the maximum total number of positive and negative buckets this histogram can hold.
+     */
+    FixedCapacityExponentialHistogram(int bucketCapacity) {
+        bucketIndices = new long[bucketCapacity];
+        bucketCounts = new long[bucketCapacity];
+        reset();
+    }
+
+    /**
+     * Resets this histogram to the same state as a newly constructed one with the same capacity.
+     */
+    void reset() {
+        setZeroBucket(ZeroBucket.minimalEmpty());
+        resetBuckets(MAX_SCALE);
+    }
+
+    /**
+     * Removes all positive and negative buckets from this histogram and sets the scale to the given value.
+     *
+     * @param scale the scale to set for this histogram
+     */
+    void resetBuckets(int scale) {
+        assert scale >= MIN_SCALE && scale <= MAX_SCALE : "scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]";
+        negativeBuckets.reset();
+        positiveBuckets.reset();
+        bucketScale = scale;
+    }
+
+    @Override
+    public ZeroBucket zeroBucket() {
+        return zeroBucket;
+    }
+
+    /**
+     * Replaces the zero bucket of this histogram with the given one.
+     * Callers must ensure that the given {@link ZeroBucket} does not
+     * overlap with any of the positive or negative buckets of this histogram.
+     *
+     * @param zeroBucket the zero bucket to set
+     */
+    void setZeroBucket(ZeroBucket zeroBucket) {
+        this.zeroBucket = zeroBucket;
+    }
+
+    /**
+     * Attempts to add a bucket to the positive or negative range of this histogram.
+     * <br>
+     * Callers must adhere to the following rules:
+     * <ul>
+     *     <li>All buckets for the negative values range must be provided before the first one from the positive values range.</li>
+     *     <li>For both the negative and positive ranges, buckets must be provided with their indices in ascending order.</li>
+     *     <li>It is not allowed to provide the same bucket more than once.</li>
+     *     <li>It is not allowed to add empty buckets ({@code count <= 0}).</li>
+     * </ul>
+     *
+     * If any of these rules are violated, this call will fail with an exception.
+     * If the bucket cannot be added because the maximum capacity has been reached, the call will not modify the state
+     * of this histogram and will return {@code false}.
+     *
+     * @param index      the index of the bucket to add
+     * @param count      the count to associate with the given bucket
+     * @param isPositive {@code true} if the bucket belongs to the positive range, {@code false} if it belongs to the negative range
+     * @return {@code true} if the bucket was added, {@code false} if it could not be added due to insufficient capacity
+     */
+    boolean tryAddBucket(long index, long count, boolean isPositive) {
+        assert index >= MIN_INDEX && index <= MAX_INDEX : "index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]";
+        assert isPositive || positiveBuckets.numBuckets == 0 : "Cannot add negative buckets after a positive bucket has been added";
+        assert count > 0 : "Cannot add a bucket with empty or negative count";
+        if (isPositive) {
+            return positiveBuckets.tryAddBucket(index, count);
+        } else {
+            return negativeBuckets.tryAddBucket(index, count);
+        }
+    }
+
+    @Override
+    public int scale() {
+        return bucketScale;
+    }
+
+    @Override
+    public ExponentialHistogram.Buckets negativeBuckets() {
+        return negativeBuckets;
+    }
+
+    @Override
+    public ExponentialHistogram.Buckets positiveBuckets() {
+        return positiveBuckets;
+    }
+
+    private class Buckets implements ExponentialHistogram.Buckets {
+
+        private final boolean isPositive;
+        private int numBuckets;
+        private int cachedValueSumForNumBuckets;
+        private long cachedValueSum;
+
+        /**
+         * @param isPositive true, if this object should represent the positive bucket range, false for the negative range
+         */
+        Buckets(boolean isPositive) {
+            this.isPositive = isPositive;
+            reset();
+        }
+
+        /**
+         * @return the position of the first bucket of this set of buckets within {@link #bucketCounts} and {@link #bucketIndices}.
+         */
+        int startSlot() {
+            return isPositive ? negativeBuckets.numBuckets : 0;
+        }
+
+        final void reset() {
+            numBuckets = 0;
+            cachedValueSumForNumBuckets = 0;
+            cachedValueSum = 0;
+        }
+
+        boolean tryAddBucket(long index, long count) {
+            int slot = startSlot() + numBuckets;
+            assert numBuckets == 0 || bucketIndices[slot - 1] < index
+                : "Histogram buckets must be added with their indices in ascending order";
+            if (slot >= bucketCounts.length) {
+                return false; // no more space
+            }
+            bucketIndices[slot] = index;
+            bucketCounts[slot] = count;
+            numBuckets++;
+            return true;
+        }
+
+        @Override
+        public CopyableBucketIterator iterator() {
+            int start = startSlot();
+            return new BucketArrayIterator(start, start + numBuckets);
+        }
+
+        @Override
+        public OptionalLong maxBucketIndex() {
+            if (numBuckets == 0) {
+                return OptionalLong.empty();
+            } else {
+                return OptionalLong.of(bucketIndices[startSlot() + numBuckets - 1]);
+            }
+        }
+
+        @Override
+        public long valueCount() {
+            int startSlot = startSlot();
+            while (cachedValueSumForNumBuckets < numBuckets) {
+                cachedValueSum += bucketCounts[startSlot + cachedValueSumForNumBuckets];
+                cachedValueSumForNumBuckets++;
+            }
+            return cachedValueSum;
+        }
+    }
+
+    private class BucketArrayIterator implements CopyableBucketIterator {
+
+        int currentSlot;
+        final int limit;
+
+        private BucketArrayIterator(int startSlot, int limit) {
+            this.currentSlot = startSlot;
+            this.limit = limit;
+        }
+
+        @Override
+        public boolean hasNext() {
+            return currentSlot < limit;
+        }
+
+        @Override
+        public long peekCount() {
+            ensureEndNotReached();
+            return bucketCounts[currentSlot];
+        }
+
+        @Override
+        public long peekIndex() {
+            ensureEndNotReached();
+            return bucketIndices[currentSlot];
+        }
+
+        @Override
+        public void advance() {
+            ensureEndNotReached();
+            currentSlot++;
+        }
+
+        @Override
+        public int scale() {
+            return FixedCapacityExponentialHistogram.this.scale();
+        }
+
+        @Override
+        public CopyableBucketIterator copy() {
+            return new BucketArrayIterator(currentSlot, limit);
+        }
+
+        private void ensureEndNotReached() {
+            if (hasNext() == false) {
+                throw new IllegalStateException("Iterator has no more buckets");
+            }
+        }
+    }
+}

+ 109 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java

@@ -0,0 +1,109 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+/**
+ * An iterator that merges two bucket iterators, aligning them to a common scale and combining buckets with the same index.
+ */
+final class MergingBucketIterator implements BucketIterator {
+
+    private final BucketIterator itA;
+    private final BucketIterator itB;
+
+    private boolean endReached;
+    private long currentIndex;
+    private long currentCount;
+
+    /**
+     * Creates a new merging iterator.
+     *
+     * @param itA         the first iterator to merge
+     * @param itB         the second iterator to merge
+     * @param targetScale the histogram scale to which both iterators should be aligned
+     */
+    MergingBucketIterator(BucketIterator itA, BucketIterator itB, int targetScale) {
+        this.itA = new ScaleAdjustingBucketIterator(itA, targetScale);
+        this.itB = new ScaleAdjustingBucketIterator(itB, targetScale);
+        endReached = false;
+        advance();
+    }
+
+    @Override
+    public void advance() {
+        boolean hasNextA = itA.hasNext();
+        boolean hasNextB = itB.hasNext();
+        endReached = hasNextA == false && hasNextB == false;
+        if (endReached) {
+            return;
+        }
+        long idxA = 0;
+        long idxB = 0;
+        if (hasNextA) {
+            idxA = itA.peekIndex();
+        }
+        if (hasNextB) {
+            idxB = itB.peekIndex();
+        }
+
+        currentCount = 0;
+        boolean advanceA = hasNextA && (hasNextB == false || idxA <= idxB);
+        boolean advanceB = hasNextB && (hasNextA == false || idxB <= idxA);
+        if (advanceA) {
+            currentIndex = idxA;
+            currentCount += itA.peekCount();
+            itA.advance();
+        }
+        if (advanceB) {
+            currentIndex = idxB;
+            currentCount += itB.peekCount();
+            itB.advance();
+        }
+    }
+
+    @Override
+    public boolean hasNext() {
+        return endReached == false;
+    }
+
+    @Override
+    public long peekCount() {
+        assertEndNotReached();
+        return currentCount;
+    }
+
+    @Override
+    public long peekIndex() {
+        assertEndNotReached();
+        return currentIndex;
+    }
+
+    @Override
+    public int scale() {
+        return itA.scale();
+    }
+
+    private void assertEndNotReached() {
+        if (endReached) {
+            throw new IllegalStateException("Iterator has no more buckets");
+        }
+    }
+}

+ 95 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java

@@ -0,0 +1,95 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale;
+
+/**
+ * An iterator that wraps another bucket iterator and adjusts its scale.
+ * When scaling down, multiple buckets can collapse into a single one. This iterator ensures they are merged correctly.
+ */
+final class ScaleAdjustingBucketIterator implements BucketIterator {
+
+    private final BucketIterator delegate;
+    private final int scaleAdjustment;
+
+    private long currentIndex;
+    private long currentCount;
+    boolean hasNextValue;
+
+    /**
+     * Creates a new scale-adjusting iterator.
+     *
+     * @param delegate    the iterator to wrap
+     * @param targetScale the target scale for the new iterator
+     */
+    ScaleAdjustingBucketIterator(BucketIterator delegate, int targetScale) {
+        this.delegate = delegate;
+        scaleAdjustment = targetScale - delegate.scale();
+        hasNextValue = true;
+        advance();
+    }
+
+    @Override
+    public boolean hasNext() {
+        return hasNextValue;
+    }
+
+    @Override
+    public long peekCount() {
+        assertEndNotReached();
+        return currentCount;
+    }
+
+    @Override
+    public long peekIndex() {
+        assertEndNotReached();
+        return currentIndex;
+    }
+
+    @Override
+    public void advance() {
+        assertEndNotReached();
+        hasNextValue = delegate.hasNext();
+        if (hasNextValue == false) {
+            return;
+        }
+        currentIndex = adjustScale(delegate.peekIndex(), delegate.scale(), scaleAdjustment);
+        currentCount = delegate.peekCount();
+        delegate.advance();
+        while (delegate.hasNext() && adjustScale(delegate.peekIndex(), delegate.scale(), scaleAdjustment) == currentIndex) {
+            currentCount += delegate.peekCount();
+            delegate.advance();
+        }
+    }
+
+    private void assertEndNotReached() {
+        if (hasNextValue == false) {
+            throw new IllegalStateException("Iterator has no more buckets");
+        }
+    }
+
+    @Override
+    public int scale() {
+        return delegate.scale() + scaleAdjustment;
+    }
+}

+ 172 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java

@@ -0,0 +1,172 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareExponentiallyScaledValues;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.exponentiallyScaledToDoubleValue;
+
+/**
+ * Represents the bucket for values around zero in an exponential histogram.
+ * The range of this bucket is {@code [-zeroThreshold, +zeroThreshold]}.
+ * To allow efficient comparison with bucket boundaries, this class internally
+ * represents the zero threshold as a exponential histogram bucket index with a scale,
+ * computed via {@link ExponentialScaleUtils#computeIndex(double, int)}.
+ *
+ * @param index The index used with the scale to determine the zero threshold.
+ * @param scale The scale used with the index to determine the zero threshold.
+ * @param count The number of values in the zero bucket.
+ */
+public record ZeroBucket(long index, int scale, long count) {
+
+    // A singleton for an empty zero bucket with the smallest possible threshold.
+    private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(MIN_INDEX, MIN_SCALE, 0);
+
+    /**
+     * Creates a new zero bucket with a specific threshold and count.
+     *
+     * @param zeroThreshold The threshold defining the bucket's range [-zeroThreshold, +zeroThreshold].
+     * @param count         The number of values in the bucket.
+     */
+    public ZeroBucket(double zeroThreshold, long count) {
+        this(computeIndex(zeroThreshold, MAX_SCALE) + 1, MAX_SCALE, count);
+    }
+
+    /**
+     * @return A singleton instance of an empty zero bucket with the smallest possible threshold.
+     */
+    public static ZeroBucket minimalEmpty() {
+        return MINIMAL_EMPTY;
+    }
+
+    /**
+     * Creates a zero bucket with the smallest possible threshold and a given count.
+     *
+     * @param count The number of values in the bucket.
+     * @return A new {@link ZeroBucket}.
+     */
+    public static ZeroBucket minimalWithCount(long count) {
+        if (count == 0) {
+            return MINIMAL_EMPTY;
+        } else {
+            return new ZeroBucket(MINIMAL_EMPTY.index, MINIMAL_EMPTY.scale(), count);
+        }
+    }
+
+    /**
+     * Merges this zero bucket with another one.
+     * <ul>
+     *     <li>If the other zero bucket or both are empty, this instance is returned unchanged.</li>
+     *     <li>If the this zero bucket is empty and the other one is populated, the other instance is returned unchanged.</li>
+     *     <li>Otherwise, the zero threshold is increased if necessary (by taking the maximum of the two), and the counts are summed.</li>
+     * </ul>
+     *
+     * @param other The other zero bucket to merge with.
+     * @return A new {@link ZeroBucket} representing the merged result.
+     */
+    public ZeroBucket merge(ZeroBucket other) {
+        if (other.count == 0) {
+            return this;
+        } else if (count == 0) {
+            return other;
+        } else {
+            long totalCount = count + other.count;
+            // Both are populated, so we need to use the higher zero-threshold.
+            if (this.compareZeroThreshold(other) >= 0) {
+                return new ZeroBucket(index, scale, totalCount);
+            } else {
+                return new ZeroBucket(other.index, other.scale, totalCount);
+            }
+        }
+    }
+
+    /**
+     * Collapses all buckets from the given iterators whose lower boundaries are smaller than the zero threshold.
+     * The iterators are advanced to point at the first, non-collapsed bucket.
+     *
+     * @param bucketIterators The iterators whose buckets may be collapsed.
+     * @return A potentially updated {@link ZeroBucket} with the collapsed buckets' counts and an adjusted threshold.
+     */
+    public ZeroBucket collapseOverlappingBucketsForAll(BucketIterator... bucketIterators) {
+        ZeroBucket current = this;
+        ZeroBucket previous;
+        do {
+            previous = current;
+            for (BucketIterator buckets : bucketIterators) {
+                current = current.collapseOverlappingBuckets(buckets);
+            }
+        } while (previous.compareZeroThreshold(current) != 0);
+        return current;
+    }
+
+    /**
+     * Compares the zero threshold of this bucket with another one.
+     *
+     * @param other The other zero bucket to compare against.
+     * @return A negative integer, zero, or a positive integer if this bucket's threshold is less than,
+     *         equal to, or greater than the other's.
+     */
+    public int compareZeroThreshold(ZeroBucket other) {
+        return compareExponentiallyScaledValues(index, scale, other.index, other.scale);
+    }
+
+    /**
+     * @return The value of the zero threshold.
+     */
+    public double zeroThreshold() {
+        return exponentiallyScaledToDoubleValue(index, scale);
+    }
+
+    /**
+     * Collapses all buckets from the given iterator whose lower boundaries are smaller than the zero threshold.
+     * The iterator is advanced to point at the first, non-collapsed bucket.
+     *
+     * @param buckets The iterator whose buckets may be collapsed.
+     * @return A potentially updated {@link ZeroBucket} with the collapsed buckets' counts and an adjusted threshold.
+     */
+    public ZeroBucket collapseOverlappingBuckets(BucketIterator buckets) {
+
+        long collapsedCount = 0;
+        long highestCollapsedIndex = 0;
+        while (buckets.hasNext() && compareExponentiallyScaledValues(buckets.peekIndex(), buckets.scale(), index, scale) < 0) {
+            highestCollapsedIndex = buckets.peekIndex();
+            collapsedCount += buckets.peekCount();
+            buckets.advance();
+        }
+        if (collapsedCount == 0) {
+            return this;
+        } else {
+            long newZeroCount = count + collapsedCount;
+            // +1 because we need to adjust the zero threshold to the upper boundary of the collapsed bucket
+            long collapsedUpperBoundIndex = highestCollapsedIndex + 1;
+            if (compareExponentiallyScaledValues(index, scale, collapsedUpperBoundIndex, buckets.scale()) >= 0) {
+                // Our current zero-threshold is larger than the upper boundary of the largest collapsed bucket, so we keep it.
+                return new ZeroBucket(index, scale, newZeroCount);
+            } else {
+                return new ZeroBucket(collapsedUpperBoundIndex, buckets.scale(), newZeroCount);
+            }
+        }
+    }
+}

+ 180 - 0
libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/package-info.java

@@ -0,0 +1,180 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+/**
+ * This library provides an implementation of merging and analysis algorithms for exponential histograms based on the
+ * <a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram">OpenTelemetry definition</a>.
+ * It is designed as a complementary tool to the OpenTelemetry SDK, focusing specifically on efficient histogram merging and accurate
+ * percentile estimation.
+ *
+ * <h2>Overview</h2>
+ *
+ * The library implements base-2 exponential histograms with perfect subsetting. The most important properties are:
+ *
+ * <ul>
+ *   <li>The histogram has a scale parameter, which defines the accuracy. A higher scale implies a higher accuracy.</li>
+ *   <li>The {@code base} for the buckets is defined as {@code base = 2^(2^-scale)}.</li>
+ *   <li>The histogram bucket at index {@code i} has the range {@code (base^i, base^(i+1)]}</li>
+ *   <li>Negative values are represented by a separate negative range of buckets with the boundaries {@code (-base^(i+1), -base^i]}</li>
+ *   <li>Histograms support perfect subsetting: when the scale is decreased by one, each pair of adjacent buckets is merged into a
+ *       single bucket without introducing error</li>
+ *   <li>A special zero bucket with a zero-threshold is used to handle zero and close-to-zero values</li>
+ * </ul>
+ *
+ * For more details please refer to the
+ * <a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram">OpenTelemetry definition</a>.
+ * <p>
+ * The library implements a sparse storage approach where only populated buckets consume memory and count towards the bucket limit.
+ * This differs from the OpenTelemetry implementation, which uses dense storage. While dense storage allows for O(1) time insertion of
+ * individual values, our sparse representation requires O(log m) time where m is the bucket capacity. However, the sparse
+ * representation enables more efficient storage and provides a simple merging algorithm with runtime linear in the number of
+ * populated buckets. Additionally, this library also provides an array-backed sparse storage, ensuring cache efficiency.
+ * <p>
+ * The sparse storage approach offers significant advantages for distributions with fewer distinct values than the bucket count,
+ * allowing the library to achieve representation of such distributions with an error so small that it won't be noticed in practice.
+ * This makes it suitable not only for exponential histograms but also as a universal solution for handling explicit bucket
+ * histograms.
+ *
+ * <h2>Merging Algorithm</h2>
+ *
+ * The merging algorithm works similarly to the merge-step of merge sort. We simultaneously walk through the buckets of both
+ * histograms in order, merging them on the fly as needed. If the total number of buckets in the end would exceed the bucket limit,
+ * we scale down as needed.
+ * <p>
+ * Before we merge the buckets, we need to take care of the special zero-bucket and bring both histograms to the same scale.
+ * <p>
+ * For the zero-bucket, we merge the zero threshold from both histograms and collapse any overlapping buckets into the resulting new
+ * zero bucket.
+ * <p>
+ * In order to bring both histograms to the same scale, we can make adjustments in both directions: we can increase or decrease the
+ * scale of histograms as needed.
+ * <p>
+ * See the upscaling section for details on how the upscaling works. Upscaling helps prevent the precision of
+ * the result histogram merged from many histograms from being dragged down to the lowest scale of a potentially misconfigured input
+ * histogram. For example, if a histogram is recorded with a too low zero threshold, this can result in a degraded scale when using
+ * dense histogram storage, even if the histogram only contains two points.
+ *
+ * <h3>Upscaling</h3>
+ *
+ * In general, we assume that all values in a bucket lie on a single point: the point of least relative error. This is the point
+ * {@code x} in the bucket such that:
+ *
+ * <pre>
+ * (x - l) / l = (u - x) / u
+ * </pre>
+ *
+ * where {@code l} is the lower bucket boundary and {@code u} is the upper bucket boundary.
+ * <p>
+ * This assumption allows us to increase the scale of histograms without increasing the bucket count. Buckets are simply mapped to
+ * the ones in the new scale containing the point of least relative error of the original buckets.
+ * <p>
+ * This can introduce a small error, as the original center might be moved slightly. Therefore, we ensure that the upscaling happens
+ * at most once to prevent errors from adding up. The higher the amount of upscaling, the less the error (higher scale means smaller
+ * buckets, which in turn means we get a better fit around the original point of least relative error).
+ *
+ * <h2>Distributions with Few Distinct Values</h2>
+ *
+ * The sparse storage model only requires memory linear to the total number of buckets, while dense storage needs to store the entire
+ * range of the smallest and biggest buckets.
+ * <p>
+ * This offers significant benefits for distributions with fewer distinct values:
+ * If we have at least as many buckets as we have distinct values to store in the histogram, we can represent this distribution with
+ * a much smaller error than the dense representation.
+ * This can be achieved by maintaining the scale at the maximum supported value (so the buckets become the smallest).
+ * At the time of writing, the maximum scale is 38, so the relative distance between the lower and upper bucket boundaries is
+ * {@code (2^2^(-38))}.
+ * <p>
+ * The impact of the error is best shown with a concrete example:
+ * If we store, for example, a duration value of {@code 10^15} nanoseconds (= roughly 11.5 days), this value will be stored in a
+ * bucket that guarantees a relative error of at most {@code 2^2^(-38)}, so roughly 2.5 microseconds in this case.
+ * As long as the number of values we insert is lower than the bucket count, we are guaranteed that no down-scaling happens:
+ * In contrast to dense storage, the scale does not depend on the spread between the smallest and largest bucket index.
+ * <p>
+ * To clarify the difference between dense and sparse storage, let's assume that we have an empty histogram and the maximum scale is
+ * zero while the maximum bucket count is four.
+ * The same logic applies to higher scales and bucket counts, but we use these values to get easier numbers for this example.
+ * The scale of zero means that our bucket boundaries are {@code 1, 2, 4, 8, 16, 32, 64, 128, 256, ...}.
+ * We now want to insert the value {@code 6} into the histogram. The dense storage works by storing an array for the bucket counts
+ * plus an initial offset.
+ * This means that the first slot in the bucket counts array corresponds to the bucket with index {@code offset} and the last one to
+ * {@code offset + bucketCounts.length - 1}.
+ * So if we add the value {@code 6} to the histogram, it falls into the {@code (4,8]} bucket, which has the index {@code 2}.
+ * <p>
+ * So our dense histogram looks like this:
+ *
+ * <pre>
+ * offset = 2
+ * bucketCounts = [1, 0, 0, 0] // represent bucket counts for bucket index 2 to 5
+ * </pre>
+ *
+ * If we now insert the value {@code 20} ({@code (16,32]}, bucket index 4), everything is still fine:
+ *
+ * <pre>
+ * offset = 2
+ * bucketCounts = [1, 0, 1, 0] // represent bucket counts for bucket index 2 to 5
+ * </pre>
+ *
+ * However, we run into trouble if we insert the value {@code 100}, which corresponds to index 6: That index is outside of the bounds
+ * of our array.
+ * We can't just increase the {@code offset}, because the first bucket in our array is populated too.
+ * We have no other option other than decreasing the scale of the histogram, to make sure that our values {@code 6} and {@code 100}
+ * fall in the range of four <strong>consecutive</strong> buckets due to the bucket count limit of the dense storage.
+ * <p>
+ * In contrast, a sparse histogram has no trouble storing this data while keeping the scale of zero:
+ *
+ * <pre>
+ * bucketIndiciesToCounts: {
+ *   "2" : 1,
+ *   "4" : 1,
+ *   "6" : 1
+ * }
+ * </pre>
+ *
+ * Downscaling on the sparse representation only happens if either:
+ * <ul>
+ *   <li>The number of populated buckets would become bigger than our maximum bucket count. We have to downscale to combine
+ *       neighboring, populated buckets to a single bucket until we are below our limit again.</li>
+ *   <li>The highest or smallest indices require more bits to store than we allow. This does not happen in our implementation for
+ *       normal inputs, because we allow up to 62 bits for index storage, which fits the entire numeric range of IEEE 754 double
+ *       precision floats at our maximum scale.</li>
+ * </ul>
+ *
+ * <h3>Handling Explicit Bucket Histograms</h3>
+ *
+ * We can make use of this property to convert explicit bucket histograms
+ * (<a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram">OpenTelemetry Histogram</a>) to exponential
+ * ones by again assuming that all values in a bucket lie in a single point:
+ * <ul>
+ *   <li>For each explicit bucket, we take its point of least relative error and add it to the corresponding exponential histogram
+ *       bucket with the corresponding count.</li>
+ *   <li>The open, upper, and lower buckets, including infinity, will need special treatment, but these are not useful for percentile
+ *       estimates anyway.</li>
+ * </ul>
+ *
+ * This gives us a great solution for universally dealing with histograms:
+ * When merging exponential histograms generated from explicit ones, the scale is not decreased (and therefore the error not
+ * increased) as long as the number of distinct buckets from the original explicit bucket histograms does not exceed the exponential
+ * histogram bucket count. As a result, the computed percentiles will be precise with only the
+ * <a href="#distributions-with-few-distinct-values">relative error of the initial conversion</a>.
+ * In addition, this allows us to compute percentiles on mixed explicit bucket histograms or even mix them with exponential ones by
+ * just using the exponential histogram algorithms.
+ */
+package org.elasticsearch.exponentialhistogram;

+ 88 - 0
libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java

@@ -0,0 +1,88 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.IntStream;
+import java.util.stream.LongStream;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+public class DownscaleStatsTests extends ESTestCase {
+
+    public void testExponential() {
+        long[] values = IntStream.range(0, 100).mapToLong(i -> (long) Math.min(MAX_INDEX, Math.pow(1.1, i))).distinct().toArray();
+        verifyFor(values);
+    }
+
+    public void testNumericalLimits() {
+        verifyFor(MIN_INDEX, MAX_INDEX);
+    }
+
+    public void testRandom() {
+        for (int i = 0; i < 100; i++) {
+            List<Long> values = IntStream.range(0, 1000).mapToObj(j -> randomLongBetween(MIN_INDEX, MAX_INDEX)).distinct().toList();
+            verifyFor(values);
+        }
+    }
+
+    void verifyFor(long... indices) {
+        verifyFor(LongStream.of(indices).boxed().toList());
+    }
+
+    void verifyFor(Collection<Long> indices) {
+        // sanity check, we require unique indices
+        assertThat(indices.size(), equalTo(new HashSet<>(indices).size()));
+
+        List<Long> sorted = new ArrayList<>(indices);
+        sorted.sort(Long::compareTo);
+
+        DownscaleStats stats = new DownscaleStats();
+        for (int i = 1; i < sorted.size(); i++) {
+            long prev = sorted.get(i - 1);
+            long curr = sorted.get(i);
+            stats.add(prev, curr);
+        }
+
+        for (int i = 0; i <= MAX_INDEX_BITS; i++) {
+            int scaleReduction = i;
+            long remainingCount = indices.stream().mapToLong(Long::longValue).map(index -> index >> scaleReduction).distinct().count();
+            long reduction = sorted.size() - remainingCount;
+
+            assertThat(
+                "Expected size after reduction of " + i + " to match",
+                stats.getCollapsedBucketCountAfterScaleReduction(scaleReduction),
+                equalTo((int) reduction)
+            );
+        }
+
+    }
+}

+ 45 - 0
libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java

@@ -0,0 +1,45 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+
+public class ExponentialHistogramGeneratorTests extends ESTestCase {
+
+    public void testVeryLargeValue() {
+        double value = Double.MAX_VALUE / 10;
+        ExponentialHistogram histo = ExponentialHistogram.create(1, value);
+
+        long index = histo.positiveBuckets().iterator().peekIndex();
+        int scale = histo.scale();
+
+        double lowerBound = ExponentialScaleUtils.getLowerBucketBoundary(index, scale);
+        double upperBound = ExponentialScaleUtils.getUpperBucketBoundary(index, scale);
+
+        assertThat("Lower bucket boundary should be smaller than value", lowerBound, lessThanOrEqualTo(value));
+        assertThat("Upper bucket boundary should be greater than value", upperBound, greaterThanOrEqualTo(value));
+    }
+
+}

+ 173 - 0
libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java

@@ -0,0 +1,173 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.equalTo;
+
+public class ExponentialHistogramMergerTests extends ESTestCase {
+
+    public void testZeroThresholdCollapsesOverlappingBuckets() {
+        FixedCapacityExponentialHistogram first = new FixedCapacityExponentialHistogram(100);
+        first.setZeroBucket(new ZeroBucket(2.0001, 10));
+
+        FixedCapacityExponentialHistogram second = new FixedCapacityExponentialHistogram(100);
+        first.resetBuckets(0); // scale 0 means base 2
+        first.tryAddBucket(0, 1, false); // bucket (-2, 1]
+        first.tryAddBucket(1, 1, false); // bucket (-4, 2]
+        first.tryAddBucket(2, 7, false); // bucket (-8, 4]
+        first.tryAddBucket(0, 1, true); // bucket (1, 2]
+        first.tryAddBucket(1, 1, true); // bucket (2, 4]
+        first.tryAddBucket(2, 42, true); // bucket (4, 8]
+
+        ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second);
+
+        assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(4.0));
+        assertThat(mergeResult.zeroBucket().count(), equalTo(14L));
+
+        // only the (4, 8] bucket should be left
+        assertThat(mergeResult.scale(), equalTo(0));
+
+        BucketIterator negBuckets = mergeResult.negativeBuckets().iterator();
+        assertThat(negBuckets.peekIndex(), equalTo(2L));
+        assertThat(negBuckets.peekCount(), equalTo(7L));
+        negBuckets.advance();
+        assertThat(negBuckets.hasNext(), equalTo(false));
+
+        BucketIterator posBuckets = mergeResult.positiveBuckets().iterator();
+        assertThat(posBuckets.peekIndex(), equalTo(2L));
+        assertThat(posBuckets.peekCount(), equalTo(42L));
+        posBuckets.advance();
+        assertThat(posBuckets.hasNext(), equalTo(false));
+
+        // ensure buckets of the accumulated histogram are collapsed too if needed
+        FixedCapacityExponentialHistogram third = new FixedCapacityExponentialHistogram(100);
+        third.setZeroBucket(new ZeroBucket(45.0, 1));
+
+        mergeResult = mergeWithMinimumScale(100, 0, mergeResult, third);
+        assertThat(mergeResult.zeroBucket().zeroThreshold(), closeTo(45.0, 0.000001));
+        assertThat(mergeResult.zeroBucket().count(), equalTo(1L + 14L + 42L + 7L));
+        assertThat(mergeResult.positiveBuckets().iterator().hasNext(), equalTo(false));
+        assertThat(mergeResult.negativeBuckets().iterator().hasNext(), equalTo(false));
+    }
+
+    public void testEmptyZeroBucketIgnored() {
+        FixedCapacityExponentialHistogram first = new FixedCapacityExponentialHistogram(100);
+        first.setZeroBucket(new ZeroBucket(2.0, 10));
+        first.resetBuckets(0); // scale 0 means base 2
+        first.tryAddBucket(2, 42L, true); // bucket (4, 8]
+
+        FixedCapacityExponentialHistogram second = new FixedCapacityExponentialHistogram(100);
+        second.setZeroBucket(new ZeroBucket(100.0, 0));
+
+        ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second);
+
+        assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(2.0));
+        assertThat(mergeResult.zeroBucket().count(), equalTo(10L));
+
+        BucketIterator posBuckets = mergeResult.positiveBuckets().iterator();
+        assertThat(posBuckets.peekIndex(), equalTo(2L));
+        assertThat(posBuckets.peekCount(), equalTo(42L));
+        posBuckets.advance();
+        assertThat(posBuckets.hasNext(), equalTo(false));
+    }
+
+    public void testUpscalingDoesNotExceedIndexLimits() {
+        for (int i = 0; i < 4; i++) {
+
+            boolean isPositive = i % 2 == 0;
+            boolean useMinIndex = i > 1;
+
+            FixedCapacityExponentialHistogram histo = new FixedCapacityExponentialHistogram(2);
+            histo.resetBuckets(20);
+
+            long index = useMinIndex ? MIN_INDEX / 2 : MAX_INDEX / 2;
+
+            histo.tryAddBucket(index, 1, isPositive);
+
+            ExponentialHistogramMerger merger = new ExponentialHistogramMerger(100);
+            merger.add(histo);
+            ExponentialHistogram result = merger.get();
+
+            assertThat(result.scale(), equalTo(21));
+            if (isPositive) {
+                assertThat(result.positiveBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1)));
+            } else {
+                assertThat(result.negativeBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1)));
+            }
+        }
+    }
+
+    /**
+     * Verify that the resulting histogram is independent of the order of elements and therefore merges performed.
+     */
+    public void testMergeOrderIndependence() {
+        List<Double> values = IntStream.range(0, 10_000)
+            .mapToDouble(i -> i < 17 ? 0 : (-1 + 2 * randomDouble()) * Math.pow(10, randomIntBetween(-4, 4)))
+            .boxed()
+            .collect(Collectors.toCollection(ArrayList::new));
+
+        ExponentialHistogram reference = ExponentialHistogram.create(20, values.stream().mapToDouble(Double::doubleValue).toArray());
+
+        for (int i = 0; i < 100; i++) {
+            Collections.shuffle(values, random());
+            ExponentialHistogram shuffled = ExponentialHistogram.create(20, values.stream().mapToDouble(Double::doubleValue).toArray());
+
+            assertThat("Expected same scale", shuffled.scale(), equalTo(reference.scale()));
+            assertThat("Expected same zero-bucket", shuffled.zeroBucket(), equalTo(reference.zeroBucket()));
+            assertBucketsEqual(shuffled.negativeBuckets(), reference.negativeBuckets());
+            assertBucketsEqual(shuffled.positiveBuckets(), reference.positiveBuckets());
+        }
+    }
+
+    private void assertBucketsEqual(ExponentialHistogram.Buckets bucketsA, ExponentialHistogram.Buckets bucketsB) {
+        BucketIterator itA = bucketsA.iterator();
+        BucketIterator itB = bucketsB.iterator();
+        assertThat("Expecting both set of buckets to be empty or non-empty", itA.hasNext(), equalTo(itB.hasNext()));
+        while (itA.hasNext() && itB.hasNext()) {
+            assertThat(itA.peekIndex(), equalTo(itB.peekIndex()));
+            assertThat(itA.peekCount(), equalTo(itB.peekCount()));
+            assertThat("The number of buckets is different", itA.hasNext(), equalTo(itB.hasNext()));
+            itA.advance();
+            itB.advance();
+        }
+    }
+
+    private static ExponentialHistogram mergeWithMinimumScale(int bucketCount, int scale, ExponentialHistogram... histograms) {
+        ExponentialHistogramMerger merger = ExponentialHistogramMerger.createForTesting(bucketCount, scale);
+        Arrays.stream(histograms).forEach(merger::add);
+        return merger.get();
+    }
+
+}

+ 211 - 0
libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java

@@ -0,0 +1,211 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import ch.obermuhlner.math.big.BigDecimalMath;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.math.RoundingMode;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.SCALE_UP_CONSTANT_TABLE;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareExponentiallyScaledValues;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getLowerBucketBoundary;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getPointOfLeastRelativeError;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getUpperBucketBoundary;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.lessThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+
+public class ExponentialScaleUtilsTests extends ESTestCase {
+
+    public void testMaxIndex() {
+        assertThat(getMaximumScaleIncrease(MAX_INDEX), equalTo(0));
+        assertThat(getMaximumScaleIncrease(MAX_INDEX - 1), equalTo(0));
+        assertThat(getMaximumScaleIncrease(MAX_INDEX >> 1), equalTo(1));
+        assertThrows(ArithmeticException.class, () -> Math.multiplyExact(MAX_INDEX, 4));
+    }
+
+    public void testMinIndex() {
+        assertThat(getMaximumScaleIncrease(MIN_INDEX), equalTo(0));
+        assertThat(getMaximumScaleIncrease(MIN_INDEX + 1), equalTo(0));
+        assertThat(getMaximumScaleIncrease(MIN_INDEX >> 1), equalTo(0));
+        assertThat(getMaximumScaleIncrease((MIN_INDEX + 1) >> 1), equalTo(1));
+        assertThrows(ArithmeticException.class, () -> Math.multiplyExact(MIN_INDEX, 4));
+    }
+
+    public void testExtremeValueIndexing() {
+        double leeway = Math.pow(10.0, 20);
+
+        for (double testValue : new double[] { Double.MAX_VALUE / leeway, Double.MIN_VALUE * leeway }) {
+            long idx = computeIndex(testValue, MAX_SCALE);
+            double lowerBound = getLowerBucketBoundary(idx, MAX_SCALE);
+            double upperBound = getUpperBucketBoundary(idx, MAX_SCALE);
+            assertThat(lowerBound, lessThanOrEqualTo(testValue));
+            assertThat(upperBound, greaterThanOrEqualTo(testValue));
+            assertThat(lowerBound, lessThan(upperBound));
+        }
+    }
+
+    public void testRandomValueIndexing() {
+        for (int i = 0; i < 100_000; i++) {
+            // generate values in the range 10^-100 to 10^100
+            double exponent = randomDouble() * 200 - 100;
+            double testValue = Math.pow(10, exponent);
+            int scale = randomIntBetween(MIN_SCALE / 2, MAX_SCALE / 2);
+            long index = computeIndex(testValue, scale);
+
+            double lowerBound = getLowerBucketBoundary(index, scale);
+            double upperBound = getUpperBucketBoundary(index, scale);
+            double pointOfLeastError = getPointOfLeastRelativeError(index, scale);
+
+            String baseMsg = " for input value " + testValue + " and scale " + scale;
+
+            assertThat("Expected lower bound to be less than input value", lowerBound, lessThanOrEqualTo(testValue));
+            assertThat("Expected upper bound to be greater than input value", upperBound, greaterThanOrEqualTo(upperBound));
+            assertThat("Expected lower bound to be less than upper bound" + baseMsg, lowerBound, lessThan(upperBound));
+
+            // only do this check for ranges where we have enough numeric stability
+            if (lowerBound > Math.pow(10, -250) && upperBound < Math.pow(10, 250)) {
+
+                assertThat(
+                    "Expected point of least error to be greater than lower bound" + baseMsg,
+                    pointOfLeastError,
+                    greaterThan(lowerBound)
+                );
+                assertThat("Expected point of least error to be less than upper bound" + baseMsg, pointOfLeastError, lessThan(upperBound));
+
+                double errorLower = (pointOfLeastError - lowerBound) / lowerBound;
+                double errorUpper = (upperBound - pointOfLeastError) / upperBound;
+                assertThat(errorLower / errorUpper, closeTo(1, 0.1));
+            }
+
+        }
+    }
+
+    public void testRandomIndicesScaleAdjustement() {
+
+        for (int i = 0; i < 100_000; i++) {
+            long index = randomLongBetween(MIN_INDEX, MAX_INDEX);
+            int currentScale = randomIntBetween(MIN_SCALE, MAX_SCALE);
+            int maxAdjustment = Math.min(MAX_SCALE - currentScale, getMaximumScaleIncrease(index));
+
+            assertThat(
+                adjustScale(adjustScale(index, currentScale, maxAdjustment), currentScale + maxAdjustment, -maxAdjustment),
+                equalTo(index)
+            );
+            if (currentScale + maxAdjustment < MAX_SCALE) {
+                if (index > 0) {
+                    assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, greaterThan(MAX_INDEX));
+                } else if (index < 0) {
+                    assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, lessThan(MIN_INDEX));
+                }
+            }
+        }
+
+    }
+
+    public void testRandomBucketBoundaryComparison() {
+
+        for (int i = 0; i < 100_000; i++) {
+            long indexA = randomLongBetween(MIN_INDEX, MAX_INDEX);
+            long indexB = randomLongBetween(MIN_INDEX, MAX_INDEX);
+            int scaleA = randomIntBetween(MIN_SCALE, MAX_SCALE);
+            int scaleB = randomIntBetween(MIN_SCALE, MAX_SCALE);
+
+            double lowerBoundA = getLowerBucketBoundary(indexA, scaleA);
+            while (Double.isInfinite(lowerBoundA)) {
+                indexA = indexA >> 1;
+                lowerBoundA = getLowerBucketBoundary(indexA, scaleA);
+            }
+            double lowerBoundB = getLowerBucketBoundary(indexB, scaleB);
+            while (Double.isInfinite(lowerBoundB)) {
+                indexB = indexB >> 1;
+                lowerBoundB = getLowerBucketBoundary(indexB, scaleB);
+            }
+
+            if (lowerBoundA != lowerBoundB) {
+                assertThat(
+                    Double.compare(lowerBoundA, lowerBoundB),
+                    equalTo(compareExponentiallyScaledValues(indexA, scaleA, indexB, scaleB))
+                );
+            }
+        }
+    }
+
+    public void testUpscalingAccuracy() {
+        // Use slightly adjusted scales to not run into numeric trouble, because we don't use exact maths here
+        int minScale = MIN_SCALE + 7;
+        int maxScale = MAX_SCALE - 15;
+
+        for (int i = 0; i < 10_000; i++) {
+
+            int startScale = randomIntBetween(minScale, maxScale - 1);
+            int scaleIncrease = randomIntBetween(1, maxScale - startScale);
+
+            long index = MAX_INDEX >> scaleIncrease >> (int) (randomDouble() * (MAX_INDEX_BITS - scaleIncrease));
+            index = Math.max(1, index);
+            index = (long) ((2 * randomDouble() - 1) * index);
+
+            double midPoint = getPointOfLeastRelativeError(index, startScale);
+            // limit the numeric range, otherwise we get rounding errors causing the test to fail
+            while (midPoint > Math.pow(10, 10) || midPoint < Math.pow(10, -10)) {
+                index /= 2;
+                midPoint = getPointOfLeastRelativeError(index, startScale);
+            }
+
+            long scaledUpIndex = adjustScale(index, startScale, scaleIncrease);
+            long correctIdx = computeIndex(midPoint, startScale + scaleIncrease);
+            // Due to rounding problems in the tests, we can still be off by one for extreme scales
+            assertThat(scaledUpIndex, equalTo(correctIdx));
+        }
+    }
+
+    public void testScaleUpTableUpToDate() {
+
+        MathContext mc = new MathContext(1000);
+        BigDecimal one = new BigDecimal(1, mc);
+        BigDecimal two = new BigDecimal(2, mc);
+
+        for (int scale = MIN_SCALE; scale <= MAX_SCALE; scale++) {
+            BigDecimal base = BigDecimalMath.pow(two, two.pow(-scale, mc), mc);
+            BigDecimal factor = one.add(two.pow(scale, mc).multiply(one.subtract(BigDecimalMath.log2(one.add(base), mc))));
+
+            BigDecimal scaledFactor = factor.multiply(two.pow(63, mc)).setScale(0, RoundingMode.FLOOR);
+            assertThat(SCALE_UP_CONSTANT_TABLE[scale - MIN_SCALE], equalTo(scaledFactor.longValue()));
+        }
+    }
+
+}

+ 60 - 0
libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java

@@ -0,0 +1,60 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class FixedCapacityExponentialHistogramTests extends ESTestCase {
+
+    public void testValueCountUpdatedCorrectly() {
+
+        FixedCapacityExponentialHistogram histogram = new FixedCapacityExponentialHistogram(100);
+
+        assertThat(histogram.negativeBuckets().valueCount(), equalTo(0L));
+        assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L));
+
+        histogram.tryAddBucket(1, 10, false);
+
+        assertThat(histogram.negativeBuckets().valueCount(), equalTo(10L));
+        assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L));
+
+        histogram.tryAddBucket(2, 3, false);
+        histogram.tryAddBucket(3, 4, false);
+        histogram.tryAddBucket(1, 5, true);
+
+        assertThat(histogram.negativeBuckets().valueCount(), equalTo(17L));
+        assertThat(histogram.positiveBuckets().valueCount(), equalTo(5L));
+
+        histogram.tryAddBucket(2, 3, true);
+        histogram.tryAddBucket(3, 4, true);
+
+        assertThat(histogram.negativeBuckets().valueCount(), equalTo(17L));
+        assertThat(histogram.positiveBuckets().valueCount(), equalTo(12L));
+
+        histogram.resetBuckets(0);
+
+        assertThat(histogram.negativeBuckets().valueCount(), equalTo(0L));
+        assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L));
+    }
+}

+ 310 - 0
libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java

@@ -0,0 +1,310 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.apache.commons.math3.distribution.BetaDistribution;
+import org.apache.commons.math3.distribution.ExponentialDistribution;
+import org.apache.commons.math3.distribution.GammaDistribution;
+import org.apache.commons.math3.distribution.LogNormalDistribution;
+import org.apache.commons.math3.distribution.NormalDistribution;
+import org.apache.commons.math3.distribution.RealDistribution;
+import org.apache.commons.math3.distribution.UniformRealDistribution;
+import org.apache.commons.math3.distribution.WeibullDistribution;
+import org.apache.commons.math3.random.Well19937c;
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.stream.DoubleStream;
+import java.util.stream.IntStream;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.lessThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+import static org.hamcrest.Matchers.notANumber;
+
+public class QuantileAccuracyTests extends ESTestCase {
+
+    public static final double[] QUANTILES_TO_TEST = { 0, 0.0000001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999999, 1.0 };
+
+    private static int randomBucketCount() {
+        // exponentially distribute the bucket count to test more for smaller sizes
+        return (int) Math.round(5 + Math.pow(1995, randomDouble()));
+    }
+
+    public void testNoNegativeZeroReturned() {
+        FixedCapacityExponentialHistogram histogram = new FixedCapacityExponentialHistogram(2);
+        histogram.resetBuckets(MAX_SCALE);
+        // add a single, negative bucket close to zero
+        histogram.tryAddBucket(MIN_INDEX, 3, false);
+        double median = ExponentialHistogramQuantile.getQuantile(histogram, 0.5);
+        assertThat(median, equalTo(0.0));
+    }
+
+    public void testUniformDistribution() {
+        testDistributionQuantileAccuracy(new UniformRealDistribution(new Well19937c(randomInt()), 0, 100));
+    }
+
+    public void testNormalDistribution() {
+        testDistributionQuantileAccuracy(new NormalDistribution(new Well19937c(randomInt()), 100, 15));
+    }
+
+    public void testExponentialDistribution() {
+        testDistributionQuantileAccuracy(new ExponentialDistribution(new Well19937c(randomInt()), 10));
+    }
+
+    public void testLogNormalDistribution() {
+        testDistributionQuantileAccuracy(new LogNormalDistribution(new Well19937c(randomInt()), 0, 1));
+    }
+
+    public void testGammaDistribution() {
+        testDistributionQuantileAccuracy(new GammaDistribution(new Well19937c(randomInt()), 2, 5));
+    }
+
+    public void testBetaDistribution() {
+        testDistributionQuantileAccuracy(new BetaDistribution(new Well19937c(randomInt()), 2, 5));
+    }
+
+    public void testWeibullDistribution() {
+        testDistributionQuantileAccuracy(new WeibullDistribution(new Well19937c(randomInt()), 2, 5));
+    }
+
+    public void testBasicSmall() {
+        DoubleStream values = IntStream.range(1, 10).mapToDouble(Double::valueOf);
+        double maxError = testQuantileAccuracy(values.toArray(), 100);
+        assertThat(maxError, lessThan(0.000001));
+    }
+
+    public void testPercentileOverlapsZeroBucket() {
+        ExponentialHistogram histo = ExponentialHistogram.create(9, -3.0, -2, -1, 0, 0, 0, 1, 2, 3);
+        assertThat(ExponentialHistogramQuantile.getQuantile(histo, 8.0 / 16.0), equalTo(0.0));
+        assertThat(ExponentialHistogramQuantile.getQuantile(histo, 7.0 / 16.0), equalTo(0.0));
+        assertThat(ExponentialHistogramQuantile.getQuantile(histo, 9.0 / 16.0), equalTo(0.0));
+        assertThat(ExponentialHistogramQuantile.getQuantile(histo, 5.0 / 16.0), closeTo(-0.5, 0.000001));
+        assertThat(ExponentialHistogramQuantile.getQuantile(histo, 11.0 / 16.0), closeTo(0.5, 0.000001));
+    }
+
+    public void testBigJump() {
+        double[] values = DoubleStream.concat(IntStream.range(0, 18).mapToDouble(Double::valueOf), DoubleStream.of(1_000_000.0)).toArray();
+
+        double maxError = testQuantileAccuracy(values, 500);
+        assertThat(maxError, lessThan(0.000001));
+    }
+
+    public void testExplicitSkewedData() {
+        double[] data = new double[] {
+            245,
+            246,
+            247.249,
+            240,
+            243,
+            248,
+            250,
+            241,
+            244,
+            245,
+            245,
+            247,
+            243,
+            242,
+            241,
+            50100,
+            51246,
+            52247,
+            52249,
+            51240,
+            53243,
+            59248,
+            59250,
+            57241,
+            56244,
+            55245,
+            56245,
+            575247,
+            58243,
+            51242,
+            54241 };
+
+        double maxError = testQuantileAccuracy(data, data.length / 2);
+        assertThat(maxError, lessThan(0.007));
+    }
+
+    public void testEmptyHistogram() {
+        ExponentialHistogram histo = ExponentialHistogram.create(1);
+        for (double q : QUANTILES_TO_TEST) {
+            assertThat(ExponentialHistogramQuantile.getQuantile(histo, q), notANumber());
+        }
+    }
+
+    public void testSingleValueHistogram() {
+        ExponentialHistogram histo = ExponentialHistogram.create(1, 42.0);
+        for (double q : QUANTILES_TO_TEST) {
+            assertThat(ExponentialHistogramQuantile.getQuantile(histo, q), closeTo(42, 0.0000001));
+        }
+    }
+
+    public void testBucketCountImpact() {
+        RealDistribution distribution = new LogNormalDistribution(new Well19937c(randomInt()), 0, 1);
+        int sampleSize = between(100, 50_000);
+        double[] values = generateSamples(distribution, sampleSize);
+
+        // Verify that more buckets generally means better accuracy
+        double errorWithFewBuckets = testQuantileAccuracy(values, 20);
+        double errorWithManyBuckets = testQuantileAccuracy(values, 200);
+        assertThat("More buckets should improve accuracy", errorWithManyBuckets, lessThanOrEqualTo(errorWithFewBuckets));
+    }
+
+    public void testMixedSignValues() {
+        double[] values = new double[between(100, 10_000)];
+        for (int i = 0; i < values.length; i++) {
+            values[i] = (randomDouble() * 200) - 100; // Range from -100 to 100
+        }
+
+        testQuantileAccuracy(values, 100);
+    }
+
+    public void testSkewedData() {
+        // Create a highly skewed dataset
+        double[] values = new double[10000];
+        for (int i = 0; i < values.length; i++) {
+            if (randomDouble() < 0.9) {
+                // 90% of values are small
+                values[i] = randomDouble() * 10;
+            } else {
+                // 10% are very large
+                values[i] = randomDouble() * 10000 + 100;
+            }
+        }
+
+        testQuantileAccuracy(values, 100);
+    }
+
+    public void testDataWithZeros() {
+        double[] values = new double[10000];
+        for (int i = 0; i < values.length; i++) {
+            if (randomDouble() < 0.2) {
+                // 20% zeros
+                values[i] = 0;
+            } else {
+                values[i] = randomDouble() * 100;
+            }
+        }
+
+        testQuantileAccuracy(values, 100);
+    }
+
+    private void testDistributionQuantileAccuracy(RealDistribution distribution) {
+        double[] values = generateSamples(distribution, between(100, 50_000));
+        int bucketCount = randomBucketCount();
+        testQuantileAccuracy(values, bucketCount);
+    }
+
+    private static double[] generateSamples(RealDistribution distribution, int sampleSize) {
+        double[] values = new double[sampleSize];
+        for (int i = 0; i < sampleSize; i++) {
+            values[i] = distribution.sample();
+        }
+        return values;
+    }
+
+    private double testQuantileAccuracy(double[] values, int bucketCount) {
+        // Create histogram
+        ExponentialHistogram histogram = ExponentialHistogram.create(bucketCount, values);
+        Arrays.sort(values);
+
+        double allowedError = getMaximumRelativeError(values, bucketCount);
+        double maxError = 0;
+
+        // Compare histogram quantiles with exact quantiles
+        for (double q : QUANTILES_TO_TEST) {
+            double percentileRank = q * (values.length - 1);
+            int lowerRank = (int) Math.floor(percentileRank);
+            int upperRank = (int) Math.ceil(percentileRank);
+            double upperFactor = percentileRank - lowerRank;
+
+            if (values[lowerRank] < 0 && values[upperRank] > 0) {
+                // the percentile lies directly between a sign change and we interpolate linearly in-between
+                // in this case the relative error bound does not hold
+                continue;
+            }
+            double exactValue = values[lowerRank] * (1 - upperFactor) + values[upperRank] * upperFactor;
+
+            double histoValue = ExponentialHistogramQuantile.getQuantile(histogram, q);
+
+            // Skip comparison if exact value is close to zero to avoid false-positives due to numerical imprecision
+            if (Math.abs(exactValue) < 1e-100) {
+                continue;
+            }
+
+            double relativeError = Math.abs(histoValue - exactValue) / Math.abs(exactValue);
+            maxError = Math.max(maxError, relativeError);
+
+            assertThat(
+                String.format(Locale.ENGLISH, "Quantile %.2f should be accurate within %.6f%% relative error", q, allowedError * 100),
+                histoValue,
+                closeTo(exactValue, Math.abs(exactValue * allowedError))
+            );
+
+        }
+        return maxError;
+    }
+
+    /**
+     * Provides the upper bound of the relative error for any percentile estimate performed with the exponential histogram.
+     * The error depends on the raw values put into the histogram and the number of buckets allowed.
+     * This is an implementation of the error bound computation proven by Theorem 3 in the <a href="https://arxiv.org/pdf/2004.08604">UDDSketch paper</a>
+     */
+    private static double getMaximumRelativeError(double[] values, int bucketCount) {
+        HashSet<Long> usedPositiveIndices = new HashSet<>();
+        HashSet<Long> usedNegativeIndices = new HashSet<>();
+        int bestPossibleScale = MAX_SCALE;
+        for (double value : values) {
+            if (value < 0) {
+                usedPositiveIndices.add(computeIndex(value, bestPossibleScale));
+            } else if (value > 0) {
+                usedNegativeIndices.add(computeIndex(value, bestPossibleScale));
+            }
+            while ((usedNegativeIndices.size() + usedPositiveIndices.size()) > bucketCount) {
+                usedNegativeIndices = rightShiftAll(usedNegativeIndices);
+                usedPositiveIndices = rightShiftAll(usedPositiveIndices);
+                bestPossibleScale--;
+            }
+        }
+        // for the best possible scale, compute the worst-case error
+        double base = Math.pow(2.0, Math.scalb(1.0, -bestPossibleScale));
+        return 2 * base / (1 + base) - 1;
+    }
+
+    private static HashSet<Long> rightShiftAll(HashSet<Long> indices) {
+        HashSet<Long> result = new HashSet<>();
+        for (long index : indices) {
+            result.add(index >> 1);
+        }
+        return result;
+    }
+
+}

+ 33 - 0
libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java

@@ -0,0 +1,33 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class ZeroBucketTests extends ESTestCase {
+
+    public void testMinimalBucketHasZeroThreshold() {
+        assertThat(ZeroBucket.minimalWithCount(42).zeroThreshold(), equalTo(0.0));
+    }
+}