Browse Source

Add epoch blob-cache metric (#132547)

Add epoch (changes) as a metric to the blob-cache, giving
visibility into the velocity of change in the cache and
indications of cache churn
Henning Andersen 2 months ago
parent
commit
397821e5ce

+ 5 - 0
docs/changelog/132547.yaml

@@ -0,0 +1,5 @@
+pr: 132547
+summary: Add epoch blob-cache metric
+area: Searchable Snapshots
+type: enhancement
+issues: []

+ 10 - 2
x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/BlobCacheMetrics.java

@@ -42,6 +42,7 @@ public class BlobCacheMetrics {
 
     private final LongAdder missCount = new LongAdder();
     private final LongAdder readCount = new LongAdder();
+    private final LongCounter epochChanges;
 
     public enum CachePopulationReason {
         /**
@@ -98,7 +99,8 @@ public class BlobCacheMetrics {
                 "es.blob_cache.population.time.total",
                 "The time spent copying data into the cache",
                 "milliseconds"
-            )
+            ),
+            meterRegistry.registerLongCounter("es.blob_cache.epoch.total", "The epoch changes of the LFU cache", "count")
         );
 
         meterRegistry.registerLongGauge(
@@ -134,7 +136,8 @@ public class BlobCacheMetrics {
         LongHistogram cacheMissLoadTimes,
         DoubleHistogram cachePopulationThroughput,
         LongCounter cachePopulationBytes,
-        LongCounter cachePopulationTime
+        LongCounter cachePopulationTime,
+        LongCounter epochChanges
     ) {
         this.cacheMissCounter = cacheMissCounter;
         this.evictedCountNonZeroFrequency = evictedCountNonZeroFrequency;
@@ -143,6 +146,7 @@ public class BlobCacheMetrics {
         this.cachePopulationThroughput = cachePopulationThroughput;
         this.cachePopulationBytes = cachePopulationBytes;
         this.cachePopulationTime = cachePopulationTime;
+        this.epochChanges = epochChanges;
     }
 
     public static final BlobCacheMetrics NOOP = new BlobCacheMetrics(TelemetryProvider.NOOP.getMeterRegistry());
@@ -201,6 +205,10 @@ public class BlobCacheMetrics {
         }
     }
 
+    public void recordEpochChange() {
+        epochChanges.increment();
+    }
+
     public void recordRead() {
         readCount.increment();
     }

+ 1 - 0
x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java

@@ -2049,6 +2049,7 @@ public class SharedBlobCacheService<KeyType> implements Releasable {
             public void onAfter() {
                 assert pendingEpoch.get() == epoch.get() + 1;
                 epoch.incrementAndGet();
+                blobCacheMetrics.recordEpochChange();
             }
 
             @Override

+ 21 - 2
x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java

@@ -35,6 +35,7 @@ import org.elasticsearch.env.NodeEnvironment;
 import org.elasticsearch.env.TestEnvironment;
 import org.elasticsearch.node.NodeRoleSettings;
 import org.elasticsearch.telemetry.InstrumentType;
+import org.elasticsearch.telemetry.Measurement;
 import org.elasticsearch.telemetry.RecordingMeterRegistry;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.threadpool.TestThreadPool;
@@ -332,6 +333,8 @@ public class SharedBlobCacheServiceTests extends ESTestCase {
     }
 
     public void testDecay() throws IOException {
+        RecordingMeterRegistry recordingMeterRegistry = new RecordingMeterRegistry();
+        BlobCacheMetrics metrics = new BlobCacheMetrics(recordingMeterRegistry);
         // we have 8 regions
         Settings settings = Settings.builder()
             .put(NODE_NAME_SETTING.getKey(), "node")
@@ -347,7 +350,7 @@ public class SharedBlobCacheServiceTests extends ESTestCase {
                 settings,
                 taskQueue.getThreadPool(),
                 taskQueue.getThreadPool().executor(ThreadPool.Names.GENERIC),
-                BlobCacheMetrics.NOOP
+                metrics
             )
         ) {
             assertEquals(4, cacheService.freeRegionCount());
@@ -375,6 +378,8 @@ public class SharedBlobCacheServiceTests extends ESTestCase {
                 assertThat(taskQueue.hasRunnableTasks(), is(true));
                 taskQueue.runAllRunnableTasks();
                 assertThat(cacheService.epoch(), equalTo(expectedEpoch.incrementAndGet()));
+                long epochs = recordedEpochs(recordingMeterRegistry);
+                assertEquals(cacheService.epoch(), epochs);
             };
 
             triggerDecay.run();
@@ -435,11 +440,22 @@ public class SharedBlobCacheServiceTests extends ESTestCase {
         }
     }
 
+    private static long recordedEpochs(RecordingMeterRegistry recordingMeterRegistry) {
+        long epochs = recordingMeterRegistry.getRecorder()
+            .getMeasurements(InstrumentType.LONG_COUNTER, "es.blob_cache.epoch.total")
+            .stream()
+            .mapToLong(Measurement::getLong)
+            .sum();
+        return epochs;
+    }
+
     /**
      * Test when many objects need to decay, in particular useful to measure how long the decay task takes.
      * For 1M objects (with no assertions) it took 26ms locally.
      */
     public void testMassiveDecay() throws IOException {
+        RecordingMeterRegistry recordingMeterRegistry = new RecordingMeterRegistry();
+        BlobCacheMetrics metrics = new BlobCacheMetrics(recordingMeterRegistry);
         int regions = 1024; // to measure decay time, increase to 1024*1024 and disable assertions.
         Settings settings = Settings.builder()
             .put(NODE_NAME_SETTING.getKey(), "node")
@@ -455,7 +471,7 @@ public class SharedBlobCacheServiceTests extends ESTestCase {
                 settings,
                 taskQueue.getThreadPool(),
                 taskQueue.getThreadPool().executor(ThreadPool.Names.GENERIC),
-                BlobCacheMetrics.NOOP
+                metrics
             )
         ) {
             Runnable decay = () -> {
@@ -496,6 +512,9 @@ public class SharedBlobCacheServiceTests extends ESTestCase {
                 }
             }
             assertThat(freqs.get(4), equalTo(regions - maxRounds + 1));
+
+            long epochs = recordedEpochs(recordingMeterRegistry);
+            assertEquals(cacheService.epoch(), epochs);
         }
     }