Browse Source

Adding cache_stats to geoip stats API (#107334)

Keith Massey 1 year ago
parent
commit
f5c7938ab8

+ 5 - 0
docs/changelog/107334.yaml

@@ -0,0 +1,5 @@
+pr: 107334
+summary: Adding `cache_stats` to geoip stats API
+area: Ingest Node
+type: enhancement
+issues: []

+ 36 - 1
docs/reference/ingest/apis/geoip-stats-api.asciidoc

@@ -24,7 +24,7 @@ GET _ingest/geoip/stats
 `manage` <<privileges-list-cluster,cluster privilege>> to use this API.
 
 * If <<ingest-geoip-downloader-enabled,`ingest.geoip.downloader.enabled`>> is
-disabled, this API returns zero values and an empty `nodes` object.
+disabled and no custom databases are configured, this API returns zero values and an empty `nodes` object.
 
 [role="child_attributes"]
 [[geoip-stats-api-response-body]]
@@ -83,6 +83,41 @@ Downloaded databases for the node.
 (string)
 Name of the database.
 ======
+`cache_stats`::
+(object)
+GeoIP cache stats for the node.
++
+.Properties of `cache_stats`
+[%collapsible%open]
+======
+`count`::
+(Long)
+Number of cached entries.
+
+`hits`::
+(Long)
+The number of enrich lookups served from cache.
+
+`misses`::
+(Long)
+The number of times geoIP lookups couldn't be
+served from cache.
+
+`evictions`::
+(Long)
+The number cache entries evicted from the cache.
+
+`hits_time_in_millis`::
+(Long)
+The amount of time in milliseconds spent fetching data from the cache on succesful cache hits only.
+
+`misses_time_in_millis`::
+(Long)
+The amount of time in milliseconds spent fetching data from the cache and the backing GeoIP2 database and updating the
+cache, on cache misses only.
+
+======
+
 
 `files_in_temp`::
 (array of strings)

+ 5 - 0
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java

@@ -29,6 +29,7 @@ import org.elasticsearch.gateway.GatewayService;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.query.TermQueryBuilder;
 import org.elasticsearch.ingest.IngestService;
+import org.elasticsearch.ingest.geoip.stats.CacheStats;
 import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.watcher.ResourceWatcherService;
@@ -506,4 +507,8 @@ public final class DatabaseNodeService implements GeoIpDatabaseProvider, Closeab
             throw new UncheckedIOException(e);
         }
     }
+
+    public CacheStats getCacheStats() {
+        return cache.getCacheStats();
+    }
 }

+ 37 - 1
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpCache.java

@@ -12,10 +12,14 @@ import com.maxmind.geoip2.model.AbstractResponse;
 
 import org.elasticsearch.common.cache.Cache;
 import org.elasticsearch.common.cache.CacheBuilder;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.ingest.geoip.stats.CacheStats;
 
 import java.net.InetAddress;
 import java.nio.file.Path;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Function;
+import java.util.function.LongSupplier;
 
 /**
  * The in-memory cache for the geoip data. There should only be 1 instance of this class.
@@ -38,16 +42,24 @@ final class GeoIpCache {
         }
     };
 
+    private final LongSupplier relativeNanoTimeProvider;
     private final Cache<CacheKey, AbstractResponse> cache;
+    private final AtomicLong hitsTimeInNanos = new AtomicLong(0);
+    private final AtomicLong missesTimeInNanos = new AtomicLong(0);
 
     // package private for testing
-    GeoIpCache(long maxSize) {
+    GeoIpCache(long maxSize, LongSupplier relativeNanoTimeProvider) {
         if (maxSize < 0) {
             throw new IllegalArgumentException("geoip max cache size must be 0 or greater");
         }
+        this.relativeNanoTimeProvider = relativeNanoTimeProvider;
         this.cache = CacheBuilder.<CacheKey, AbstractResponse>builder().setMaximumWeight(maxSize).build();
     }
 
+    GeoIpCache(long maxSize) {
+        this(maxSize, System::nanoTime);
+    }
+
     @SuppressWarnings("unchecked")
     <T extends AbstractResponse> T putIfAbsent(
         InetAddress ip,
@@ -56,11 +68,14 @@ final class GeoIpCache {
     ) {
         // can't use cache.computeIfAbsent due to the elevated permissions for the jackson (run via the cache loader)
         CacheKey cacheKey = new CacheKey(ip, databasePath);
+        long cacheStart = relativeNanoTimeProvider.getAsLong();
         // intentionally non-locking for simplicity...it's OK if we re-put the same key/value in the cache during a race condition.
         AbstractResponse response = cache.get(cacheKey);
+        long cacheRequestTime = relativeNanoTimeProvider.getAsLong() - cacheStart;
 
         // populate the cache for this key, if necessary
         if (response == null) {
+            long retrieveStart = relativeNanoTimeProvider.getAsLong();
             response = retrieveFunction.apply(ip);
             // if the response from the database was null, then use the no-result sentinel value
             if (response == null) {
@@ -68,6 +83,10 @@ final class GeoIpCache {
             }
             // store the result or no-result in the cache
             cache.put(cacheKey, response);
+            long databaseRequestAndCachePutTime = relativeNanoTimeProvider.getAsLong() - retrieveStart;
+            missesTimeInNanos.addAndGet(cacheRequestTime + databaseRequestAndCachePutTime);
+        } else {
+            hitsTimeInNanos.addAndGet(cacheRequestTime);
         }
 
         if (response == NO_RESULT) {
@@ -99,6 +118,23 @@ final class GeoIpCache {
         return cache.count();
     }
 
+    /**
+     * Returns stats about this cache as of this moment. There is no guarantee that the counts reconcile (for example hits + misses = count)
+     * because no locking is performed when requesting these stats.
+     * @return Current stats about this cache
+     */
+    public CacheStats getCacheStats() {
+        Cache.CacheStats stats = cache.stats();
+        return new CacheStats(
+            cache.count(),
+            stats.getHits(),
+            stats.getMisses(),
+            stats.getEvictions(),
+            TimeValue.nsecToMSec(hitsTimeInNanos.get()),
+            TimeValue.nsecToMSec(missesTimeInNanos.get())
+        );
+    }
+
     /**
      * The key to use for the cache. Since this cache can span multiple geoip processors that all use different databases, the database
      * path is needed to be included in the cache key. For example, if we only used the IP address as the key the City and ASN the same

+ 41 - 0
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/stats/CacheStats.java

@@ -0,0 +1,41 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.ingest.geoip.stats;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+
+import java.io.IOException;
+
+public record CacheStats(long count, long hits, long misses, long evictions, long hitsTimeInMillis, long missesTimeInMillis)
+    implements
+        Writeable {
+
+    public CacheStats(StreamInput streamInput) throws IOException {
+        this(
+            streamInput.readLong(),
+            streamInput.readLong(),
+            streamInput.readLong(),
+            streamInput.readLong(),
+            streamInput.readLong(),
+            streamInput.readLong()
+        );
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        out.writeLong(count);
+        out.writeLong(hits);
+        out.writeLong(misses);
+        out.writeLong(evictions);
+        out.writeLong(hitsTimeInMillis);
+        out.writeLong(missesTimeInMillis);
+    }
+}

+ 25 - 1
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/stats/GeoIpStatsAction.java

@@ -130,6 +130,15 @@ public class GeoIpStatsAction {
                 if (response.configDatabases.isEmpty() == false) {
                     builder.array("config_databases", response.configDatabases.toArray(String[]::new));
                 }
+                builder.startObject("cache_stats");
+                CacheStats cacheStats = response.cacheStats;
+                builder.field("count", cacheStats.count());
+                builder.field("hits", cacheStats.hits());
+                builder.field("misses", cacheStats.misses());
+                builder.field("evictions", cacheStats.evictions());
+                builder.field("hits_time_in_millis", cacheStats.hitsTimeInMillis());
+                builder.field("misses_time_in_millis", cacheStats.missesTimeInMillis());
+                builder.endObject();
                 builder.endObject();
             }
             builder.endObject();
@@ -154,6 +163,7 @@ public class GeoIpStatsAction {
     public static class NodeResponse extends BaseNodeResponse {
 
         private final GeoIpDownloaderStats downloaderStats;
+        private final CacheStats cacheStats;
         private final Set<String> databases;
         private final Set<String> filesInTemp;
         private final Set<String> configDatabases;
@@ -161,6 +171,11 @@ public class GeoIpStatsAction {
         protected NodeResponse(StreamInput in) throws IOException {
             super(in);
             downloaderStats = in.readBoolean() ? new GeoIpDownloaderStats(in) : null;
+            if (in.getTransportVersion().onOrAfter(TransportVersions.GEOIP_CACHE_STATS)) {
+                cacheStats = in.readBoolean() ? new CacheStats(in) : null;
+            } else {
+                cacheStats = null;
+            }
             databases = in.readCollectionAsImmutableSet(StreamInput::readString);
             filesInTemp = in.readCollectionAsImmutableSet(StreamInput::readString);
             configDatabases = in.getTransportVersion().onOrAfter(TransportVersions.V_8_0_0)
@@ -171,12 +186,14 @@ public class GeoIpStatsAction {
         protected NodeResponse(
             DiscoveryNode node,
             GeoIpDownloaderStats downloaderStats,
+            CacheStats cacheStats,
             Set<String> databases,
             Set<String> filesInTemp,
             Set<String> configDatabases
         ) {
             super(node);
             this.downloaderStats = downloaderStats;
+            this.cacheStats = cacheStats;
             this.databases = Set.copyOf(databases);
             this.filesInTemp = Set.copyOf(filesInTemp);
             this.configDatabases = Set.copyOf(configDatabases);
@@ -205,6 +222,12 @@ public class GeoIpStatsAction {
             if (downloaderStats != null) {
                 downloaderStats.writeTo(out);
             }
+            if (out.getTransportVersion().onOrAfter(TransportVersions.GEOIP_CACHE_STATS)) {
+                out.writeBoolean(cacheStats != null);
+                if (cacheStats != null) {
+                    cacheStats.writeTo(out);
+                }
+            }
             out.writeStringCollection(databases);
             out.writeStringCollection(filesInTemp);
             if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_0_0)) {
@@ -218,6 +241,7 @@ public class GeoIpStatsAction {
             if (o == null || getClass() != o.getClass()) return false;
             NodeResponse that = (NodeResponse) o;
             return downloaderStats.equals(that.downloaderStats)
+                && Objects.equals(cacheStats, that.cacheStats)
                 && databases.equals(that.databases)
                 && filesInTemp.equals(that.filesInTemp)
                 && Objects.equals(configDatabases, that.configDatabases);
@@ -225,7 +249,7 @@ public class GeoIpStatsAction {
 
         @Override
         public int hashCode() {
-            return Objects.hash(downloaderStats, databases, filesInTemp, configDatabases);
+            return Objects.hash(downloaderStats, cacheStats, databases, filesInTemp, configDatabases);
         }
     }
 }

+ 2 - 0
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/stats/GeoIpStatsTransportAction.java

@@ -76,9 +76,11 @@ public class GeoIpStatsTransportAction extends TransportNodesAction<Request, Res
     protected NodeResponse nodeOperation(NodeRequest request, Task task) {
         GeoIpDownloader geoIpTask = geoIpDownloaderTaskExecutor.getCurrentTask();
         GeoIpDownloaderStats downloaderStats = geoIpTask == null || geoIpTask.getStatus() == null ? null : geoIpTask.getStatus();
+        CacheStats cacheStats = registry.getCacheStats();
         return new NodeResponse(
             transportService.getLocalNode(),
             downloaderStats,
+            cacheStats,
             registry.getAvailableDatabases(),
             registry.getFilesInTemp(),
             registry.getConfigDatabases()

+ 33 - 0
modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpCacheTests.java

@@ -11,12 +11,16 @@ package org.elasticsearch.ingest.geoip;
 import com.maxmind.geoip2.model.AbstractResponse;
 
 import org.elasticsearch.common.network.InetAddresses;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.ingest.geoip.stats.CacheStats;
 import org.elasticsearch.test.ESTestCase;
 
 import java.net.InetAddress;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Function;
 
+import static org.hamcrest.Matchers.equalTo;
 import static org.mockito.Mockito.mock;
 
 public class GeoIpCacheTests extends ESTestCase {
@@ -83,4 +87,33 @@ public class GeoIpCacheTests extends ESTestCase {
         IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> new GeoIpCache(-1));
         assertEquals("geoip max cache size must be 0 or greater", ex.getMessage());
     }
+
+    public void testGetCacheStats() {
+        final long maxCacheSize = 2;
+        final AtomicLong testNanoTime = new AtomicLong(0);
+        // We use a relative time provider that increments 1ms every time it is called. So each operation appears to take 1ms
+        GeoIpCache cache = new GeoIpCache(maxCacheSize, () -> testNanoTime.addAndGet(TimeValue.timeValueMillis(1).getNanos()));
+        AbstractResponse response = mock(AbstractResponse.class);
+        String databasePath = "path/to/db1";
+        InetAddress key1 = InetAddresses.forString("127.0.0.1");
+        InetAddress key2 = InetAddresses.forString("127.0.0.2");
+        InetAddress key3 = InetAddresses.forString("127.0.0.3");
+
+        cache.putIfAbsent(key1, databasePath, ip -> response); // cache miss
+        cache.putIfAbsent(key2, databasePath, ip -> response); // cache miss
+        cache.putIfAbsent(key1, databasePath, ip -> response); // cache hit
+        cache.putIfAbsent(key1, databasePath, ip -> response); // cache hit
+        cache.putIfAbsent(key1, databasePath, ip -> response); // cache hit
+        cache.putIfAbsent(key3, databasePath, ip -> response); // cache miss, key2 will be evicted
+        cache.putIfAbsent(key2, databasePath, ip -> response); // cache miss, key1 will be evicted
+        CacheStats cacheStats = cache.getCacheStats();
+        assertThat(cacheStats.count(), equalTo(maxCacheSize));
+        assertThat(cacheStats.hits(), equalTo(3L));
+        assertThat(cacheStats.misses(), equalTo(4L));
+        assertThat(cacheStats.evictions(), equalTo(2L));
+        // There are 3 hits, each taking 1ms:
+        assertThat(cacheStats.hitsTimeInMillis(), equalTo(3L));
+        // There are 4 misses. Each is made up of a cache query, and a database query, each being 1ms:
+        assertThat(cacheStats.missesTimeInMillis(), equalTo(8L));
+    }
 }

+ 92 - 0
modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/stats/CacheStatsSerializingTests.java

@@ -0,0 +1,92 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.ingest.geoip.stats;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.test.AbstractWireSerializingTestCase;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+
+public class CacheStatsSerializingTests extends AbstractWireSerializingTestCase<CacheStats> {
+    @Override
+    protected Writeable.Reader<CacheStats> instanceReader() {
+        return CacheStats::new;
+    }
+
+    @Override
+    protected CacheStats createTestInstance() {
+        return createRandomInstance();
+    }
+
+    @Override
+    protected CacheStats mutateInstance(CacheStats instance) throws IOException {
+        long count = instance.count();
+        long hits = instance.hits();
+        long misses = instance.misses();
+        long evictions = instance.evictions();
+        long hitsTimeInMillis = instance.hitsTimeInMillis();
+        long missesTimeInMillis = instance.missesTimeInMillis();
+        return switch (between(0, 5)) {
+            case 0 -> new CacheStats(
+                randomValueOtherThan(count, ESTestCase::randomLong),
+                hits,
+                misses,
+                evictions,
+                hitsTimeInMillis,
+                missesTimeInMillis
+            );
+            case 1 -> new CacheStats(
+                count,
+                randomValueOtherThan(hits, ESTestCase::randomLong),
+                misses,
+                evictions,
+                hitsTimeInMillis,
+                missesTimeInMillis
+            );
+            case 2 -> new CacheStats(
+                count,
+                hits,
+                randomValueOtherThan(misses, ESTestCase::randomLong),
+                evictions,
+                hitsTimeInMillis,
+                missesTimeInMillis
+            );
+            case 3 -> new CacheStats(
+                count,
+                hits,
+                misses,
+                randomValueOtherThan(evictions, ESTestCase::randomLong),
+                hitsTimeInMillis,
+                missesTimeInMillis
+            );
+            case 4 -> new CacheStats(
+                count,
+                hits,
+                misses,
+                evictions,
+                randomValueOtherThan(hitsTimeInMillis, ESTestCase::randomLong),
+                missesTimeInMillis
+            );
+            case 5 -> new CacheStats(
+                count,
+                hits,
+                misses,
+                evictions,
+                hitsTimeInMillis,
+                randomValueOtherThan(missesTimeInMillis, ESTestCase::randomLong)
+            );
+            default -> throw new IllegalStateException("Unexpected value");
+        };
+    }
+
+    static CacheStats createRandomInstance() {
+        return new CacheStats(randomLong(), randomLong(), randomLong(), randomLong(), randomLong(), randomLong());
+    }
+}

+ 1 - 0
modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/stats/GeoIpStatsActionNodeResponseSerializingTests.java

@@ -40,6 +40,7 @@ public class GeoIpStatsActionNodeResponseSerializingTests extends AbstractWireSe
         return new GeoIpStatsAction.NodeResponse(
             node,
             GeoIpDownloaderStatsSerializingTests.createRandomInstance(),
+            randomBoolean() ? null : CacheStatsSerializingTests.createRandomInstance(),
             databases,
             files,
             configDatabases

+ 1 - 0
modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/stats/GeoIpStatsActionNodeResponseTests.java

@@ -28,6 +28,7 @@ public class GeoIpStatsActionNodeResponseTests extends ESTestCase {
         GeoIpStatsAction.NodeResponse nodeResponse = new GeoIpStatsAction.NodeResponse(
             node,
             GeoIpDownloaderStatsSerializingTests.createRandomInstance(),
+            randomBoolean() ? null : CacheStatsSerializingTests.createRandomInstance(),
             databases,
             files,
             configDatabases

+ 8 - 0
modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/30_geoip_stats.yml

@@ -8,3 +8,11 @@
   - gte: { stats.databases_count: 0 }
   - gte: { stats.total_download_time: 0 }
   - is_true: nodes
+  - set:
+      nodes._arbitrary_key_: node_id
+  - gte: { nodes.$node_id.cache_stats.count: 0 }
+  - gte: { nodes.$node_id.cache_stats.hits: 0 }
+  - gte: { nodes.$node_id.cache_stats.misses: 0 }
+  - gte: { nodes.$node_id.cache_stats.evictions: 0 }
+  - gte: { nodes.$node_id.cache_stats.hits_time_in_millis: 0 }
+  - gte: { nodes.$node_id.cache_stats.misses_time_in_millis: 0 }

+ 1 - 0
server/src/main/java/org/elasticsearch/TransportVersions.java

@@ -174,6 +174,7 @@ public class TransportVersions {
     public static final TransportVersion TRACK_FLUSH_TIME_EXCLUDING_WAITING_ON_LOCKS = def(8_633_00_0);
     public static final TransportVersion ML_INFERENCE_AZURE_OPENAI_EMBEDDINGS = def(8_634_00_0);
     public static final TransportVersion ILM_SHRINK_ENABLE_WRITE = def(8_635_00_0);
+    public static final TransportVersion GEOIP_CACHE_STATS = def(8_636_00_0);
 
     /*
      * STOP! READ THIS FIRST! No, really,