Browse Source

Support IPinfo databases in the ip_location processor (#114735) (#114776)

Joe Gallo 1 year ago
parent
commit
45881c4c9f

+ 2 - 0
modules/ingest-geoip/src/main/java/module-info.java

@@ -18,4 +18,6 @@ module org.elasticsearch.ingest.geoip {
 
     exports org.elasticsearch.ingest.geoip.direct to org.elasticsearch.server;
     exports org.elasticsearch.ingest.geoip.stats to org.elasticsearch.server;
+
+    exports org.elasticsearch.ingest.geoip to com.maxmind.db;
 }

+ 2 - 2
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/ConfigDatabases.java

@@ -73,14 +73,14 @@ final class ConfigDatabases implements Closeable {
         String databaseFileName = file.getFileName().toString();
         try {
             if (update) {
-                logger.info("database file changed [{}], reload database...", file);
+                logger.info("database file changed [{}], reloading database...", file);
                 DatabaseReaderLazyLoader loader = new DatabaseReaderLazyLoader(cache, file, null);
                 DatabaseReaderLazyLoader existing = configDatabases.put(databaseFileName, loader);
                 if (existing != null) {
                     existing.shutdown();
                 }
             } else {
-                logger.info("database file removed [{}], close database...", file);
+                logger.info("database file removed [{}], closing database...", file);
                 DatabaseReaderLazyLoader existing = configDatabases.remove(databaseFileName);
                 assert existing != null;
                 existing.shutdown();

+ 13 - 7
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java

@@ -196,13 +196,19 @@ public final class GeoIpProcessor extends AbstractProcessor {
             }
 
             if (Assertions.ENABLED) {
-                // Only check whether the suffix has changed and not the entire database type.
-                // To sanity check whether a city db isn't overwriting with a country or asn db.
-                // For example overwriting a geoip lite city db with geoip city db is a valid change, but the db type is slightly different,
-                // by checking just the suffix this assertion doesn't fail.
-                String expectedSuffix = databaseType.substring(databaseType.lastIndexOf('-'));
-                assert loader.getDatabaseType().endsWith(expectedSuffix)
-                    : "database type [" + loader.getDatabaseType() + "] doesn't match with expected suffix [" + expectedSuffix + "]";
+                // Note that the expected suffix might be null for providers that aren't amenable to using dashes as separator for
+                // determining the database type.
+                int last = databaseType.lastIndexOf('-');
+                final String expectedSuffix = last == -1 ? null : databaseType.substring(last);
+
+                // If the entire database type matches, then that's a match. Otherwise, if there's a suffix to compare on, then
+                // check whether the suffix has changed (not the entire database type).
+                // This is to sanity check, for example, that a city db isn't overwritten with a country or asn db.
+                // But there are permissible overwrites that make sense, for example overwriting a geolite city db with a geoip city db
+                // is a valid change, but the db type is slightly different -- by checking just the suffix this assertion won't fail.
+                final String loaderType = loader.getDatabaseType();
+                assert loaderType.equals(databaseType) || expectedSuffix == null || loaderType.endsWith(expectedSuffix)
+                    : "database type [" + loaderType + "] doesn't match with expected suffix [" + expectedSuffix + "]";
             }
             return loader;
         }

+ 23 - 50
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookupFactories.java

@@ -13,9 +13,16 @@ import org.elasticsearch.common.Strings;
 import org.elasticsearch.core.Nullable;
 
 import java.util.List;
+import java.util.Locale;
 import java.util.Set;
 import java.util.function.Function;
 
+import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.IPINFO_PREFIX;
+import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.getIpinfoDatabase;
+import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.getIpinfoLookup;
+import static org.elasticsearch.ingest.geoip.MaxmindIpDataLookups.getMaxmindDatabase;
+import static org.elasticsearch.ingest.geoip.MaxmindIpDataLookups.getMaxmindLookup;
+
 final class IpDataLookupFactories {
 
     private IpDataLookupFactories() {
@@ -26,78 +33,44 @@ final class IpDataLookupFactories {
         IpDataLookup create(List<String> properties);
     }
 
-    private static final String CITY_DB_SUFFIX = "-City";
-    private static final String COUNTRY_DB_SUFFIX = "-Country";
-    private static final String ASN_DB_SUFFIX = "-ASN";
-    private static final String ANONYMOUS_IP_DB_SUFFIX = "-Anonymous-IP";
-    private static final String CONNECTION_TYPE_DB_SUFFIX = "-Connection-Type";
-    private static final String DOMAIN_DB_SUFFIX = "-Domain";
-    private static final String ENTERPRISE_DB_SUFFIX = "-Enterprise";
-    private static final String ISP_DB_SUFFIX = "-ISP";
-
-    @Nullable
-    private static Database getMaxmindDatabase(final String databaseType) {
-        if (databaseType.endsWith(CITY_DB_SUFFIX)) {
-            return Database.City;
-        } else if (databaseType.endsWith(COUNTRY_DB_SUFFIX)) {
-            return Database.Country;
-        } else if (databaseType.endsWith(ASN_DB_SUFFIX)) {
-            return Database.Asn;
-        } else if (databaseType.endsWith(ANONYMOUS_IP_DB_SUFFIX)) {
-            return Database.AnonymousIp;
-        } else if (databaseType.endsWith(CONNECTION_TYPE_DB_SUFFIX)) {
-            return Database.ConnectionType;
-        } else if (databaseType.endsWith(DOMAIN_DB_SUFFIX)) {
-            return Database.Domain;
-        } else if (databaseType.endsWith(ENTERPRISE_DB_SUFFIX)) {
-            return Database.Enterprise;
-        } else if (databaseType.endsWith(ISP_DB_SUFFIX)) {
-            return Database.Isp;
-        } else {
-            return null; // no match was found
-        }
-    }
-
     /**
      * Parses the passed-in databaseType and return the Database instance that is
      * associated with that databaseType.
      *
      * @param databaseType the database type String from the metadata of the database file
-     * @return the Database instance that is associated with the databaseType
+     * @return the Database instance that is associated with the databaseType (or null)
      */
     @Nullable
     static Database getDatabase(final String databaseType) {
         Database database = null;
 
         if (Strings.hasText(databaseType)) {
-            database = getMaxmindDatabase(databaseType);
+            final String databaseTypeLowerCase = databaseType.toLowerCase(Locale.ROOT);
+            if (databaseTypeLowerCase.startsWith(IPINFO_PREFIX)) {
+                database = getIpinfoDatabase(databaseTypeLowerCase); // all lower case!
+            } else {
+                // for historical reasons, fall back to assuming maxmind-like type parsing
+                database = getMaxmindDatabase(databaseType);
+            }
         }
 
         return database;
     }
 
-    @Nullable
-    static Function<Set<Database.Property>, IpDataLookup> getMaxmindLookup(final Database database) {
-        return switch (database) {
-            case City -> MaxmindIpDataLookups.City::new;
-            case Country -> MaxmindIpDataLookups.Country::new;
-            case Asn -> MaxmindIpDataLookups.Asn::new;
-            case AnonymousIp -> MaxmindIpDataLookups.AnonymousIp::new;
-            case ConnectionType -> MaxmindIpDataLookups.ConnectionType::new;
-            case Domain -> MaxmindIpDataLookups.Domain::new;
-            case Enterprise -> MaxmindIpDataLookups.Enterprise::new;
-            case Isp -> MaxmindIpDataLookups.Isp::new;
-            default -> null;
-        };
-    }
-
     static IpDataLookupFactory get(final String databaseType, final String databaseFile) {
         final Database database = getDatabase(databaseType);
         if (database == null) {
             throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]");
         }
 
-        final Function<Set<Database.Property>, IpDataLookup> factoryMethod = getMaxmindLookup(database);
+        final Function<Set<Database.Property>, IpDataLookup> factoryMethod;
+        final String databaseTypeLowerCase = databaseType.toLowerCase(Locale.ROOT);
+        if (databaseTypeLowerCase.startsWith(IPINFO_PREFIX)) {
+            factoryMethod = getIpinfoLookup(database);
+        } else {
+            // for historical reasons, fall back to assuming maxmind-like types
+            factoryMethod = getMaxmindLookup(database);
+        }
 
         if (factoryMethod == null) {
             throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]");

+ 79 - 0
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookups.java

@@ -23,10 +23,14 @@ import org.elasticsearch.core.Nullable;
 
 import java.io.IOException;
 import java.net.InetAddress;
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
 
 /**
  * A collection of {@link IpDataLookup} implementations for IPinfo databases
@@ -43,6 +47,81 @@ final class IpinfoIpDataLookups {
     // prefix dispatch and checks case-insensitive, so that works out nicely
     static final String IPINFO_PREFIX = "ipinfo";
 
+    private static final Set<String> IPINFO_TYPE_STOP_WORDS = Set.of(
+        "ipinfo",
+        "extended",
+        "free",
+        "generic",
+        "ip",
+        "sample",
+        "standard",
+        "mmdb"
+    );
+
+    /**
+     * Cleans up the database_type String from an ipinfo database by splitting on punctuation, removing stop words, and then joining
+     * with an underscore.
+     * <p>
+     * e.g. "ipinfo free_foo_sample.mmdb" -> "foo"
+     *
+     * @param type the database_type from an ipinfo database
+     * @return a cleaned up database_type string
+     */
+    // n.b. this is just based on observation of the types from a survey of such databases -- it's like browser user agent sniffing,
+    // there aren't necessarily any amazing guarantees about this behavior
+    static String ipinfoTypeCleanup(String type) {
+        List<String> parts = Arrays.asList(type.split("[ _.]"));
+        return parts.stream().filter((s) -> IPINFO_TYPE_STOP_WORDS.contains(s) == false).collect(Collectors.joining("_"));
+    }
+
+    @Nullable
+    static Database getIpinfoDatabase(final String databaseType) {
+        // for ipinfo the database selection is more along the lines of user-agent sniffing than
+        // string-based dispatch. the specific database_type strings could change in the future,
+        // hence the somewhat loose nature of this checking.
+
+        final String cleanedType = ipinfoTypeCleanup(databaseType);
+
+        // early detection on any of the 'extended' types
+        if (databaseType.contains("extended")) {
+            // which are not currently supported
+            logger.trace("returning null for unsupported database_type [{}]", databaseType);
+            return null;
+        }
+
+        // early detection on 'country_asn' so the 'country' and 'asn' checks don't get faked out
+        if (cleanedType.contains("country_asn")) {
+            // but it's not currently supported
+            logger.trace("returning null for unsupported database_type [{}]", databaseType);
+            return null;
+        }
+
+        if (cleanedType.contains("asn")) {
+            return Database.AsnV2;
+        } else if (cleanedType.contains("country")) {
+            return Database.CountryV2;
+        } else if (cleanedType.contains("location")) { // note: catches 'location' and 'geolocation' ;)
+            return Database.CityV2;
+        } else if (cleanedType.contains("privacy")) {
+            return Database.PrivacyDetection;
+        } else {
+            // no match was found
+            logger.trace("returning null for unsupported database_type [{}]", databaseType);
+            return null;
+        }
+    }
+
+    @Nullable
+    static Function<Set<Database.Property>, IpDataLookup> getIpinfoLookup(final Database database) {
+        return switch (database) {
+            case AsnV2 -> IpinfoIpDataLookups.Asn::new;
+            case CountryV2 -> IpinfoIpDataLookups.Country::new;
+            case CityV2 -> IpinfoIpDataLookups.Geolocation::new;
+            case PrivacyDetection -> IpinfoIpDataLookups.PrivacyDetection::new;
+            default -> null;
+        };
+    }
+
     /**
      * Lax-ly parses a string that (ideally) looks like 'AS123' into a Long like 123L (or null, if such parsing isn't possible).
      * @param asn a potentially empty (or null) ASN string that is expected to contain 'AS' and then a parsable long

+ 55 - 0
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MaxmindIpDataLookups.java

@@ -26,6 +26,8 @@ import com.maxmind.geoip2.record.Location;
 import com.maxmind.geoip2.record.Postal;
 import com.maxmind.geoip2.record.Subdivision;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.elasticsearch.common.network.InetAddresses;
 import org.elasticsearch.common.network.NetworkAddress;
 import org.elasticsearch.core.Nullable;
@@ -37,6 +39,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Function;
 
 /**
  * A collection of {@link IpDataLookup} implementations for MaxMind databases
@@ -47,11 +50,63 @@ final class MaxmindIpDataLookups {
         // utility class
     }
 
+    private static final Logger logger = LogManager.getLogger(MaxmindIpDataLookups.class);
+
     // the actual prefixes from the metadata are cased like the literal strings, but
     // prefix dispatch and checks case-insensitive, so the actual constants are lowercase
     static final String GEOIP2_PREFIX = "GeoIP2".toLowerCase(Locale.ROOT);
     static final String GEOLITE2_PREFIX = "GeoLite2".toLowerCase(Locale.ROOT);
 
+    // note: the secondary dispatch on suffix happens to be case sensitive
+    private static final String CITY_DB_SUFFIX = "-City";
+    private static final String COUNTRY_DB_SUFFIX = "-Country";
+    private static final String ASN_DB_SUFFIX = "-ASN";
+    private static final String ANONYMOUS_IP_DB_SUFFIX = "-Anonymous-IP";
+    private static final String CONNECTION_TYPE_DB_SUFFIX = "-Connection-Type";
+    private static final String DOMAIN_DB_SUFFIX = "-Domain";
+    private static final String ENTERPRISE_DB_SUFFIX = "-Enterprise";
+    private static final String ISP_DB_SUFFIX = "-ISP";
+
+    @Nullable
+    static Database getMaxmindDatabase(final String databaseType) {
+        if (databaseType.endsWith(CITY_DB_SUFFIX)) {
+            return Database.City;
+        } else if (databaseType.endsWith(COUNTRY_DB_SUFFIX)) {
+            return Database.Country;
+        } else if (databaseType.endsWith(ASN_DB_SUFFIX)) {
+            return Database.Asn;
+        } else if (databaseType.endsWith(ANONYMOUS_IP_DB_SUFFIX)) {
+            return Database.AnonymousIp;
+        } else if (databaseType.endsWith(CONNECTION_TYPE_DB_SUFFIX)) {
+            return Database.ConnectionType;
+        } else if (databaseType.endsWith(DOMAIN_DB_SUFFIX)) {
+            return Database.Domain;
+        } else if (databaseType.endsWith(ENTERPRISE_DB_SUFFIX)) {
+            return Database.Enterprise;
+        } else if (databaseType.endsWith(ISP_DB_SUFFIX)) {
+            return Database.Isp;
+        } else {
+            // no match was found
+            logger.trace("returning null for unsupported database_type [{}]", databaseType);
+            return null;
+        }
+    }
+
+    @Nullable
+    static Function<Set<Database.Property>, IpDataLookup> getMaxmindLookup(final Database database) {
+        return switch (database) {
+            case City -> MaxmindIpDataLookups.City::new;
+            case Country -> MaxmindIpDataLookups.Country::new;
+            case Asn -> MaxmindIpDataLookups.Asn::new;
+            case AnonymousIp -> MaxmindIpDataLookups.AnonymousIp::new;
+            case ConnectionType -> MaxmindIpDataLookups.ConnectionType::new;
+            case Domain -> MaxmindIpDataLookups.Domain::new;
+            case Enterprise -> MaxmindIpDataLookups.Enterprise::new;
+            case Isp -> MaxmindIpDataLookups.Isp::new;
+            default -> null;
+        };
+    }
+
     static class AnonymousIp extends AbstractBase<AnonymousIpResponse> {
         AnonymousIp(final Set<Database.Property> properties) {
             super(

+ 106 - 38
modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java

@@ -27,6 +27,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 
 import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
 import static org.elasticsearch.ingest.geoip.GeoIpProcessor.GEOIP_TYPE;
+import static org.elasticsearch.ingest.geoip.GeoIpProcessor.IP_LOCATION_TYPE;
 import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDatabase;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -37,10 +38,6 @@ import static org.hamcrest.Matchers.nullValue;
 
 public class GeoIpProcessorTests extends ESTestCase {
 
-    private static IpDataLookup ipDataLookupAll(final Database database) {
-        return IpDataLookupFactories.getMaxmindLookup(database).apply(database.properties());
-    }
-
     // a temporary directory that mmdb files can be copied to and read from
     private Path tmpDir;
 
@@ -54,6 +51,66 @@ public class GeoIpProcessorTests extends ESTestCase {
         IOUtils.rm(tmpDir);
     }
 
+    public void testMaxmindCity() throws Exception {
+        String ip = "2602:306:33d3:8000::3257:9652";
+        GeoIpProcessor processor = new GeoIpProcessor(
+            GEOIP_TYPE, // n.b. this is a "geoip" processor
+            randomAlphaOfLength(10),
+            null,
+            "source_field",
+            loader("GeoLite2-City.mmdb"),
+            () -> true,
+            "target_field",
+            getMaxmindCityLookup(),
+            false,
+            false,
+            "filename"
+        );
+
+        Map<String, Object> document = new HashMap<>();
+        document.put("source_field", ip);
+        IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
+        processor.execute(ingestDocument);
+
+        assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo(ip));
+        @SuppressWarnings("unchecked")
+        Map<String, Object> data = (Map<String, Object>) ingestDocument.getSourceAndMetadata().get("target_field");
+        assertThat(data, notNullValue());
+        assertThat(data.get("ip"), equalTo(ip));
+        assertThat(data.get("city_name"), equalTo("Homestead"));
+        // see MaxmindIpDataLookupsTests for more tests of the data lookup behavior
+    }
+
+    public void testIpinfoGeolocation() throws Exception {
+        String ip = "13.107.39.238";
+        GeoIpProcessor processor = new GeoIpProcessor(
+            IP_LOCATION_TYPE, // n.b. this is an "ip_location" processor
+            randomAlphaOfLength(10),
+            null,
+            "source_field",
+            loader("ipinfo/ip_geolocation_sample.mmdb"),
+            () -> true,
+            "target_field",
+            getIpinfoGeolocationLookup(),
+            false,
+            false,
+            "filename"
+        );
+
+        Map<String, Object> document = new HashMap<>();
+        document.put("source_field", ip);
+        IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
+        processor.execute(ingestDocument);
+
+        assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo(ip));
+        @SuppressWarnings("unchecked")
+        Map<String, Object> data = (Map<String, Object>) ingestDocument.getSourceAndMetadata().get("target_field");
+        assertThat(data, notNullValue());
+        assertThat(data.get("ip"), equalTo(ip));
+        assertThat(data.get("city_name"), equalTo("Des Moines"));
+        // see IpinfoIpDataLookupsTests for more tests of the data lookup behavior
+    }
+
     public void testNullValueWithIgnoreMissing() throws Exception {
         GeoIpProcessor processor = new GeoIpProcessor(
             GEOIP_TYPE,
@@ -63,7 +120,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             true,
             false,
             "filename"
@@ -86,7 +143,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             true,
             false,
             "filename"
@@ -106,7 +163,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             false,
             "filename"
@@ -129,7 +186,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             false,
             "filename"
@@ -149,7 +206,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             false,
             "filename"
@@ -174,7 +231,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             false,
             "filename"
@@ -196,7 +253,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             false,
             "filename"
@@ -208,11 +265,11 @@ public class GeoIpProcessorTests extends ESTestCase {
         processor.execute(ingestDocument);
 
         @SuppressWarnings("unchecked")
-        List<Map<String, Object>> geoData = (List<Map<String, Object>>) ingestDocument.getSourceAndMetadata().get("target_field");
-        assertThat(geoData, notNullValue());
-        assertThat(geoData.size(), equalTo(2));
-        assertThat(geoData.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d)));
-        assertThat(geoData.get(1).get("city_name"), equalTo("Hoensbroek"));
+        List<Map<String, Object>> data = (List<Map<String, Object>>) ingestDocument.getSourceAndMetadata().get("target_field");
+        assertThat(data, notNullValue());
+        assertThat(data.size(), equalTo(2));
+        assertThat(data.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d)));
+        assertThat(data.get(1).get("city_name"), equalTo("Hoensbroek"));
     }
 
     public void testListPartiallyValid() throws Exception {
@@ -224,7 +281,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             false,
             "filename"
@@ -236,11 +293,11 @@ public class GeoIpProcessorTests extends ESTestCase {
         processor.execute(ingestDocument);
 
         @SuppressWarnings("unchecked")
-        List<Map<String, Object>> geoData = (List<Map<String, Object>>) ingestDocument.getSourceAndMetadata().get("target_field");
-        assertThat(geoData, notNullValue());
-        assertThat(geoData.size(), equalTo(2));
-        assertThat(geoData.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d)));
-        assertThat(geoData.get(1), nullValue());
+        List<Map<String, Object>> data = (List<Map<String, Object>>) ingestDocument.getSourceAndMetadata().get("target_field");
+        assertThat(data, notNullValue());
+        assertThat(data.size(), equalTo(2));
+        assertThat(data.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d)));
+        assertThat(data.get(1), nullValue());
     }
 
     public void testListNoMatches() throws Exception {
@@ -252,7 +309,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             false,
             "filename"
@@ -272,7 +329,7 @@ public class GeoIpProcessorTests extends ESTestCase {
         GeoIpProcessor processor = new GeoIpProcessor(GEOIP_TYPE, randomAlphaOfLength(10), null, "source_field", () -> {
             loader.preLookup();
             return loader;
-        }, () -> true, "target_field", ipDataLookupAll(Database.City), false, false, "filename");
+        }, () -> true, "target_field", getMaxmindCityLookup(), false, false, "filename");
 
         Map<String, Object> document = new HashMap<>();
         document.put("source_field", List.of("8.8.8.8", "82.171.64.0"));
@@ -280,11 +337,11 @@ public class GeoIpProcessorTests extends ESTestCase {
         processor.execute(ingestDocument);
 
         @SuppressWarnings("unchecked")
-        List<Map<String, Object>> geoData = (List<Map<String, Object>>) ingestDocument.getSourceAndMetadata().get("target_field");
-        assertThat(geoData, notNullValue());
-        assertThat(geoData.size(), equalTo(2));
-        assertThat(geoData.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d)));
-        assertThat(geoData.get(1).get("city_name"), equalTo("Hoensbroek"));
+        List<Map<String, Object>> data = (List<Map<String, Object>>) ingestDocument.getSourceAndMetadata().get("target_field");
+        assertThat(data, notNullValue());
+        assertThat(data.size(), equalTo(2));
+        assertThat(data.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d)));
+        assertThat(data.get(1).get("city_name"), equalTo("Hoensbroek"));
 
         // Check the loader's reference count and attempt to close
         assertThat(loader.current(), equalTo(0));
@@ -301,7 +358,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             true,
             "filename"
@@ -313,9 +370,9 @@ public class GeoIpProcessorTests extends ESTestCase {
         processor.execute(ingestDocument);
 
         @SuppressWarnings("unchecked")
-        Map<String, Object> geoData = (Map<String, Object>) ingestDocument.getSourceAndMetadata().get("target_field");
-        assertThat(geoData, notNullValue());
-        assertThat(geoData.get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d)));
+        Map<String, Object> data = (Map<String, Object>) ingestDocument.getSourceAndMetadata().get("target_field");
+        assertThat(data, notNullValue());
+        assertThat(data.get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d)));
     }
 
     public void testListFirstOnlyNoMatches() throws Exception {
@@ -327,7 +384,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             true,
             "filename"
@@ -350,7 +407,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             loader("GeoLite2-City.mmdb"),
             () -> false,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             true,
             "filename"
@@ -374,7 +431,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             () -> null,
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             false,
             false,
             "GeoLite2-City"
@@ -398,7 +455,7 @@ public class GeoIpProcessorTests extends ESTestCase {
             () -> null,
             () -> true,
             "target_field",
-            ipDataLookupAll(Database.City),
+            getMaxmindCityLookup(),
             true,
             false,
             "GeoLite2-City"
@@ -412,13 +469,24 @@ public class GeoIpProcessorTests extends ESTestCase {
         assertIngestDocument(originalIngestDocument, ingestDocument);
     }
 
+    private static IpDataLookup getMaxmindCityLookup() {
+        final var database = Database.City;
+        return MaxmindIpDataLookups.getMaxmindLookup(database).apply(database.properties());
+    }
+
+    private static IpDataLookup getIpinfoGeolocationLookup() {
+        final var database = Database.CityV2;
+        return IpinfoIpDataLookups.getIpinfoLookup(database).apply(database.properties());
+    }
+
     private CheckedSupplier<IpDatabase, IOException> loader(final String path) {
         var loader = loader(path, null);
         return () -> loader;
     }
 
     private DatabaseReaderLazyLoader loader(final String databaseName, final AtomicBoolean closed) {
-        Path path = tmpDir.resolve(databaseName);
+        int last = databaseName.lastIndexOf("/");
+        final Path path = tmpDir.resolve(last == -1 ? databaseName : databaseName.substring(last + 1));
         copyDatabase(databaseName, path);
 
         final GeoIpCache cache = new GeoIpCache(1000);

+ 103 - 0
modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookupsTests.java

@@ -31,12 +31,14 @@ import java.util.function.BiConsumer;
 
 import static java.util.Map.entry;
 import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDatabase;
+import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.ipinfoTypeCleanup;
 import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.parseAsn;
 import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.parseBoolean;
 import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.parseLocationDouble;
 import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.notNullValue;
 import static org.hamcrest.Matchers.nullValue;
 import static org.hamcrest.Matchers.startsWith;
@@ -308,6 +310,107 @@ public class IpinfoIpDataLookupsTests extends ESTestCase {
         }
     }
 
+    public void testIpinfoTypeCleanup() {
+        Map<String, String> typesToCleanedTypes = Map.ofEntries(
+            // database_type strings from upstream:
+            // abuse.mmdb
+            entry("ipinfo standard_abuse_mmdb_v4.mmdb", "abuse_v4"),
+            // asn.mmdb
+            entry("ipinfo generic_asn_mmdb_v4.mmdb", "asn_v4"),
+            // carrier.mmdb
+            entry("ipinfo standard_carrier_mmdb.mmdb", "carrier"),
+            // location_extended_v2.mmdb
+            entry("ipinfo extended_location_v2.mmdb", "location_v2"),
+            // privacy_extended_v2.mmdb
+            entry("ipinfo extended_privacy_v2.mmdb", "privacy_v2"),
+            // standard_company.mmdb
+            entry("ipinfo standard_company.mmdb", "company"),
+            // standard_ip_hosted_domains_sample.mmdb
+            entry("ipinfo standard_ip_hosted_domains_sample.mmdb", "hosted_domains"),
+            // standard_location.mmdb
+            entry("ipinfo standard_location_mmdb_v4.mmdb", "location_v4"),
+            // standard_privacy.mmdb
+            entry("ipinfo standard_privacy.mmdb", "privacy"),
+
+            // database_type strings from test files:
+            // ip_asn_sample.mmdb
+            entry("ipinfo ip_asn_sample.mmdb", "asn"),
+            // ip_country_asn_sample.mmdb
+            entry("ipinfo ip_country_asn_sample.mmdb", "country_asn"),
+            // ip_geolocation_sample.mmdb
+            entry("ipinfo ip_geolocation_sample.mmdb", "geolocation"),
+            // abuse_contact_sample.mmdb
+            entry("ipinfo abuse_contact_sample.mmdb", "abuse_contact"),
+            // asn_sample.mmdb
+            entry("ipinfo asn_sample.mmdb", "asn"),
+            // hosted_domains_sample.mmdb
+            entry("ipinfo hosted_domains_sample.mmdb", "hosted_domains"),
+            // ip_carrier_sample.mmdb
+            entry("ipinfo ip_carrier_sample.mmdb", "carrier"),
+            // ip_company_sample.mmdb
+            entry("ipinfo ip_company_sample.mmdb", "company"),
+            // ip_country_sample.mmdb
+            entry("ipinfo ip_country_sample.mmdb", "country"),
+            // ip_geolocation_extended_ipv4_sample.mmdb
+            entry("ipinfo ip_geolocation_extended_ipv4_sample.mmdb", "geolocation_ipv4"),
+            // ip_geolocation_extended_ipv6_sample.mmdb
+            entry("ipinfo ip_geolocation_extended_ipv6_sample.mmdb", "geolocation_ipv6"),
+            // ip_geolocation_extended_sample.mmdb
+            entry("ipinfo ip_geolocation_extended_sample.mmdb", "geolocation"),
+            // ip_rdns_domains_sample.mmdb
+            entry("ipinfo ip_rdns_domains_sample.mmdb", "rdns_domains"),
+            // ip_rdns_hostnames_sample.mmdb
+            entry("ipinfo ip_rdns_hostnames_sample.mmdb", "rdns_hostnames"),
+            // privacy_detection_extended_sample.mmdb
+            entry("ipinfo privacy_detection_extended_sample.mmdb", "privacy_detection"),
+            // privacy_detection_sample.mmdb
+            entry("ipinfo privacy_detection_sample.mmdb", "privacy_detection"),
+
+            // database_type strings from downloaded (free) files:
+            // asn.mmdb
+            entry("ipinfo generic_asn_free.mmdb", "asn"),
+            // country.mmdb
+            entry("ipinfo generic_country_free.mmdb", "country"),
+            // country_asn.mmdb
+            entry("ipinfo generic_country_free_country_asn.mmdb", "country_country_asn")
+        );
+
+        for (var entry : typesToCleanedTypes.entrySet()) {
+            String type = entry.getKey();
+            String cleanedType = entry.getValue();
+            assertThat(ipinfoTypeCleanup(type), equalTo(cleanedType));
+        }
+    }
+
+    public void testDatabaseTypeParsing() throws IOException {
+        // this test is a little bit overloaded -- it's testing that we're getting the expected sorts of
+        // database_type strings from these files, *and* it's also testing that we dispatch on those strings
+        // correctly and associated those files with the correct high-level Elasticsearch Database type.
+        // down the road it would probably make sense to split these out and find a better home for some of the
+        // logic, but for now it's probably more valuable to have the test *somewhere* than to get especially
+        // pedantic about where precisely it should be.
+
+        copyDatabase("ipinfo/ip_asn_sample.mmdb", tmpDir.resolve("ip_asn_sample.mmdb"));
+        copyDatabase("ipinfo/ip_geolocation_sample.mmdb", tmpDir.resolve("ip_geolocation_sample.mmdb"));
+        copyDatabase("ipinfo/asn_sample.mmdb", tmpDir.resolve("asn_sample.mmdb"));
+        copyDatabase("ipinfo/ip_country_sample.mmdb", tmpDir.resolve("ip_country_sample.mmdb"));
+        copyDatabase("ipinfo/privacy_detection_sample.mmdb", tmpDir.resolve("privacy_detection_sample.mmdb"));
+
+        assertThat(parseDatabaseFromType("ip_asn_sample.mmdb"), is(Database.AsnV2));
+        assertThat(parseDatabaseFromType("ip_geolocation_sample.mmdb"), is(Database.CityV2));
+        assertThat(parseDatabaseFromType("asn_sample.mmdb"), is(Database.AsnV2));
+        assertThat(parseDatabaseFromType("ip_country_sample.mmdb"), is(Database.CountryV2));
+        assertThat(parseDatabaseFromType("privacy_detection_sample.mmdb"), is(Database.PrivacyDetection));
+
+        // additional cases where we're bailing early on types we don't support
+        assertThat(IpDataLookupFactories.getDatabase("ipinfo ip_country_asn_sample.mmdb"), nullValue());
+        assertThat(IpDataLookupFactories.getDatabase("ipinfo privacy_detection_extended_sample.mmdb"), nullValue());
+    }
+
+    private Database parseDatabaseFromType(String databaseFile) throws IOException {
+        return IpDataLookupFactories.getDatabase(MMDBUtil.getDatabaseType(tmpDir.resolve(databaseFile)));
+    }
+
     private static void assertDatabaseInvariants(final Path databasePath, final BiConsumer<InetAddress, Map<String, Object>> rowConsumer) {
         try (Reader reader = new Reader(pathToFile(databasePath))) {
             Networks<?> networks = reader.networks(Map.class);