Browse Source

Add tool for preparing local GeoIp database service (#71018)

Air-gapped environments can't simply use GeoIp database service provided by Infra, so they have to either use proxy or recreate similar service themselves.
This PR adds tool to make this process easier. Basic workflow is:

download databases from MaxMind site to single directory (either .mmdb files or gzipped tarballs with .tgz suffix)
run the tool with $ES_PATH/bin/elasticsearch-geoip -s directory/to/use [-t target/directory]
serve static files from that directory (for example with docker run -v directory/to/use:/usr/share/nginx/html:ro nginx
use server above as endpoint for GeoIpDownloader (geoip.downloader.endpoint setting)
to update new databases simply put new files in directory and run the tool again
This change also adds support for relative paths in overview json because the cli tool doesn't know about the address it would be served under.

Relates to #68920
Przemko Robakowski 4 years ago
parent
commit
61fe14565a

+ 5 - 1
distribution/build.gradle

@@ -282,7 +282,7 @@ configure(subprojects.findAll { ['archives', 'packages'].contains(it.name) }) {
    *             Properties to expand when copying packaging files             *
    *****************************************************************************/
   configurations {
-    ['libs', 'libsPluginCli', 'libsKeystoreCli', 'libsSecurityCli'].each {
+    ['libs', 'libsPluginCli', 'libsKeystoreCli', 'libsSecurityCli', 'libsGeoIpCli'].each {
       create(it) {
         canBeConsumed = false
         canBeResolved = true
@@ -303,6 +303,7 @@ configure(subprojects.findAll { ['archives', 'packages'].contains(it.name) }) {
     libsPluginCli project(':distribution:tools:plugin-cli')
     libsKeystoreCli project(path: ':distribution:tools:keystore-cli')
     libsSecurityCli project(':x-pack:plugin:security:cli')
+    libsGeoIpCli project(':distribution:tools:geoip-cli')
   }
 
   project.ext {
@@ -314,6 +315,9 @@ configure(subprojects.findAll { ['archives', 'packages'].contains(it.name) }) {
       copySpec {
         // delay by using closures, since they have not yet been configured, so no jar task exists yet
         from(configurations.libs)
+        into('tools/geoip-cli') {
+          from(configurations.libsGeoIpCli)
+        }
         into('tools/plugin-cli') {
           from(configurations.libsPluginCli)
         }

+ 6 - 0
distribution/src/bin/elasticsearch-geoip

@@ -0,0 +1,6 @@
+#!/bin/bash
+
+ES_MAIN_CLASS=org.elasticsearch.geoip.GeoIpCli \
+  ES_ADDITIONAL_CLASSPATH_DIRECTORIES=lib/tools/geoip-cli \
+  "`dirname "$0"`"/elasticsearch-cli \
+  "$@"

+ 15 - 0
distribution/src/bin/elasticsearch-geoip.bat

@@ -0,0 +1,15 @@
+@echo off
+
+setlocal enabledelayedexpansion
+setlocal enableextensions
+
+set ES_MAIN_CLASS=org.elasticsearch.geoip.GeoIpCli
+set ES_ADDITIONAL_CLASSPATH_DIRECTORIES=lib/tools/geoip-cli
+call "%~dp0elasticsearch-cli.bat" ^
+  %%* ^
+  || goto exit
+
+endlocal
+endlocal
+:exit
+exit /b %ERRORLEVEL%

+ 18 - 0
distribution/tools/geoip-cli/build.gradle

@@ -0,0 +1,18 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+apply plugin: 'elasticsearch.build'
+
+archivesBaseName = 'elasticsearch-geoip-cli'
+
+dependencies {
+  compileOnly project(":server")
+  compileOnly project(":libs:elasticsearch-cli")
+  compileOnly project(":libs:elasticsearch-x-content")
+  testImplementation project(":test:framework")
+}

+ 161 - 0
distribution/tools/geoip-cli/src/main/java/org/elasticsearch/geoip/GeoIpCli.java

@@ -0,0 +1,161 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.geoip;
+
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+import org.elasticsearch.cli.Command;
+import org.elasticsearch.cli.Terminal;
+import org.elasticsearch.common.SuppressForbidden;
+import org.elasticsearch.common.hash.MessageDigests;
+import org.elasticsearch.common.io.PathUtils;
+import org.elasticsearch.common.xcontent.XContentGenerator;
+import org.elasticsearch.common.xcontent.XContentType;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.stream.Collectors;
+import java.util.zip.GZIPOutputStream;
+
+import static java.nio.file.StandardOpenOption.CREATE;
+import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
+
+public class GeoIpCli extends Command {
+
+    private static final byte[] EMPTY_BUF = new byte[512];
+
+    // visible for testing
+    final OptionSpec<String> sourceDirectory;
+    final OptionSpec<String> targetDirectory;
+
+    public GeoIpCli() {
+        super("A CLI tool to prepare local GeoIp database service", () -> {});
+        sourceDirectory = parser.acceptsAll(Arrays.asList("s", "source"), "Source directory").withRequiredArg().required();
+        targetDirectory = parser.acceptsAll(Arrays.asList("t", "target"), "Target directory").withRequiredArg();
+
+    }
+
+    @Override
+    protected void execute(Terminal terminal, OptionSet options) throws Exception {
+        Path source = getPath(options.valueOf(sourceDirectory));
+        String targetString = options.valueOf(targetDirectory);
+        Path target = targetString != null ? getPath(targetString) : source;
+        copyTgzToTarget(terminal, source, target);
+        packDatabasesToTgz(terminal, source, target);
+        createOverviewJson(terminal, target);
+    }
+
+    @SuppressForbidden(reason = "file arg for cli")
+    private Path getPath(String file) {
+        return PathUtils.get(file);
+    }
+
+    private void copyTgzToTarget(Terminal terminal, Path source, Path target) throws IOException {
+        if (source.equals(target)) {
+            return;
+        }
+        List<Path> toCopy = Files.list(source).filter(p -> p.getFileName().toString().endsWith(".tgz")).collect(Collectors.toList());
+        for (Path path : toCopy) {
+            Files.copy(path, target.resolve(path.getFileName()), StandardCopyOption.REPLACE_EXISTING);
+        }
+    }
+
+    private void packDatabasesToTgz(Terminal terminal, Path source, Path target) throws IOException {
+        List<Path> toPack = Files.list(source).filter(p -> p.getFileName().toString().endsWith(".mmdb")).collect(Collectors.toList());
+        for (Path path : toPack) {
+            String fileName = path.getFileName().toString();
+            Path compressedPath = target.resolve(fileName.replaceAll("mmdb$", "") + "tgz");
+            terminal.println("Found " + fileName + ", will compress it to " + compressedPath.getFileName());
+            try (
+                OutputStream fos = Files.newOutputStream(compressedPath, TRUNCATE_EXISTING, CREATE);
+                OutputStream gos = new GZIPOutputStream(new BufferedOutputStream(fos))
+            ) {
+                long size = Files.size(path);
+                gos.write(createTarHeader(fileName, size));
+                Files.copy(path, gos);
+                if (size % 512 != 0) {
+                    gos.write(EMPTY_BUF, 0, (int) (512 - (size % 512)));
+                }
+                gos.write(EMPTY_BUF);
+                gos.write(EMPTY_BUF);
+            }
+        }
+    }
+
+    private void createOverviewJson(Terminal terminal, Path directory) throws IOException {
+        List<Path> databasesPaths = Files.list(directory)
+            .filter(p -> p.getFileName().toString().endsWith(".tgz"))
+            .collect(Collectors.toList());
+        Path overview = directory.resolve("overview.json");
+        try (
+            OutputStream os = new BufferedOutputStream(Files.newOutputStream(overview, TRUNCATE_EXISTING, CREATE));
+            XContentGenerator generator = XContentType.JSON.xContent().createGenerator(os)
+        ) {
+            generator.writeStartArray();
+            for (Path db : databasesPaths) {
+                terminal.println("Adding " + db.getFileName() + " to overview.json");
+                MessageDigest md5 = MessageDigests.md5();
+                try (InputStream dis = new DigestInputStream(new BufferedInputStream(Files.newInputStream(db)), md5)) {
+                    dis.transferTo(OutputStream.nullOutputStream());
+                }
+                String digest = MessageDigests.toHexString(md5.digest());
+                generator.writeStartObject();
+                String fileName = db.getFileName().toString();
+                generator.writeStringField("name", fileName);
+                generator.writeStringField("md5_hash", digest);
+                generator.writeStringField("url", fileName);
+                generator.writeNumberField("updated", System.currentTimeMillis());
+                generator.writeEndObject();
+            }
+            generator.writeEndArray();
+        }
+        terminal.println("overview.json created");
+    }
+
+    private byte[] createTarHeader(String name, long size) {
+        byte[] buf = new byte[512];
+        byte[] sizeBytes = String.format(Locale.ROOT, "%1$012o", size).getBytes(StandardCharsets.UTF_8);
+        byte[] nameBytes = name.substring(Math.max(0, name.length() - 100)).getBytes(StandardCharsets.US_ASCII);
+        byte[] id = "0001750".getBytes(StandardCharsets.UTF_8);
+        byte[] permission = "000644 ".getBytes(StandardCharsets.UTF_8);
+        byte[] time = String.format(Locale.ROOT, "%1$012o", System.currentTimeMillis() / 1000).getBytes(StandardCharsets.UTF_8);
+        System.arraycopy(nameBytes, 0, buf, 0, nameBytes.length);
+        System.arraycopy(permission, 0, buf, 100, 7);
+        System.arraycopy(id, 0, buf, 108, 7);
+        System.arraycopy(id, 0, buf, 116, 7);
+        System.arraycopy(sizeBytes, 0, buf, 124, 12);
+        System.arraycopy(time, 0, buf, 136, 12);
+
+        int checksum = 256;
+        for (byte b : buf) {
+            checksum += b & 0xFF;
+        }
+
+        byte[] checksumBytes = String.format(Locale.ROOT, "%1$07o", checksum).getBytes(StandardCharsets.UTF_8);
+        System.arraycopy(checksumBytes, 0, buf, 148, 7);
+
+        return buf;
+    }
+
+    public static void main(String[] args) throws Exception {
+        exit(new GeoIpCli().main(args, Terminal.DEFAULT));
+    }
+}

+ 160 - 0
distribution/tools/geoip-cli/src/test/java/org/elasticsearch/geoip/GeoIpCliTests.java

@@ -0,0 +1,160 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.geoip;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.lucene.util.LuceneTestCase;
+import org.elasticsearch.cli.MockTerminal;
+import org.elasticsearch.common.SuppressForbidden;
+import org.elasticsearch.common.xcontent.DeprecationHandler;
+import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentType;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.zip.GZIPInputStream;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.hasEntry;
+import static org.hamcrest.Matchers.hasKey;
+
+@LuceneTestCase.SuppressFileSystems(value = "ExtrasFS") // Don't randomly add 'extra' files to directory.
+public class GeoIpCliTests extends LuceneTestCase {
+
+    private Path source;
+    private Path target;
+
+    public void setUp() throws Exception {
+        super.setUp();
+        Path tempPath = createTempDir();
+        source = tempPath.resolve("source");
+        target = tempPath.resolve("target");
+        Files.createDirectory(source);
+        Files.createDirectory(target);
+    }
+
+    @SuppressForbidden(reason = "process builder requires File for directory")
+    private File getTargetFile() {
+        return target.toFile();
+    }
+
+    public void testNoSource() throws Exception {
+        MockTerminal terminal = new MockTerminal();
+        new GeoIpCli().main(new String[] {}, terminal);
+        assertThat(terminal.getErrorOutput(), containsString("Missing required option(s) [s/source]"));
+    }
+
+    public void testDifferentDirectories() throws Exception {
+        Map<String, byte[]> data = createTestFiles(source);
+
+        GeoIpCli cli = new GeoIpCli();
+        cli.main(new String[] { "-t", target.toAbsolutePath().toString(), "-s", source.toAbsolutePath().toString() }, new MockTerminal());
+
+        try (Stream<Path> list = Files.list(source)) {
+            List<String> files = list.map(p -> p.getFileName().toString()).collect(Collectors.toList());
+            assertThat(files, containsInAnyOrder("a.mmdb", "b.mmdb", "c.tgz"));
+        }
+
+        try (Stream<Path> list = Files.list(target)) {
+            List<String> files = list.map(p -> p.getFileName().toString()).collect(Collectors.toList());
+            assertThat(files, containsInAnyOrder("a.tgz", "b.tgz", "c.tgz", "overview.json"));
+        }
+
+        verifyTarball(data);
+        verifyOverview();
+    }
+
+    public void testSameDirectory() throws Exception {
+        Map<String, byte[]> data = createTestFiles(target);
+
+        GeoIpCli cli = new GeoIpCli();
+        cli.main(new String[] { "-s", target.toAbsolutePath().toString() }, new MockTerminal());
+
+        try (Stream<Path> list = Files.list(target)) {
+            List<String> files = list.map(p -> p.getFileName().toString()).collect(Collectors.toList());
+            assertThat(files, containsInAnyOrder("a.mmdb", "b.mmdb", "a.tgz", "b.tgz", "c.tgz", "overview.json"));
+        }
+
+        Files.delete(target.resolve("a.mmdb"));
+        Files.delete(target.resolve("b.mmdb"));
+
+        verifyTarball(data);
+        verifyOverview();
+    }
+
+    private void verifyOverview() throws Exception {
+        byte[] data = Files.readAllBytes(target.resolve("overview.json"));
+        try (
+            XContentParser parser = XContentType.JSON.xContent()
+                .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, data)
+        ) {
+            @SuppressWarnings({ "unchecked" })
+            List<Map<String, String>> list = (List) parser.list();
+            assertThat(list, containsInAnyOrder(hasEntry("name", "a.tgz"), hasEntry("name", "b.tgz"), hasEntry("name", "c.tgz")));
+            assertThat(list, containsInAnyOrder(hasEntry("url", "a.tgz"), hasEntry("url", "b.tgz"), hasEntry("url", "c.tgz")));
+            for (Map<String, String> map : list) {
+                assertThat(map, hasKey("md5_hash"));
+                assertThat(map, hasKey("updated"));
+            }
+        }
+    }
+
+    private void verifyTarball(Map<String, byte[]> data) throws Exception {
+        for (String tgz : List.of("a.tgz", "b.tgz")) {
+            try (
+                TarArchiveInputStream tis = new TarArchiveInputStream(
+                    new GZIPInputStream(new BufferedInputStream(Files.newInputStream(target.resolve(tgz))))
+                )
+            ) {
+                TarArchiveEntry entry = tis.getNextTarEntry();
+                assertNotNull(entry);
+                assertTrue(entry.isFile());
+                byte[] bytes = data.get(tgz);
+                assertEquals(tgz.replace(".tgz", ".mmdb"), entry.getName());
+                assertEquals(bytes.length, entry.getSize());
+                assertArrayEquals(bytes, tis.readAllBytes());
+                assertEquals(1000, entry.getLongUserId());
+                assertEquals(1000, entry.getLongGroupId());
+                assertEquals(420, entry.getMode()); // 644oct=420dec
+
+                assertNull(tis.getNextTarEntry());
+            }
+        }
+    }
+
+    private Map<String, byte[]> createTestFiles(Path dir) throws IOException {
+        Map<String, byte[]> data = new HashMap<>();
+
+        byte[] a = new byte[514];
+        Arrays.fill(a, (byte) 'a');
+        Files.write(dir.resolve("a.mmdb"), a);
+        data.put("a.tgz", a);
+
+        byte[] b = new byte[100];
+        Arrays.fill(b, (byte) 'b');
+        Files.write(dir.resolve("b.mmdb"), b);
+        data.put("b.tgz", b);
+
+        Files.createFile(dir.resolve("c.tgz"));
+
+        return data;
+    }
+}

+ 5 - 0
modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java

@@ -131,6 +131,11 @@ public class GeoIpDownloader extends AllocatedPersistentTask {
         }
         logger.info("updating geoip database [" + name + "]");
         String url = databaseInfo.get("url").toString();
+        if (url.startsWith("http") == false) {
+            //relative url, add it after last slash (i.e resolve sibling) or at the end if there's no slash after http[s]://
+            int lastSlash = endpoint.substring(8).lastIndexOf('/');
+            url = (lastSlash != -1 ? endpoint.substring(0, lastSlash + 8) : endpoint) + "/" + url;
+        }
         long start = System.currentTimeMillis();
         try (InputStream is = httpClient.get(url)) {
             int firstChunk = state.contains(name) ? state.get(name).getLastChunk() + 1 : 0;

+ 5 - 5
modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java

@@ -180,7 +180,7 @@ public class GeoIpDownloaderTests extends ESTestCase {
 
     public void testProcessDatabaseNew() throws IOException {
         ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]);
-        when(httpClient.get("a.b/t1")).thenReturn(bais);
+        when(httpClient.get("http://a.b/t1")).thenReturn(bais);
 
         geoIpDownloader = new GeoIpDownloader(client, httpClient, clusterService, threadPool, Settings.EMPTY,
             1, "", "", "", EMPTY_TASK_ID, Collections.emptyMap()) {
@@ -210,12 +210,12 @@ public class GeoIpDownloaderTests extends ESTestCase {
         };
 
         geoIpDownloader.setState(GeoIpTaskState.EMPTY);
-        geoIpDownloader.processDatabase(Map.of("name", "test.tgz", "url", "a.b/t1", "md5_hash", "1"));
+        geoIpDownloader.processDatabase(Map.of("name", "test.tgz", "url", "http://a.b/t1", "md5_hash", "1"));
     }
 
     public void testProcessDatabaseUpdate() throws IOException {
         ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]);
-        when(httpClient.get("a.b/t1")).thenReturn(bais);
+        when(httpClient.get("http://a.b/t1")).thenReturn(bais);
 
         geoIpDownloader = new GeoIpDownloader(client, httpClient, clusterService, threadPool, Settings.EMPTY,
             1, "", "", "", EMPTY_TASK_ID, Collections.emptyMap()) {
@@ -245,7 +245,7 @@ public class GeoIpDownloaderTests extends ESTestCase {
         };
 
         geoIpDownloader.setState(GeoIpTaskState.EMPTY.put("test.mmdb", new GeoIpTaskState.Metadata(0, 5, 8, "0")));
-        geoIpDownloader.processDatabase(Map.of("name", "test.tgz", "url", "a.b/t1", "md5_hash", "1"));
+        geoIpDownloader.processDatabase(Map.of("name", "test.tgz", "url", "http://a.b/t1", "md5_hash", "1"));
     }
 
 
@@ -280,7 +280,7 @@ public class GeoIpDownloaderTests extends ESTestCase {
             }
         };
         geoIpDownloader.setState(taskState);
-        geoIpDownloader.processDatabase(Map.of("name", "test.tgz", "url", "a.b/t1", "md5_hash", "1"));
+        geoIpDownloader.processDatabase(Map.of("name", "test.tgz", "url", "http://a.b/t1", "md5_hash", "1"));
     }
 
     @SuppressWarnings("unchecked")

+ 1 - 0
settings.gradle

@@ -49,6 +49,7 @@ List projects = [
   'distribution:tools:launchers',
   'distribution:tools:plugin-cli',
   'distribution:tools:keystore-cli',
+  'distribution:tools:geoip-cli',
   'server',
   'server:cli',
   'test:framework',

+ 1 - 2
test/fixtures/geoip-fixture/src/main/java/fixture/geoip/GeoIpHttpFixture.java

@@ -23,7 +23,7 @@ public class GeoIpHttpFixture {
     private final HttpServer server;
 
     GeoIpHttpFixture(final String[] args) throws Exception {
-        String rawData = new String(GeoIpHttpFixture.class.getResourceAsStream("/data.json").readAllBytes(), StandardCharsets.UTF_8);
+        String data = new String(GeoIpHttpFixture.class.getResourceAsStream("/data.json").readAllBytes(), StandardCharsets.UTF_8);
         this.server = HttpServer.create(new InetSocketAddress(InetAddress.getByName(args[0]), Integer.parseInt(args[1])), 0);
         this.server.createContext("/", exchange -> {
             String query = exchange.getRequestURI().getQuery();
@@ -32,7 +32,6 @@ public class GeoIpHttpFixture {
                 exchange.getResponseBody().close();
                 return;
             }
-            String data = rawData.replace("endpoint", "http://" + exchange.getRequestHeaders().getFirst("Host"));
             exchange.sendResponseHeaders(200, data.length());
             try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(exchange.getResponseBody()))) {
                 writer.write(data);

+ 3 - 3
test/fixtures/geoip-fixture/src/main/resources/data.json

@@ -2,19 +2,19 @@
   {
     "md5_hash": "da5bb1c00c74e3f5a34ca1ec0022c550",
     "name": "GeoLite2-City.tgz",
-    "url": "endpoint/db/GeoLite2-City.tgz",
+    "url": "db/GeoLite2-City.tgz",
     "provider": "maxmind"
   },
   {
     "md5_hash": "61c38f0fcec4a7b0b359201f124004df",
     "name": "GeoLite2-ASN.tgz",
-    "url": "endpoint/db/GeoLite2-ASN.tgz",
+    "url": "db/GeoLite2-ASN.tgz",
     "provider": "maxmind"
   },
   {
     "md5_hash": "8f3229d6158f85adef296f8781f7ab49",
     "name": "GeoLite2-Country.tgz",
-    "url": "endpoint/db/GeoLite2-Country.tgz",
+    "url": "db/GeoLite2-Country.tgz",
     "provider": "maxmind"
   }
 ]