Browse Source

Make the Docker build more re-usable in Cloud (#50277)

Closes #49926 and #46166. Rework the Docker image so that it comes with a tiny
init system, to ensure ML processes are correctly cleaned up, and to run ES
as a regular user instead of root.

Also:

   * Ensure no files in the image have the setuid/setgid flag
   * Also improve dependency tracking in the build
   * Remove TAKE_FILE_OWNERSHIP option and its documentation
Rory Hunter 5 years ago
parent
commit
8a6d68b173

+ 1 - 0
distribution/docker/build.gradle

@@ -208,6 +208,7 @@ subprojects { Project subProject ->
     def tarFile = "${parent.projectDir}/build/elasticsearch${oss ? '-oss' : ''}_test.${VersionProperties.elasticsearch}.docker.tar"
 
     final Task exportDockerImageTask = task(exportTaskName, type: LoggedExec) {
+      inputs.file("${parent.projectDir}/build/markers/${buildTaskName}.marker")
       executable 'docker'
       outputs.file(tarFile)
       args "save",

+ 21 - 6
distribution/docker/src/docker/Dockerfile

@@ -35,6 +35,17 @@ RUN chmod 0775 config data logs
 COPY config/elasticsearch.yml config/log4j2.properties config/
 RUN chmod 0660 config/elasticsearch.yml config/log4j2.properties
 
+# `tini` is a tiny but valid init for containers. This is used to cleanly
+# control how ES and any child processes are shut down.
+#
+# The tini GitHub page gives instructions for verifying the binary using
+# gpg, but the keyservers are slow to return the key and this can fail the
+# build. Instead, we check the binary against a checksum that we have
+# computed.
+ADD https://github.com/krallin/tini/releases/download/v0.18.0/tini /tini
+COPY config/tini.sha512 /tini.sha512
+RUN sha512sum -c /tini.sha512 && chmod +x /tini
+
 ################################################################################
 # Build stage 1 (the actual elasticsearch image):
 # Copy elasticsearch from stage 0
@@ -45,6 +56,8 @@ FROM centos:7
 
 ENV ELASTIC_CONTAINER true
 
+COPY --from=builder /tini /tini
+
 RUN for iter in {1..10}; do yum update --setopt=tsflags=nodocs -y && \
     yum install --setopt=tsflags=nodocs -y nc shadow-utils zip unzip && \
     yum clean all && exit_code=0 && break || exit_code=\$? && echo "yum error: retry \$iter in 10s" && sleep 10; done; \
@@ -65,14 +78,14 @@ RUN ln -sf /etc/pki/ca-trust/extracted/java/cacerts /usr/share/elasticsearch/jdk
 
 ENV PATH /usr/share/elasticsearch/bin:\$PATH
 
-COPY --chown=1000:0 bin/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
+COPY bin/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
 
-# Openshift overrides USER and uses ones with randomly uid>1024 and gid=0
-# Allow ENTRYPOINT (and ES) to run even with a different user
-RUN chgrp 0 /usr/local/bin/docker-entrypoint.sh && \
-    chmod g=u /etc/passwd && \
+RUN chmod g=u /etc/passwd && \
     chmod 0775 /usr/local/bin/docker-entrypoint.sh
 
+# Ensure that there are no files with setuid or setgid, in order to mitigate "stackclash" attacks.
+RUN find / -xdev -perm -4000 -exec chmod ug-s {} +
+
 EXPOSE 9200 9300
 
 LABEL org.label-schema.build-date="${build_date}" \
@@ -95,7 +108,9 @@ LABEL org.label-schema.build-date="${build_date}" \
   org.opencontainers.image.vendor="Elastic" \
   org.opencontainers.image.version="${version}"
 
-ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
+USER elasticsearch:root
+
+ENTRYPOINT ["/tini", "--", "/usr/local/bin/docker-entrypoint.sh"]
 # Dummy overridable parameter parsed by entrypoint
 CMD ["eswrapper"]
 

+ 19 - 40
distribution/docker/src/docker/bin/docker-entrypoint.sh

@@ -4,38 +4,22 @@ set -e
 # Files created by Elasticsearch should always be group writable too
 umask 0002
 
-run_as_other_user_if_needed() {
-  if [[ "$(id -u)" == "0" ]]; then
-    # If running as root, drop to specified UID and run command
-    exec chroot --userspec=1000 / "${@}"
-  else
-    # Either we are running in Openshift with random uid and are a member of the root group
-    # or with a custom --user
-    exec "${@}"
-  fi
-}
-
 # Allow user specify custom CMD, maybe bin/elasticsearch itself
 # for example to directly specify `-E` style parameters for elasticsearch on k8s
 # or simply to run /bin/bash to check the image
-if [[ "$1" != "eswrapper" ]]; then
-  if [[ "$(id -u)" == "0" && $(basename "$1") == "elasticsearch" ]]; then
-    # centos:7 chroot doesn't have the `--skip-chdir` option and
-    # changes our CWD.
-    # Rewrite CMD args to replace $1 with `elasticsearch` explicitly,
-    # so that we are backwards compatible with the docs
-    # from the previous Elasticsearch versions<6
-    # and configuration option D:
-    # https://www.elastic.co/guide/en/elasticsearch/reference/5.6/docker.html#_d_override_the_image_8217_s_default_ulink_url_https_docs_docker_com_engine_reference_run_cmd_default_command_or_options_cmd_ulink
-    # Without this, user could specify `elasticsearch -E x.y=z` but
-    # `bin/elasticsearch -E x.y=z` would not work.
-    set -- "elasticsearch" "${@:2}"
-    # Use chroot to switch to UID 1000
-    exec chroot --userspec=1000 / "$@"
-  else
-    # User probably wants to run something else, like /bin/bash, with another uid forced (Openshift?)
-    exec "$@"
-  fi
+if [[ "$1" == "eswrapper" || $(basename "$1") == "elasticsearch" ]]; then
+  # Rewrite CMD args to remove the explicit command,
+  # so that we are backwards compatible with the docs
+  # from the previous Elasticsearch versions < 6
+  # and configuration option:
+  # https://www.elastic.co/guide/en/elasticsearch/reference/5.6/docker.html#_d_override_the_image_8217_s_default_ulink_url_https_docs_docker_com_engine_reference_run_cmd_default_command_or_options_cmd_ulink
+  # Without this, user could specify `elasticsearch -E x.y=z` but
+  # `bin/elasticsearch -E x.y=z` would not work. In any case,
+  # we want to continue through this script, and not exec early.
+  set -- "${@:2}"
+else
+  # Run whatever command the user wanted
+  exec "$@"
 fi
 
 # Allow environment variables to be set by creating a file with the
@@ -56,18 +40,13 @@ if [[ -f bin/elasticsearch-users ]]; then
   # enabled, but we have no way of knowing which node we are yet. We'll just
   # honor the variable if it's present.
   if [[ -n "$ELASTIC_PASSWORD" ]]; then
-    [[ -f /usr/share/elasticsearch/config/elasticsearch.keystore ]] || (run_as_other_user_if_needed elasticsearch-keystore create)
-    if ! (run_as_other_user_if_needed elasticsearch-keystore list | grep -q '^bootstrap.password$'); then
-      (run_as_other_user_if_needed echo "$ELASTIC_PASSWORD" | elasticsearch-keystore add -x 'bootstrap.password')
+    [[ -f /usr/share/elasticsearch/config/elasticsearch.keystore ]] || (elasticsearch-keystore create)
+    if ! (elasticsearch-keystore list | grep -q '^bootstrap.password$'); then
+      (echo "$ELASTIC_PASSWORD" | elasticsearch-keystore add -x 'bootstrap.password')
     fi
   fi
 fi
 
-if [[ "$(id -u)" == "0" ]]; then
-  # If requested and running as root, mutate the ownership of bind-mounts
-  if [[ -n "$TAKE_FILE_OWNERSHIP" ]]; then
-    chown -R 1000:0 /usr/share/elasticsearch/{data,logs}
-  fi
-fi
-
-run_as_other_user_if_needed /usr/share/elasticsearch/bin/elasticsearch
+# Signal forwarding and child reaping is handled by `tini`, which is the
+# actual entrypoint of the container
+exec /usr/share/elasticsearch/bin/elasticsearch

+ 1 - 0
distribution/docker/src/docker/config/tini.sha512

@@ -0,0 +1 @@
+ffdb31563e34bca91a094f962544b9d31f5d138432f2d639a0856ff605b3a69f47e48191da42d6956ab62a1b24eafca1a95b299901257832225d26770354ce5e  /tini

+ 32 - 8
distribution/src/bin/elasticsearch-env-from-file

@@ -20,19 +20,44 @@ for VAR_NAME_FILE in $(env | cut -f1 -d= | grep '_FILE$'); do
     fi
 
     if [[ ! -e "${!VAR_NAME_FILE}" ]]; then
-      echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE does not exist" >&2
+      # Maybe the file doesn't exist, maybe we just can't read it due to file permissions.
+      # Check permissions on each part of the path
+      path=''
+      if ! echo "${!VAR_NAME_FILE}" | grep -q '^/'; then
+        path='.'
+      fi
+
+      dirname "${!VAR_NAME_FILE}" | tr '/' '\n' | while read part; do
+        if [[ "$path" == "/" ]]; then
+          path="${path}${part}"
+        else
+          path="$path/$part"
+        fi
+
+        if ! [[ -x "$path" ]]; then
+          echo "ERROR: Cannot read ${!VAR_NAME_FILE} from $VAR_NAME_FILE, due to lack of permissions on '$path'" 2>&1
+          exit 1
+        fi
+      done
+
+      if ! [[ -r "${!VAR_NAME_FILE}" ]]; then
+        echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE is not readable." 2>&1
+      else
+        echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE does not exist" >&2
+      fi
+
       exit 1
     fi
 
     FILE_PERMS="$(stat -L -c '%a' ${!VAR_NAME_FILE})"
 
     if [[ "$FILE_PERMS" != "400" && "$FILE_PERMS" != "600" ]]; then
-        if [[ -h "${!VAR_NAME_FILE}" ]]; then
-            echo "ERROR: File $(readlink "${!VAR_NAME_FILE}") (target of symlink ${!VAR_NAME_FILE} from $VAR_NAME_FILE) must have file permissions 400 or 600, but actually has: $FILE_PERMS" >&2
-        else
-            echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE must have file permissions 400 or 600, but actually has: $FILE_PERMS" >&2
-        fi
-        exit 1
+      if [[ -L "${!VAR_NAME_FILE}" ]]; then
+        echo "ERROR: File $(readlink "${!VAR_NAME_FILE}") (target of symlink ${!VAR_NAME_FILE} from $VAR_NAME_FILE) must have file permissions 400 or 600, but actually has: $FILE_PERMS" >&2
+      else
+        echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE must have file permissions 400 or 600, but actually has: $FILE_PERMS" >&2
+      fi
+      exit 1
     fi
 
     echo "Setting $VAR_NAME from $VAR_NAME_FILE at ${!VAR_NAME_FILE}" >&2
@@ -43,4 +68,3 @@ for VAR_NAME_FILE in $(env | cut -f1 -d= | grep '_FILE$'); do
     unset "$VAR_NAME_FILE"
   fi
 done
-

+ 3 - 9
docs/reference/setup/install/docker.asciidoc

@@ -87,9 +87,9 @@ endif::[]
 This sample Docker Compose file brings up a three-node {es} cluster.
 Node `es01` listens on `localhost:9200` and `es02` and `es03` talk to `es01` over a Docker network.
 
-Please note that this configuration exposes port 9200 on all network interfaces, and given how 
-Docker manipulates `iptables` on Linux, this means that your {es} cluster is publically accessible, 
-potentially ignoring any firewall settings. If you don't want to expose port 9200 and instead use 
+Please note that this configuration exposes port 9200 on all network interfaces, and given how
+Docker manipulates `iptables` on Linux, this means that your {es} cluster is publicly accessible,
+potentially ignoring any firewall settings. If you don't want to expose port 9200 and instead use
 a reverse proxy, replace `9200:9200` with `127.0.0.1:9200:9200` in the docker-compose.yml file.
 {es} will then only be accessible from the host machine itself.
 
@@ -221,12 +221,6 @@ chmod g+rwx esdatadir
 chgrp 0 esdatadir
 --------------------------------------------
 
-As a last resort, you can force the container to mutate the ownership of
-any bind-mounts used for the <<path-settings,data and log dirs>> through the
-environment variable `TAKE_FILE_OWNERSHIP`. When you do this, they will be owned by
-uid:gid `1000:0`, which provides the required read/write access to the {es} process.
-
-
 ===== Increase ulimits for nofile and nproc
 
 Increased ulimits for <<setting-system-settings,nofile>> and <<max-number-threads-check,nproc>>

+ 25 - 4
qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java

@@ -651,21 +651,42 @@ public class DockerTests extends PackagingTestCase {
     }
 
     /**
-     * Check that the Java process running inside the container has the expect PID, UID and username.
+     * Check that the Java process running inside the container has the expected UID, GID and username.
      */
-    public void test130JavaHasCorrectPidAndOwnership() {
-        final List<String> processes = sh.run("ps -o pid,uid,user -C java").stdout.lines().skip(1).collect(Collectors.toList());
+    public void test130JavaHasCorrectOwnership() {
+        final List<String> processes = sh.run("ps -o uid,gid,user -C java").stdout.lines().skip(1).collect(Collectors.toList());
 
         assertThat("Expected a single java process", processes, hasSize(1));
 
         final String[] fields = processes.get(0).trim().split("\\s+");
 
         assertThat(fields, arrayWithSize(3));
+        assertThat("Incorrect UID", fields[0], equalTo("1000"));
+        assertThat("Incorrect GID", fields[1], equalTo("0"));
+        assertThat("Incorrect username", fields[2], equalTo("elasticsearch"));
+    }
+
+    /**
+     * Check that the init process running inside the container has the expected PID, UID, GID and user.
+     * The PID is particularly important because PID 1 handles signal forwarding and child reaping.
+     */
+    public void test131InitProcessHasCorrectPID() {
+        final List<String> processes = sh.run("ps -o pid,uid,gid,user -p 1").stdout.lines().skip(1).collect(Collectors.toList());
+
+        assertThat("Expected a single process", processes, hasSize(1));
+
+        final String[] fields = processes.get(0).trim().split("\\s+");
+
+        assertThat(fields, arrayWithSize(4));
         assertThat("Incorrect PID", fields[0], equalTo("1"));
         assertThat("Incorrect UID", fields[1], equalTo("1000"));
-        assertThat("Incorrect username", fields[2], equalTo("elasticsearch"));
+        assertThat("Incorrect GID", fields[2], equalTo("0"));
+        assertThat("Incorrect username", fields[3], equalTo("elasticsearch"));
     }
 
+    /**
+     * Check that Elasticsearch reports per-node cgroup information.
+     */
     public void test140CgroupOsStatsAreAvailable() throws Exception {
         waitForElasticsearch(installation);
 

+ 12 - 1
qa/os/src/test/java/org/elasticsearch/packaging/util/Docker.java

@@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.http.client.fluent.Request;
 import org.elasticsearch.common.CheckedRunnable;
 
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.attribute.PosixFileAttributes;
@@ -152,9 +153,19 @@ public class Docker {
 
         // Bind-mount any volumes
         if (volumes != null) {
-            volumes.forEach((localPath, containerPath) -> args.add("--volume \"" + localPath + ":" + containerPath + "\""));
+            volumes.forEach((localPath, containerPath) -> {
+                assertTrue(localPath + " doesn't exist", Files.exists(localPath));
+
+                if (Platforms.WINDOWS == false && System.getProperty("user.name").equals("root")) {
+                    // The tests are running as root, but the process in the Docker container runs as `elasticsearch` (UID 1000),
+                    // so we need to ensure that the container process is able to read the bind-mounted files.
+                    sh.run("chown -R 1000:0 " + localPath);
+                }
+                args.add("--volume \"" + localPath + ":" + containerPath + "\"");
+            });
         }
 
+        // Image name
         args.add(distribution.flavor.name + ":test");
 
         final String command = String.join(" ", args);