Browse Source

Logging: Log less source in slowlog

Instead of logging the entire `_source` in the indexing slowlog we log by
default just the first 1000 characters - this is controlled by the
`index.indexing.slowlog.source` settings and can be set to `true` to log the
whole `_source`, `false` to log none of it, and a number to log at most that
many characters.

Closes #4485
Nik Everett 10 years ago
parent
commit
79d9f5b775

+ 1 - 0
core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java

@@ -175,6 +175,7 @@ public class IndexMetaData implements Diffable<IndexMetaData>, FromXContentBuild
     public static final String SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE = "index.shared_filesystem.recover_on_any_node";
     public static final String INDEX_UUID_NA_VALUE = "_na_";
 
+
     // hard-coded hash function as of 2.0
     // older indices will read which hash function to use in their index settings
     private static final HashFunction MURMUR3_HASH_FUNCTION = new Murmur3HashFunction();

+ 24 - 0
core/src/main/java/org/elasticsearch/common/Strings.java

@@ -1093,4 +1093,28 @@ public class Strings {
             throw new AssertionError("Cannot happen", e);
         }
     }
+
+    /**
+     * Truncates string to a length less than length. Backtracks to throw out
+     * high surrogates.
+     */
+    public static String cleanTruncate(String s, int length) {
+        if (s == null) {
+            return s;
+        }
+        /*
+         * Its pretty silly for you to truncate to 0 length but just in case
+         * someone does this shouldn't break.
+         */
+        if (length == 0) {
+            return "";
+        }
+        if (length >= s.length()) {
+            return s;
+        }
+        if (Character.isHighSurrogate(s.charAt(length - 1))) {
+            length--;
+        }
+        return s.substring(0, length);
+    }
 }

+ 60 - 21
core/src/main/java/org/elasticsearch/index/indexing/IndexingSlowLog.java

@@ -19,6 +19,8 @@
 
 package org.elasticsearch.index.indexing;
 
+import org.elasticsearch.common.Booleans;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.settings.Settings;
@@ -26,7 +28,6 @@ import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.index.engine.Engine;
 import org.elasticsearch.index.mapper.ParsedDocument;
-import org.elasticsearch.index.settings.IndexSettingsService;
 
 import java.io.IOException;
 import java.util.Locale;
@@ -42,6 +43,12 @@ public final class IndexingSlowLog {
     private long indexInfoThreshold;
     private long indexDebugThreshold;
     private long indexTraceThreshold;
+    /**
+     * How much of the source to log in the slowlog - 0 means log none and
+     * anything greater than 0 means log at least that many <em>characters</em>
+     * of the source.
+     */
+    private int maxSourceCharsToLog;
 
     private String level;
 
@@ -55,18 +62,27 @@ public final class IndexingSlowLog {
     public static final String INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE = INDEX_INDEXING_SLOWLOG_PREFIX +".threshold.index.trace";
     public static final String INDEX_INDEXING_SLOWLOG_REFORMAT = INDEX_INDEXING_SLOWLOG_PREFIX +".reformat";
     public static final String INDEX_INDEXING_SLOWLOG_LEVEL = INDEX_INDEXING_SLOWLOG_PREFIX +".level";
+    public static final String INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG = INDEX_INDEXING_SLOWLOG_PREFIX + ".source";
 
     IndexingSlowLog(Settings indexSettings) {
+        this(indexSettings, Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX + ".index"),
+                Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX + ".delete"));
+    }
+
+    /**
+     * Build with the specified loggers. Only used to testing.
+     */
+    IndexingSlowLog(Settings indexSettings, ESLogger indexLogger, ESLogger deleteLogger) {
+        this.indexLogger = indexLogger;
+        this.deleteLogger = deleteLogger;
+
         this.reformat = indexSettings.getAsBoolean(INDEX_INDEXING_SLOWLOG_REFORMAT, true);
         this.indexWarnThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_WARN, TimeValue.timeValueNanos(-1)).nanos();
         this.indexInfoThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_INFO, TimeValue.timeValueNanos(-1)).nanos();
         this.indexDebugThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_DEBUG, TimeValue.timeValueNanos(-1)).nanos();
         this.indexTraceThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE, TimeValue.timeValueNanos(-1)).nanos();
-
         this.level = indexSettings.get(INDEX_INDEXING_SLOWLOG_LEVEL, "TRACE").toUpperCase(Locale.ROOT);
-
-        this.indexLogger = Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX +".index");
-        this.deleteLogger = Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX +".delete");
+        this.maxSourceCharsToLog = readSourceToLog(indexSettings);
 
         indexLogger.setLevel(level);
         deleteLogger.setLevel(level);
@@ -101,6 +117,11 @@ public final class IndexingSlowLog {
         if (reformat != this.reformat) {
             this.reformat = reformat;
         }
+
+        int maxSourceCharsToLog = readSourceToLog(settings);
+        if (maxSourceCharsToLog != this.maxSourceCharsToLog) {
+            this.maxSourceCharsToLog = maxSourceCharsToLog;
+        }
     }
 
     void postIndex(Engine.Index index, long tookInNanos) {
@@ -111,27 +132,44 @@ public final class IndexingSlowLog {
         postIndexing(create.parsedDoc(), tookInNanos);
     }
 
+    /**
+     * Reads how much of the source to log. The user can specify any value they
+     * like and numbers are interpreted the maximum number of characters to log
+     * and everything else is interpreted as Elasticsearch interprets booleans
+     * which is then converted to 0 for false and Integer.MAX_VALUE for true.
+     */
+    private int readSourceToLog(Settings settings) {
+        String sourceToLog = settings.get(INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG, "1000");
+        try {
+            return Integer.parseInt(sourceToLog, 10);
+        } catch (NumberFormatException e) {
+            return Booleans.parseBoolean(sourceToLog, true) ? Integer.MAX_VALUE : 0;
+        }
+    }
+
     private void postIndexing(ParsedDocument doc, long tookInNanos) {
         if (indexWarnThreshold >= 0 && tookInNanos > indexWarnThreshold) {
-            indexLogger.warn("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat));
+            indexLogger.warn("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog));
         } else if (indexInfoThreshold >= 0 && tookInNanos > indexInfoThreshold) {
-            indexLogger.info("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat));
+            indexLogger.info("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog));
         } else if (indexDebugThreshold >= 0 && tookInNanos > indexDebugThreshold) {
-            indexLogger.debug("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat));
+            indexLogger.debug("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog));
         } else if (indexTraceThreshold >= 0 && tookInNanos > indexTraceThreshold) {
-            indexLogger.trace("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat));
+            indexLogger.trace("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog));
         }
     }
 
-    final static class SlowLogParsedDocumentPrinter {
+    static final class SlowLogParsedDocumentPrinter {
         private final ParsedDocument doc;
         private final long tookInNanos;
         private final boolean reformat;
+        private final int maxSourceCharsToLog;
 
-        SlowLogParsedDocumentPrinter(ParsedDocument doc, long tookInNanos, boolean reformat) {
+        SlowLogParsedDocumentPrinter(ParsedDocument doc, long tookInNanos, boolean reformat, int maxSourceCharsToLog) {
             this.doc = doc;
             this.tookInNanos = tookInNanos;
             this.reformat = reformat;
+            this.maxSourceCharsToLog = maxSourceCharsToLog;
         }
 
         @Override
@@ -141,18 +179,19 @@ public final class IndexingSlowLog {
             sb.append("type[").append(doc.type()).append("], ");
             sb.append("id[").append(doc.id()).append("], ");
             if (doc.routing() == null) {
-                sb.append("routing[], ");
+                sb.append("routing[] ");
             } else {
-                sb.append("routing[").append(doc.routing()).append("], ");
+                sb.append("routing[").append(doc.routing()).append("] ");
             }
-            if (doc.source() != null && doc.source().length() > 0) {
-                try {
-                    sb.append("source[").append(XContentHelper.convertToJson(doc.source(), reformat)).append("]");
-                } catch (IOException e) {
-                    sb.append("source[_failed_to_convert_]");
-                }
-            } else {
-                sb.append("source[]");
+
+            if (maxSourceCharsToLog == 0 || doc.source() == null || doc.source().length() == 0) {
+                return sb.toString();
+            }
+            try {
+                String source = XContentHelper.convertToJson(doc.source(), reformat);
+                sb.append(", source[").append(Strings.cleanTruncate(source, maxSourceCharsToLog)).append("]");
+            } catch (IOException e) {
+                sb.append(", source[_failed_to_convert_]");
             }
             return sb.toString();
         }

+ 1 - 1
core/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java

@@ -30,7 +30,6 @@ import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocatio
 import org.elasticsearch.cluster.settings.DynamicSettings;
 import org.elasticsearch.cluster.settings.Validator;
 import org.elasticsearch.common.inject.AbstractModule;
-import org.elasticsearch.gateway.GatewayAllocator;
 import org.elasticsearch.index.engine.EngineConfig;
 import org.elasticsearch.index.indexing.IndexingSlowLog;
 import org.elasticsearch.index.search.stats.SearchSlowLog;
@@ -86,6 +85,7 @@ public class IndexDynamicSettingsModule extends AbstractModule {
         indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE, Validator.TIME);
         indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_REFORMAT);
         indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_LEVEL);
+        indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG);
         indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_WARN, Validator.TIME);
         indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_INFO, Validator.TIME);
         indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_DEBUG, Validator.TIME);

+ 28 - 3
core/src/test/java/org/elasticsearch/common/StringsTests.java

@@ -20,11 +20,8 @@
 package org.elasticsearch.common;
 
 import org.elasticsearch.test.ESTestCase;
-import org.junit.Test;
 
 public class StringsTests extends ESTestCase {
-
-    @Test
     public void testToCamelCase() {
         assertEquals("foo", Strings.toCamelCase("foo"));
         assertEquals("fooBar", Strings.toCamelCase("fooBar"));
@@ -33,4 +30,32 @@ public class StringsTests extends ESTestCase {
         assertEquals("fooBarFooBar", Strings.toCamelCase("foo_bar_foo_bar"));
         assertEquals("fooBar", Strings.toCamelCase("foo_bar_"));
     }
+
+    public void testSubstring() {
+        assertEquals(null, Strings.substring(null, 0, 1000));
+        assertEquals("foo", Strings.substring("foo", 0, 1000));
+        assertEquals("foo", Strings.substring("foo", 0, 3));
+        assertEquals("oo", Strings.substring("foo", 1, 3));
+        assertEquals("oo", Strings.substring("foo", 1, 100));
+        assertEquals("f", Strings.substring("foo", 0, 1));
+    }
+
+    public void testCleanTruncate() {
+        assertEquals(null, Strings.cleanTruncate(null, 10));
+        assertEquals("foo", Strings.cleanTruncate("foo", 10));
+        assertEquals("foo", Strings.cleanTruncate("foo", 3));
+        // Throws out high surrogates
+        assertEquals("foo", Strings.cleanTruncate("foo\uD83D\uDEAB", 4));
+        // But will keep the whole character
+        assertEquals("foo\uD83D\uDEAB", Strings.cleanTruncate("foo\uD83D\uDEAB", 5));
+        /*
+         * Doesn't take care around combining marks. This example has its
+         * meaning changed because that last codepoint is supposed to combine
+         * backwards into the find "o" and be represented as the "o" with a
+         * circle around it with a slash through it. As in "no 'o's allowed
+         * here.
+         */
+        assertEquals("o", Strings.cleanTruncate("o\uD83D\uDEAB", 1));
+        assertEquals("", Strings.cleanTruncate("foo", 0));
+    }
 }

+ 54 - 0
core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java

@@ -0,0 +1,54 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.indexing;
+
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.StringField;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.xcontent.json.JsonXContent;
+import org.elasticsearch.index.indexing.IndexingSlowLog.SlowLogParsedDocumentPrinter;
+import org.elasticsearch.index.mapper.ParsedDocument;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.not;
+
+public class IndexingSlowLogTests extends ESTestCase {
+    public void testSlowLogParsedDocumentPrinterSourceToLog() throws IOException {
+        BytesReference source = JsonXContent.contentBuilder().startObject().field("foo", "bar").endObject().bytes();
+        ParsedDocument pd = new ParsedDocument(new StringField("uid", "test:id", Store.YES), new IntField("version", 1, Store.YES), "id",
+                "test", null, 0, -1, null, source, null);
+
+        // Turning off document logging doesn't log source[]
+        SlowLogParsedDocumentPrinter p = new SlowLogParsedDocumentPrinter(pd, 10, true, 0);
+        assertThat(p.toString(), not(containsString("source[")));
+
+        // Turning on document logging logs the whole thing
+        p = new SlowLogParsedDocumentPrinter(pd, 10, true, Integer.MAX_VALUE);
+        assertThat(p.toString(), containsString("source[{\"foo\":\"bar\"}]"));
+
+        // And you can truncate the source
+        p = new SlowLogParsedDocumentPrinter(pd, 10, true, 3);
+        assertThat(p.toString(), containsString("source[{\"f]"));
+    }
+}

+ 7 - 0
docs/reference/index-modules/slowlog.asciidoc

@@ -68,10 +68,17 @@ index.indexing.slowlog.threshold.index.warn: 10s
 index.indexing.slowlog.threshold.index.info: 5s
 index.indexing.slowlog.threshold.index.debug: 2s
 index.indexing.slowlog.threshold.index.trace: 500ms
+index.indexing.slowlog.level: info
+index.indexing.slowlog.source: 1000
 --------------------------------------------------
 
 All of the above settings are _dynamic_ and can be set per-index.
 
+By default Elasticsearch will log the first 1000 characters of the _source in
+the slowlog. You can change that with `index.indexing.slowlog.source`. Setting
+it to `false` or `0` will skip logging the source entirely an setting it to
+`true` will log the entire source regardless of size.
+
 The index slow log file is configured by default in the `logging.yml`
 file: