7 年之前 · 22415fa2de
--- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java
+++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java
@@ -163,9 +163,15 @@ public final class LogStructureFinderManager {
 
				                 // deduction algorithms on binary files is very slow as the binary files generally appear to
			
 
				                 // have very long lines.
			
 
				                 boolean spaceEncodingContainsZeroByte = false;
			
 
				-                byte[] spaceBytes = " ".getBytes(name);
			
 
				-                for (int i = 0; i < spaceBytes.length && spaceEncodingContainsZeroByte == false; ++i) {
			
 
				-                    spaceEncodingContainsZeroByte = (spaceBytes[i] == 0);
			
 
				+                Charset charset = Charset.forName(name);
			
 
				+                // Some character sets cannot be encoded.  These are extremely rare so it's likely that
			
 
				+                // they've been chosen based on incorrectly provided binary data.  Therefore, err on
			
 
				+                // the side of rejecting binary data.
			
 
				+                if (charset.canEncode()) {
			
 
				+                    byte[] spaceBytes = " ".getBytes(charset);
			
 
				+                    for (int i = 0; i < spaceBytes.length && spaceEncodingContainsZeroByte == false; ++i) {
			
 
				+                        spaceEncodingContainsZeroByte = (spaceBytes[i] == 0);
			
 
				+                    }
			
 
				                 }
			
 
				                 if (containsZeroBytes && spaceEncodingContainsZeroByte == false) {
			
 
				                     explanation.add("Character encoding [" + name + "] matched the input with [" + charsetMatch.getConfidence() +