浏览代码

Fix overflow error in parsing of long geohashes (#29418)

Fixes a possible overflow error that geohashes longer than 12 characters
can cause during parsing.

Fixes #24616
Igor Motov 7 年之前
父节点
当前提交
e334baf6fc

+ 10 - 0
docs/reference/mapping/types/geo-point.asciidoc

@@ -92,6 +92,16 @@ format was changed early on to conform to the format used by GeoJSON.
 
 ==================================================
 
+[NOTE]
+A point can be expressed as a http://en.wikipedia.org/wiki/Geohash[geohash].
+Geohashes are https://en.wikipedia.org/wiki/Base32[base32] encoded strings of
+the bits of the latitude and longitude interleaved. Each character in a geohash
+adds additional 5 bits to the precision. So the longer the hash, the more
+precise it is. For the indexing purposed geohashs are translated into
+latitude-longitude pairs. During this process only first 12 characters are
+used, so specifying more than 12 characters in a geohash doesn't increase the
+precision. The 12 characters provide 60 bits, which should reduce a possible
+error to less than 2cm.
 
 [[geo-point-params]]
 ==== Parameters for `geo_point` fields

+ 14 - 5
server/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java

@@ -72,15 +72,19 @@ public class GeoHashUtils {
     /**
      * Encode from geohash string to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
      */
-    public static final long longEncode(final String hash) {
-        int level = hash.length()-1;
+    private static long longEncode(final String hash, int length) {
+        int level = length - 1;
         long b;
         long l = 0L;
         for(char c : hash.toCharArray()) {
             b = (long)(BASE_32_STRING.indexOf(c));
             l |= (b<<(level--*5));
+            if (level < 0) {
+                // We cannot handle more than 12 levels
+                break;
+            }
         }
-        return (l<<4)|hash.length();
+        return (l << 4) | length;
     }
 
     /**
@@ -173,6 +177,10 @@ public class GeoHashUtils {
         for(char c : hash.toCharArray()) {
             b = (long)(BASE_32_STRING.indexOf(c));
             l |= (b<<((level--*5) + MORTON_OFFSET));
+            if (level < 0) {
+                // We cannot handle more than 12 levels
+                break;
+            }
         }
         return BitUtil.flipFlop(l);
     }
@@ -200,13 +208,14 @@ public class GeoHashUtils {
     public static Rectangle bbox(final String geohash) {
         // bottom left is the coordinate
         GeoPoint bottomLeft = GeoPoint.fromGeohash(geohash);
-        long ghLong = longEncode(geohash);
+        int len = Math.min(12, geohash.length());
+        long ghLong = longEncode(geohash, len);
         // shift away the level
         ghLong >>>= 4;
         // deinterleave and add 1 to lat and lon to get topRight
         long lat = BitUtil.deinterleave(ghLong >>> 1) + 1;
         long lon = BitUtil.deinterleave(ghLong) + 1;
-        GeoPoint topRight = GeoPoint.fromGeohash(BitUtil.interleave((int)lon, (int)lat) << 4 | geohash.length());
+        GeoPoint topRight = GeoPoint.fromGeohash(BitUtil.interleave((int)lon, (int)lat) << 4 | len);
 
         return new Rectangle(bottomLeft.lat(), topRight.lat(), bottomLeft.lon(), topRight.lon());
     }

+ 16 - 0
server/src/test/java/org/elasticsearch/common/geo/GeoHashTests.java

@@ -82,4 +82,20 @@ public class GeoHashTests extends ESTestCase {
         assertEquals("xbpbpbpbpbpb", GeoHashUtils.stringEncode(180, 0));
         assertEquals("zzzzzzzzzzzz", GeoHashUtils.stringEncode(180, 90));
     }
+
+    public void testLongGeohashes() {
+        for (int i = 0; i < 100000; i++) {
+            String geohash = randomGeohash(12, 12);
+            GeoPoint expected = GeoPoint.fromGeohash(geohash);
+            // Adding some random geohash characters at the end
+            String extendedGeohash = geohash + randomGeohash(1, 10);
+            GeoPoint actual = GeoPoint.fromGeohash(extendedGeohash);
+            assertEquals("Additional data points above 12 should be ignored [" + extendedGeohash + "]" , expected, actual);
+
+            Rectangle expectedBbox = GeoHashUtils.bbox(geohash);
+            Rectangle actualBbox = GeoHashUtils.bbox(extendedGeohash);
+            assertEquals("Additional data points above 12 should be ignored [" + extendedGeohash + "]" , expectedBbox, actualBbox);
+
+        }
+    }
 }