Browse Source

Make size=0 return all buckets for the geohash_grid aggregation.

Close #4875
Adrien Grand 11 years ago
parent
commit
9cb17408cb

+ 4 - 0
docs/reference/search/aggregations/bucket/geohashgrid-aggregation.asciidoc

@@ -117,11 +117,15 @@ precision::     Optional. The string length of the geohashes used to define
 size::          Optional. The maximum number of geohash buckets to return
                 (defaults to 10,000). When results are trimmed, buckets are
                 prioritised based on the volumes of documents they contain.
+                added[1.1.0] A value of `0` will return all buckets that
+                contain a hit, use with caution as this could use a lot of CPU
+                and network bandwith if there are many buckets.
 
 shard_size::    Optional. To allow for more accurate counting of the top cells
                 returned in the final result the aggregation defaults to
                 returning `max(10,(size x number-of-shards))` buckets from each
                 shard. If this heuristic is undesirable, the number considered
                 from each shard can be over-ridden using this parameter.
+                added[1.1.0] A value of `0` makes the shard size unlimited.
 
 

+ 20 - 0
src/main/java/org/elasticsearch/search/aggregations/InternalAggregation.java

@@ -21,10 +21,13 @@ package org.elasticsearch.search.aggregations;
 import org.elasticsearch.cache.recycler.CacheRecycler;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Streamable;
 import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilderString;
 
+import java.io.IOException;
 import java.util.List;
 
 /**
@@ -125,6 +128,23 @@ public abstract class InternalAggregation implements Aggregation, ToXContent, St
      */
     public abstract InternalAggregation reduce(ReduceContext reduceContext);
 
+    /**
+     * Read a size under the assumption that a value of 0 means unlimited.
+     */
+    protected static int readSize(StreamInput in) throws IOException {
+        final int size = in.readVInt();
+        return size == 0 ? Integer.MAX_VALUE : size;
+    }
+
+    /**
+     * Write a size under the assumption that a value of 0 means unlimited.
+     */
+    protected static void writeSize(int size, StreamOutput out) throws IOException {
+        if (size == Integer.MAX_VALUE) {
+            size = 0;
+        }
+        out.writeVInt(size);
+    }
 
     /**
      * Common xcontent fields that are shared among addAggregation

+ 2 - 2
src/main/java/org/elasticsearch/search/aggregations/bucket/BucketUtils.java

@@ -39,11 +39,11 @@ public class BucketUtils {
             return finalSize;
         }
         //Cap the multiplier used for shards to avoid excessive data transfer
-        final int shardSampleSize = finalSize * Math.min(10, numberOfShards);
+        final long shardSampleSize = (long) finalSize * Math.min(10, numberOfShards);
         // When finalSize is very small e.g. 1 and there is a low number of
         // shards then we need to ensure we still gather a reasonable sample of statistics from each
         // shard (at low cost) to improve the chances of the final result being accurate.
-        return Math.max(10, shardSampleSize);
+        return (int) Math.min(Integer.MAX_VALUE, Math.max(10, shardSampleSize));
     }
     
 }

+ 14 - 1
src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParser.java

@@ -55,7 +55,7 @@ public class GeoHashGridParser implements Aggregator.Parser {
         String field = null;
         int precision = DEFAULT_PRECISION;
         int requiredSize = DEFAULT_MAX_NUM_CELLS;
-        int shardSize = 0;
+        int shardSize = -1;
 
 
         XContentParser.Token token;
@@ -78,11 +78,24 @@ public class GeoHashGridParser implements Aggregator.Parser {
 
             }
         }
+
         if (shardSize == 0) {
+            shardSize = Integer.MAX_VALUE;
+        }
+
+        if (requiredSize == 0) {
+            requiredSize = Integer.MAX_VALUE;
+        }
+
+        if (shardSize < 0) {
             //Use default heuristic to avoid any wrong-ranking caused by distributed counting            
             shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, context.numberOfShards());
         }
 
+        if (shardSize < requiredSize) {
+            shardSize = requiredSize;
+        }
+
         ValuesSourceConfig<GeoPointValuesSource> config = new ValuesSourceConfig<GeoPointValuesSource>(GeoPointValuesSource.class);
         if (field == null) {
             return new GeoGridFactory(aggregationName, config, precision, requiredSize, shardSize);

+ 2 - 2
src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoHashGrid.java

@@ -256,7 +256,7 @@ public class InternalGeoHashGrid extends InternalAggregation implements GeoHashG
     @Override
     public void readFrom(StreamInput in) throws IOException {
         this.name = in.readString();
-        this.requiredSize = in.readVInt();
+        this.requiredSize = readSize(in);
         int size = in.readVInt();
         List<Bucket> buckets = new ArrayList<Bucket>(size);
         for (int i = 0; i < size; i++) {
@@ -269,7 +269,7 @@ public class InternalGeoHashGrid extends InternalAggregation implements GeoHashG
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeString(name);
-        out.writeVInt(requiredSize);
+        writeSize(requiredSize, out);
         out.writeVInt(buckets.size());
         for (Bucket bucket : buckets) {
             out.writeLong(bucket.geohashAsLong);

+ 0 - 16
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalTerms.java

@@ -21,8 +21,6 @@ package org.elasticsearch.search.aggregations.bucket.terms;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import org.elasticsearch.cache.recycler.CacheRecycler;
-import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Streamable;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.common.xcontent.ToXContent;
@@ -31,7 +29,6 @@ import org.elasticsearch.search.aggregations.InternalAggregation;
 import org.elasticsearch.search.aggregations.InternalAggregations;
 import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
 
-import java.io.IOException;
 import java.util.*;
 
 /**
@@ -184,17 +181,4 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
         buckets = newBuckets;
     }
 
-    // 0 actually means unlimited
-    protected static int readSize(StreamInput in) throws IOException {
-        final int size = in.readVInt();
-        return size == 0 ? Integer.MAX_VALUE : size;
-    }
-
-    protected static void writeSize(int size, StreamOutput out) throws IOException {
-        if (size == Integer.MAX_VALUE) {
-            size = 0;
-        }
-        out.writeVInt(size);
-    }
-
 }

+ 22 - 0
src/test/java/org/elasticsearch/search/aggregations/bucket/GeoHashGridTests.java

@@ -42,6 +42,7 @@ import java.util.Random;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 import static org.elasticsearch.search.aggregations.AggregationBuilders.geohashGrid;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 
 /**
  *
@@ -241,4 +242,25 @@ public class GeoHashGridTests extends ElasticsearchIntegrationTest {
         }
     }
 
+    @Test
+    // making sure this doesn't runs into an OOME
+    public void sizeIsZero() {
+        for (int precision = 1; precision <= highestPrecisionGeohash; precision++) {
+            final int size = randomBoolean() ? 0 : randomIntBetween(1, Integer.MAX_VALUE);
+            final int shardSize = randomBoolean() ? -1 : 0;
+            SearchResponse response = client().prepareSearch("idx")
+                    .addAggregation(geohashGrid("geohashgrid")
+                            .field("location")
+                            .size(size)
+                            .shardSize(shardSize)
+                            .precision(precision)
+                    )
+                    .execute().actionGet();
+
+            assertThat(response.getFailedShards(), equalTo(0));
+            GeoHashGrid geoGrid = response.getAggregations().get("geohashgrid");
+            assertThat(geoGrid.getBuckets().size(), greaterThanOrEqualTo(1));
+        }
+    }
+
 }