Browse Source

Aggregations fix: support include/exclude strings formatted for IP and date fields in terms and significant_terms aggregations.

Closes #17705
markharwood 9 years ago
parent
commit
a846ff93e9

+ 12 - 5
core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java

@@ -211,7 +211,14 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
                 }
             }
             assert execution != null;
-            return execution.create(name, factories, valuesSource, config.format(), bucketCountThresholds, includeExclude, context, parent,
+            
+            DocValueFormat format = config.format();
+            if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
+                throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude "
+                        + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
+            }
+            
+            return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent,
                     significanceHeuristic, this, pipelineAggregators, metaData);
         }
 
@@ -227,7 +234,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
             }
             IncludeExclude.LongFilter longFilter = null;
             if (includeExclude != null) {
-                longFilter = includeExclude.convertToLongFilter();
+                longFilter = includeExclude.convertToLongFilter(config.format());
             }
             return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(),
                     bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators,
@@ -248,7 +255,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
                     AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
                     SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
                     Map<String, Object> metaData) throws IOException {
-                final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
+                final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
                 return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter,
                         aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
             }
@@ -262,7 +269,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
                     AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
                     SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
                     Map<String, Object> metaData) throws IOException {
-                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
+                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
                 return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
                         (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
                         aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
@@ -277,7 +284,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
                     AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
                     SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
                     Map<String, Object> metaData) throws IOException {
-                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
+                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
                 return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories,
                         (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
                         aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);

+ 10 - 5
core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

@@ -150,8 +150,13 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
                     }
                 }
             }
+            DocValueFormat format = config.format();
+            if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
+                throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude "
+                        + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
+            }
 
-            return execution.create(name, factories, valuesSource, order, config.format(), bucketCountThresholds, includeExclude, context, parent,
+            return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, 
                     collectMode, showTermDocCountError, pipelineAggregators, metaData);
         }
 
@@ -171,7 +176,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
                         pipelineAggregators, metaData);
             }
             if (includeExclude != null) {
-                longFilter = includeExclude.convertToLongFilter();
+                longFilter = includeExclude.convertToLongFilter(config.format());
             }
             return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order,
                     bucketCountThresholds, context, parent, collectMode, showTermDocCountError, longFilter, pipelineAggregators,
@@ -192,7 +197,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
                     AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
                     boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
                             throws IOException {
-                final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
+                final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
                 return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter,
                         aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
             }
@@ -211,7 +216,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
                     AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
                     boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
                             throws IOException {
-                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
+                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
                 return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
                         format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError,
                         pipelineAggregators, metaData);
@@ -231,7 +236,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
                     AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
                     boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
                             throws IOException {
-                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
+                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
                 return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
                         order, format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError,
                         pipelineAggregators, metaData);

+ 28 - 12
core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java

@@ -43,6 +43,7 @@ import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.aggregations.support.ValuesSource;
 import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals;
 
@@ -135,7 +136,8 @@ public class IncludeExclude implements Writeable, ToXContent {
     }
 
     public static abstract class OrdinalsFilter {
-        public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException;
+        public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource)
+                throws IOException;
 
     }
 
@@ -152,7 +154,8 @@ public class IncludeExclude implements Writeable, ToXContent {
          *
          */
         @Override
-        public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException {
+        public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource)
+                throws IOException {
             LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
             TermsEnum globalTermsEnum;
             Terms globalTerms = new DocValuesTerms(globalOrdinals);
@@ -179,7 +182,7 @@ public class IncludeExclude implements Writeable, ToXContent {
         @Override
         public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, WithOrdinals valueSource) throws IOException {
             LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
-            if(includeValues!=null){
+            if (includeValues != null) {
                 for (BytesRef term : includeValues) {
                     long ord = globalOrdinals.lookupTerm(term);
                     if (ord >= 0) {
@@ -534,33 +537,46 @@ public class IncludeExclude implements Writeable, ToXContent {
         return a;
     }
 
-    public StringFilter convertToStringFilter() {
+    public StringFilter convertToStringFilter(DocValueFormat format) {
         if (isRegexBased()) {
             return new AutomatonBackedStringFilter(toAutomaton());
         }
-        return new TermListBackedStringFilter(includeValues, excludeValues);
+        return new TermListBackedStringFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format));
     }
 
-    public OrdinalsFilter convertToOrdinalsFilter() {
+    private static SortedSet<BytesRef> parseForDocValues(SortedSet<BytesRef> endUserFormattedValues, DocValueFormat format) {
+        SortedSet<BytesRef> result = endUserFormattedValues;
+        if (endUserFormattedValues != null) {
+            if (format != DocValueFormat.RAW) {
+                result = new TreeSet<>();
+                for (BytesRef formattedVal : endUserFormattedValues) {
+                    result.add(format.parseBytesRef(formattedVal.utf8ToString()));
+                }
+            }
+        }
+        return result;
+    }
+
+    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
 
         if (isRegexBased()) {
             return new AutomatonBackedOrdinalsFilter(toAutomaton());
         }
-        return new TermListBackedOrdinalsFilter(includeValues, excludeValues);
+        return new TermListBackedOrdinalsFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format));
     }
 
-    public LongFilter convertToLongFilter() {
+    public LongFilter convertToLongFilter(DocValueFormat format) {
         int numValids = includeValues == null ? 0 : includeValues.size();
         int numInvalids = excludeValues == null ? 0 : excludeValues.size();
         LongFilter result = new LongFilter(numValids, numInvalids);
         if (includeValues != null) {
             for (BytesRef val : includeValues) {
-                result.addAccept(Long.parseLong(val.utf8ToString()));
+                result.addAccept(format.parseLong(val.utf8ToString(), false, null));
             }
         }
         if (excludeValues != null) {
             for (BytesRef val : excludeValues) {
-                result.addReject(Long.parseLong(val.utf8ToString()));
+                result.addReject(format.parseLong(val.utf8ToString(), false, null));
             }
         }
         return result;
@@ -572,13 +588,13 @@ public class IncludeExclude implements Writeable, ToXContent {
         LongFilter result = new LongFilter(numValids, numInvalids);
         if (includeValues != null) {
             for (BytesRef val : includeValues) {
-                double dval=Double.parseDouble(val.utf8ToString());
+                double dval = Double.parseDouble(val.utf8ToString());
                 result.addAccept(NumericUtils.doubleToSortableLong(dval));
             }
         }
         if (excludeValues != null) {
             for (BytesRef val : excludeValues) {
-                double dval=Double.parseDouble(val.utf8ToString());
+                double dval = Double.parseDouble(val.utf8ToString());
                 result.addReject(NumericUtils.doubleToSortableLong(dval));
             }
         }

+ 50 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml

@@ -117,6 +117,33 @@ setup:
   
   - match: { aggregations.ip_terms.buckets.1.doc_count: 1 }
 
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "include" : [ "127.0.0.1" ] } } } }
+
+  - match: { hits.total: 3 }
+
+  - length: { aggregations.ip_terms.buckets: 1 }
+
+  - match: { aggregations.ip_terms.buckets.0.key: "127.0.0.1" }
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" : [ "127.0.0.1" ] } } } }
+
+  - match: { hits.total: 3 }
+
+  - length: { aggregations.ip_terms.buckets: 1 }
+
+  - match: { aggregations.ip_terms.buckets.0.key: "::1" }
+
+  - do:
+      catch: request
+      search:
+        body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" :  "127.*"  } } } }
+
+  
+
 ---
 "Boolean test":
   - do:
@@ -300,4 +327,27 @@ setup:
   - match: { aggregations.date_terms.buckets.1.key_as_string: "2014-09-01T00:00:00.000Z" }
 
   - match: { aggregations.date_terms.buckets.1.doc_count: 1 }
+  
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "date", "include" : [ "2016-05-03" ] } } } }
+
+  - match: { hits.total: 3 }
+
+  - length: { aggregations.date_terms.buckets: 1 }
+  
+  - match: { aggregations.date_terms.buckets.0.key_as_string: "2016-05-03T00:00:00.000Z" }
+  
+  - match: { aggregations.date_terms.buckets.0.doc_count: 2 }  
+  
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "date", "exclude" : [ "2016-05-03" ] } } } }
+
+  - match: { hits.total: 3 }
+
+  - length: { aggregations.date_terms.buckets: 1 }
+  
+  - match: { aggregations.date_terms.buckets.0.key_as_string: "2014-09-01T00:00:00.000Z" }
 
+  - match: { aggregations.date_terms.buckets.0.doc_count: 1 }  

+ 25 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml

@@ -121,3 +121,28 @@
   - is_false: aggregations.ip_terms.buckets.0.key_as_string
 
   - match: { aggregations.ip_terms.buckets.0.doc_count: 1 }
+
+  - do:
+      search:
+        body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "include" : [ "::1" ] } } } }
+
+  - match: { hits.total: 1 }
+
+  - length: { aggregations.ip_terms.buckets: 1 }
+
+  - match: { aggregations.ip_terms.buckets.0.key: "::1" }
+ 
+  - do:
+      search:
+        body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "exclude" : [ "::1" ] } } } }
+
+  - match: { hits.total: 1 }
+
+  - length: { aggregations.ip_terms.buckets: 0 }
+  
+  - do:
+      catch: request
+      search:
+        body: { "size" : 0, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "exclude" :  "127.*"  } } } }
+  
+