浏览代码

Add geo_point to FieldStats

This commit adds a new GeoPoint class to FieldStats for computing field stats over geo_point field types.
Nicholas Knize 9 年之前
父节点
当前提交
84e4f91253

+ 66 - 5
core/src/main/java/org/elasticsearch/action/fieldstats/FieldStats.java

@@ -124,8 +124,10 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
                 return "string";
             case 4:
                 return "ip";
+            case 5:
+                return "geo_point";
             default:
-                throw new IllegalArgumentException("Unknown type.");
+                throw new IllegalArgumentException("Unknown type 1. " + type);
         }
     }
 
@@ -276,7 +278,7 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
         }
     }
 
-    private void updateMinMax(T min, T max) {
+    protected void updateMinMax(T min, T max) {
         if (compare(minValue, min) > 0) {
             minValue = min;
         }
@@ -643,6 +645,55 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
         }
     }
 
+    public static class GeoPoint extends FieldStats<org.elasticsearch.common.geo.GeoPoint> {
+        public GeoPoint(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
+                  boolean isSearchable, boolean isAggregatable) {
+            super((byte) 5, maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
+                isSearchable, isAggregatable);
+        }
+
+        public GeoPoint(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
+                        boolean isSearchable, boolean isAggregatable,
+                        org.elasticsearch.common.geo.GeoPoint minValue, org.elasticsearch.common.geo.GeoPoint maxValue) {
+            super((byte) 5, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, isSearchable, isAggregatable,
+                minValue, maxValue);
+        }
+
+        @Override
+        public org.elasticsearch.common.geo.GeoPoint valueOf(String value, String fmt) {
+            return org.elasticsearch.common.geo.GeoPoint.parseFromLatLon(value);
+        }
+
+        @Override
+        protected void updateMinMax(org.elasticsearch.common.geo.GeoPoint min, org.elasticsearch.common.geo.GeoPoint max) {
+            minValue.reset(Math.min(min.lat(), minValue.lat()), Math.min(min.lon(), minValue.lon()));
+            maxValue.reset(Math.max(max.lat(), maxValue.lat()), Math.max(max.lon(), maxValue.lon()));
+        }
+
+        @Override
+        public int compare(org.elasticsearch.common.geo.GeoPoint p1, org.elasticsearch.common.geo.GeoPoint p2) {
+            throw new IllegalArgumentException("compare is not supported for geo_point field stats");
+        }
+
+        @Override
+        public void writeMinMax(StreamOutput out) throws IOException {
+            out.writeDouble(minValue.lat());
+            out.writeDouble(minValue.lon());
+            out.writeDouble(maxValue.lat());
+            out.writeDouble(maxValue.lon());
+        }
+
+        @Override
+        public String getMinValueAsString() {
+            return minValue.toString();
+        }
+
+        @Override
+        public String getMaxValueAsString() {
+            return maxValue.toString();
+        }
+    }
+
     public static FieldStats readFrom(StreamInput in) throws IOException {
         byte type = in.readByte();
         long maxDoc = in.readLong();
@@ -690,7 +741,7 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
                         isSearchable, isAggregatable);
                 }
 
-            case 4:
+            case 4: {
                 if (hasMinMax == false) {
                     return new Ip(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
                         isSearchable, isAggregatable);
@@ -705,9 +756,19 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
                 InetAddress max = InetAddressPoint.decode(b2);
                 return new Ip(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
                     isSearchable, isAggregatable, min, max);
-
+            }
+            case 5: {
+                if (hasMinMax == false) {
+                    return new GeoPoint(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
+                        isSearchable, isAggregatable);
+                }
+                org.elasticsearch.common.geo.GeoPoint min = new org.elasticsearch.common.geo.GeoPoint(in.readDouble(), in.readDouble());
+                org.elasticsearch.common.geo.GeoPoint max = new org.elasticsearch.common.geo.GeoPoint(in.readDouble(), in.readDouble());
+                return new GeoPoint(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
+                    isSearchable, isAggregatable, min, max);
+            }
             default:
-                throw new IllegalArgumentException("Unknown type.");
+                throw new IllegalArgumentException("Unknown type 2. " + type);
         }
     }
 

+ 14 - 8
core/src/main/java/org/elasticsearch/index/mapper/BaseGeoPointFieldMapper.java

@@ -22,7 +22,9 @@ package org.elasticsearch.index.mapper;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.util.NumericUtils;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.action.fieldstats.FieldStats;
 import org.elasticsearch.common.Explicit;
@@ -173,17 +175,21 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
         }
 
         @Override
-        public FieldStats stats(IndexReader reader) throws IOException {
-            int maxDoc = reader.maxDoc();
-            FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
+        public FieldStats.GeoPoint stats(IndexReader reader) throws IOException {
+            String field = name();
+            FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(field);
             if (fi == null) {
                 return null;
             }
-            /**
-             * we don't have a specific type for geo_point so we use an empty {@link FieldStats.Text}.
-             * TODO: we should maybe support a new type that knows how to (de)encode the min/max information
-             */
-            return new FieldStats.Text(maxDoc, -1, -1, -1, isSearchable(), isAggregatable());
+
+            Terms terms = org.apache.lucene.index.MultiFields.getTerms(reader, field);
+            if (terms == null) {
+                return new FieldStats.GeoPoint(reader.maxDoc(), 0L, -1L, -1L, isSearchable(), isAggregatable());
+            }
+            GeoPoint minPt = GeoPoint.fromGeohash(NumericUtils.sortableBytesToLong(terms.getMin().bytes, terms.getMin().offset));
+            GeoPoint maxPt = GeoPoint.fromGeohash(NumericUtils.sortableBytesToLong(terms.getMax().bytes, terms.getMax().offset));
+            return new FieldStats.GeoPoint(reader.maxDoc(), terms.getDocCount(), -1L, terms.getSumTotalTermFreq(), isSearchable(),
+                isAggregatable(), minPt, maxPt);
         }
     }
 

+ 25 - 0
core/src/main/java/org/elasticsearch/index/mapper/LatLonPointFieldMapper.java

@@ -21,8 +21,13 @@ package org.elasticsearch.index.mapper;
 import org.apache.lucene.document.LatLonDocValuesField;
 import org.apache.lucene.document.LatLonPoint;
 import org.apache.lucene.document.StoredField;
+import org.apache.lucene.geo.GeoEncodingUtils;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.search.Query;
+import org.elasticsearch.action.fieldstats.FieldStats;
 import org.elasticsearch.common.Explicit;
 import org.elasticsearch.common.geo.GeoPoint;
 import org.elasticsearch.common.geo.GeoUtils;
@@ -120,6 +125,26 @@ public class LatLonPointFieldMapper extends BaseGeoPointFieldMapper {
             throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead: ["
                 + name() + "]");
         }
+
+        @Override
+        public FieldStats.GeoPoint stats(IndexReader reader) throws IOException {
+            String field = name();
+            FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
+            if (fi == null) {
+                return null;
+            }
+            final long size = PointValues.size(reader, field);
+            if (size == 0) {
+                return new FieldStats.GeoPoint(reader.maxDoc(), -1L, -1L, -1L, isSearchable(), isAggregatable());
+            }
+            final int docCount = PointValues.getDocCount(reader, field);
+            byte[] min = PointValues.getMinPackedValue(reader, field);
+            byte[] max = PointValues.getMaxPackedValue(reader, field);
+            GeoPoint minPt = new GeoPoint(GeoEncodingUtils.decodeLatitude(min, 0), GeoEncodingUtils.decodeLongitude(min, Integer.BYTES));
+            GeoPoint maxPt = new GeoPoint(GeoEncodingUtils.decodeLatitude(max, 0), GeoEncodingUtils.decodeLongitude(max, Integer.BYTES));
+            return new FieldStats.GeoPoint(reader.maxDoc(), docCount, -1L, size, isSearchable(), isAggregatable(),
+                minPt, maxPt);
+        }
     }
 
     @Override

+ 40 - 4
core/src/test/java/org/elasticsearch/fieldstats/FieldStatsIntegrationIT.java

@@ -27,8 +27,10 @@ import org.elasticsearch.action.fieldstats.FieldStatsAction;
 import org.elasticsearch.action.fieldstats.FieldStatsResponse;
 import org.elasticsearch.action.fieldstats.IndexConstraint;
 import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.common.geo.GeoPoint;
 import org.elasticsearch.index.cache.request.RequestCacheStats;
 import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.test.geo.RandomGeoGenerator;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -40,6 +42,7 @@ import static org.elasticsearch.action.fieldstats.IndexConstraint.Property.MAX;
 import static org.elasticsearch.action.fieldstats.IndexConstraint.Property.MIN;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllSuccessful;
+import static org.hamcrest.Matchers.closeTo;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.nullValue;
@@ -60,7 +63,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
                     "long", "type=long",
                     "integer", "type=integer",
                     "short", "type=short",
-                    "byte", "type=byte"));
+                    "byte", "type=byte",
+                    "location", "type=geo_point"));
         ensureGreen("test");
 
         // index=false
@@ -74,7 +78,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
             "long", "type=long,index=false",
             "integer", "type=integer,index=false",
             "short", "type=short,index=false",
-            "byte", "type=byte,index=false"
+            "byte", "type=byte,index=false",
+            "location", "type=geo_point,index=false"
         ));
         ensureGreen("test1");
 
@@ -89,7 +94,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
             "long", "type=long,index=false",
             "integer", "type=integer,index=false",
             "short", "type=short,index=false",
-            "byte", "type=byte,index=false"
+            "byte", "type=byte,index=false",
+            "location", "type=geo_point,index=false"
         ));
         ensureGreen("test3");
 
@@ -107,6 +113,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
         double maxFloat = Double.NEGATIVE_INFINITY;
         double minDouble = Double.POSITIVE_INFINITY;
         double maxDouble = Double.NEGATIVE_INFINITY;
+        GeoPoint minLoc = new GeoPoint(90, 180);
+        GeoPoint maxLoc = new GeoPoint(-90, -180);
         String minString = new String(Character.toChars(1114111));
         String maxString = "0";
 
@@ -135,6 +143,9 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
             double d = randomDouble();
             minDouble = Math.min(minDouble, d);
             maxDouble = Math.max(maxDouble, d);
+            GeoPoint loc = RandomGeoGenerator.randomPoint(random());
+            minLoc.reset(Math.min(loc.lat(), minLoc.lat()), Math.min(loc.lon(), minLoc.lon()));
+            maxLoc.reset(Math.max(loc.lat(), maxLoc.lat()), Math.max(loc.lon(), maxLoc.lon()));
             String str = randomRealisticUnicodeOfLength(3);
             if (str.compareTo(minString) < 0) {
                 minString = str;
@@ -151,6 +162,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
                                 "half_float", hf,
                                 "float", f,
                                 "double", d,
+                                "location", loc,
                                 "string", str)
             );
         }
@@ -158,7 +170,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
 
         FieldStatsResponse response = client()
             .prepareFieldStats()
-            .setFields("byte", "short", "integer", "long", "half_float", "float", "double", "string").get();
+            .setFields("byte", "short", "integer", "long", "half_float", "float", "double", "location", "string").get();
         assertAllSuccessful(response);
 
         for (FieldStats<?> stats : response.getAllFieldStats().values()) {
@@ -188,6 +200,11 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
         assertThat(response.getAllFieldStats().get("double").getMinValue(), equalTo(minDouble));
         assertThat(response.getAllFieldStats().get("double").getMaxValue(), equalTo(maxDouble));
         assertThat(response.getAllFieldStats().get("double").getDisplayType(), equalTo("float"));
+        assertThat(((GeoPoint)response.getAllFieldStats().get("location").getMinValue()).lat(), closeTo(minLoc.lat(), 1E-5));
+        assertThat(((GeoPoint)response.getAllFieldStats().get("location").getMinValue()).lon(), closeTo(minLoc.lon(), 1E-5));
+        assertThat(((GeoPoint)response.getAllFieldStats().get("location").getMaxValue()).lat(), closeTo(maxLoc.lat(), 1E-5));
+        assertThat(((GeoPoint)response.getAllFieldStats().get("location").getMaxValue()).lon(), closeTo(maxLoc.lon(), 1E-5));
+        assertThat(response.getAllFieldStats().get("location").getDisplayType(), equalTo("geo_point"));
     }
 
     public void testFieldStatsIndexLevel() throws Exception {
@@ -522,6 +539,25 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
         assertEquals(oldHitCount, indexStats.getHitCount());
     }
 
+    public void testGeoPointNotIndexed() throws Exception {
+        assertAcked(prepareCreate("test").addMapping("test", "value", "type=long", "location", "type=geo_point,index=no"));
+        ensureGreen("test");
+        client().prepareIndex("test", "test").setSource("value", 1L, "location", new GeoPoint(32, -132)).get();
+        client().prepareIndex("test", "test").setSource("value", 2L).get();
+        client().prepareIndex("test", "test").setSource("value", 3L).get();
+        client().prepareIndex("test", "test").setSource("value", 4L).get();
+        refresh();
+
+        FieldStatsResponse response = client().prepareFieldStats().setFields("value", "location").get();
+        assertAllSuccessful(response);
+        assertThat(response.getIndicesMergedFieldStats().size(), equalTo(1));
+        assertThat(response.getAllFieldStats().get("location").getMinValue(), equalTo(null));
+        assertThat(response.getAllFieldStats().get("location").getMaxValue(), equalTo(null));
+        assertThat(response.getAllFieldStats().get("location").isAggregatable(), equalTo(true));
+        assertThat(response.getAllFieldStats().get("location").isSearchable(), equalTo(false));
+
+    }
+
     private void indexRange(String index, long from, long to) throws Exception {
         indexRange(index, "value", from, to);
     }

+ 36 - 16
rest-api-spec/src/main/resources/rest-api-spec/test/field_stats/10_basics.yaml

@@ -34,6 +34,7 @@ setup:
                     type: text
                   geo:
                     type: geo_point
+                    index: no
                   geo_shape:
                     type: geo_shape
                     tree: quadtree
@@ -51,7 +52,7 @@ setup:
           index:  test_2
           type:   test
           id:     id_10
-          body: { foo: "babar", number: 456, bar: "123",  geo_shape:  {type: "linestring", coordinates : [[-77.03653, 38.897676], [-77.009051, 38.889939]] } }
+          body: { foo: "babar", number: 456, bar: "123", geo: { lat: 48.858093, lon: 2.294694},  geo_shape:  {type: "linestring", coordinates : [[-77.03653, 38.897676], [-77.009051, 38.889939]] } }
 
   - do:
       indices.refresh: {}
@@ -84,17 +85,17 @@ setup:
   - is_false: conflicts
 
 ---
-"Geopoint field stats":
+"Geo field stats":
   - skip:
-      version: " - 5.2.0"
+      version: " - 5.3.0"
       reason:  geo_point fields don't return min/max for versions greater than 5.2.0
 
   - do:
       field_stats:
           fields:  [geo, geo_shape]
 
-  - match: { indices._all.fields.geo.type: "string" }
-  - match: { indices._all.fields.geo.max_doc: 1 }
+  - match: { indices._all.fields.geo.type: "geo_point" }
+  - match: { indices._all.fields.geo.max_doc: 2 }
   - match: { indices._all.fields.geo.doc_count: -1 }
   - match: { indices._all.fields.geo.searchable: true }
   - match: { indices._all.fields.geo.aggregatable: true }
@@ -113,7 +114,6 @@ setup:
   - is_false: indices._all.fields.geo_shape.max_value_as_string
   - is_false: conflicts
 
-
 ---
 "Basic field stats with level set to indices":
   - do:
@@ -162,9 +162,9 @@ setup:
 
 
 ---
-"Geopoint field stats with level set to indices":
+"Geo field stats with level set to indices":
   - skip:
-      version: " - 5.2.0"
+      version: " - 5.3.0"
       reason:  geo_point fields don't return min/max for versions greater than 5.2.0
 
   - do:
@@ -173,15 +173,15 @@ setup:
           level: indices
 
   - match: { indices.test_1.fields.geo.max_doc: 1 }
-  - match: { indices.test_1.fields.geo.doc_count: -1 }
-  - is_false: indices.test_1.fields.geo.min_value
-  - is_false: indices.test_1.fields.geo.max_value
-  - is_false: indices.test_1.fields.geo.min_value_as_string
-  - is_false: indices.test_1.fields.geo.max_value_as_string
+  - match: { indices.test_1.fields.geo.doc_count: 1 }
+  - is_true: indices.test_1.fields.geo.min_value
+  - is_true: indices.test_1.fields.geo.max_value
+  - is_true: indices.test_1.fields.geo.min_value_as_string
+  - is_true: indices.test_1.fields.geo.max_value_as_string
   - match: { indices.test_1.fields.geo.searchable: true }
   - match: { indices.test_1.fields.geo.aggregatable: true }
-  - match: { indices.test_1.fields.geo.type: "string" }
-  - is_false: indices.test_2.fields.geo
+  - match: { indices.test_1.fields.geo.type: "geo_point" }
+  - is_true: indices.test_2.fields.geo
   - match: { indices.test_2.fields.geo_shape.max_doc: 1 }
   - match: { indices.test_2.fields.geo_shape.doc_count: -1 }
   - is_false: indices.test_2.fields.geo_shape.min_value
@@ -191,7 +191,27 @@ setup:
   - match: { indices.test_2.fields.geo_shape.searchable: true }
   - match: { indices.test_2.fields.geo_shape.aggregatable: false }
   - match: { indices.test_2.fields.geo_shape.type: "string" }
-  - is_false: indices.test_2.fields.geo
+  - is_false: conflicts
+
+---
+"Geopoint field stats":
+  - skip:
+      version: " - 5.3.0"
+      reason:  geo_point type not handled for versions earlier than 6.0.0
+
+  - do:
+      field_stats:
+          fields:  [geo]
+          level: indices
+
+  - match: { indices.test_2.fields.geo.max_doc: 1 }
+  - match: { indices.test_2.fields.geo.doc_count: -1 }
+  - is_false: indices.test_2.fields.geo.min_value
+  - is_false: indices.test_2.fields.geo.max_value
+  - match: { indices.test_2.fields.geo.searchable: false }
+  - match: { indices.test_2.fields.geo.aggregatable: true }
+  - match: { indices.test_2.fields.geo.type: "geo_point" }
+  - is_true: indices.test_2.fields.geo
   - is_false: conflicts
 
 ---