Browse Source

Fix point visitor in DiskUsage API (#84909)

We should use numIndexDimensions instead of numDimensions
when comparing packedValues of PointValues.

Relates #84816
Nhat Nguyen 3 years ago
parent
commit
b4e6408a98

+ 5 - 0
docs/changelog/84909.yaml

@@ -0,0 +1,5 @@
+pr: 84909
+summary: Fix point visitor in `DiskUsage` API
+area: Search
+type: bug
+issues: []

+ 54 - 0
server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerIT.java

@@ -8,6 +8,7 @@
 
 package org.elasticsearch.action.admin.indices.diskusage;
 
+import org.apache.lucene.tests.geo.GeoTestUtil;
 import org.apache.lucene.tests.util.English;
 import org.elasticsearch.action.support.PlainActionFuture;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
@@ -147,6 +148,59 @@ public class IndexDiskUsageAnalyzerIT extends ESIntegTestCase {
         assertMetadataFields(stats);
     }
 
+    public void testGeoShape() throws Exception {
+        final XContentBuilder mapping = XContentFactory.jsonBuilder();
+        mapping.startObject();
+        {
+            mapping.startObject("_doc");
+            {
+                mapping.startObject("properties");
+                {
+                    mapping.startObject("location");
+                    mapping.field("type", "geo_shape");
+                    mapping.endObject();
+                }
+                mapping.endObject();
+            }
+            mapping.endObject();
+        }
+        mapping.endObject();
+
+        final String index = "test-index";
+        client().admin()
+            .indices()
+            .prepareCreate(index)
+            .setMapping(mapping)
+            .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 5)))
+            .get();
+
+        int numDocs = randomIntBetween(10, 100);
+        for (int i = 0; i < numDocs; i++) {
+            final XContentBuilder doc = XContentFactory.jsonBuilder()
+                .startObject()
+                .startObject("location")
+                .field("type", "point")
+                .field("coordinates", new double[] { GeoTestUtil.nextLatitude(), GeoTestUtil.nextLongitude() })
+                .endObject()
+                .endObject();
+            client().prepareIndex(index).setId("id-" + i).setSource(doc).get();
+        }
+        AnalyzeIndexDiskUsageResponse resp = client().execute(
+            AnalyzeIndexDiskUsageAction.INSTANCE,
+            new AnalyzeIndexDiskUsageRequest(new String[] { index }, AnalyzeIndexDiskUsageRequest.DEFAULT_INDICES_OPTIONS, true)
+        ).actionGet();
+
+        final IndexDiskUsageStats stats = resp.getStats().get(index);
+        logger.info("--> stats {}", stats);
+        assertNotNull(stats);
+        assertThat(stats.getIndexSizeInBytes(), greaterThan(100L));
+
+        final IndexDiskUsageStats.PerFieldDiskUsage locationField = stats.getFields().get("location");
+        assertThat(locationField.totalBytes(), greaterThan(0L));
+        assertThat(locationField.getPointsBytes(), greaterThan(0L));
+        assertMetadataFields(stats);
+    }
+
     public void testFailOnFlush() throws Exception {
         final String indexName = "test-index";
         int numberOfShards = between(1, 5);

+ 6 - 2
server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java

@@ -386,8 +386,12 @@ final class IndexDiskUsageAnalyzer {
             directory.resetBytesRead();
             if (field.getPointDimensionCount() > 0) {
                 final PointValues values = pointsReader.getValues(field.name);
-                values.intersect(new PointsVisitor(values.getMinPackedValue(), values.getNumDimensions(), values.getBytesPerDimension()));
-                values.intersect(new PointsVisitor(values.getMaxPackedValue(), values.getNumDimensions(), values.getBytesPerDimension()));
+                values.intersect(
+                    new PointsVisitor(values.getMinPackedValue(), values.getNumIndexDimensions(), values.getBytesPerDimension())
+                );
+                values.intersect(
+                    new PointsVisitor(values.getMaxPackedValue(), values.getNumIndexDimensions(), values.getBytesPerDimension())
+                );
                 stats.addPoints(field.name, directory.getBytesRead());
             }
         }

+ 85 - 4
server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java

@@ -21,6 +21,8 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LatLonShape;
+import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -35,6 +37,7 @@ import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentReader;
@@ -43,6 +46,7 @@ import org.apache.lucene.search.suggest.document.CompletionPostingsFormat;
 import org.apache.lucene.search.suggest.document.SuggestField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
+import org.apache.lucene.tests.geo.GeoTestUtil;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.core.internal.io.IOUtils;
@@ -170,7 +174,7 @@ public class IndexDiskUsageAnalyzerTests extends ESTestCase {
         }
     }
 
-    public void testPoints() throws Exception {
+    public void testBinaryPoints() throws Exception {
         try (Directory dir = newDirectory()) {
             final CodecMode codec = randomFrom(CodecMode.values());
             indexRandomly(dir, codec, between(100, 1000), doc -> {
@@ -207,6 +211,59 @@ public class IndexDiskUsageAnalyzerTests extends ESTestCase {
         }
     }
 
+    public void testTriangle() throws Exception {
+        try (Directory dir = newDirectory()) {
+            final CodecMode codec = randomFrom(CodecMode.values());
+            indexRandomly(dir, codec, between(100, 1000), doc -> {
+                final double ratio = randomDouble();
+                if (ratio <= 0.25) {
+                    addFieldsToDoc(
+                        doc,
+                        LatLonShape.createIndexableFields("triangle_1", GeoTestUtil.nextLatitude(), GeoTestUtil.nextLongitude())
+                    );
+                }
+                if (ratio <= 0.50) {
+                    addFieldsToDoc(
+                        doc,
+                        LatLonShape.createIndexableFields("triangle_2", GeoTestUtil.nextLatitude(), GeoTestUtil.nextLongitude())
+                    );
+                }
+                addFieldsToDoc(
+                    doc,
+                    LatLonShape.createIndexableFields("triangle_3", GeoTestUtil.nextLatitude(), GeoTestUtil.nextLongitude())
+                );
+            });
+            final IndexDiskUsageStats stats = IndexDiskUsageAnalyzer.analyze(testShardId(), lastCommit(dir), () -> {});
+            final IndexDiskUsageStats perField = collectPerFieldStats(dir);
+            logger.info("--> stats {} per field {}", stats, perField);
+            assertFieldStats("total", "points", stats.total().getPointsBytes(), perField.total().getPointsBytes(), 0.01, 2048);
+            assertFieldStats(
+                "triangle_1",
+                "points",
+                stats.getFields().get("triangle_1").getPointsBytes(),
+                stats.total().getPointsBytes() / 7,
+                0.01,
+                2048
+            );
+            assertFieldStats(
+                "triangle_2",
+                "triangle",
+                stats.getFields().get("triangle_2").getPointsBytes(),
+                stats.total().getPointsBytes() * 2 / 7,
+                0.01,
+                2048
+            );
+            assertFieldStats(
+                "triangle_3",
+                "triangle",
+                stats.getFields().get("triangle_3").getPointsBytes(),
+                stats.total().getPointsBytes() * 4 / 7,
+                0.01,
+                2048
+            );
+        }
+    }
+
     public void testCompletionField() throws Exception {
         IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true)
             .setUseCompoundFile(false)
@@ -288,6 +345,12 @@ public class IndexDiskUsageAnalyzerTests extends ESTestCase {
         }
     }
 
+    private static void addFieldsToDoc(Document doc, IndexableField[] fields) {
+        for (IndexableField field : fields) {
+            doc.add(field);
+        }
+    }
+
     enum CodecMode {
         BEST_SPEED {
             @Override
@@ -366,10 +429,15 @@ public class IndexDiskUsageAnalyzerTests extends ESTestCase {
         }
     }
 
-    static void addRandomPoints(Document doc) {
+    static void addRandomIntLongPoints(Document doc) {
         final int numValues = random().nextInt(5);
         for (int i = 0; i < numValues; i++) {
-            doc.add(new IntPoint("pt-" + randomIntBetween(1, 2), random().nextInt()));
+            if (randomBoolean()) {
+                doc.add(new IntPoint("int_point_" + randomIntBetween(1, 2), random().nextInt()));
+            }
+            if (randomBoolean()) {
+                doc.add(new LongPoint("long_point_" + randomIntBetween(1, 2), random().nextLong()));
+            }
         }
     }
 
@@ -398,7 +466,20 @@ public class IndexDiskUsageAnalyzerTests extends ESTestCase {
             addRandomPostings(doc);
         }
         if (randomBoolean()) {
-            addRandomPoints(doc);
+            addRandomIntLongPoints(doc);
+        }
+        if (randomBoolean()) {
+            final int numValues = random().nextInt(5);
+            for (int i = 0; i < numValues; i++) {
+                addFieldsToDoc(
+                    doc,
+                    LatLonShape.createIndexableFields(
+                        "triangle_" + randomIntBetween(1, 2),
+                        GeoTestUtil.nextLatitude(),
+                        GeoTestUtil.nextLongitude()
+                    )
+                );
+            }
         }
         if (randomBoolean()) {
             addRandomStoredFields(doc, between(1, 3));