Prechádzať zdrojové kódy

Rare terms aggregation false **positive** fix (#126884) (#127358)

I found that `rare_terms` aggregation can return **false positive**
results (some of the returned results are not real).  I traced it
down and find out it's a bug in `CuckooFilter`'s merge methord.

```
        if (isSetMode == false && other.isSetMode) {
            other.hashes.forEach(this::add);
        }
```

should be 

```
        if (isSetMode == false && other.isSetMode) {            
            other.hashes.forEach(this::addHash);
        }
```

Co-authored-by: iamgd67 <iamgd67@sina.com>
Nik Everett 5 mesiacov pred
rodič
commit
d55b61874c

+ 5 - 0
docs/changelog/126884.yaml

@@ -0,0 +1,5 @@
+pr: 126884
+summary: Rare terms aggregation false **positive** fix
+area: Aggregations
+type: bug
+issues: []

+ 1 - 1
server/src/main/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilter.java

@@ -341,7 +341,7 @@ public class SetBackedScalingCuckooFilter implements Writeable {
         } else if (isSetMode == false && other.isSetMode) {
             // Rather than converting the other to a cuckoo first, we can just
             // replay the values directly into our filter.
-            other.hashes.forEach(this::add);
+            other.hashes.forEach(this::addHash);
         } else {
             // Both are in cuckoo mode, merge raw fingerprints
 

+ 28 - 0
server/src/test/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilterTests.java

@@ -132,6 +132,34 @@ public class SetBackedScalingCuckooFilterTests extends AbstractWireSerializingTe
         );
     }
 
+    public void testMergeBigSmall() {
+        int threshold = 1000;
+
+        // Setup the first filter
+        SetBackedScalingCuckooFilter filter = new SetBackedScalingCuckooFilter(threshold, Randomness.get(), 0.01);
+        int counter = 0;
+        Set<Long> values = new HashSet<>();
+        while (counter < threshold + 1) {
+            long value = randomLong();
+            filter.add(value);
+            boolean newValue = values.add(value);
+            if (newValue) {
+                counter += 1;
+            }
+        }
+
+        SetBackedScalingCuckooFilter filter2 = new SetBackedScalingCuckooFilter(threshold, Randomness.get(), 0.01);
+        long value = randomLong();
+        while (filter.mightContain(value)) {
+            value = randomLong();
+        }
+
+        filter2.add(value);
+
+        filter.merge(filter2);
+        assertTrue(filter.mightContain(value));
+    }
+
     public void testMergeSmall() {
         int threshold = 1000;