瀏覽代碼

ESQL: Fix SearchStats#count(String) to count values not rows (#104891)

SearchStats#count incorrectly counts the number of documents (or rows)
 in which a document appears instead of the actual number of values.
This PR fixes this by looking at the term frequency instead of the doc
 count.

Fix #104795
Costin Leau 1 年之前
父節點
當前提交
202a81f212

+ 6 - 0
docs/changelog/104891.yaml

@@ -0,0 +1,6 @@
+pr: 104891
+summary: "ESQL: Fix `SearchStats#count(String)` to count values not rows"
+area: ES|QL
+type: bug
+issues:
+ - 104795

+ 22 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

@@ -983,3 +983,25 @@ ROW a = 1, c = null
 COUNT(c):long | a:integer
             0 | 1
 ;
+
+countMultiValuesRow
+ROW keyword_field = ["foo", "bar"], int_field = [1, 2, 3] | STATS ck = COUNT(keyword_field), ci = COUNT(int_field), c = COUNT(*);
+
+ck:l | ci:l | c:l 
+2    | 3    | 1
+;
+
+
+countSource
+FROM employees | 
+STATS ck = COUNT(job_positions), 
+      cb = COUNT(is_rehired), 
+      cd = COUNT(salary_change), 
+      ci = COUNT(salary_change.int), 
+      c = COUNT(*), 
+      csv = COUNT(emp_no);
+
+ck:l | cb:l | cd:l | ci:l | c:l | csv:l 
+221  | 204  | 183  | 183  | 100 | 100
+;
+

+ 2 - 2
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java

@@ -323,12 +323,12 @@ public class SearchStats {
                     if (fieldInfo.getPointIndexDimensionCount() > 0) {
                         PointValues points = reader.getPointValues(field);
                         if (points != null) {
-                            count += points.getDocCount();
+                            count += points.size();
                         }
                     } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
                         Terms terms = reader.terms(field);
                         if (terms != null) {
-                            count += terms.getDocCount();
+                            count += terms.getSumTotalTermFreq();
                         }
                     } else {
                         return -1; // no shortcut possible for fields that are not indexed