Bläddra i källkod

ESQL: Fix precision of `scaled_float` field values retrieved from stored source (#122586)

kanoshiou 8 månader sedan
förälder
incheckning
de41d5704b

+ 6 - 0
docs/changelog/122586.yaml

@@ -0,0 +1,6 @@
+pr: 122586
+summary: "ESQL: Fix inconsistent results in using scaled_float field"
+area: ES|QL
+type: bug
+issues:
+  - 122547

+ 1 - 2
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java

@@ -321,8 +321,7 @@ public class ScaledFloatFieldMapper extends FieldMapper {
                 return BlockLoader.CONSTANT_NULLS;
             }
             if (hasDocValues()) {
-                double scalingFactorInverse = 1d / scalingFactor;
-                return new BlockDocValuesReader.DoublesBlockLoader(name(), l -> l * scalingFactorInverse);
+                return new BlockDocValuesReader.DoublesBlockLoader(name(), l -> l / scalingFactor);
             }
             if (isSyntheticSource) {
                 return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {

+ 0 - 10
modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldBlockLoaderTests.java

@@ -26,16 +26,6 @@ public class ScaledFloatFieldBlockLoaderTests extends NumberFieldBlockLoaderTest
     protected Double convert(Number value, Map<String, Object> fieldMapping) {
         var scalingFactor = ((Number) fieldMapping.get("scaling_factor")).doubleValue();
 
-        var docValues = (boolean) fieldMapping.getOrDefault("doc_values", false);
-
-        // There is a slight inconsistency between values that are read from doc_values and from source.
-        // Due to how precision reduction is applied to source values so that they are consistent with doc_values.
-        // See #122547.
-        if (docValues) {
-            var reverseScalingFactor = 1d / scalingFactor;
-            return Math.round(value.doubleValue() * scalingFactor) * reverseScalingFactor;
-        }
-
         // Adjust values coming from source to the way they are stored in doc_values.
         // See mapper implementation.
         return Math.round(value.doubleValue() * scalingFactor) / scalingFactor;

+ 1 - 3
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java

@@ -660,8 +660,6 @@ public final class CsvTestUtils {
 
     private static double scaledFloat(String value, String factor) {
         double scalingFactor = Double.parseDouble(factor);
-        // this extra division introduces extra imprecision in the following multiplication, but this is how ScaledFloatFieldMapper works.
-        double scalingFactorInverse = 1d / scalingFactor;
-        return new BigDecimal(value).multiply(BigDecimal.valueOf(scalingFactor)).longValue() * scalingFactorInverse;
+        return new BigDecimal(value).multiply(BigDecimal.valueOf(scalingFactor)).longValue() / scalingFactor;
     }
 }

+ 12 - 8
x-pack/plugin/esql/qa/testFixtures/src/main/resources/floats.csv-spec

@@ -8,35 +8,39 @@ a:double              | b:double
 ;
 
 inDouble
+required_capability: fix_precision_of_scaled_float_fields
 from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no;
 
 emp_no:integer |height:double  |height.float:double |height.half_float:double |height.scaled_float:double
-10001          |2.03           |2.0299999713897705  |2.029296875              |2.0300000000000002
-10090          |2.03           |2.0299999713897705  |2.029296875              |2.0300000000000002
+10001          |2.03           |2.0299999713897705  |2.029296875              |2.03
+10090          |2.03           |2.0299999713897705  |2.029296875              |2.03
 ;
 
 inFloat
+required_capability: fix_precision_of_scaled_float_fields
 from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no;
 
 emp_no:integer |height:double  |height.float:double |height.half_float:double |height.scaled_float:double
-10001          |2.03           |2.0299999713897705  |2.029296875              |2.0300000000000002
-10090          |2.03           |2.0299999713897705  |2.029296875              |2.0300000000000002
+10001          |2.03           |2.0299999713897705  |2.029296875              |2.03
+10090          |2.03           |2.0299999713897705  |2.029296875              |2.03
 ;
 
 inHalfFloat
+required_capability: fix_precision_of_scaled_float_fields
 from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.half_float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no;
 
 emp_no:integer |height:double  |height.float:double |height.half_float:double |height.scaled_float:double
-10001          |2.03           |2.0299999713897705  |2.029296875              |2.0300000000000002
-10090          |2.03           |2.0299999713897705  |2.029296875              |2.0300000000000002
+10001          |2.03           |2.0299999713897705  |2.029296875              |2.03
+10090          |2.03           |2.0299999713897705  |2.029296875              |2.03
 ;
 
 inScaledFloat
+required_capability: fix_precision_of_scaled_float_fields
 from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.scaled_float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no;
 
 emp_no:integer |height:double  |height.float:double |height.half_float:double |height.scaled_float:double
-10001          |2.03           |2.0299999713897705  |2.029296875              |2.0300000000000002
-10090          |2.03           |2.0299999713897705  |2.029296875              |2.0300000000000002
+10001          |2.03           |2.0299999713897705  |2.029296875              |2.03
+10090          |2.03           |2.0299999713897705  |2.029296875              |2.03
 ;
 
 convertFromDouble

+ 2 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec

@@ -266,10 +266,11 @@ height:double | languages.long:long | still_hired:boolean
 ;
 
 simpleEvalWithSortAndLimitOne
+required_capability: fix_precision_of_scaled_float_fields
 from employees | eval x = languages + 7 | sort x, avg_worked_seconds | limit 1;
 
 avg_worked_seconds:long | birth_date:date | emp_no:integer | first_name:keyword | gender:keyword | height:double | height.float:double | height.half_float:double | height.scaled_float:double | hire_date:date | is_rehired:boolean | job_positions:keyword | languages:integer | languages.byte:integer | languages.long:long | languages.short:integer | last_name:keyword | salary:integer | salary_change:double | salary_change.int:integer | salary_change.keyword:keyword |salary_change.long:long | still_hired:boolean | x:integer
-208374744         |1956-11-14T00:00:00.000Z|10033          |null           |M              |1.63           |1.6299999952316284|1.6298828125     |1.6300000000000001 |1987-03-18T00:00:00.000Z|true           |null           |1              |1              |1              |1              |Merlo          |70011          |null           |null             |null                 |null              |false          |8                                          
+208374744         |1956-11-14T00:00:00.000Z|10033          |null           |M              |1.63           |1.6299999952316284|1.6298828125     |1.63        |1987-03-18T00:00:00.000Z|true           |null           |1              |1              |1              |1              |Merlo          |70011          |null           |null             |null                 |null              |false          |8                                          
 ;
 
 evalOfAverageValue

+ 2 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec

@@ -319,10 +319,11 @@ a         | a
 
 //see https://github.com/elastic/elasticsearch/issues/103331
 keepStarMvExpand#[skip:-8.12.99]
+required_capability: fix_precision_of_scaled_float_fields
 from employees | where emp_no == 10001 | keep * |  mv_expand first_name;
 
 avg_worked_seconds:long | birth_date:date          | emp_no:integer | first_name:keyword | gender:keyword | height:double | height.float:double | height.half_float:double | height.scaled_float:double | hire_date:date           | is_rehired:boolean | job_positions:keyword                 | languages:integer | languages.byte:integer | languages.long:long | languages.short:integer | last_name:keyword | salary:integer | salary_change:double | salary_change.int:integer | salary_change.keyword:keyword | salary_change.long:long | still_hired:boolean  
-268728049               | 1953-09-02T00:00:00.000Z | 10001          | Georgi             | M              | 2.03          | 2.0299999713897705  | 2.029296875              | 2.0300000000000002         | 1986-06-26T00:00:00.000Z | [false, true]      | [Accountant, Senior Python Developer] | 2                 | 2                      | 2                   | 2                       | Facello           | 57305          | 1.19                 | 1                         | 1.19                          | 1                       | true           
+268728049               | 1953-09-02T00:00:00.000Z | 10001          | Georgi             | M              | 2.03          | 2.0299999713897705  | 2.029296875              | 2.03                       | 1986-06-26T00:00:00.000Z | [false, true]      | [Accountant, Senior Python Developer] | 2                 | 2                      | 2                   | 2                       | Facello           | 57305          | 1.19                 | 1                         | 1.19                          | 1                       | true           
 ;
 
 

+ 2 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec

@@ -159,11 +159,12 @@ y:integer | x:date
 ;
 
 renameIntertwinedWithSort
+required_capability: fix_precision_of_scaled_float_fields
 FROM employees | eval x = salary | rename x as y | rename y as x | sort x | rename x as y | limit 10;
 
 avg_worked_seconds:l | birth_date:date          | emp_no:i             | first_name:s         | gender:s             | height:d             | height.float:d       | height.half_float:d  | height.scaled_float:d| hire_date:date           | is_rehired:bool      | job_positions:s      | languages:i          | languages.byte:i     | languages.long:l     | languages.short:i    | last_name:s          | salary:i             | salary_change:d      | salary_change.int:i  | salary_change.keyword:s | salary_change.long:l | still_hired:bool  | y:i          
 
-390266432            | 1959-08-19T00:00:00.000Z | 10015                | Guoxiang             | null                 | 1.66                 | 1.659999966621399    | 1.66015625           | 1.6600000000000001   | 1987-07-02T00:00:00.000Z | [false, false, false, true]| [Head Human Resources, Junior Developer, Principal Support Engineer, Support Engineer]  | 5                    | 5                    | 5                    | 5                    | Nooteboom            | 25324                | [12.4, 14.25]               | [12, 14]             | [12.40, 14.25]             | [12, 14]             | true                 | 25324               
+390266432            | 1959-08-19T00:00:00.000Z | 10015                | Guoxiang             | null                 | 1.66                 | 1.659999966621399    | 1.66015625           | 1.66                 | 1987-07-02T00:00:00.000Z | [false, false, false, true]| [Head Human Resources, Junior Developer, Principal Support Engineer, Support Engineer]  | 5                    | 5                    | 5                    | 5                    | Nooteboom            | 25324                | [12.4, 14.25]               | [12, 14]             | [12.40, 14.25]             | [12, 14]             | true                 | 25324               
 203838153            | 1953-02-08T00:00:00.000Z | 10035                | null                 | M                    | 1.81                 | 1.809999942779541    | 1.8095703125         | 1.81                 | 1988-09-05T00:00:00.000Z | false                      | [Data Scientist, Senior Python Developer]                                               | 5                    | 5                    | 5                    | 5                    | Chappelet            | 25945                | [-6.58, -2.54]              | [-6, -2]             | [-2.54, -6.58]             | [-6, -2]             | false                | 25945               
 313407352            | 1964-10-18T00:00:00.000Z | 10092                | Valdiodio            | F                    | 1.75                 | 1.75                 | 1.75                 | 1.75                 | 1989-09-22T00:00:00.000Z | [false, false, true, true] | [Accountant, Junior Developer]                                                          | 1                    | 1                    | 1                    | 1                    | Niizuma              | 25976                | [-6.77, 0.39, 8.3, 8.78]    | [-6, 0, 8, 8]        | [-6.77, 0.39, 8.30,8.78]   | [-6, 0, 8, 8]        | false                | 25976               
 248451647            | null                     | 10048                | Florian              | M                    | 2.0                  | 2.0                  | 2.0                  | 2.0                  | 1985-02-24T00:00:00.000Z | [true, true]               | Internship                                                                              | 3                    | 3                    | 3                    | 3                    | Syrotiuk             | 26436                | null                        | null                 | null                       | null                 | false                | 26436               

+ 1 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec

@@ -52,6 +52,7 @@ h:long
 ;
 
 countDistinctOfScaledFloat
+required_capability: fix_precision_of_scaled_float_fields
 // float becomes double until https://github.com/elastic/elasticsearch-internal/issues/724
 from employees | stats h = count_distinct(height.scaled_float);
 

+ 2 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

@@ -734,10 +734,11 @@ null
 ;
 
 convertFromFloats
+required_capability: fix_precision_of_scaled_float_fields
 from employees | sort emp_no| eval double = to_string(height), float = to_string(height.float), scaled_float = to_string(height.scaled_float), half_float = to_string(height.half_float) | keep emp_no, double, float, scaled_float, half_float, height  | limit 2;
 
 emp_no:integer |double:keyword |float:keyword     |scaled_float:keyword   |half_float:keyword   |height:double
-10001          |2.03           |2.0299999713897705|2.0300000000000002     |2.029296875          |2.03
+10001          |2.03           |2.0299999713897705|2.03                   |2.029296875          |2.03
 10002          |2.08           |2.0799999237060547|2.08                   |2.080078125          |2.08
 ;
 

+ 6 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

@@ -408,6 +408,12 @@ public class EsqlCapabilities {
          */
         FIX_PARSING_LARGE_NEGATIVE_NUMBERS,
 
+        /**
+         * Fix precision of scaled_float field values retrieved from stored source
+         * see <a href="https://github.com/elastic/elasticsearch/issues/122547"> Slight inconsistency in ESQL using scaled_float field #122547 </a>
+         */
+        FIX_PRECISION_OF_SCALED_FLOAT_FIELDS,
+
         /**
          * Fix the status code returned when trying to run count_distinct on the _source type (which is not supported).
          * see <a href="https://github.com/elastic/elasticsearch/issues/105240">count_distinct(_source) returns a 500 response</a>