Selaa lähdekoodia

SQL: Enable aggregations to create a separate bucket for missing values (#32832)

Enable aggregations to create a separate bucket for missing values.
Andrei Stefan 7 vuotta sitten
vanhempi
commit
3d9ca4baee

+ 2 - 1
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/GroupByColumnKey.java

@@ -25,7 +25,8 @@ public class GroupByColumnKey extends GroupByKey {
     public TermsValuesSourceBuilder asValueSource() {
         return new TermsValuesSourceBuilder(id())
                 .field(fieldName())
-                .order(direction().asOrder());
+                .order(direction().asOrder())
+                .missingBucket(true);
     }
 
     @Override

+ 2 - 1
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/GroupByDateKey.java

@@ -44,7 +44,8 @@ public class GroupByDateKey extends GroupByKey {
         return new DateHistogramValuesSourceBuilder(id())
                 .field(fieldName())
                 .dateHistogramInterval(new DateHistogramInterval(interval))
-                .timeZone(DateTimeZone.forTimeZone(timeZone));
+                .timeZone(DateTimeZone.forTimeZone(timeZone))
+                .missingBucket(true);
     }
 
     @Override

+ 2 - 1
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/GroupByScriptKey.java

@@ -36,7 +36,8 @@ public class GroupByScriptKey extends GroupByKey {
     public TermsValuesSourceBuilder asValueSource() {
         TermsValuesSourceBuilder builder = new TermsValuesSourceBuilder(id())
                 .script(script.toPainless())
-                .order(direction().asOrder());
+                .order(direction().asOrder())
+                .missingBucket(true);
 
         if (script.outputType().isNumeric()) {
             builder.valueType(ValueType.NUMBER);

+ 12 - 8
x-pack/qa/sql/src/main/java/org/elasticsearch/xpack/qa/sql/jdbc/DataLoader.java

@@ -42,14 +42,15 @@ public class DataLoader {
     }
 
     protected static void loadEmpDatasetIntoEs(RestClient client) throws Exception {
-        loadEmpDatasetIntoEs(client, "test_emp");
-        loadEmpDatasetIntoEs(client, "test_emp_copy");
+        loadEmpDatasetIntoEs(client, "test_emp", "employees");
+        loadEmpDatasetIntoEs(client, "test_emp_copy", "employees");
+        loadEmpDatasetIntoEs(client, "test_emp_with_nulls", "employees_with_nulls");
         makeAlias(client, "test_alias", "test_emp", "test_emp_copy");
         makeAlias(client, "test_alias_emp", "test_emp", "test_emp_copy");
     }
 
     public static void loadDocsDatasetIntoEs(RestClient client) throws Exception {
-        loadEmpDatasetIntoEs(client, "emp");
+        loadEmpDatasetIntoEs(client, "emp", "employees");
         loadLibDatasetIntoEs(client, "library");
         makeAlias(client, "employees", "emp");
     }
@@ -62,7 +63,7 @@ public class DataLoader {
        .endObject();
     }
 
-    protected static void loadEmpDatasetIntoEs(RestClient client, String index) throws Exception {
+    protected static void loadEmpDatasetIntoEs(RestClient client, String index, String fileName) throws Exception {
         Request request = new Request("PUT", "/" + index);
         XContentBuilder createIndex = JsonXContent.contentBuilder().startObject();
         createIndex.startObject("settings");
@@ -129,15 +130,18 @@ public class DataLoader {
         request = new Request("POST", "/" + index + "/emp/_bulk");
         request.addParameter("refresh", "true");
         StringBuilder bulk = new StringBuilder();
-        csvToLines("employees", (titles, fields) -> {
+        csvToLines(fileName, (titles, fields) -> {
             bulk.append("{\"index\":{}}\n");
             bulk.append('{');
             String emp_no = fields.get(1);
             for (int f = 0; f < fields.size(); f++) {
-                if (f != 0) {
-                    bulk.append(',');
+                // an empty value in the csv file is treated as 'null', thus skipping it in the bulk request
+                if (fields.get(f).trim().length() > 0) {
+                    if (f != 0) {
+                        bulk.append(',');
+                    }
+                    bulk.append('"').append(titles.get(f)).append("\":\"").append(fields.get(f)).append('"');
                 }
-                bulk.append('"').append(titles.get(f)).append("\":\"").append(fields.get(f)).append('"');
             }
             // append department
             List<List<String>> list = dep_emp.get(emp_no);

+ 5 - 1
x-pack/qa/sql/src/main/java/org/elasticsearch/xpack/qa/sql/jdbc/SqlSpecTestCase.java

@@ -25,7 +25,10 @@ public abstract class SqlSpecTestCase extends SpecBaseIntegrationTestCase {
     private String query;
 
     @ClassRule
-    public static LocalH2 H2 = new LocalH2((c) -> c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp.sql'"));
+    public static LocalH2 H2 = new LocalH2((c) -> { 
+        c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp.sql'");
+        c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp_with_nulls.sql'");
+    });
 
     @ParametersFactory(argumentFormatting = PARAM_FORMATTING)
     public static List<Object[]> readScriptSpec() throws Exception { 
@@ -39,6 +42,7 @@ public abstract class SqlSpecTestCase extends SpecBaseIntegrationTestCase {
         tests.addAll(readScriptSpec("/arithmetic.sql-spec", parser));
         tests.addAll(readScriptSpec("/string-functions.sql-spec", parser));
         tests.addAll(readScriptSpec("/case-functions.sql-spec", parser));
+        tests.addAll(readScriptSpec("/agg_nulls.sql-spec", parser));
         return tests;
     }
 

+ 14 - 0
x-pack/qa/sql/src/main/resources/agg_nulls.sql-spec

@@ -0,0 +1,14 @@
+selectGenderWithNullsAndGroupByGender
+SELECT gender, COUNT(*) count FROM test_emp_with_nulls GROUP BY gender ORDER BY gender;
+selectFirstNameWithNullsAndGroupByFirstName
+SELECT first_name FROM test_emp_with_nulls GROUP BY first_name ORDER BY first_name;
+selectCountWhereIsNull
+SELECT COUNT(*) count FROM test_emp_with_nulls WHERE first_name IS NULL;
+selectLanguagesCountWithNullsAndGroupByLanguage
+SELECT languages l, COUNT(*) c FROM test_emp_with_nulls GROUP BY languages ORDER BY languages;
+selectHireDateGroupByHireDate
+SELECT hire_date HD, COUNT(*) c FROM test_emp_with_nulls GROUP BY hire_date ORDER BY hire_date DESC;
+selectHireDateGroupByHireDate
+SELECT hire_date HD, COUNT(*) c FROM test_emp_with_nulls GROUP BY hire_date ORDER BY hire_date DESC;
+selectSalaryGroupBySalary
+SELECT salary, COUNT(*) c FROM test_emp_with_nulls GROUP BY salary ORDER BY salary DESC;

+ 4 - 3
x-pack/qa/sql/src/main/resources/alias.csv-spec

@@ -86,6 +86,7 @@ test_alias           | ALIAS
 test_alias_emp       | ALIAS
 test_emp             | BASE TABLE
 test_emp_copy        | BASE TABLE
+test_emp_with_nulls  | BASE TABLE
 ;
 
 testGroupByOnAlias
@@ -98,10 +99,10 @@ F               | 10099.28
 ;
 
 testGroupByOnPattern
-SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_* GROUP BY gender;
+SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_* WHERE gender is NOT NULL GROUP BY gender;
 
 gender:s             | p1:d
 
-F                    | 10099.28
-M                    | 10095.75
+F                    | 10099.32
+M                    | 10095.98
 ;

+ 101 - 0
x-pack/qa/sql/src/main/resources/employees_with_nulls.csv

@@ -0,0 +1,101 @@
+birth_date,emp_no,first_name,gender,hire_date,languages,last_name,salary
+1953-09-02T00:00:00Z,10001,Georgi,,1986-06-26T00:00:00Z,2,Facello,57305
+1964-06-02T00:00:00Z,10002,Bezalel,,1985-11-21T00:00:00Z,5,Simmel,56371
+1959-12-03T00:00:00Z,10003,Parto,,1986-08-28T00:00:00Z,4,Bamford,61805
+1954-05-01T00:00:00Z,10004,Chirstian,,1986-12-01T00:00:00Z,5,Koblick,36174
+1955-01-21T00:00:00Z,10005,Kyoichi,,1989-09-12T00:00:00Z,1,Maliniak,63528
+1953-04-20T00:00:00Z,10006,Anneke,,1989-06-02T00:00:00Z,3,Preusig,60335
+1957-05-23T00:00:00Z,10007,Tzvetan,,1989-02-10T00:00:00Z,4,Zielinski,74572
+1958-02-19T00:00:00Z,10008,Saniya,,1994-09-15T00:00:00Z,2,Kalloufi,43906
+1952-04-19T00:00:00Z,10009,Sumant,,1985-02-18T00:00:00Z,1,Peac,66174
+1963-06-01T00:00:00Z,10010,Duangkaew,,1989-08-24T00:00:00Z,4,Piveteau,45797
+1953-11-07T00:00:00Z,10011,Mary,F,1990-01-22T00:00:00Z,5,Sluis,31120
+1960-10-04T00:00:00Z,10012,Patricio,M,1992-12-18T00:00:00Z,5,Bridgland,48942
+1963-06-07T00:00:00Z,10013,Eberhardt,M,1985-10-20T00:00:00Z,1,Terkki,48735
+1956-02-12T00:00:00Z,10014,Berni,M,1987-03-11T00:00:00Z,5,Genin,37137
+1959-08-19T00:00:00Z,10015,Guoxiang,M,1987-07-02T00:00:00Z,5,Nooteboom,25324
+1961-05-02T00:00:00Z,10016,Kazuhito,M,1995-01-27T00:00:00Z,2,Cappelletti,61358
+1958-07-06T00:00:00Z,10017,Cristinel,F,1993-08-03T00:00:00Z,2,Bouloucos,58715
+1954-06-19T00:00:00Z,10018,Kazuhide,F,1993-08-03T00:00:00Z,2,Peha,56760
+1953-01-23T00:00:00Z,10019,Lillian,M,1993-08-03T00:00:00Z,1,Haddadi,73717
+1952-12-24T00:00:00Z,10020,,M,1991-01-26T00:00:00Z,3,Warwick,40031
+1960-02-20T00:00:00Z,10021,,M,1989-12-17T00:00:00Z,5,Erde,60408
+1952-07-08T00:00:00Z,10022,,M,1995-08-22T00:00:00Z,3,Famili,48233
+1953-09-29T00:00:00Z,10023,,F,1989-12-17T00:00:00Z,2,Montemayor,47896
+1958-09-05T00:00:00Z,10024,,F,1997-05-19T00:00:00Z,3,Pettey,64675
+1958-10-31T00:00:00Z,10025,Prasadram,M,1987-08-17T00:00:00Z,5,Heyers,47411
+1953-04-03T00:00:00Z,10026,Yongqiao,M,1995-03-20T00:00:00Z,3,Berztiss,28336
+1962-07-10T00:00:00Z,10027,Divier,F,1989-07-07T00:00:00Z,5,Reistad,73851
+1963-11-26T00:00:00Z,10028,Domenick,M,1991-10-22T00:00:00Z,1,Tempesti,39356
+1956-12-13T00:00:00Z,10029,Otmar,M,1985-11-20T00:00:00Z,,Herbst,74999
+1958-07-14T00:00:00Z,10030,Elvis,M,1994-02-17T00:00:00Z,,Demeyer,67492
+1959-01-27T00:00:00Z,10031,Karsten,M,1994-02-17T00:00:00Z,,Joslin,37716
+1960-08-09T00:00:00Z,10032,Jeong,F,1990-06-20T00:00:00Z,,Reistad,62233
+1956-11-14T00:00:00Z,10033,Arif,M,1987-03-18T00:00:00Z,,Merlo,70011
+1962-12-29T00:00:00Z,10034,Bader,M,1988-09-05T00:00:00Z,,Swan,39878
+1953-02-08T00:00:00Z,10035,Alain,M,1988-09-05T00:00:00Z,,Chappelet,25945
+1959-08-10T00:00:00Z,10036,Adamantios,M,1992-01-03T00:00:00Z,,Portugali,60781
+1963-07-22T00:00:00Z,10037,Pradeep,M,1990-12-05T00:00:00Z,,Makrucki,37691
+1960-07-20T00:00:00Z,10038,Huan,M,1989-09-20T00:00:00Z,,Lortz,35222
+1959-10-01T00:00:00Z,10039,Alejandro,M,1988-01-19T00:00:00Z,,Brender,36051
+1959-09-13T00:00:00Z,10040,Weiyi,F,1993-02-14T00:00:00Z,,Meriste,37112
+1959-08-27T00:00:00Z,10041,Uri,F,1989-11-12T00:00:00Z,1,Lenart,56415
+1956-02-26T00:00:00Z,10042,Magy,F,1993-03-21T00:00:00Z,3,Stamatiou,30404
+1960-09-19T00:00:00Z,10043,Yishay,M,1990-10-20T00:00:00Z,1,Tzvieli,34341
+1961-09-21T00:00:00Z,10044,Mingsen,F,1994-05-21T00:00:00Z,1,Casley,39728
+1957-08-14T00:00:00Z,10045,Moss,M,1989-09-02T00:00:00Z,3,Shanbhogue,74970
+1960-07-23T00:00:00Z,10046,Lucien,M,1992-06-20T00:00:00Z,4,Rosenbaum,50064
+1952-06-29T00:00:00Z,10047,Zvonko,M,1989-03-31T00:00:00Z,4,Nyanchama,42716
+1963-07-11T00:00:00Z,10048,Florian,M,1985-02-24T00:00:00Z,3,Syrotiuk,26436
+1961-04-24T00:00:00Z,10049,Basil,F,1992-05-04T00:00:00Z,5,Tramer,37853
+1958-05-21T00:00:00Z,10050,Yinghua,M,1990-12-25T00:00:00Z,2,Dredge,43026
+1953-07-28T00:00:00Z,10051,Hidefumi,M,1992-10-15T00:00:00Z,3,Caine,58121
+1961-02-26T00:00:00Z,10052,Heping,M,1988-05-21T00:00:00Z,1,Nitsch,55360
+1954-09-13T00:00:00Z,10053,Sanjiv,F,1986-02-04T00:00:00Z,3,Zschoche,54462
+1957-04-04T00:00:00Z,10054,Mayumi,M,1995-03-13T00:00:00Z,4,Schueller,65367
+1956-06-06T00:00:00Z,10055,Georgy,M,1992-04-27T00:00:00Z,5,Dredge,49281
+1961-09-01T00:00:00Z,10056,Brendon,F,1990-02-01T00:00:00Z,2,Bernini,33370
+1954-05-30T00:00:00Z,10057,Ebbe,F,1992-01-15T00:00:00Z,4,Callaway,27215
+1954-10-01T00:00:00Z,10058,Berhard,M,1987-04-13T00:00:00Z,3,McFarlin,38376
+1953-09-19T00:00:00Z,10059,Alejandro,F,1991-06-26T00:00:00Z,2,McAlpine,44307
+1961-10-15T00:00:00Z,10060,Breannda,M,1987-11-02T00:00:00Z,2,Billingsley,29175
+1962-10-19T00:00:00Z,10061,Tse,M,1985-09-17T00:00:00Z,1,Herber,49095
+1961-11-02T00:00:00Z,10062,Anoosh,M,1991-08-30T00:00:00Z,3,Peyn,65030
+1952-08-06T00:00:00Z,10063,Gino,F,1989-04-08T00:00:00Z,3,Leonhardt,52121
+1959-04-07T00:00:00Z,10064,Udi,M,1985-11-20T00:00:00Z,5,Jansch,33956
+1963-04-14T00:00:00Z,10065,Satosi,M,1988-05-18T00:00:00Z,2,Awdeh,50249
+1952-11-13T00:00:00Z,10066,Kwee,M,1986-02-26T00:00:00Z,5,Schusler,31897
+1953-01-07T00:00:00Z,10067,Claudi,M,1987-03-04T00:00:00Z,2,Stavenow,52044
+1962-11-26T00:00:00Z,10068,Charlene,M,1987-08-07T00:00:00Z,3,Brattka,28941
+1960-09-06T00:00:00Z,10069,Margareta,F,1989-11-05T00:00:00Z,5,Bierman,41933
+1955-08-20T00:00:00Z,10070,Reuven,M,1985-10-14T00:00:00Z,3,Garigliano,54329
+1958-01-21T00:00:00Z,10071,Hisao,M,1987-10-01T00:00:00Z,2,Lipner,40612
+1952-05-15T00:00:00Z,10072,Hironoby,F,1988-07-21T00:00:00Z,5,Sidou,54518
+1954-02-23T00:00:00Z,10073,Shir,M,1991-12-01T00:00:00Z,4,McClurg,32568
+1955-08-28T00:00:00Z,10074,Mokhtar,F,1990-08-13T00:00:00Z,5,Bernatsky,38992
+1960-03-09T00:00:00Z,10075,Gao,F,1987-03-19T00:00:00Z,5,Dolinsky,51956
+1952-06-13T00:00:00Z,10076,Erez,F,1985-07-09T00:00:00Z,3,Ritzmann,62405
+1964-04-18T00:00:00Z,10077,Mona,M,1990-03-02T00:00:00Z,5,Azuma,46595
+1959-12-25T00:00:00Z,10078,Danel,F,1987-05-26T00:00:00Z,2,Mondadori,69904
+1961-10-05T00:00:00Z,10079,Kshitij,F,1986-03-27T00:00:00Z,2,Gils,32263
+1957-12-03T00:00:00Z,10080,Premal,M,1985-11-19T00:00:00Z,5,Baek,52833
+1960-12-17T00:00:00Z,10081,Zhongwei,M,1986-10-30T00:00:00Z,2,Rosen,50128
+1963-09-09T00:00:00Z,10082,Parviz,M,1990-01-03T00:00:00Z,4,Lortz,49818
+1959-07-23T00:00:00Z,10083,Vishv,M,1987-03-31T00:00:00Z,1,Zockler,
+1960-05-25T00:00:00Z,10084,Tuval,M,1995-12-15T00:00:00Z,1,Kalloufi,
+1962-11-07T00:00:00Z,10085,Kenroku,M,1994-04-09T00:00:00Z,5,Malabarba,
+1962-11-19T00:00:00Z,10086,Somnath,M,1990-02-16T00:00:00Z,1,Foote,
+1959-07-23T00:00:00Z,10087,Xinglin,F,1986-09-08T00:00:00Z,5,Eugenio,
+1954-02-25T00:00:00Z,10088,Jungsoon,F,1988-09-02T00:00:00Z,5,Syrzycki,
+1963-03-21T00:00:00Z,10089,Sudharsan,F,1986-08-12T00:00:00Z,4,Flasterstein,
+1961-05-30T00:00:00Z,10090,Kendra,M,1986-03-14T00:00:00Z,2,Hofting,44956
+1955-10-04T00:00:00Z,10091,Amabile,M,1992-11-18T00:00:00Z,3,Gomatam,38645
+1964-10-18T00:00:00Z,10092,Valdiodio,F,1989-09-22T00:00:00Z,1,Niizuma,25976
+1964-06-11T00:00:00Z,10093,Sailaja,M,1996-11-05T00:00:00Z,3,Desikan,45656
+1957-05-25T00:00:00Z,10094,Arumugam,F,1987-04-18T00:00:00Z,5,Ossenbruggen,66817
+1965-01-03T00:00:00Z,10095,Hilari,M,1986-07-15T00:00:00Z,4,Morton,37702
+1954-09-16T00:00:00Z,10096,Jayson,M,1990-01-14T00:00:00Z,4,Mandell,43889
+1952-02-27T00:00:00Z,10097,Remzi,M,1990-09-15T00:00:00Z,3,Waschkowski,71165
+1961-09-23T00:00:00Z,10098,Sreekrishna,F,1985-05-13T00:00:00Z,4,Servieres,44817
+1956-05-25T00:00:00Z,10099,Valter,F,1988-10-18T00:00:00Z,2,Sullins,73578
+1953-04-21T00:00:00Z,10100,Hironobu,F,1987-09-21T00:00:00Z,4,Haraldson,68431

+ 12 - 0
x-pack/qa/sql/src/main/resources/setup_test_emp_with_nulls.sql

@@ -0,0 +1,12 @@
+DROP TABLE IF EXISTS "test_emp_with_nulls";
+CREATE TABLE "test_emp_with_nulls" (
+                    "birth_date" TIMESTAMP WITH TIME ZONE,
+                    "emp_no" INT, 
+                    "first_name" VARCHAR(50),
+                    "gender" VARCHAR(1),
+                    "hire_date" TIMESTAMP WITH TIME ZONE,
+                    "languages" TINYINT,
+                    "last_name" VARCHAR(50),
+                    "salary" INT
+                   )
+   AS SELECT * FROM CSVREAD('classpath:/employees_with_nulls.csv');