1
0
Эх сурвалжийг харах

ESQL: Enrich match text (#106435)

* WIP Support ENRICH MATCH on TEXT

* Disallow KEYWORD from range enrich

The ingest processor does not support this, and there is no keyword_range type to complement the numerical, date and ip range types.

* Revert: Disallow KEYWORD from range enrich

We allow using KEYWORD to range match against ip_range.

* Update docs/changelog/106435.yaml

* Improve changelog entry

* Added yaml test for ENRICH on TEXT fields

* Allow TEXT for range, so text matches IP-range (plus test)
Craig Taverner 1 жил өмнө
parent
commit
1802aea723

+ 6 - 0
docs/changelog/106435.yaml

@@ -0,0 +1,6 @@
+pr: 106435
+summary: "ENRICH support for TEXT fields"
+area: ES|QL
+type: enhancement
+issues:
+ - 105384

+ 3 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

@@ -107,6 +107,7 @@ public class CsvTestsDataLoader {
     private static final EnrichConfig DECADES_ENRICH = new EnrichConfig("decades_policy", "enrich-policy-decades.json");
     private static final EnrichConfig CITY_NAMES_ENRICH = new EnrichConfig("city_names", "enrich-policy-city_names.json");
     private static final EnrichConfig CITY_BOUNDARIES_ENRICH = new EnrichConfig("city_boundaries", "enrich-policy-city_boundaries.json");
+    private static final EnrichConfig CITY_AIRPORTS_ENRICH = new EnrichConfig("city_airports", "enrich-policy-city_airports.json");
 
     public static final List<String> ENRICH_SOURCE_INDICES = List.of(
         "languages",
@@ -125,7 +126,8 @@ public class CsvTestsDataLoader {
         HEIGHTS_ENRICH,
         DECADES_ENRICH,
         CITY_NAMES_ENRICH,
-        CITY_BOUNDARIES_ENRICH
+        CITY_BOUNDARIES_ENRICH,
+        CITY_AIRPORTS_ENRICH
     );
 
     /**

+ 13 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich-IT_tests_only.csv-spec

@@ -334,3 +334,16 @@ count:long  |  centroid:geo_point            |  airport_in_city:boolean
 396         |  POINT (-2.534797 20.667712)   |  true
 455         |  POINT (3.090752 27.676442)    |  false
 ;
+
+
+spatialEnrichmentTextMatch#[skip:-8.13.99, reason:ENRICH extended in 8.14.0]
+FROM airports
+| WHERE abbrev == "IDR"
+| ENRICH city_airports ON name WITH city_name = city, region, city_boundary
+| EVAL boundary_wkt_length = LENGTH(TO_STRING(city_boundary))
+| KEEP abbrev, city_name, city_location, country, location, name, name, region, boundary_wkt_length
+;
+
+abbrev:k  |  city_name:k  |  city_location:geo_point |  country:k  |  location:geo_point                       |  name:text                    |  region:text  |  boundary_wkt_length:i
+IDR       |  Indore       |  POINT(75.8472 22.7167)  |  India      |  POINT(75.8092915005895 22.727749187571)  |  Devi Ahilyabai Holkar Int'l  |  Indore City  |  231
+;

+ 7 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich-policy-city_airports.json

@@ -0,0 +1,7 @@
+{
+  "match": {
+    "indices": "airport_city_boundaries",
+    "match_field": "airport",
+    "enrich_fields": ["city", "region", "city_boundary"]
+  }
+}

+ 2 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

@@ -94,6 +94,7 @@ import static org.elasticsearch.xpack.ql.type.DataTypes.IP;
 import static org.elasticsearch.xpack.ql.type.DataTypes.KEYWORD;
 import static org.elasticsearch.xpack.ql.type.DataTypes.LONG;
 import static org.elasticsearch.xpack.ql.type.DataTypes.NESTED;
+import static org.elasticsearch.xpack.ql.type.DataTypes.TEXT;
 
 public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerContext> {
     // marker list of attributes for plans that do not have any concrete fields to return, but have other computed columns to return
@@ -605,7 +606,7 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
         }
 
         private static final DataType[] GEO_TYPES = new DataType[] { GEO_POINT, GEO_SHAPE };
-        private static final DataType[] NON_GEO_TYPES = new DataType[] { KEYWORD, IP, LONG, INTEGER, FLOAT, DOUBLE, DATETIME };
+        private static final DataType[] NON_GEO_TYPES = new DataType[] { KEYWORD, TEXT, IP, LONG, INTEGER, FLOAT, DOUBLE, DATETIME };
 
         private DataType[] allowedEnrichTypes(String matchType) {
             return matchType.equals(GEO_MATCH_TYPE) ? GEO_TYPES : NON_GEO_TYPES;

+ 69 - 18
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_enrich.yml

@@ -6,7 +6,7 @@ setup:
       features: allowed_warnings_regex
   - do:
       indices.create:
-        index:  cities
+        index: cities
         body:
           settings:
             number_of_shards: 5
@@ -15,13 +15,13 @@ setup:
               city_code:
                 type: keyword
               city:
-                type: keyword
+                type: text
               country:
                 type: keyword
 
   - do:
       bulk:
-        index: "cities"
+        index: cities
         refresh: true
         body:
           - { "index": { } }
@@ -31,16 +31,29 @@ setup:
 
   - do:
       enrich.put_policy:
-        name: cities_policy
+        name: city_codes_policy
         body:
           match:
-            indices: ["cities"]
+            indices: [ "cities" ]
             match_field: "city_code"
-            enrich_fields: ["city", "country"]
+            enrich_fields: [ "city", "country" ]
+
+  - do:
+      enrich.put_policy:
+        name: city_names_policy
+        body:
+          match:
+            indices: [ "cities" ]
+            match_field: "city"
+            enrich_fields: [ "city_code", "country" ]
+
+  - do:
+      enrich.execute_policy:
+        name: city_codes_policy
 
   - do:
       enrich.execute_policy:
-        name: cities_policy
+        name: city_names_policy
 
   - do:
       indices.create:
@@ -52,39 +65,44 @@ setup:
                 type: keyword
               city_id:
                 type: keyword
+              city_name:
+                type: text
   - do:
       bulk:
-        index: "test"
+        index: test
         refresh: true
         body:
           - { "index": { } }
-          - { "name": "Alice", "city_id": "nyc" }
+          - { "name": "Alice", "city_id": "nyc", "city_name": "New York" }
           - { "index": { } }
-          - { "name": "Bob", "city_id": "nyc" }
+          - { "name": "Bob", "city_id": "nyc", "city_name": "New York" }
           - { "index": { } }
-          - { "name": "Mario", "city_id": "rom" }
+          - { "name": "Mario", "city_id": "rom", "city_name": "Rome" }
           - { "index": { } }
-          - { "name": "Denise", "city_id": "sgn" }
+          - { "name": "Denise", "city_id": "sgn", "city_name": "Tan Son Nhat" }
 
 ---
 teardown:
   - do:
       enrich.delete_policy:
-        name: cities_policy
+        name: city_codes_policy
+  - do:
+      enrich.delete_policy:
+        name: city_names_policy
 
 ---
-"Basic":
+"Enrich on keyword":
   - do:
       allowed_warnings_regex:
         - "No limit defined, adding default limit of \\[.*\\]"
       esql.query:
         body:
-          query: 'from test | enrich cities_policy on city_id | keep name, city, country | sort name'
+          query: 'from test | enrich city_codes_policy on city_id | keep name, city, country | sort name'
 
   - match: { columns.0.name: "name" }
   - match: { columns.0.type: "keyword" }
   - match: { columns.1.name: "city" }
-  - match: { columns.1.type: "keyword" }
+  - match: { columns.1.type: "text" }
   - match: { columns.2.name: "country" }
   - match: { columns.2.type: "keyword" }
 
@@ -95,12 +113,14 @@ teardown:
   - match: { values.3: [ "Mario", "Rome", "Italy" ] }
 
 
+---
+"Enrich on keyword with fields":
   - do:
       allowed_warnings_regex:
         - "No limit defined, adding default limit of \\[.*\\]"
       esql.query:
         body:
-          query: 'from test | keep name, city_id | enrich cities_policy on city_id with country | sort name'
+          query: 'from test | keep name, city_id | enrich city_codes_policy on city_id with country | sort name'
 
   - match: { columns.0.name: "name" }
   - match: { columns.0.type: "keyword" }
@@ -116,12 +136,14 @@ teardown:
   - match: { values.3: [ "Mario", "rom", "Italy" ] }
 
 
+---
+"Enrich on keyword with fields alias":
   - do:
       allowed_warnings_regex:
         - "No limit defined, adding default limit of \\[.*\\]"
       esql.query:
         body:
-          query: 'from test | keep name, city_id | enrich cities_policy on city_id with country_name = country | sort name'
+          query: 'from test | keep name, city_id | enrich city_codes_policy on city_id with country_name = country | sort name'
 
   - match: { columns.0.name: "name" }
   - match: { columns.0.type: "keyword" }
@@ -135,3 +157,32 @@ teardown:
   - match: { values.1: [ "Bob", "nyc", "USA" ] }
   - match: { values.2: [ "Denise", "sgn", null ] }
   - match: { values.3: [ "Mario", "rom", "Italy" ] }
+
+
+---
+"Enrich on text":
+  - skip:
+      version: " - 8.13.99"
+      reason: "TEXT field ENRICH support was added in 8.14.0"
+
+  - do:
+      allowed_warnings_regex:
+        - "No limit defined, adding default limit of \\[.*\\]"
+      esql.query:
+        body:
+          query: 'from test | keep name, city_name | enrich city_names_policy on city_name | sort name'
+
+  - match: { columns.0.name: "name" }
+  - match: { columns.0.type: "keyword" }
+  - match: { columns.1.name: "city_name" }
+  - match: { columns.1.type: "text" }
+  - match: { columns.2.name: "city_code" }
+  - match: { columns.2.type: "keyword" }
+  - match: { columns.3.name: "country" }
+  - match: { columns.3.type: "keyword" }
+
+  - length: { values: 4 }
+  - match: { values.0: [ "Alice", "New York", "nyc", "USA" ] }
+  - match: { values.1: [ "Bob", "New York", "nyc", "USA" ] }
+  - match: { values.2: [ "Denise", "Tan Son Nhat", null, null ] }
+  - match: { values.3: [ "Mario", "Rome", "rom", "Italy" ] }

+ 55 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/61_enrich_ip.yml

@@ -74,6 +74,33 @@ setup:
           - { "index": { } }
           - { "@timestamp": "2023-06-24", "ip": "13.101.0.114", "message": "authentication failed" }
 
+  - do:
+      indices.create:
+        index: events_text
+        body:
+          mappings:
+            properties:
+              "@timestamp":
+                type: date
+              ip_text:
+                type: text
+              message:
+                type: keyword
+
+  - do:
+      bulk:
+        index: events_text
+        refresh: true
+        body:
+          - { "index": { } }
+          - { "@timestamp": "2023-06-20", "ip_text": "10.100.0.21", "message": "network connected" }
+          - { "index": { } }
+          - { "@timestamp": "2023-06-21", "ip_text": [ "10.100.0.21", "10.101.0.107" ], "message": "sending messages" }
+          - { "index": { } }
+          - { "@timestamp": "2023-06-22", "ip_text": "10.101.0.107", "message": "network disconnected" }
+          - { "index": { } }
+          - { "@timestamp": "2023-06-24", "ip_text": "13.101.0.114", "message": "authentication failed" }
+
 ---
 teardown:
   - do:
@@ -104,6 +131,34 @@ teardown:
   - match: { values.2: [ "10.101.0.107" , "QA", "Engineering", "network disconnected" ] }
   - match: { values.3: [ "13.101.0.114" , null, null, "authentication failed" ] }
 
+---
+"IP text fields":
+  - skip:
+      version: " - 8.13.99"
+      reason: "ENRICH support for TEXT fields was added in 8.14.0"
+
+  - do:
+      allowed_warnings_regex:
+        - "No limit defined, adding default limit of \\[.*\\]"
+      esql.query:
+        body:
+          query: 'FROM events_text | ENRICH networks-policy ON ip_text | sort @timestamp | KEEP ip_text, name, department, message'
+
+  - match: { columns.0.name: "ip_text" }
+  - match: { columns.0.type: "text" }
+  - match: { columns.1.name: "name" }
+  - match: { columns.1.type: "keyword" }
+  - match: { columns.2.name: "department" }
+  - match: { columns.2.type: "keyword" }
+  - match: { columns.3.name: "message" }
+  - match: { columns.3.type: "keyword" }
+
+  - length: { values: 4 }
+  - match: { values.0: [ "10.100.0.21", "Production", "OPS", "network connected" ] }
+  - match: { values.1: [ [ "10.100.0.21", "10.101.0.107" ], [ "Production", "QA" ], [ "OPS","Engineering" ], "sending messages" ] }
+  - match: { values.2: [ "10.101.0.107" , "QA", "Engineering", "network disconnected" ] }
+  - match: { values.3: [ "13.101.0.114" , null, null, "authentication failed" ] }
+
 ---
 "Invalid IP strings":
   - skip: