Browse Source

Make LOOKUP JOIN docs examples fully tested (#126622)

The current LOOKUP JOIN docs include examples that are not tested by the ES|QL tests, unlike most other examples in the documentation. This PR fixes that, changing two examples to use existing tests, and adding a new csv-spec file for the remaining four examples. These four are not required to show results, so the tests have empty data and do not require any results. This means we are testing only the syntax (parsing and semantic analysis), which is sufficient for the docs.
Craig Taverner 6 months ago
parent
commit
ec495e9f0b
20 changed files with 316 additions and 50 deletions
  1. 7 0
      docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinHostNameTwice.md
  2. 6 0
      docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinServiceId.md
  3. 6 0
      docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIp.md
  4. 7 0
      docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIpWhere.md
  5. 14 0
      docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnLeftSide.md
  6. 14 0
      docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnRightSide.md
  7. 14 30
      docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md
  8. 36 9
      x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
  9. 70 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs-lookup-join.csv-spec
  10. 0 5
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json
  11. 26 1
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec
  12. 0 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-settings.json
  13. 13 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-app_logs.json
  14. 24 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-firewall_logs.json
  15. 20 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-host_inventory.json
  16. 18 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-ownerships.json
  17. 10 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-service_owners.json
  18. 17 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-system_metrics.json
  19. 14 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-threat_list.json
  20. 0 5
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json

+ 7 - 0
docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinHostNameTwice.md

@@ -0,0 +1,7 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+```esql
+FROM system_metrics
+| LOOKUP JOIN host_inventory ON host.name
+| LOOKUP JOIN ownerships ON host.name
+```

+ 6 - 0
docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinServiceId.md

@@ -0,0 +1,6 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+```esql
+FROM app_logs
+| LOOKUP JOIN service_owners ON service_id
+```

+ 6 - 0
docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIp.md

@@ -0,0 +1,6 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+```esql
+FROM firewall_logs
+| LOOKUP JOIN threat_list ON source.IP
+```

+ 7 - 0
docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIpWhere.md

@@ -0,0 +1,7 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+```esql
+FROM firewall_logs
+| LOOKUP JOIN threat_list ON source.IP
+| WHERE threat_level IS NOT NULL
+```

+ 14 - 0
docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnLeftSide.md

@@ -0,0 +1,14 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+```esql
+FROM employees
+| EVAL language_code = languages
+| WHERE emp_no >= 10091 AND emp_no < 10094
+| LOOKUP JOIN languages_lookup ON language_code
+```
+
+| emp_no:integer | language_code:integer | language_name:keyword |
+| --- | --- | --- |
+| 10091 | 3 | Spanish |
+| 10092 | 1 | English |
+| 10093 | 3 | Spanish |

+ 14 - 0
docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnRightSide.md

@@ -0,0 +1,14 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+```esql
+FROM employees
+| EVAL language_code = languages
+| LOOKUP JOIN languages_lookup ON language_code
+| WHERE emp_no >= 10091 AND emp_no < 10094
+```
+
+| emp_no:integer | language_code:integer | language_name:keyword |
+| --- | --- | --- |
+| 10091 | 3 | Spanish |
+| 10092 | 1 | English |
+| 10093 | 3 | Spanish |

+ 14 - 30
docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md

@@ -52,53 +52,37 @@ In case of name collisions, the newly created columns will override existing col
 **IP Threat correlation**: This query would allow you to see if any source
 IPs match known malicious addresses.
 
-```esql
-FROM firewall_logs
-| LOOKUP JOIN threat_list ON source.IP
-```
+:::{include} ../examples/docs-lookup-join.csv-spec/lookupJoinSourceIp.md
+:::
 
 To filter only for those rows that have a matching `threat_list` entry, use `WHERE ... IS NOT NULL` with a field from the lookup index:
 
-```esql
-FROM firewall_logs
-| LOOKUP JOIN threat_list ON source.IP
-| WHERE threat_level IS NOT NULL
-```
+:::{include} ../examples/docs-lookup-join.csv-spec/lookupJoinSourceIpWhere.md
+:::
 
 **Host metadata correlation**: This query pulls in environment or
 ownership details for each host to correlate with your metrics data.
 
-```esql
-FROM system_metrics
-| LOOKUP JOIN host_inventory ON host.name
-| LOOKUP JOIN employees ON host.name
-```
+:::{include} ../examples/docs-lookup-join.csv-spec/lookupJoinHostNameTwice.md
+:::
 
 **Service ownership mapping**: This query would show logs with the owning
 team or escalation information for faster triage and incident response.
 
-```esql
-FROM app_logs
-| LOOKUP JOIN service_owners ON service_id
-```
+:::{include} ../examples/docs-lookup-join.csv-spec/lookupJoinServiceId.md
+:::
 
 `LOOKUP JOIN` is generally faster when there are fewer rows to join
 with. {{esql}} will try and perform any `WHERE` clause before the
 `LOOKUP JOIN` where possible.
 
-The two following examples will have the same results. The two examples
-have the `WHERE` clause before and after the `LOOKUP JOIN`. It does not
+The following two examples will have the same results. One has the
+`WHERE` clause before and the other after the `LOOKUP JOIN`. It does not
 matter how you write your query, our optimizer will move the filter
 before the lookup when possible.
 
-```esql
-FROM Left
-| WHERE Language IS NOT NULL
-| LOOKUP JOIN Right ON Key
-```
+:::{include} ../examples/lookup-join.csv-spec/filterOnLeftSide.md
+:::
 
-```esql
-FROM Left
-| LOOKUP JOIN Right ON Key
-| WHERE Language IS NOT NULL
-```
+:::{include} ../examples/lookup-join.csv-spec/filterOnRightSide.md
+:::

+ 36 - 9
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

@@ -63,15 +63,14 @@ public class CsvTestsDataLoader {
     private static final TestDataset APPS = new TestDataset("apps");
     private static final TestDataset APPS_SHORT = APPS.withIndex("apps_short").withTypeMapping(Map.of("id", "short"));
     private static final TestDataset LANGUAGES = new TestDataset("languages");
-    private static final TestDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup")
-        .withSetting("languages_lookup-settings.json");
+    private static final TestDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup").withSetting("lookup-settings.json");
     private static final TestDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key")
         .withData("languages_non_unique_key.csv");
     private static final TestDataset LANGUAGES_NESTED_FIELDS = new TestDataset(
         "languages_nested_fields",
         "mapping-languages_nested_fields.json",
         "languages_nested_fields.csv"
-    ).withSetting("languages_lookup-settings.json");
+    ).withSetting("lookup-settings.json");
     private static final TestDataset ALERTS = new TestDataset("alerts");
     private static final TestDataset UL_LOGS = new TestDataset("ul_logs");
     private static final TestDataset SAMPLE_DATA = new TestDataset("sample_data");
@@ -102,11 +101,17 @@ public class CsvTestsDataLoader {
         "partial_mapping_sample_data.csv"
     ).withSetting("source_parameters-settings.json");
     private static final TestDataset CLIENT_IPS = new TestDataset("clientips");
-    private static final TestDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup")
-        .withSetting("clientips_lookup-settings.json");
+    private static final TestDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup").withSetting("lookup-settings.json");
     private static final TestDataset MESSAGE_TYPES = new TestDataset("message_types");
     private static final TestDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup")
-        .withSetting("message_types_lookup-settings.json");
+        .withSetting("lookup-settings.json");
+    private static final TestDataset FIREWALL_LOGS = new TestDataset("firewall_logs").noData();
+    private static final TestDataset THREAT_LIST = new TestDataset("threat_list").withSetting("lookup-settings.json").noData();
+    private static final TestDataset APP_LOGS = new TestDataset("app_logs").noData();
+    private static final TestDataset SERVICE_OWNERS = new TestDataset("service_owners").withSetting("lookup-settings.json").noData();
+    private static final TestDataset SYSTEM_METRICS = new TestDataset("system_metrics").noData();
+    private static final TestDataset HOST_INVENTORY = new TestDataset("host_inventory").withSetting("lookup-settings.json").noData();
+    private static final TestDataset OWNERSHIPS = new TestDataset("ownerships").withSetting("lookup-settings.json").noData();
     private static final TestDataset CLIENT_CIDR = new TestDataset("client_cidr");
     private static final TestDataset AGES = new TestDataset("ages");
     private static final TestDataset HEIGHTS = new TestDataset("heights");
@@ -161,6 +166,13 @@ public class CsvTestsDataLoader {
         Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP),
         Map.entry(MESSAGE_TYPES.indexName, MESSAGE_TYPES),
         Map.entry(MESSAGE_TYPES_LOOKUP.indexName, MESSAGE_TYPES_LOOKUP),
+        Map.entry(FIREWALL_LOGS.indexName, FIREWALL_LOGS),
+        Map.entry(THREAT_LIST.indexName, THREAT_LIST),
+        Map.entry(APP_LOGS.indexName, APP_LOGS),
+        Map.entry(SERVICE_OWNERS.indexName, SERVICE_OWNERS),
+        Map.entry(SYSTEM_METRICS.indexName, SYSTEM_METRICS),
+        Map.entry(HOST_INVENTORY.indexName, HOST_INVENTORY),
+        Map.entry(OWNERSHIPS.indexName, OWNERSHIPS),
         Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR),
         Map.entry(AGES.indexName, AGES),
         Map.entry(HEIGHTS.indexName, HEIGHTS),
@@ -461,11 +473,14 @@ public class CsvTestsDataLoader {
 
     private static void load(RestClient client, TestDataset dataset, Logger logger, IndexCreator indexCreator) throws IOException {
         URL mapping = getResource("/" + dataset.mappingFileName);
-        URL data = getResource("/data/" + dataset.dataFileName);
-
         Settings indexSettings = dataset.readSettingsFile();
         indexCreator.createIndex(client, dataset.indexName, readMappingFile(mapping, dataset.typeMapping), indexSettings);
-        loadCsvData(client, dataset.indexName, data, dataset.allowSubFields, logger);
+
+        // Some examples only test that the query and mappings are valid, and don't need example data. Use .noData() for those
+        if (dataset.dataFileName != null) {
+            URL data = getResource("/data/" + dataset.dataFileName);
+            loadCsvData(client, dataset.indexName, data, dataset.allowSubFields, logger);
+        }
     }
 
     private static String readMappingFile(URL resource, Map<String, String> typeMapping) throws IOException {
@@ -740,6 +755,18 @@ public class CsvTestsDataLoader {
             );
         }
 
+        public TestDataset noData() {
+            return new TestDataset(
+                indexName,
+                mappingFileName,
+                null,
+                settingFileName,
+                allowSubFields,
+                typeMapping,
+                requiresInferenceEndpoint
+            );
+        }
+
         public TestDataset withSetting(String settingFileName) {
             return new TestDataset(
                 indexName,

+ 70 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs-lookup-join.csv-spec

@@ -0,0 +1,70 @@
+###########################################################
+# These tests were created specifically to satisfy the needs
+# of the docs, and the lookup-join.md file in particular.
+# Since those docs do not display output results, we only
+# need to ensure that the tests run without error.
+# This requires index mappings to be set up correctly,
+# but no data needs to be loaded into the indices.
+###########################################################
+
+# **IP Threat correlation**: This query would allow you to see if any source
+# IPs match known malicious addresses.
+
+lookupJoinSourceIp
+required_capability: join_lookup_v12
+
+// tag::lookupJoinSourceIp[]
+FROM firewall_logs
+| LOOKUP JOIN threat_list ON source.IP
+// end::lookupJoinSourceIp[]
+;
+
+@timestamp:datetime | destination.IP:ip | message:keyword | source.IP:ip | threat_level:keyword
+;
+
+# To filter only for those rows that have a matching `threat_list` entry,
+# use `WHERE ... IS NOT NULL` with a field from the lookup index:
+
+lookupJoinSourceIpWhere
+required_capability: join_lookup_v12
+
+// tag::lookupJoinSourceIpWhere[]
+FROM firewall_logs
+| LOOKUP JOIN threat_list ON source.IP
+| WHERE threat_level IS NOT NULL
+// end::lookupJoinSourceIpWhere[]
+;
+
+@timestamp:datetime | destination.IP:ip | message:keyword | source.IP:ip | threat_level:keyword
+;
+
+# **Host metadata correlation**: This query pulls in environment or
+# ownership details for each host to correlate with your metrics data.
+
+lookupJoinHostNameTwice
+required_capability: join_lookup_v12
+
+// tag::lookupJoinHostNameTwice[]
+FROM system_metrics
+| LOOKUP JOIN host_inventory ON host.name
+| LOOKUP JOIN ownerships ON host.name
+// end::lookupJoinHostNameTwice[]
+;
+
+count:long | details:keyword | host.name:keyword | description:keyword | host.os:keyword | host.version:keyword | owner.name:keyword
+;
+
+# **Service ownership mapping**: This query would show logs with the owning
+# team or escalation information for faster triage and incident response.
+
+lookupJoinIpServiceId
+required_capability: join_lookup_v12
+
+// tag::lookupJoinServiceId[]
+FROM app_logs
+| LOOKUP JOIN service_owners ON service_id
+// end::lookupJoinServiceId[]
+;
+
+@timestamp:datetime | message:keyword | service_id:keyword | owner:keyword
+;

+ 0 - 5
x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json

@@ -1,5 +0,0 @@
-{
-  "index": {
-    "mode": "lookup"
-  }
-}

+ 26 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

@@ -489,23 +489,48 @@ language_code:integer | language_name:keyword | country:text
 filterOnLeftSide
 required_capability: join_lookup_v12
 
+// tag::filterOnLeftSide[]
 FROM employees
 | EVAL language_code = languages
+| WHERE emp_no >= 10091 AND emp_no < 10094
 | LOOKUP JOIN languages_lookup ON language_code
+// end::filterOnLeftSide[]
 | SORT emp_no
 | KEEP emp_no, language_code, language_name
-| WHERE emp_no >= 10091 AND emp_no < 10094
 ;
 
+// tag::filterOnLeftSide-result[]
 emp_no:integer | language_code:integer | language_name:keyword
 10091          | 3                     | Spanish
 10092          | 1                     | English
 10093          | 3                     | Spanish
+// end::filterOnLeftSide-result[]
 ;
 
 filterOnRightSide
 required_capability: join_lookup_v12
 
+// tag::filterOnRightSide[]
+FROM employees
+| EVAL language_code = languages
+| LOOKUP JOIN languages_lookup ON language_code
+| WHERE emp_no >= 10091 AND emp_no < 10094
+// end::filterOnRightSide[]
+| SORT emp_no
+| KEEP emp_no, language_code, language_name
+;
+
+// tag::filterOnRightSide-result[]
+emp_no:integer | language_code:integer | language_name:keyword
+10091          | 3                     | Spanish
+10092          | 1                     | English
+10093          | 3                     | Spanish
+// end::filterOnRightSide-result[]
+;
+
+filterOnRightSideMessages
+required_capability: join_lookup_v12
+
 FROM sample_data
 | LOOKUP JOIN message_types_lookup ON message
 | WHERE type == "Error"

+ 0 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json → x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-settings.json


+ 13 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-app_logs.json

@@ -0,0 +1,13 @@
+{
+  "properties": {
+    "@timestamp": {
+      "type": "date"
+    },
+    "service_id": {
+      "type": "keyword"
+    },
+    "message": {
+      "type": "keyword"
+    }
+  }
+}

+ 24 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-firewall_logs.json

@@ -0,0 +1,24 @@
+{
+  "properties": {
+    "@timestamp": {
+      "type": "date"
+    },
+    "source": {
+      "properties": {
+        "IP": {
+          "type": "ip"
+        }
+      }
+    },
+    "destination": {
+      "properties": {
+        "IP": {
+          "type": "ip"
+        }
+      }
+    },
+    "message": {
+      "type": "keyword"
+    }
+  }
+}

+ 20 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-host_inventory.json

@@ -0,0 +1,20 @@
+{
+  "properties": {
+    "host": {
+      "properties": {
+        "name": {
+          "type": "keyword"
+        },
+        "os": {
+          "type": "keyword"
+        },
+        "version": {
+          "type": "keyword"
+        }
+      }
+    },
+    "description": {
+      "type": "keyword"
+    }
+  }
+}

+ 18 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-ownerships.json

@@ -0,0 +1,18 @@
+{
+  "properties": {
+    "host": {
+      "properties": {
+        "name": {
+          "type": "keyword"
+        }
+      }
+    },
+    "owner": {
+      "properties": {
+        "name": {
+          "type": "keyword"
+        }
+      }
+    }
+  }
+}

+ 10 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-service_owners.json

@@ -0,0 +1,10 @@
+{
+  "properties": {
+    "service_id": {
+      "type": "keyword"
+    },
+    "owner": {
+      "type": "keyword"
+    }
+  }
+}

+ 17 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-system_metrics.json

@@ -0,0 +1,17 @@
+{
+  "properties": {
+    "host": {
+      "properties": {
+        "name": {
+          "type": "keyword"
+        }
+      }
+    },
+    "count": {
+      "type": "long"
+    },
+    "details": {
+      "type": "keyword"
+    }
+  }
+}

+ 14 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-threat_list.json

@@ -0,0 +1,14 @@
+{
+  "properties": {
+    "source": {
+      "properties": {
+        "IP": {
+          "type": "ip"
+        }
+      }
+    },
+    "threat_level": {
+      "type": "keyword"
+    }
+  }
+}

+ 0 - 5
x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json

@@ -1,5 +0,0 @@
-{
-  "index": {
-    "mode": "lookup"
-  }
-}