Browse Source

ES|QL Add initial support for semantic_text field type (#113920) (#115256)

* Add initial support for semantic_text field type

* Update docs/changelog/113920.yaml

* More tests and fixes

* Use mock inference service

* Fix tests

* Spotless

* Fix mixed-cluster and multi-clusters tests

* sort

* Attempt another fix for bwc tests

* Spotless

* Fix merge

* Attempt another fix

* Don't load the inference-service-test plugin for mixed versions/clusters

* Add more tests, address review comments

* trivial

* revert

* post-merge fix block loader

* post-merge fix compile

* add mixed version testing

* whitespace

* fix MultiClusterSpecIT

* add more fields to mapping

* Revert  mixed version testing

* whitespace

---------

Co-authored-by: ChrisHegarty <chegar999@gmail.com>
Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Ioana Tagirta 1 year ago
parent
commit
ab82d74481
26 changed files with 490 additions and 35 deletions
  1. 5 0
      docs/changelog/113920.yaml
  2. 1 0
      x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java
  3. 10 2
      x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java
  4. 5 0
      x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java
  5. 5 0
      x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
  6. 1 0
      x-pack/plugin/esql/qa/server/multi-node/build.gradle
  7. 1 1
      x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlSpecIT.java
  8. 1 0
      x-pack/plugin/esql/qa/server/single-node/build.gradle
  9. 1 1
      x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java
  10. 18 2
      x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java
  11. 5 1
      x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java
  12. 1 0
      x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java
  13. 113 19
      x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
  14. 1 1
      x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java
  15. 73 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-semantic_text.json
  16. 4 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv
  17. 175 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec
  18. 5 1
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
  19. 1 1
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java
  20. 1 1
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java
  21. 1 1
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java
  22. 1 1
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java
  23. 2 2
      x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java
  24. 1 1
      x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java
  25. 9 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
  26. 49 0
      x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml

+ 5 - 0
docs/changelog/113920.yaml

@@ -0,0 +1,5 @@
+pr: 113920
+summary: Add initial support for `semantic_text` field type
+area: Search
+type: enhancement
+issues: []

+ 1 - 0
x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java

@@ -14,4 +14,5 @@ import org.elasticsearch.plugins.Plugin;
 public class EsqlCorePlugin extends Plugin implements ExtensiblePlugin {
     public static final FeatureFlag DATE_NANOS_FEATURE_FLAG = new FeatureFlag("esql_date_nanos");
 
+    public static final FeatureFlag SEMANTIC_TEXT_FEATURE_FLAG = new FeatureFlag("esql_semantic_text");
 }

+ 10 - 2
x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java

@@ -194,7 +194,14 @@ public enum DataType {
      * inside alongside time-series aggregations. These fields are not parsable from the
      * mapping and should be hidden from users.
      */
-    PARTIAL_AGG(builder().esType("partial_agg").unknownSize());
+    PARTIAL_AGG(builder().esType("partial_agg").unknownSize()),
+    /**
+     * String fields that are split into chunks, where each chunk has attached embeddings
+     * used for semantic search. Generally ESQL only sees {@code semantic_text} fields when
+     * loaded from the index and ESQL will load these fields as strings without their attached
+     * chunks or embeddings.
+     */
+    SEMANTIC_TEXT(builder().esType("semantic_text").unknownSize());
 
     /**
      * Types that are actively being built. These types are not returned
@@ -203,7 +210,8 @@ public enum DataType {
      * check that sending them to a function produces a sane error message.
      */
     public static final Map<DataType, FeatureFlag> UNDER_CONSTRUCTION = Map.ofEntries(
-        Map.entry(DATE_NANOS, EsqlCorePlugin.DATE_NANOS_FEATURE_FLAG)
+        Map.entry(DATE_NANOS, EsqlCorePlugin.DATE_NANOS_FEATURE_FLAG),
+        Map.entry(SEMANTIC_TEXT, EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG)
     );
 
     private final String typeName;

+ 5 - 0
x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java

@@ -86,4 +86,9 @@ public class MixedClusterEsqlSpecIT extends EsqlSpecTestCase {
     protected boolean enableRoundingDoubleValuesOnAsserting() {
         return true;
     }
+
+    @Override
+    protected boolean supportsInferenceTestService() {
+        return false;
+    }
 }

+ 5 - 0
x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java

@@ -261,4 +261,9 @@ public class MultiClusterSpecIT extends EsqlSpecTestCase {
     protected boolean enableRoundingDoubleValuesOnAsserting() {
         return true;
     }
+
+    @Override
+    protected boolean supportsInferenceTestService() {
+        return false;
+    }
 }

+ 1 - 0
x-pack/plugin/esql/qa/server/multi-node/build.gradle

@@ -11,6 +11,7 @@ dependencies {
 
   clusterPlugins project(':plugins:mapper-size')
   clusterPlugins project(':plugins:mapper-murmur3')
+  clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin')
 }
 
 GradleUtils.extendSourceSet(project, "javaRestTest", "yamlRestTest")

+ 1 - 1
x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlSpecIT.java

@@ -14,7 +14,7 @@ import org.junit.ClassRule;
 
 public class EsqlSpecIT extends EsqlSpecTestCase {
     @ClassRule
-    public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> {});
+    public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> spec.plugin("inference-service-test"));
 
     @Override
     protected String getTestRestCluster() {

+ 1 - 0
x-pack/plugin/esql/qa/server/single-node/build.gradle

@@ -22,6 +22,7 @@ dependencies {
 
   clusterPlugins project(':plugins:mapper-size')
   clusterPlugins project(':plugins:mapper-murmur3')
+  clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin')
 }
 
 restResources {

+ 1 - 1
x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java

@@ -18,7 +18,7 @@ import org.junit.ClassRule;
 @ThreadLeakFilters(filters = TestClustersThreadFilter.class)
 public class EsqlSpecIT extends EsqlSpecTestCase {
     @ClassRule
-    public static ElasticsearchCluster cluster = Clusters.testCluster();
+    public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> spec.plugin("inference-service-test"));
 
     @Override
     protected String getTestRestCluster() {

+ 18 - 2
x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java

@@ -65,7 +65,10 @@ import static org.elasticsearch.xpack.esql.CsvSpecReader.specParser;
 import static org.elasticsearch.xpack.esql.CsvTestUtils.ExpectedResults;
 import static org.elasticsearch.xpack.esql.CsvTestUtils.isEnabled;
 import static org.elasticsearch.xpack.esql.CsvTestUtils.loadCsvSpecValues;
-import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.CSV_DATASET_MAP;
+import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.availableDatasetsForEs;
+import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.clusterHasInferenceEndpoint;
+import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.createInferenceEndpoint;
+import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.deleteInferenceEndpoint;
 import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.loadDataSetIntoEs;
 import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources;
 
@@ -129,7 +132,11 @@ public abstract class EsqlSpecTestCase extends ESRestTestCase {
 
     @Before
     public void setup() throws IOException {
-        if (indexExists(CSV_DATASET_MAP.keySet().iterator().next()) == false) {
+        if (supportsInferenceTestService() && clusterHasInferenceEndpoint(client()) == false) {
+            createInferenceEndpoint(client());
+        }
+
+        if (indexExists(availableDatasetsForEs(client()).iterator().next().indexName()) == false) {
             loadDataSetIntoEs(client());
         }
     }
@@ -148,6 +155,8 @@ public abstract class EsqlSpecTestCase extends ESRestTestCase {
                 throw e;
             }
         }
+
+        deleteInferenceEndpoint(client());
     }
 
     public boolean logResults() {
@@ -164,6 +173,9 @@ public abstract class EsqlSpecTestCase extends ESRestTestCase {
     }
 
     protected void shouldSkipTest(String testName) throws IOException {
+        if (testCase.requiredCapabilities.contains("semantic_text_type")) {
+            assumeTrue("Inference test service needs to be supported for semantic_text", supportsInferenceTestService());
+        }
         checkCapabilities(adminClient(), testFeatureService, testName, testCase);
         assumeTrue("Test " + testName + " is not enabled", isEnabled(testName, instructions, Version.CURRENT));
     }
@@ -207,6 +219,10 @@ public abstract class EsqlSpecTestCase extends ESRestTestCase {
         }
     }
 
+    protected boolean supportsInferenceTestService() {
+        return true;
+    }
+
     protected final void doTest() throws Throwable {
         RequestObjectBuilder builder = new RequestObjectBuilder(randomFrom(XContentType.values()));
 

+ 5 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java

@@ -134,7 +134,11 @@ public final class CsvAssert {
                         || expectedType == UNSIGNED_LONG)) {
                     continue;
                 }
-                if (blockType == Type.KEYWORD && (expectedType == Type.IP || expectedType == Type.VERSION || expectedType == Type.TEXT)) {
+                if (blockType == Type.KEYWORD
+                    && (expectedType == Type.IP
+                        || expectedType == Type.VERSION
+                        || expectedType == Type.TEXT
+                        || expectedType == Type.SEMANTIC_TEXT)) {
                     // Type.asType translates all bytes references into keywords
                     continue;
                 }

+ 1 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java

@@ -447,6 +447,7 @@ public final class CsvTestUtils {
         SCALED_FLOAT(s -> s == null ? null : scaledFloat(s, "100"), Double.class),
         KEYWORD(Object::toString, BytesRef.class),
         TEXT(Object::toString, BytesRef.class),
+        SEMANTIC_TEXT(Object::toString, BytesRef.class),
         IP(
             StringUtils::parseIP,
             (l, r) -> l instanceof String maybeIP

+ 113 - 19
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

@@ -19,6 +19,7 @@ import org.apache.http.impl.client.BasicCredentialsProvider;
 import org.apache.logging.log4j.core.config.plugins.util.PluginManager;
 import org.elasticsearch.client.Request;
 import org.elasticsearch.client.Response;
+import org.elasticsearch.client.ResponseException;
 import org.elasticsearch.client.RestClient;
 import org.elasticsearch.client.RestClientBuilder;
 import org.elasticsearch.common.Strings;
@@ -36,9 +37,11 @@ import java.io.InputStream;
 import java.net.URI;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
 import static org.elasticsearch.xpack.esql.CsvTestUtils.COMMA_ESCAPING_REGEX;
@@ -81,6 +84,7 @@ public class CsvTestsDataLoader {
     private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv").withSetting("k8s-settings.json");
     private static final TestsDataset ADDRESSES = new TestsDataset("addresses");
     private static final TestsDataset BOOKS = new TestsDataset("books");
+    private static final TestsDataset SEMANTIC_TEXT = new TestsDataset("semantic_text").withInferenceEndpoint(true);
 
     public static final Map<String, TestsDataset> CSV_DATASET_MAP = Map.ofEntries(
         Map.entry(EMPLOYEES.indexName, EMPLOYEES),
@@ -112,7 +116,8 @@ public class CsvTestsDataLoader {
         Map.entry(K8S.indexName, K8S),
         Map.entry(DISTANCES.indexName, DISTANCES),
         Map.entry(ADDRESSES.indexName, ADDRESSES),
-        Map.entry(BOOKS.indexName, BOOKS)
+        Map.entry(BOOKS.indexName, BOOKS),
+        Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT)
     );
 
     private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
@@ -219,8 +224,13 @@ public class CsvTestsDataLoader {
         }
     }
 
-    private static void loadDataSetIntoEs(RestClient client, IndexCreator indexCreator) throws IOException {
-        loadDataSetIntoEs(client, LogManager.getLogger(CsvTestsDataLoader.class), indexCreator);
+    public static Set<TestsDataset> availableDatasetsForEs(RestClient client) throws IOException {
+        boolean inferenceEnabled = clusterHasInferenceEndpoint(client);
+
+        return CSV_DATASET_MAP.values()
+            .stream()
+            .filter(d -> d.requiresInferenceEndpoint == false || inferenceEnabled)
+            .collect(Collectors.toCollection(HashSet::new));
     }
 
     public static void loadDataSetIntoEs(RestClient client) throws IOException {
@@ -229,22 +239,61 @@ public class CsvTestsDataLoader {
         });
     }
 
-    public static void loadDataSetIntoEs(RestClient client, Logger logger) throws IOException {
-        loadDataSetIntoEs(client, logger, (restClient, indexName, indexMapping, indexSettings) -> {
-            ESRestTestCase.createIndex(restClient, indexName, indexSettings, indexMapping, null);
-        });
-    }
+    private static void loadDataSetIntoEs(RestClient client, IndexCreator indexCreator) throws IOException {
+        Logger logger = LogManager.getLogger(CsvTestsDataLoader.class);
 
-    private static void loadDataSetIntoEs(RestClient client, Logger logger, IndexCreator indexCreator) throws IOException {
-        for (var dataset : CSV_DATASET_MAP.values()) {
+        Set<String> loadedDatasets = new HashSet<>();
+        for (var dataset : availableDatasetsForEs(client)) {
             load(client, dataset, logger, indexCreator);
+            loadedDatasets.add(dataset.indexName);
         }
-        forceMerge(client, CSV_DATASET_MAP.keySet(), logger);
+        forceMerge(client, loadedDatasets, logger);
         for (var policy : ENRICH_POLICIES) {
             loadEnrichPolicy(client, policy.policyName, policy.policyFileName, logger);
         }
     }
 
+    /** The semantic_text mapping type require an inference endpoint that needs to be setup before creating the index. */
+    public static void createInferenceEndpoint(RestClient client) throws IOException {
+        Request request = new Request("PUT", "_inference/sparse_embedding/test_sparse_inference");
+        request.setJsonEntity("""
+                  {
+                   "service": "test_service",
+                   "service_settings": {
+                     "model": "my_model",
+                     "api_key": "abc64"
+                   },
+                   "task_settings": {
+                   }
+                 }
+            """);
+        client.performRequest(request);
+    }
+
+    public static void deleteInferenceEndpoint(RestClient client) throws IOException {
+        try {
+            client.performRequest(new Request("DELETE", "_inference/test_sparse_inference"));
+        } catch (ResponseException e) {
+            // 404 here means the endpoint was not created
+            if (e.getResponse().getStatusLine().getStatusCode() != 404) {
+                throw e;
+            }
+        }
+    }
+
+    public static boolean clusterHasInferenceEndpoint(RestClient client) throws IOException {
+        Request request = new Request("GET", "_inference/sparse_embedding/test_sparse_inference");
+        try {
+            client.performRequest(request);
+        } catch (ResponseException e) {
+            if (e.getResponse().getStatusLine().getStatusCode() == 404) {
+                return false;
+            }
+            throw e;
+        }
+        return true;
+    }
+
     private static void loadEnrichPolicy(RestClient client, String policyName, String policyFileName, Logger logger) throws IOException {
         URL policyMapping = CsvTestsDataLoader.class.getResource("/" + policyFileName);
         if (policyMapping == null) {
@@ -511,34 +560,79 @@ public class CsvTestsDataLoader {
         String dataFileName,
         String settingFileName,
         boolean allowSubFields,
-        Map<String, String> typeMapping
+        Map<String, String> typeMapping,
+        boolean requiresInferenceEndpoint
     ) {
         public TestsDataset(String indexName, String mappingFileName, String dataFileName) {
-            this(indexName, mappingFileName, dataFileName, null, true, null);
+            this(indexName, mappingFileName, dataFileName, null, true, null, false);
         }
 
         public TestsDataset(String indexName) {
-            this(indexName, "mapping-" + indexName + ".json", indexName + ".csv", null, true, null);
+            this(indexName, "mapping-" + indexName + ".json", indexName + ".csv", null, true, null, false);
         }
 
         public TestsDataset withIndex(String indexName) {
-            return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping);
+            return new TestsDataset(
+                indexName,
+                mappingFileName,
+                dataFileName,
+                settingFileName,
+                allowSubFields,
+                typeMapping,
+                requiresInferenceEndpoint
+            );
         }
 
         public TestsDataset withData(String dataFileName) {
-            return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping);
+            return new TestsDataset(
+                indexName,
+                mappingFileName,
+                dataFileName,
+                settingFileName,
+                allowSubFields,
+                typeMapping,
+                requiresInferenceEndpoint
+            );
         }
 
         public TestsDataset withSetting(String settingFileName) {
-            return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping);
+            return new TestsDataset(
+                indexName,
+                mappingFileName,
+                dataFileName,
+                settingFileName,
+                allowSubFields,
+                typeMapping,
+                requiresInferenceEndpoint
+            );
         }
 
         public TestsDataset noSubfields() {
-            return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, false, typeMapping);
+            return new TestsDataset(
+                indexName,
+                mappingFileName,
+                dataFileName,
+                settingFileName,
+                false,
+                typeMapping,
+                requiresInferenceEndpoint
+            );
         }
 
         public TestsDataset withTypeMapping(Map<String, String> typeMapping) {
-            return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping);
+            return new TestsDataset(
+                indexName,
+                mappingFileName,
+                dataFileName,
+                settingFileName,
+                allowSubFields,
+                typeMapping,
+                requiresInferenceEndpoint
+            );
+        }
+
+        public TestsDataset withInferenceEndpoint(boolean needsInference) {
+            return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping, needsInference);
         }
     }
 

+ 1 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java

@@ -648,7 +648,7 @@ public final class EsqlTestUtils {
             case KEYWORD -> new BytesRef(randomAlphaOfLength(5));
             case IP -> new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean())));
             case TIME_DURATION -> Duration.ofMillis(randomLongBetween(-604800000L, 604800000L)); // plus/minus 7 days
-            case TEXT -> new BytesRef(randomAlphaOfLength(50));
+            case TEXT, SEMANTIC_TEXT -> new BytesRef(randomAlphaOfLength(50));
             case VERSION -> randomVersion().toBytesRef();
             case GEO_POINT -> GEO.asWkb(GeometryTestUtils.randomPoint());
             case CARTESIAN_POINT -> CARTESIAN.asWkb(ShapeTestUtils.randomPoint());

+ 73 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-semantic_text.json

@@ -0,0 +1,73 @@
+{
+  "properties": {
+    "semantic_text_field": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_bool": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_cartesian_point": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_cartesian_shape": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_datetime": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_double": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_geopoint": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_geoshape": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_integer": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_ip": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_long": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_unsigned_long": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_version": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_multi_value": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "st_unicode": {
+      "type": "semantic_text",
+      "inference_id": "test_sparse_inference"
+    },
+    "host" : {
+      "type" : "keyword"
+    },
+    "description" : {
+      "type" : "text"
+    },
+    "value": {
+      "type": "long"
+    }
+  }
+}

+ 4 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv

@@ -0,0 +1,4 @@
+_id:keyword,semantic_text_field:semantic_text,st_bool:semantic_text,st_cartesian_point:semantic_text,st_cartesian_shape:semantic_text,st_datetime:semantic_text,st_double:semantic_text,st_geopoint:semantic_text,st_geoshape:semantic_text,st_integer:semantic_text,st_ip:semantic_text,st_long:semantic_text,st_unsigned_long:semantic_text,st_version:semantic_text,st_multi_value:semantic_text,st_unicode:semantic_text,host:keyword,description:text,value:long
+1,live long and prosper,false,"POINT(4297.11 -1475.53)",,1953-09-02T00:00:00.000Z,5.20128E11,"POINT(42.97109630194 14.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",23,1.1.1.1,2147483648,2147483648,1.2.3,["Hello there!", "This is a random value", "for testing purposes"],你吃饭了吗,"host1","some description1",1001
+2,all we have to decide is what to do with the time that is given to us,true,"POINT(7580.93 2272.77)",,2023-09-24T15:57:00.000Z,4541.11,"POINT(37.97109630194 21.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",122,1.1.2.1,123,2147483648.2,9.0.0,["nice to meet you", "bye bye!"],["谢谢", "对不起我的中文不好"],"host2","some description2",1002
+3,be excellent to each other,,,,,,,,,,,,,,,"host3","some description3",1003

+ 175 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec

@@ -0,0 +1,175 @@
+simple
+required_capability: semantic_text_type
+
+FROM semantic_text
+| KEEP semantic_text_field
+| sort semantic_text_field asc;
+
+semantic_text_field:semantic_text
+all we have to decide is what to do with the time that is given to us
+be excellent to each other
+live long and prosper
+;
+
+simpleWithUnicode
+required_capability: semantic_text_type
+
+FROM semantic_text
+| KEEP st_unicode
+| SORT st_unicode
+;
+
+st_unicode:semantic_text
+你吃饭了吗
+["谢谢", "对不起我的中文不好"]
+null
+;
+
+mvExpand
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| KEEP _id, st_multi_value
+| MV_EXPAND st_multi_value
+| SORT st_multi_value
+;
+
+_id:keyword | st_multi_value:semantic_text
+1           | Hello there!
+1           | This is a random value
+2           | bye bye!
+1           | for testing purposes
+2           | nice to meet you
+3           | null
+;
+
+withDropAndKeep
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| KEEP _id, semantic_text_field, st_double
+| DROP st_double
+| SORT _id
+;
+
+_id:keyword | semantic_text_field:semantic_text
+1           | live long and prosper
+2           | all we have to decide is what to do with the time that is given to us
+3           | be excellent to each other
+;
+
+rename
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| RENAME semantic_text_field AS my_field
+| KEEP _id, my_field
+| SORT _id
+;
+
+_id:keyword | my_field:semantic_text
+1           | live long and prosper
+2           | all we have to decide is what to do with the time that is given to us
+3           | be excellent to each other
+;
+
+eval
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| EVAL my_field = semantic_text_field
+| KEEP _id, my_field
+| SORT _id
+;
+
+_id:keyword | my_field:semantic_text
+1           | live long and prosper
+2           | all we have to decide is what to do with the time that is given to us
+3           | be excellent to each other
+;
+
+simpleStats
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| STATS COUNT(*)
+;
+
+COUNT(*):long
+3
+;
+
+statsWithGrouping
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| STATS COUNT(*) BY st_version
+| SORT st_version
+;
+
+COUNT(*):long | st_version:semantic_text
+1             | 1.2.3
+1             | 9.0.0
+1             | null
+;
+
+withDropKeepStatsMvExpandRenameSortLimit
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| KEEP _id, semantic_text_field, st_multi_value
+| DROP semantic_text_field
+| RENAME st_multi_value AS my_field
+| MV_EXPAND my_field
+| STATS COUNT(*) BY my_field
+| SORT my_field
+| LIMIT 3
+;
+
+COUNT(*):long | my_field:semantic_text
+1             | Hello there!
+1             | This is a random value
+1             | bye bye!
+;
+
+simpleWithLongValue
+required_capability: semantic_text_type
+
+FROM semantic_text
+| KEEP value, semantic_text_field
+| SORT value
+;
+
+value:long | semantic_text_field:semantic_text
+1001            | live long and prosper
+1002            | all we have to decide is what to do with the time that is given to us
+1003            | be excellent to each other
+;
+
+simpleWithText
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| KEEP description, semantic_text_field
+| SORT description
+;
+
+description:text     | semantic_text_field:semantic_text
+"some description1"  | live long and prosper
+"some description2"  | all we have to decide is what to do with the time that is given to us
+"some description3"  | be excellent to each other
+;
+
+simpleWithKeyword
+required_capability: semantic_text_type
+
+FROM semantic_text METADATA _id
+| KEEP host, semantic_text_field
+| SORT host
+;
+
+host:keyword | semantic_text_field:semantic_text
+"host1"      | live long and prosper
+"host2"      | all we have to decide is what to do with the time that is given to us
+"host3"      | be excellent to each other
+;

+ 5 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

@@ -401,7 +401,11 @@ public class EsqlCapabilities {
         /**
          * Adding stats for functions (stack telemetry)
          */
-        FUNCTION_STATS;
+        FUNCTION_STATS,
+        /**
+         * Support for semantic_text field mapping
+         */
+        SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG);
 
         private final boolean enabled;
 

+ 1 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java

@@ -89,7 +89,7 @@ abstract class PositionToXContent {
                     return builder.value(unsignedLongAsNumber(l));
                 }
             };
-            case KEYWORD, TEXT -> new PositionToXContent(block) {
+            case KEYWORD, SEMANTIC_TEXT, TEXT -> new PositionToXContent(block) {
                 @Override
                 protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Params params, int valueIndex)
                     throws IOException {

+ 1 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java

@@ -114,7 +114,7 @@ public final class ResponseValueUtils {
             case LONG, COUNTER_LONG -> ((LongBlock) block).getLong(offset);
             case INTEGER, COUNTER_INTEGER -> ((IntBlock) block).getInt(offset);
             case DOUBLE, COUNTER_DOUBLE -> ((DoubleBlock) block).getDouble(offset);
-            case KEYWORD, TEXT -> ((BytesRefBlock) block).getBytesRef(offset, scratch).utf8ToString();
+            case KEYWORD, SEMANTIC_TEXT, TEXT -> ((BytesRefBlock) block).getBytesRef(offset, scratch).utf8ToString();
             case IP -> {
                 BytesRef val = ((BytesRefBlock) block).getBytesRef(offset, scratch);
                 yield ipToString(val);

+ 1 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java

@@ -349,7 +349,7 @@ public class LocalExecutionPlanner {
             elementTypes[channel] = PlannerUtils.toElementType(inverse.get(channel).type());
             encoders[channel] = switch (inverse.get(channel).type()) {
                 case IP -> TopNEncoder.IP;
-                case TEXT, KEYWORD -> TopNEncoder.UTF8;
+                case TEXT, KEYWORD, SEMANTIC_TEXT -> TopNEncoder.UTF8;
                 case VERSION -> TopNEncoder.VERSION;
                 case BOOLEAN, NULL, BYTE, SHORT, INTEGER, LONG, DOUBLE, FLOAT, HALF_FLOAT, DATETIME, DATE_NANOS, DATE_PERIOD, TIME_DURATION,
                     OBJECT, SCALED_FLOAT, UNSIGNED_LONG, DOC_DATA_TYPE, TSID_DATA_TYPE -> TopNEncoder.DEFAULT_SORTABLE;

+ 1 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java

@@ -247,7 +247,7 @@ public class PlannerUtils {
             case INTEGER, COUNTER_INTEGER -> ElementType.INT;
             case DOUBLE, COUNTER_DOUBLE -> ElementType.DOUBLE;
             // unsupported fields are passed through as a BytesRef
-            case KEYWORD, TEXT, IP, SOURCE, VERSION, UNSUPPORTED -> ElementType.BYTES_REF;
+            case KEYWORD, TEXT, IP, SOURCE, VERSION, SEMANTIC_TEXT, UNSUPPORTED -> ElementType.BYTES_REF;
             case NULL -> ElementType.NULL;
             case BOOLEAN -> ElementType.BOOLEAN;
             case DOC_DATA_TYPE -> ElementType.DOC;

+ 2 - 2
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java

@@ -193,7 +193,7 @@ public class EsqlQueryResponseTests extends AbstractChunkedSerializingTestCase<E
                 case INTEGER, COUNTER_INTEGER -> ((IntBlock.Builder) builder).appendInt(randomInt());
                 case DOUBLE, COUNTER_DOUBLE -> ((DoubleBlock.Builder) builder).appendDouble(randomDouble());
                 case KEYWORD -> ((BytesRefBlock.Builder) builder).appendBytesRef(new BytesRef(randomAlphaOfLength(10)));
-                case TEXT -> ((BytesRefBlock.Builder) builder).appendBytesRef(new BytesRef(randomAlphaOfLength(10000)));
+                case TEXT, SEMANTIC_TEXT -> ((BytesRefBlock.Builder) builder).appendBytesRef(new BytesRef(randomAlphaOfLength(10000)));
                 case IP -> ((BytesRefBlock.Builder) builder).appendBytesRef(
                     new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean())))
                 );
@@ -866,7 +866,7 @@ public class EsqlQueryResponseTests extends AbstractChunkedSerializingTestCase<E
                     case LONG, COUNTER_LONG -> ((LongBlock.Builder) builder).appendLong(((Number) value).longValue());
                     case INTEGER, COUNTER_INTEGER -> ((IntBlock.Builder) builder).appendInt(((Number) value).intValue());
                     case DOUBLE, COUNTER_DOUBLE -> ((DoubleBlock.Builder) builder).appendDouble(((Number) value).doubleValue());
-                    case KEYWORD, TEXT -> ((BytesRefBlock.Builder) builder).appendBytesRef(new BytesRef(value.toString()));
+                    case KEYWORD, TEXT, SEMANTIC_TEXT -> ((BytesRefBlock.Builder) builder).appendBytesRef(new BytesRef(value.toString()));
                     case UNSUPPORTED -> ((BytesRefBlock.Builder) builder).appendNull();
                     case IP -> ((BytesRefBlock.Builder) builder).appendBytesRef(stringToIP(value.toString()));
                     case DATETIME -> {

+ 1 - 1
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java

@@ -59,7 +59,7 @@ public class CaseTests extends AbstractScalarFunctionTestCase {
             DataType.NULL
         ).collect(Collectors.toList());
         if (Build.current().isSnapshot()) {
-            t.addAll(DataType.UNDER_CONSTRUCTION.keySet());
+            t.add(DataType.DATE_NANOS);
         }
         TYPES = unmodifiableList(t);
     }

+ 9 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

@@ -23,6 +23,8 @@ import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.mapper.BlockLoader;
+import org.elasticsearch.index.mapper.BlockSourceReader;
 import org.elasticsearch.index.mapper.DocumentParserContext;
 import org.elasticsearch.index.mapper.DocumentParsingException;
 import org.elasticsearch.index.mapper.FieldMapper;
@@ -606,6 +608,13 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
 
             return baseMessageBuilder.toString();
         }
+
+        @Override
+        public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
+            SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name().concat(".text")));
+            var sourceMode = blContext.indexSettings().getIndexMappingSourceMode();
+            return new BlockSourceReader.BytesRefsBlockLoader(fetcher, BlockSourceReader.lookupMatchingAll(), sourceMode);
+        }
     }
 
     /**

+ 49 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml

@@ -504,3 +504,52 @@ double nested declared in mapping:
 
   # The `nested` field is not visible, nor are any of it's subfields.
   - match: { columns: [{name: name, type: keyword}] }
+
+---
+semantic_text declared in mapping:
+  - requires:
+      test_runner_features: [ capabilities ]
+      capabilities:
+        - method: POST
+          path: /_query
+          parameters: [ ]
+          capabilities: [ semantic_text_type ]
+      reason: "support for semantic_text type"
+  - do:
+      indices.create:
+        index: test_semantic_text
+        body:
+          settings:
+            number_of_shards: 5
+          mappings:
+            properties:
+              semantic_text_field:
+                type: semantic_text
+                inference_id: my_inference_id
+  - do:
+      bulk:
+        index: test_semantic_text
+        refresh: true
+        body:
+          - { "index": { } }
+          - {
+              "semantic_text_field": {
+                "text": "be excellent to each other",
+                "inference": {
+                  "inference_id": "my_inference_id",
+                  "model_settings": {
+                    "task_type": "sparse_embedding"
+                  },
+                  "chunks": [{ "text": "be excellent to each other", "embeddings": { "a": 1,"b": 2 } }]
+              }
+            }
+          }
+  - do:
+      allowed_warnings_regex:
+        - "No limit defined, adding default limit of \\[.*\\]"
+      esql.query:
+        body:
+          query: 'FROM test_semantic_text'
+  - match: { columns: [{name: semantic_text_field, type: semantic_text}] }
+  - length: { values: 1 }
+  - match: { values.0: ["be excellent to each other"] }