Browse Source

[ML] Inference API request count telemetry and metrics core components (#110799)

* Adding new inference stats class

* Working test

* Adding factory to plugin

* Removing unnecessary changes

* Fixing test

* Removing close() for now

* Expanding modelId() comment

* using incrementby and refactoring interfaces
Jonathan Buttner 1 year ago
parent
commit
4c413f4434
44 changed files with 620 additions and 56 deletions
  1. 8 0
      server/src/main/java/org/elasticsearch/inference/ServiceSettings.java
  2. 18 2
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/InferenceFeatureSetUsage.java
  3. 70 0
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/InferenceRequestStats.java
  4. 15 0
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/SerializableStats.java
  5. 72 0
      x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/inference/InferenceRequestStatsTests.java
  6. 5 0
      x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java
  7. 9 4
      x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java
  8. 5 0
      x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java
  9. 5 0
      x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java
  10. 6 1
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java
  11. 1 1
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockModel.java
  12. 1 1
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java
  13. 2 1
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceSettings.java
  14. 5 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceSettings.java
  15. 5 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/completion/AzureOpenAiCompletionServiceSettings.java
  16. 5 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/embeddings/AzureOpenAiEmbeddingsServiceSettings.java
  17. 1 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceSettings.java
  18. 5 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/embeddings/CohereEmbeddingsServiceSettings.java
  19. 1 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/rerank/CohereRerankServiceSettings.java
  20. 5 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettings.java
  21. 2 2
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java
  22. 3 3
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java
  23. 2 5
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java
  24. 3 3
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java
  25. 3 3
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java
  26. 1 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/embeddings/GoogleVertexAiEmbeddingsServiceSettings.java
  27. 1 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/rerank/GoogleVertexAiRerankServiceSettings.java
  28. 5 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceSettings.java
  29. 5 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserServiceSettings.java
  30. 1 1
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java
  31. 1 1
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsModel.java
  32. 2 1
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettings.java
  33. 6 3
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/settings/InternalServiceSettings.java
  34. 47 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceAPMStats.java
  35. 59 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java
  36. 30 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/Stats.java
  37. 57 0
      x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/StatsMap.java
  38. 5 0
      x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/model/TestModel.java
  39. 13 13
      x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java
  40. 3 3
      x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettingsTests.java
  41. 3 3
      x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java
  42. 2 2
      x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceTests.java
  43. 3 3
      x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java
  44. 119 0
      x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/StatsMapTests.java

+ 8 - 0
server/src/main/java/org/elasticsearch/inference/ServiceSettings.java

@@ -41,4 +41,12 @@ public interface ServiceSettings extends ToXContentObject, VersionedNamedWriteab
     default DenseVectorFieldMapper.ElementType elementType() {
         return null;
     }
+
+    /**
+     * The model to use in the inference endpoint (e.g. text-embedding-ada-002). Sometimes the model is not defined in the service
+     * settings. This can happen for external providers (e.g. hugging face, azure ai studio) where the provider requires that the model
+     * be chosen when initializing a deployment within their service. In this situation, return null.
+     * @return the model used to perform inference or null if the model is not defined
+     */
+    String modelId();
 }

+ 18 - 2
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/InferenceFeatureSetUsage.java

@@ -54,14 +54,30 @@ public class InferenceFeatureSetUsage extends XPackFeatureSet.Usage {
             count++;
         }
 
+        public String service() {
+            return service;
+        }
+
+        public TaskType taskType() {
+            return taskType;
+        }
+
+        public long count() {
+            return count;
+        }
+
         @Override
         public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
             builder.startObject();
+            addXContentFragment(builder, params);
+            builder.endObject();
+            return builder;
+        }
+
+        public void addXContentFragment(XContentBuilder builder, Params params) throws IOException {
             builder.field("service", service);
             builder.field("task_type", taskType.name());
             builder.field("count", count);
-            builder.endObject();
-            return builder;
         }
 
         @Override

+ 70 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/InferenceRequestStats.java

@@ -0,0 +1,70 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.inference;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.core.Nullable;
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.xcontent.ToXContent;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.Objects;
+
+public class InferenceRequestStats implements SerializableStats {
+    private final InferenceFeatureSetUsage.ModelStats modelStats;
+    private final String modelId;
+
+    public InferenceRequestStats(String service, TaskType taskType, @Nullable String modelId, long count) {
+        this(new InferenceFeatureSetUsage.ModelStats(service, taskType, count), modelId);
+    }
+
+    private InferenceRequestStats(InferenceFeatureSetUsage.ModelStats modelStats, @Nullable String modelId) {
+        this.modelStats = new InferenceFeatureSetUsage.ModelStats(modelStats);
+        this.modelId = modelId;
+    }
+
+    public InferenceRequestStats(StreamInput in) throws IOException {
+        this.modelStats = new InferenceFeatureSetUsage.ModelStats(in);
+        this.modelId = in.readOptionalString();
+    }
+
+    public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
+        builder.startObject();
+        builder.field("service", modelStats.service());
+        builder.field("task_type", modelStats.taskType().toString());
+        builder.field("count", modelStats.count());
+
+        if (modelId != null) {
+            builder.field("model_id", modelId);
+        }
+
+        builder.endObject();
+        return builder;
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        modelStats.writeTo(out);
+        out.writeOptionalString(modelId);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        InferenceRequestStats that = (InferenceRequestStats) o;
+        return Objects.equals(modelStats, that.modelStats) && Objects.equals(modelId, that.modelId);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(modelStats, modelId);
+    }
+}

+ 15 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/SerializableStats.java

@@ -0,0 +1,15 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.inference;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.xcontent.ToXContentObject;
+
+public interface SerializableStats extends ToXContentObject, Writeable {
+
+}

+ 72 - 0
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/inference/InferenceRequestStatsTests.java

@@ -0,0 +1,72 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.inference;
+
+import org.elasticsearch.TransportVersion;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.core.ml.AbstractBWCWireSerializationTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.Matchers.is;
+
+public class InferenceRequestStatsTests extends AbstractBWCWireSerializationTestCase<InferenceRequestStats> {
+
+    public static InferenceRequestStats createRandom() {
+        var modelId = randomBoolean() ? randomAlphaOfLength(10) : null;
+
+        return new InferenceRequestStats(randomAlphaOfLength(10), randomFrom(TaskType.values()), modelId, randomInt());
+    }
+
+    public void testToXContent_DoesNotWriteModelId_WhenItIsNull() throws IOException {
+        var stats = new InferenceRequestStats("service", TaskType.TEXT_EMBEDDING, null, 1);
+
+        XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
+        stats.toXContent(builder, null);
+        String xContentResult = Strings.toString(builder);
+
+        assertThat(xContentResult, is("""
+            {"service":"service","task_type":"text_embedding","count":1}"""));
+    }
+
+    public void testToXContent_WritesModelId_WhenItIsDefined() throws IOException {
+        var stats = new InferenceRequestStats("service", TaskType.TEXT_EMBEDDING, "model_id", 2);
+
+        XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
+        stats.toXContent(builder, null);
+        String xContentResult = Strings.toString(builder);
+
+        assertThat(xContentResult, is("""
+            {"service":"service","task_type":"text_embedding","count":2,"model_id":"model_id"}"""));
+    }
+
+    @Override
+    protected InferenceRequestStats mutateInstanceForVersion(InferenceRequestStats instance, TransportVersion version) {
+        return instance;
+    }
+
+    @Override
+    protected Writeable.Reader<InferenceRequestStats> instanceReader() {
+        return InferenceRequestStats::new;
+    }
+
+    @Override
+    protected InferenceRequestStats createTestInstance() {
+        return createRandom();
+    }
+
+    @Override
+    protected InferenceRequestStats mutateInstance(InferenceRequestStats instance) throws IOException {
+        return randomValueOtherThan(instance, this::createTestInstance);
+    }
+}

+ 5 - 0
x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java

@@ -309,6 +309,11 @@ public class TestDenseInferenceServiceExtension implements InferenceServiceExten
         public DenseVectorFieldMapper.ElementType elementType() {
             return elementType != null ? elementType : DenseVectorFieldMapper.ElementType.FLOAT;
         }
+
+        @Override
+        public String modelId() {
+            return model;
+        }
     }
 
 }

+ 9 - 4
x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java

@@ -135,7 +135,7 @@ public class TestRerankingServiceExtension implements InferenceServiceExtension
         }
     }
 
-    public record TestServiceSettings(String model_id) implements ServiceSettings {
+    public record TestServiceSettings(String modelId) implements ServiceSettings {
 
         static final String NAME = "test_reranking_service_settings";
 
@@ -162,7 +162,7 @@ public class TestRerankingServiceExtension implements InferenceServiceExtension
         @Override
         public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
             builder.startObject();
-            builder.field("model_id", model_id);
+            builder.field("model_id", modelId);
             builder.endObject();
             return builder;
         }
@@ -179,14 +179,19 @@ public class TestRerankingServiceExtension implements InferenceServiceExtension
 
         @Override
         public void writeTo(StreamOutput out) throws IOException {
-            out.writeString(model_id);
+            out.writeString(modelId);
+        }
+
+        @Override
+        public String modelId() {
+            return modelId;
         }
 
         @Override
         public ToXContentObject getFilteredXContentObject() {
             return (builder, params) -> {
                 builder.startObject();
-                builder.field("model_id", model_id);
+                builder.field("model_id", modelId);
                 builder.endObject();
                 return builder;
             };

+ 5 - 0
x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java

@@ -224,6 +224,11 @@ public class TestSparseInferenceServiceExtension implements InferenceServiceExte
             out.writeBoolean(shouldReturnHiddenField);
         }
 
+        @Override
+        public String modelId() {
+            return model;
+        }
+
         @Override
         public ToXContentObject getFilteredXContentObject() {
             return (builder, params) -> {

+ 5 - 0
x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java

@@ -341,6 +341,11 @@ public class ModelRegistryIT extends ESSingleNodeTestCase {
 
             }
 
+            @Override
+            public String modelId() {
+                return null;
+            }
+
             @Override
             public ToXContentObject getFilteredXContentObject() {
                 return this;

+ 6 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java

@@ -84,6 +84,8 @@ import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceService
 import org.elasticsearch.xpack.inference.services.huggingface.elser.HuggingFaceElserService;
 import org.elasticsearch.xpack.inference.services.mistral.MistralService;
 import org.elasticsearch.xpack.inference.services.openai.OpenAiService;
+import org.elasticsearch.xpack.inference.telemetry.InferenceAPMStats;
+import org.elasticsearch.xpack.inference.telemetry.StatsMap;
 
 import java.util.ArrayList;
 import java.util.Collection;
@@ -194,7 +196,10 @@ public class InferencePlugin extends Plugin implements ActionPlugin, ExtensibleP
         var actionFilter = new ShardBulkInferenceActionFilter(registry, modelRegistry);
         shardBulkInferenceActionFilter.set(actionFilter);
 
-        return List.of(modelRegistry, registry, httpClientManager);
+        var statsFactory = new InferenceAPMStats.Factory(services.telemetryProvider().getMeterRegistry());
+        var statsMap = new StatsMap<>(InferenceAPMStats::key, statsFactory::newInferenceRequestAPMCounter);
+
+        return List.of(modelRegistry, registry, httpClientManager, statsMap);
     }
 
     @Override

+ 1 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockModel.java

@@ -68,7 +68,7 @@ public abstract class AmazonBedrockModel extends Model {
 
     private void setPropertiesFromServiceSettings(AmazonBedrockServiceSettings serviceSettings) {
         this.region = serviceSettings.region();
-        this.model = serviceSettings.model();
+        this.model = serviceSettings.modelId();
         this.provider = serviceSettings.provider();
         this.rateLimitSettings = serviceSettings.rateLimitSettings();
     }

+ 1 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java

@@ -287,7 +287,7 @@ public class AmazonBedrockService extends SenderService {
 
         AmazonBedrockEmbeddingsServiceSettings settingsToUse = new AmazonBedrockEmbeddingsServiceSettings(
             serviceSettings.region(),
-            serviceSettings.model(),
+            serviceSettings.modelId(),
             serviceSettings.provider(),
             embeddingSize,
             serviceSettings.dimensionsSetByUser(),

+ 2 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceSettings.java

@@ -108,7 +108,8 @@ public abstract class AmazonBedrockServiceSettings extends FilteredXContentObjec
         return region;
     }
 
-    public String model() {
+    @Override
+    public String modelId() {
         return model;
     }
 

+ 5 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceSettings.java

@@ -114,6 +114,11 @@ public abstract class AzureAiStudioServiceSettings extends FilteredXContentObjec
         return this.rateLimitSettings;
     }
 
+    @Override
+    public String modelId() {
+        return null;
+    }
+
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeString(target);

+ 5 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/completion/AzureOpenAiCompletionServiceSettings.java

@@ -127,6 +127,11 @@ public class AzureOpenAiCompletionServiceSettings extends FilteredXContentObject
         return deploymentId;
     }
 
+    @Override
+    public String modelId() {
+        return null;
+    }
+
     @Override
     public RateLimitSettings rateLimitSettings() {
         return rateLimitSettings;

+ 5 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/embeddings/AzureOpenAiEmbeddingsServiceSettings.java

@@ -247,6 +247,11 @@ public class AzureOpenAiEmbeddingsServiceSettings extends FilteredXContentObject
         return DenseVectorFieldMapper.ElementType.FLOAT;
     }
 
+    @Override
+    public String modelId() {
+        return null;
+    }
+
     @Override
     public String getWriteableName() {
         return NAME;

+ 1 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceSettings.java

@@ -159,6 +159,7 @@ public class CohereServiceSettings extends FilteredXContentObject implements Ser
         return maxInputTokens;
     }
 
+    @Override
     public String modelId() {
         return modelId;
     }

+ 5 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/embeddings/CohereEmbeddingsServiceSettings.java

@@ -134,6 +134,11 @@ public class CohereEmbeddingsServiceSettings extends FilteredXContentObject impl
         return commonSettings.dimensions();
     }
 
+    @Override
+    public String modelId() {
+        return commonSettings.modelId();
+    }
+
     public CohereEmbeddingType getEmbeddingType() {
         return embeddingType;
     }

+ 1 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/rerank/CohereRerankServiceSettings.java

@@ -112,6 +112,7 @@ public class CohereRerankServiceSettings extends FilteredXContentObject implemen
         return uri;
     }
 
+    @Override
     public String modelId() {
         return modelId;
     }

+ 5 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettings.java

@@ -231,6 +231,11 @@ public class CustomElandInternalTextEmbeddingServiceSettings implements ServiceS
         return dimensions;
     }
 
+    @Override
+    public String modelId() {
+        return getElasticsearchInternalServiceSettings().modelId();
+    }
+
     @Override
     public ToXContentObject getFilteredXContentObject() {
         return this;

+ 2 - 2
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java

@@ -29,12 +29,12 @@ public class CustomElandModel extends Model implements ElasticsearchModel {
     }
 
     public String getModelId() {
-        return internalServiceSettings.getModelId();
+        return internalServiceSettings.modelId();
     }
 
     @Override
     public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentActionRequest() {
-        var startRequest = new StartTrainedModelDeploymentAction.Request(internalServiceSettings.getModelId(), this.getInferenceEntityId());
+        var startRequest = new StartTrainedModelDeploymentAction.Request(internalServiceSettings.modelId(), this.getInferenceEntityId());
         startRequest.setNumberOfAllocations(internalServiceSettings.getNumAllocations());
         startRequest.setThreadsPerAllocation(internalServiceSettings.getNumThreads());
         startRequest.setAdaptiveAllocationsSettings(internalServiceSettings.getAdaptiveAllocationsSettings());

+ 3 - 3
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java

@@ -270,7 +270,7 @@ public class ElasticsearchInternalService implements InferenceService {
         CustomElandInternalTextEmbeddingServiceSettings serviceSettings = new CustomElandInternalTextEmbeddingServiceSettings(
             model.getServiceSettings().getElasticsearchInternalServiceSettings().getNumAllocations(),
             model.getServiceSettings().getElasticsearchInternalServiceSettings().getNumThreads(),
-            model.getServiceSettings().getElasticsearchInternalServiceSettings().getModelId(),
+            model.getServiceSettings().getElasticsearchInternalServiceSettings().modelId(),
             model.getServiceSettings().getElasticsearchInternalServiceSettings().getAdaptiveAllocationsSettings(),
             embeddingSize,
             model.getServiceSettings().similarity(),
@@ -474,7 +474,7 @@ public class ElasticsearchInternalService implements InferenceService {
             listener.onFailure(notTextEmbeddingModelException(model));
             return;
         } else if (model instanceof MultilingualE5SmallModel e5Model) {
-            String modelId = e5Model.getServiceSettings().getModelId();
+            String modelId = e5Model.getServiceSettings().modelId();
             var input = new TrainedModelInput(List.<String>of("text_field")); // by convention text_field is used
             var config = TrainedModelConfig.builder().setInput(input).setModelId(modelId).validate(true).build();
             PutTrainedModelAction.Request putRequest = new PutTrainedModelAction.Request(config, false, true);
@@ -520,7 +520,7 @@ public class ElasticsearchInternalService implements InferenceService {
         if (model instanceof ElasticsearchModel == false) {
             listener.onFailure(notTextEmbeddingModelException(model));
         } else if (model.getServiceSettings() instanceof InternalServiceSettings internalServiceSettings) {
-            String modelId = internalServiceSettings.getModelId();
+            String modelId = internalServiceSettings.modelId();
             GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId);
             executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener);
         } else {

+ 2 - 5
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java

@@ -36,15 +36,12 @@ public class MultilingualE5SmallModel extends Model implements ElasticsearchMode
 
     @Override
     public String getModelId() {
-        return getServiceSettings().getModelId();
+        return getServiceSettings().modelId();
     }
 
     @Override
     public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentActionRequest() {
-        var startRequest = new StartTrainedModelDeploymentAction.Request(
-            this.getServiceSettings().getModelId(),
-            this.getInferenceEntityId()
-        );
+        var startRequest = new StartTrainedModelDeploymentAction.Request(this.getServiceSettings().modelId(), this.getInferenceEntityId());
         startRequest.setNumberOfAllocations(this.getServiceSettings().getNumAllocations());
         startRequest.setThreadsPerAllocation(this.getServiceSettings().getNumThreads());
         startRequest.setAdaptiveAllocationsSettings(this.getServiceSettings().getAdaptiveAllocationsSettings());

+ 3 - 3
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java

@@ -211,7 +211,7 @@ public class ElserInternalService implements InferenceService {
         var serviceSettings = elserModel.getServiceSettings();
 
         var startRequest = new StartTrainedModelDeploymentAction.Request(
-            serviceSettings.getModelId(),
+            serviceSettings.modelId(),
             model.getConfigurations().getInferenceEntityId()
         );
         startRequest.setNumberOfAllocations(serviceSettings.getNumAllocations());
@@ -362,7 +362,7 @@ public class ElserInternalService implements InferenceService {
             );
             return;
         } else {
-            String modelId = ((ElserInternalModel) model).getServiceSettings().getModelId();
+            String modelId = ((ElserInternalModel) model).getServiceSettings().modelId();
             var input = new TrainedModelInput(List.<String>of("text_field")); // by convention text_field is used
             var config = TrainedModelConfig.builder().setInput(input).setModelId(modelId).validate(true).build();
             PutTrainedModelAction.Request putRequest = new PutTrainedModelAction.Request(config, false, true);
@@ -389,7 +389,7 @@ public class ElserInternalService implements InferenceService {
         });
 
         if (model instanceof ElserInternalModel elserModel) {
-            String modelId = elserModel.getServiceSettings().getModelId();
+            String modelId = elserModel.getServiceSettings().modelId();
             GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId);
             executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener);
         } else {

+ 3 - 3
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java

@@ -130,7 +130,7 @@ public class ElserInternalServiceSettings extends InternalServiceSettings {
         }
         out.writeVInt(getNumThreads());
         if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_11_X)) {
-            out.writeString(getModelId());
+            out.writeString(modelId());
         }
         if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
             out.writeOptionalWriteable(getAdaptiveAllocationsSettings());
@@ -139,7 +139,7 @@ public class ElserInternalServiceSettings extends InternalServiceSettings {
 
     @Override
     public int hashCode() {
-        return Objects.hash(NAME, getNumAllocations(), getNumThreads(), getModelId(), getAdaptiveAllocationsSettings());
+        return Objects.hash(NAME, getNumAllocations(), getNumThreads(), modelId(), getAdaptiveAllocationsSettings());
     }
 
     @Override
@@ -149,7 +149,7 @@ public class ElserInternalServiceSettings extends InternalServiceSettings {
         ElserInternalServiceSettings that = (ElserInternalServiceSettings) o;
         return getNumAllocations() == that.getNumAllocations()
             && getNumThreads() == that.getNumThreads()
-            && Objects.equals(getModelId(), that.getModelId())
+            && Objects.equals(modelId(), that.modelId())
             && Objects.equals(getAdaptiveAllocationsSettings(), that.getAdaptiveAllocationsSettings());
     }
 }

+ 1 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/embeddings/GoogleVertexAiEmbeddingsServiceSettings.java

@@ -164,6 +164,7 @@ public class GoogleVertexAiEmbeddingsServiceSettings extends FilteredXContentObj
         return location;
     }
 
+    @Override
     public String modelId() {
         return modelId;
     }

+ 1 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/rerank/GoogleVertexAiRerankServiceSettings.java

@@ -82,6 +82,7 @@ public class GoogleVertexAiRerankServiceSettings extends FilteredXContentObject
         return projectId;
     }
 
+    @Override
     public String modelId() {
         return modelId;
     }

+ 5 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceSettings.java

@@ -196,6 +196,11 @@ public class HuggingFaceServiceSettings extends FilteredXContentObject implement
         return dimensions;
     }
 
+    @Override
+    public String modelId() {
+        return null;
+    }
+
     public Integer maxInputTokens() {
         return maxInputTokens;
     }

+ 5 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserServiceSettings.java

@@ -97,6 +97,11 @@ public class HuggingFaceElserServiceSettings extends FilteredXContentObject
         return ELSER_TOKEN_LIMIT;
     }
 
+    @Override
+    public String modelId() {
+        return null;
+    }
+
     @Override
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject();

+ 1 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java

@@ -245,7 +245,7 @@ public class MistralService extends SenderService {
         var similarityToUse = similarityFromModel == null ? SimilarityMeasure.DOT_PRODUCT : similarityFromModel;
 
         MistralEmbeddingsServiceSettings serviceSettings = new MistralEmbeddingsServiceSettings(
-            embeddingServiceSettings.model(),
+            embeddingServiceSettings.modelId(),
             embeddingsSize,
             embeddingServiceSettings.maxInputTokens(),
             similarityToUse,

+ 1 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsModel.java

@@ -71,7 +71,7 @@ public class MistralEmbeddingsModel extends Model {
     }
 
     private void setPropertiesFromServiceSettings(MistralEmbeddingsServiceSettings serviceSettings) {
-        this.model = serviceSettings.model();
+        this.model = serviceSettings.modelId();
         this.rateLimitSettings = serviceSettings.rateLimitSettings();
         setEndpointUrl();
     }

+ 2 - 1
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettings.java

@@ -107,7 +107,8 @@ public class MistralEmbeddingsServiceSettings extends FilteredXContentObject imp
         return ADD_MISTRAL_EMBEDDINGS_INFERENCE;
     }
 
-    public String model() {
+    @Override
+    public String modelId() {
         return this.model;
     }
 

+ 6 - 3
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/settings/InternalServiceSettings.java

@@ -49,7 +49,8 @@ public abstract class InternalServiceSettings implements ServiceSettings {
         return numThreads;
     }
 
-    public String getModelId() {
+    @Override
+    public String modelId() {
         return modelId;
     }
 
@@ -84,8 +85,10 @@ public abstract class InternalServiceSettings implements ServiceSettings {
         if (numAllocations != null) {
             builder.field(NUM_ALLOCATIONS, getNumAllocations());
         }
+
         builder.field(NUM_THREADS, getNumThreads());
-        builder.field(MODEL_ID, getModelId());
+        builder.field(MODEL_ID, modelId());
+
         if (adaptiveAllocationsSettings != null) {
             builder.field(ADAPTIVE_ALLOCATIONS, getAdaptiveAllocationsSettings());
         }
@@ -109,7 +112,7 @@ public abstract class InternalServiceSettings implements ServiceSettings {
             out.writeVInt(getNumAllocations());
         }
         out.writeVInt(getNumThreads());
-        out.writeString(getModelId());
+        out.writeString(modelId());
         if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
             out.writeOptionalWriteable(getAdaptiveAllocationsSettings());
         }

+ 47 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceAPMStats.java

@@ -0,0 +1,47 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.telemetry;
+
+import org.elasticsearch.inference.Model;
+import org.elasticsearch.telemetry.metric.LongCounter;
+import org.elasticsearch.telemetry.metric.MeterRegistry;
+
+import java.util.Map;
+import java.util.Objects;
+
+public class InferenceAPMStats extends InferenceStats {
+
+    private final LongCounter inferenceAPMRequestCounter;
+
+    public InferenceAPMStats(Model model, MeterRegistry meterRegistry) {
+        super(model);
+        this.inferenceAPMRequestCounter = meterRegistry.registerLongCounter(
+            "es.inference.requests.count",
+            "Inference API request counts for a particular service, task type, model ID",
+            "operations"
+        );
+    }
+
+    @Override
+    public void increment() {
+        super.increment();
+        inferenceAPMRequestCounter.incrementBy(1, Map.of("service", service, "task_type", taskType.toString(), "model_id", modelId));
+    }
+
+    public static final class Factory {
+        private final MeterRegistry meterRegistry;
+
+        public Factory(MeterRegistry meterRegistry) {
+            this.meterRegistry = Objects.requireNonNull(meterRegistry);
+        }
+
+        public InferenceAPMStats newInferenceRequestAPMCounter(Model model) {
+            return new InferenceAPMStats(model, meterRegistry);
+        }
+    }
+}

+ 59 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java

@@ -0,0 +1,59 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.telemetry;
+
+import org.elasticsearch.inference.Model;
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.xpack.core.inference.InferenceRequestStats;
+
+import java.util.Objects;
+import java.util.concurrent.atomic.LongAdder;
+
+public class InferenceStats implements Stats {
+    protected final String service;
+    protected final TaskType taskType;
+    protected final String modelId;
+    protected final LongAdder counter = new LongAdder();
+
+    public static String key(Model model) {
+        StringBuilder builder = new StringBuilder();
+        builder.append(model.getConfigurations().getService());
+        builder.append(":");
+        builder.append(model.getTaskType());
+
+        if (model.getServiceSettings().modelId() != null) {
+            builder.append(":");
+            builder.append(model.getServiceSettings().modelId());
+        }
+
+        return builder.toString();
+    }
+
+    public InferenceStats(Model model) {
+        Objects.requireNonNull(model);
+
+        service = model.getConfigurations().getService();
+        taskType = model.getTaskType();
+        modelId = model.getServiceSettings().modelId();
+    }
+
+    @Override
+    public void increment() {
+        counter.increment();
+    }
+
+    @Override
+    public long getCount() {
+        return counter.sum();
+    }
+
+    @Override
+    public InferenceRequestStats toSerializableForm() {
+        return new InferenceRequestStats(service, taskType, modelId, getCount());
+    }
+}

+ 30 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/Stats.java

@@ -0,0 +1,30 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.telemetry;
+
+import org.elasticsearch.xpack.core.inference.SerializableStats;
+
+public interface Stats {
+
+    /**
+     * Increase the counter by one.
+     */
+    void increment();
+
+    /**
+     * Return the current value of the counter.
+     * @return the current value of the counter
+     */
+    long getCount();
+
+    /**
+     * Convert the object into a serializable form that can be written across nodes and returned in xcontent format.
+     * @return the serializable format of the object
+     */
+    SerializableStats toSerializableForm();
+}

+ 57 - 0
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/StatsMap.java

@@ -0,0 +1,57 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.telemetry;
+
+import org.elasticsearch.xpack.core.inference.SerializableStats;
+
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * A map to provide tracking incrementing statistics.
+ *
+ * @param <Input> The input to derive the keys and values for the map
+ * @param <Value> The type of the values stored in the map
+ */
+public class StatsMap<Input, Value extends Stats> {
+
+    private final ConcurrentMap<String, Value> stats = new ConcurrentHashMap<>();
+    private final Function<Input, String> keyCreator;
+    private final Function<Input, Value> valueCreator;
+
+    /**
+     * @param keyCreator a function for creating a key in the map based on the input provided
+     * @param valueCreator a function for creating a value in the map based on the input provided
+     */
+    public StatsMap(Function<Input, String> keyCreator, Function<Input, Value> valueCreator) {
+        this.keyCreator = Objects.requireNonNull(keyCreator);
+        this.valueCreator = Objects.requireNonNull(valueCreator);
+    }
+
+    /**
+     * Increment the counter for a particular value in a thread safe manner.
+     * @param input the input to derive the appropriate key in the map
+     */
+    public void increment(Input input) {
+        var value = stats.computeIfAbsent(keyCreator.apply(input), key -> valueCreator.apply(input));
+        value.increment();
+    }
+
+    /**
+     * Build a map that can be serialized. This takes a snapshot of the current state. Any concurrent calls to increment may or may not
+     * be represented in the resulting serializable map.
+     * @return a map that is more easily serializable
+     */
+    public Map<String, SerializableStats> toSerializableMap() {
+        return stats.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toSerializableForm()));
+    }
+}

+ 5 - 0
x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/model/TestModel.java

@@ -168,6 +168,11 @@ public class TestModel extends Model {
         public DenseVectorFieldMapper.ElementType elementType() {
             return elementType;
         }
+
+        @Override
+        public String modelId() {
+            return model;
+        }
     }
 
     public record TestTaskSettings(Integer temperature) implements TaskSettings {

+ 13 - 13
x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java

@@ -96,7 +96,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
                 var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
                 assertThat(settings.region(), is("region"));
-                assertThat(settings.model(), is("model"));
+                assertThat(settings.modelId(), is("model"));
                 assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
                 var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings();
                 assertThat(secretSettings.accessKey.toString(), is("access"));
@@ -290,7 +290,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
                 var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
                 assertThat(settings.region(), is("region"));
-                assertThat(settings.model(), is("model"));
+                assertThat(settings.modelId(), is("model"));
                 assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
                 var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings();
                 assertThat(secretSettings.accessKey.toString(), is("access"));
@@ -353,7 +353,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
             var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings();
             assertThat(secretSettings.accessKey.toString(), is("access"));
@@ -404,7 +404,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
             var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings();
             assertThat(secretSettings.accessKey.toString(), is("access"));
@@ -431,7 +431,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
             var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings();
             assertThat(secretSettings.accessKey.toString(), is("access"));
@@ -458,7 +458,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
             var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings();
             assertThat(secretSettings.accessKey.toString(), is("access"));
@@ -485,7 +485,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
             var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings();
             assertThat(secretSettings.accessKey.toString(), is("access"));
@@ -513,7 +513,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockChatCompletionServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.ANTHROPIC));
             var taskSettings = (AmazonBedrockChatCompletionTaskSettings) model.getTaskSettings();
             assertThat(taskSettings.temperature(), is(1.0));
@@ -539,7 +539,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
             assertNull(model.getSecretSettings());
         }
@@ -558,7 +558,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockChatCompletionServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.ANTHROPIC));
             var taskSettings = (AmazonBedrockChatCompletionTaskSettings) model.getTaskSettings();
             assertThat(taskSettings.temperature(), is(1.0));
@@ -602,7 +602,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
             assertNull(model.getSecretSettings());
         }
@@ -623,7 +623,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN));
             assertNull(model.getSecretSettings());
         }
@@ -643,7 +643,7 @@ public class AmazonBedrockServiceTests extends ESTestCase {
 
             var settings = (AmazonBedrockChatCompletionServiceSettings) model.getServiceSettings();
             assertThat(settings.region(), is("region"));
-            assertThat(settings.model(), is("model"));
+            assertThat(settings.modelId(), is("model"));
             assertThat(settings.provider(), is(AmazonBedrockProvider.ANTHROPIC));
             var taskSettings = (AmazonBedrockChatCompletionTaskSettings) model.getTaskSettings();
             assertThat(taskSettings.temperature(), is(1.0));

+ 3 - 3
x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettingsTests.java

@@ -134,18 +134,18 @@ public class MultilingualE5SmallInternalServiceSettingsTests extends AbstractWir
             case 0 -> new MultilingualE5SmallInternalServiceSettings(
                 instance.getNumAllocations() + 1,
                 instance.getNumThreads(),
-                instance.getModelId(),
+                instance.modelId(),
                 null
             );
             case 1 -> new MultilingualE5SmallInternalServiceSettings(
                 instance.getNumAllocations(),
                 instance.getNumThreads() + 1,
-                instance.getModelId(),
+                instance.modelId(),
                 null
             );
             case 2 -> {
                 var versions = new HashSet<>(ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_VALID_IDS);
-                versions.remove(instance.getModelId());
+                versions.remove(instance.modelId());
                 yield new MultilingualE5SmallInternalServiceSettings(
                     instance.getNumAllocations(),
                     instance.getNumThreads(),

+ 3 - 3
x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java

@@ -130,18 +130,18 @@ public class ElserInternalServiceSettingsTests extends AbstractWireSerializingTe
             case 0 -> new ElserInternalServiceSettings(
                 instance.getNumAllocations() + 1,
                 instance.getNumThreads(),
-                instance.getModelId(),
+                instance.modelId(),
                 null
             );
             case 1 -> new ElserInternalServiceSettings(
                 instance.getNumAllocations(),
                 instance.getNumThreads() + 1,
-                instance.getModelId(),
+                instance.modelId(),
                 null
             );
             case 2 -> {
                 var versions = new HashSet<>(ElserInternalService.VALID_ELSER_MODEL_IDS);
-                versions.remove(instance.getModelId());
+                versions.remove(instance.modelId());
                 yield new ElserInternalServiceSettings(
                     instance.getNumAllocations(),
                     instance.getNumThreads(),

+ 2 - 2
x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceTests.java

@@ -332,7 +332,7 @@ public class ElserInternalServiceTests extends ESTestCase {
             );
 
             ActionListener<Model> modelActionListener = ActionListener.<Model>wrap((model) -> {
-                assertEquals(".elser_model_2", ((ElserInternalModel) model).getServiceSettings().getModelId());
+                assertEquals(".elser_model_2", ((ElserInternalModel) model).getServiceSettings().modelId());
             }, (e) -> { fail("Model verification should not fail"); });
 
             service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, Set.of(), modelActionListener);
@@ -345,7 +345,7 @@ public class ElserInternalServiceTests extends ESTestCase {
             );
 
             ActionListener<Model> modelActionListener = ActionListener.<Model>wrap((model) -> {
-                assertEquals(".elser_model_2_linux-x86_64", ((ElserInternalModel) model).getServiceSettings().getModelId());
+                assertEquals(".elser_model_2_linux-x86_64", ((ElserInternalModel) model).getServiceSettings().modelId());
             }, (e) -> { fail("Model verification should not fail"); });
 
             service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, Set.of("linux-x86_64"), modelActionListener);

+ 3 - 3
x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java

@@ -100,7 +100,7 @@ public class MistralServiceTests extends ESTestCase {
 
                 var embeddingsModel = (MistralEmbeddingsModel) model;
                 var serviceSettings = (MistralEmbeddingsServiceSettings) model.getServiceSettings();
-                assertThat(serviceSettings.model(), is("mistral-embed"));
+                assertThat(serviceSettings.modelId(), is("mistral-embed"));
                 assertThat(embeddingsModel.getSecretSettings().apiKey().toString(), is("secret"));
             }, exception -> fail("Unexpected exception: " + exception));
 
@@ -231,7 +231,7 @@ public class MistralServiceTests extends ESTestCase {
             assertThat(model, instanceOf(MistralEmbeddingsModel.class));
 
             var embeddingsModel = (MistralEmbeddingsModel) model;
-            assertThat(embeddingsModel.getServiceSettings().model(), is("mistral-embed"));
+            assertThat(embeddingsModel.getServiceSettings().modelId(), is("mistral-embed"));
             assertThat(embeddingsModel.getServiceSettings().dimensions(), is(1024));
             assertThat(embeddingsModel.getServiceSettings().maxInputTokens(), is(512));
             assertThat(embeddingsModel.getSecretSettings().apiKey().toString(), is("secret"));
@@ -354,7 +354,7 @@ public class MistralServiceTests extends ESTestCase {
             assertThat(model, instanceOf(MistralEmbeddingsModel.class));
 
             var embeddingsModel = (MistralEmbeddingsModel) model;
-            assertThat(embeddingsModel.getServiceSettings().model(), is("mistral-embed"));
+            assertThat(embeddingsModel.getServiceSettings().modelId(), is("mistral-embed"));
             assertThat(embeddingsModel.getServiceSettings().dimensions(), is(1024));
             assertThat(embeddingsModel.getServiceSettings().maxInputTokens(), is(512));
         }

+ 119 - 0
x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/StatsMapTests.java

@@ -0,0 +1,119 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.telemetry;
+
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
+import org.elasticsearch.xpack.inference.services.cohere.embeddings.CohereEmbeddingsModel;
+import org.elasticsearch.xpack.inference.services.cohere.embeddings.CohereEmbeddingsServiceSettingsTests;
+import org.elasticsearch.xpack.inference.services.cohere.embeddings.CohereEmbeddingsTaskSettingsTests;
+import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsModel;
+import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsServiceSettingsTests;
+import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsTaskSettingsTests;
+
+import java.util.Map;
+
+import static org.hamcrest.Matchers.is;
+
+public class StatsMapTests extends ESTestCase {
+    public void testAddingEntry_InitializesTheCountToOne() {
+        var stats = new StatsMap<>(InferenceStats::key, InferenceStats::new);
+
+        stats.increment(
+            new OpenAiEmbeddingsModel(
+                "inference_id",
+                TaskType.TEXT_EMBEDDING,
+                "openai",
+                OpenAiEmbeddingsServiceSettingsTests.getServiceSettingsMap("modelId", null, null),
+                OpenAiEmbeddingsTaskSettingsTests.getTaskSettingsMap(null),
+                null,
+                ConfigurationParseContext.REQUEST
+            )
+        );
+
+        var converted = stats.toSerializableMap();
+
+        assertThat(
+            converted,
+            is(
+                Map.of(
+                    "openai:text_embedding:modelId",
+                    new org.elasticsearch.xpack.core.inference.InferenceRequestStats("openai", TaskType.TEXT_EMBEDDING, "modelId", 1)
+                )
+            )
+        );
+    }
+
+    public void testIncrementingWithSeparateModels_IncrementsTheCounterToTwo() {
+        var stats = new StatsMap<>(InferenceStats::key, InferenceStats::new);
+
+        var model1 = new OpenAiEmbeddingsModel(
+            "inference_id",
+            TaskType.TEXT_EMBEDDING,
+            "openai",
+            OpenAiEmbeddingsServiceSettingsTests.getServiceSettingsMap("modelId", null, null),
+            OpenAiEmbeddingsTaskSettingsTests.getTaskSettingsMap(null),
+            null,
+            ConfigurationParseContext.REQUEST
+        );
+
+        var model2 = new OpenAiEmbeddingsModel(
+            "inference_id",
+            TaskType.TEXT_EMBEDDING,
+            "openai",
+            OpenAiEmbeddingsServiceSettingsTests.getServiceSettingsMap("modelId", null, null),
+            OpenAiEmbeddingsTaskSettingsTests.getTaskSettingsMap(null),
+            null,
+            ConfigurationParseContext.REQUEST
+        );
+
+        stats.increment(model1);
+        stats.increment(model2);
+
+        var converted = stats.toSerializableMap();
+
+        assertThat(
+            converted,
+            is(
+                Map.of(
+                    "openai:text_embedding:modelId",
+                    new org.elasticsearch.xpack.core.inference.InferenceRequestStats("openai", TaskType.TEXT_EMBEDDING, "modelId", 2)
+                )
+            )
+        );
+    }
+
+    public void testNullModelId_ResultsInKeyWithout() {
+        var stats = new StatsMap<>(InferenceStats::key, InferenceStats::new);
+
+        stats.increment(
+            new CohereEmbeddingsModel(
+                "inference_id",
+                TaskType.TEXT_EMBEDDING,
+                "cohere",
+                CohereEmbeddingsServiceSettingsTests.getServiceSettingsMap(null, null, null),
+                CohereEmbeddingsTaskSettingsTests.getTaskSettingsMap(null, null),
+                null,
+                ConfigurationParseContext.REQUEST
+            )
+        );
+
+        var converted = stats.toSerializableMap();
+
+        assertThat(
+            converted,
+            is(
+                Map.of(
+                    "cohere:text_embedding",
+                    new org.elasticsearch.xpack.core.inference.InferenceRequestStats("cohere", TaskType.TEXT_EMBEDDING, null, 1)
+                )
+            )
+        );
+    }
+}