2 meses atrás · 47442b6225
--- a/docs/changelog/132646.yaml
+++ b/docs/changelog/132646.yaml
@@ -0,0 +1,5 @@
 
				+pr: 132646
			
 
				+summary: Update EIS sparse and dense embedding max batch size to 16
			
 
				+area: Machine Learning
			
 
				+type: bug
			
 
				+issues: []
			
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java
@@ -89,7 +89,10 @@ public class ElasticInferenceService extends SenderService {
 
				     public static final String NAME = "elastic";
			
 
				     public static final String ELASTIC_INFERENCE_SERVICE_IDENTIFIER = "Elastic Inference Service";
			
 
				     public static final Integer DENSE_TEXT_EMBEDDINGS_DIMENSIONS = 1024;
			
 
				-    public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 512;
			
 
				+    // The maximum batch size for sparse text embeddings is set to 16.
			
 
				+    // This value was reduced from 512 due to memory constraints; batch sizes above 32 can cause GPU out-of-memory errors.
			
 
				+    // A batch size of 16 provides optimal throughput and stability, especially on lower-tier instance types.
			
 
				+    public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 16;
			
 
				 
			
 
				     private static final EnumSet<TaskType> IMPLEMENTED_TASK_TYPES = EnumSet.of(
			
 
				         TaskType.SPARSE_EMBEDDING,
			
@@ -99,8 +102,10 @@ public class ElasticInferenceService extends SenderService {
 
				     );
			
 
				     private static final String SERVICE_NAME = "Elastic";
			
 
				 
			
 
				-    // TODO: check with team, what makes the most sense
			
 
				-    private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 32;
			
 
				+    // TODO: revisit this value once EIS supports dense models
			
 
				+    // The maximum batch size for dense text embeddings is proactively set to 16.
			
 
				+    // This mirrors the memory constraints observed with sparse embeddings
			
 
				+    private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 16;
			
 
				 
			
 
				     // rainbow-sprinkles
			
 
				     static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";