瀏覽代碼

Update EIS sparse and dense embedding max batch size to 16 (#132646) (#132855)

(cherry picked from commit 81b4cceb043f979256b737275449c2fbc01d0120)

# Conflicts:
#	x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java

Co-authored-by: Jonathan Buttner <56361221+jonathan-buttner@users.noreply.github.com>
Jason Botzas-Coluni 1 月之前
父節點
當前提交
51e3249a0e

+ 5 - 0
docs/changelog/132646.yaml

@@ -0,0 +1,5 @@
+pr: 132646
+summary: Update EIS sparse and dense embedding max batch size to 16
+area: Machine Learning
+type: bug
+issues: []

+ 8 - 2
x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java

@@ -87,7 +87,10 @@ public class ElasticInferenceService extends SenderService {
     public static final String NAME = "elastic";
     public static final String ELASTIC_INFERENCE_SERVICE_IDENTIFIER = "Elastic Inference Service";
     public static final Integer DENSE_TEXT_EMBEDDINGS_DIMENSIONS = 1024;
-    public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 512;
+    // The maximum batch size for sparse text embeddings is set to 16.
+    // This value was reduced from 512 due to memory constraints; batch sizes above 32 can cause GPU out-of-memory errors.
+    // A batch size of 16 provides optimal throughput and stability, especially on lower-tier instance types.
+    public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 16;
 
     private static final EnumSet<TaskType> IMPLEMENTED_TASK_TYPES = EnumSet.of(
         TaskType.SPARSE_EMBEDDING,
@@ -97,7 +100,10 @@ public class ElasticInferenceService extends SenderService {
     );
     private static final String SERVICE_NAME = "Elastic";
 
-    private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 32;
+    // TODO: revisit this value once EIS supports dense models
+    // The maximum batch size for dense text embeddings is proactively set to 16.
+    // This mirrors the memory constraints observed with sparse embeddings
+    private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 16;
 
     // rainbow-sprinkles
     static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";