|
@@ -9,7 +9,6 @@ package org.elasticsearch.xpack.inference.mapper;
|
|
|
|
|
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
|
|
|
|
|
-import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
|
|
import org.apache.lucene.index.FieldInfo;
|
|
|
import org.apache.lucene.index.FieldInfos;
|
|
|
import org.apache.lucene.index.IndexableField;
|
|
@@ -25,7 +24,6 @@ import org.apache.lucene.search.join.BitSetProducer;
|
|
|
import org.apache.lucene.search.join.QueryBitSetProducer;
|
|
|
import org.apache.lucene.search.join.ScoreMode;
|
|
|
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
|
|
|
-import org.elasticsearch.cluster.ClusterChangedEvent;
|
|
|
import org.elasticsearch.cluster.metadata.IndexMetadata;
|
|
|
import org.elasticsearch.common.CheckedBiConsumer;
|
|
|
import org.elasticsearch.common.CheckedBiFunction;
|
|
@@ -36,7 +34,6 @@ import org.elasticsearch.common.lucene.search.Queries;
|
|
|
import org.elasticsearch.common.settings.Settings;
|
|
|
import org.elasticsearch.core.CheckedConsumer;
|
|
|
import org.elasticsearch.index.IndexVersion;
|
|
|
-import org.elasticsearch.index.IndexVersions;
|
|
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
|
|
import org.elasticsearch.index.mapper.DocumentParsingException;
|
|
|
import org.elasticsearch.index.mapper.FieldMapper;
|
|
@@ -66,10 +63,6 @@ import org.elasticsearch.plugins.Plugin;
|
|
|
import org.elasticsearch.search.LeafNestedDocuments;
|
|
|
import org.elasticsearch.search.NestedDocuments;
|
|
|
import org.elasticsearch.search.SearchHit;
|
|
|
-import org.elasticsearch.test.ClusterServiceUtils;
|
|
|
-import org.elasticsearch.test.client.NoOpClient;
|
|
|
-import org.elasticsearch.test.index.IndexVersionUtils;
|
|
|
-import org.elasticsearch.threadpool.TestThreadPool;
|
|
|
import org.elasticsearch.xcontent.XContentBuilder;
|
|
|
import org.elasticsearch.xcontent.XContentType;
|
|
|
import org.elasticsearch.xcontent.json.JsonXContent;
|
|
@@ -77,10 +70,7 @@ import org.elasticsearch.xpack.core.XPackClientPlugin;
|
|
|
import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryWrapper;
|
|
|
import org.elasticsearch.xpack.inference.InferencePlugin;
|
|
|
import org.elasticsearch.xpack.inference.model.TestModel;
|
|
|
-import org.elasticsearch.xpack.inference.registry.ModelRegistry;
|
|
|
-import org.junit.After;
|
|
|
import org.junit.AssumptionViolatedException;
|
|
|
-import org.junit.Before;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
import java.util.Collection;
|
|
@@ -90,7 +80,6 @@ import java.util.List;
|
|
|
import java.util.Map;
|
|
|
import java.util.Set;
|
|
|
import java.util.function.BiConsumer;
|
|
|
-import java.util.function.Supplier;
|
|
|
|
|
|
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD;
|
|
|
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD;
|
|
@@ -112,22 +101,10 @@ import static org.hamcrest.Matchers.instanceOf;
|
|
|
public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|
|
private final boolean useLegacyFormat;
|
|
|
|
|
|
- private TestThreadPool threadPool;
|
|
|
-
|
|
|
public SemanticTextFieldMapperTests(boolean useLegacyFormat) {
|
|
|
this.useLegacyFormat = useLegacyFormat;
|
|
|
}
|
|
|
|
|
|
- @Before
|
|
|
- private void startThreadPool() {
|
|
|
- threadPool = createThreadPool();
|
|
|
- }
|
|
|
-
|
|
|
- @After
|
|
|
- private void stopThreadPool() {
|
|
|
- threadPool.close();
|
|
|
- }
|
|
|
-
|
|
|
@ParametersFactory
|
|
|
public static Iterable<Object[]> parameters() throws Exception {
|
|
|
return List.of(new Object[] { true }, new Object[] { false });
|
|
@@ -135,61 +112,18 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|
|
|
|
|
@Override
|
|
|
protected Collection<? extends Plugin> getPlugins() {
|
|
|
- var clusterService = ClusterServiceUtils.createClusterService(threadPool);
|
|
|
- var modelRegistry = new ModelRegistry(clusterService, new NoOpClient(threadPool));
|
|
|
- modelRegistry.clusterChanged(new ClusterChangedEvent("init", clusterService.state(), clusterService.state()) {
|
|
|
- @Override
|
|
|
- public boolean localNodeMaster() {
|
|
|
- return false;
|
|
|
- }
|
|
|
- });
|
|
|
- return List.of(new InferencePlugin(Settings.EMPTY) {
|
|
|
- @Override
|
|
|
- protected Supplier<ModelRegistry> getModelRegistry() {
|
|
|
- return () -> modelRegistry;
|
|
|
- }
|
|
|
- }, new XPackClientPlugin());
|
|
|
+ return List.of(new InferencePlugin(Settings.EMPTY), new XPackClientPlugin());
|
|
|
}
|
|
|
|
|
|
private MapperService createMapperService(XContentBuilder mappings, boolean useLegacyFormat) throws IOException {
|
|
|
- IndexVersion indexVersion = SemanticInferenceMetadataFieldsMapperTests.getRandomCompatibleIndexVersion(useLegacyFormat);
|
|
|
- return createMapperService(mappings, useLegacyFormat, indexVersion, indexVersion, false);
|
|
|
- }
|
|
|
-
|
|
|
- private MapperService createMapperService(XContentBuilder mappings, boolean useLegacyFormat, IndexVersion minIndexVersion)
|
|
|
- throws IOException {
|
|
|
- return createMapperService(mappings, useLegacyFormat, minIndexVersion, IndexVersion.current(), false);
|
|
|
- }
|
|
|
-
|
|
|
- private MapperService createMapperService(
|
|
|
- XContentBuilder mappings,
|
|
|
- boolean useLegacyFormat,
|
|
|
- IndexVersion minIndexVersion,
|
|
|
- IndexVersion maxIndexVersion,
|
|
|
- boolean propagateIndexVersion
|
|
|
- ) throws IOException {
|
|
|
- validateIndexVersion(minIndexVersion, useLegacyFormat);
|
|
|
- IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(random(), minIndexVersion, maxIndexVersion);
|
|
|
var settings = Settings.builder()
|
|
|
- .put(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), indexVersion)
|
|
|
+ .put(
|
|
|
+ IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(),
|
|
|
+ SemanticInferenceMetadataFieldsMapperTests.getRandomCompatibleIndexVersion(useLegacyFormat)
|
|
|
+ )
|
|
|
.put(InferenceMetadataFieldsMapper.USE_LEGACY_SEMANTIC_TEXT_FORMAT.getKey(), useLegacyFormat)
|
|
|
.build();
|
|
|
- // TODO - This is added, because we discovered a bug where the index version was not being correctly propagated
|
|
|
- // in our mappings even though we were specifying the index version in settings. We will fix this in a followup and
|
|
|
- // remove the boolean flag accordingly.
|
|
|
- if (propagateIndexVersion) {
|
|
|
- return createMapperService(indexVersion, settings, mappings);
|
|
|
- } else {
|
|
|
- return createMapperService(settings, mappings);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private static void validateIndexVersion(IndexVersion indexVersion, boolean useLegacyFormat) {
|
|
|
- if (useLegacyFormat == false
|
|
|
- && indexVersion.before(IndexVersions.INFERENCE_METADATA_FIELDS)
|
|
|
- && indexVersion.between(IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT, IndexVersions.UPGRADE_TO_LUCENE_10_0_0) == false) {
|
|
|
- throw new IllegalArgumentException("Index version " + indexVersion + " does not support new semantic text format");
|
|
|
- }
|
|
|
+ return createMapperService(settings, mappings);
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -635,15 +569,14 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|
|
}
|
|
|
|
|
|
private static void assertSemanticTextField(MapperService mapperService, String fieldName, boolean expectedModelSettings) {
|
|
|
- assertSemanticTextField(mapperService, fieldName, expectedModelSettings, null, null);
|
|
|
+ assertSemanticTextField(mapperService, fieldName, expectedModelSettings, null);
|
|
|
}
|
|
|
|
|
|
private static void assertSemanticTextField(
|
|
|
MapperService mapperService,
|
|
|
String fieldName,
|
|
|
boolean expectedModelSettings,
|
|
|
- ChunkingSettings expectedChunkingSettings,
|
|
|
- DenseVectorFieldMapper.IndexOptions expectedIndexOptions
|
|
|
+ ChunkingSettings expectedChunkingSettings
|
|
|
) {
|
|
|
Mapper mapper = mapperService.mappingLookup().getMapper(fieldName);
|
|
|
assertNotNull(mapper);
|
|
@@ -689,17 +622,8 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|
|
assertThat(embeddingsMapper, instanceOf(SparseVectorFieldMapper.class));
|
|
|
SparseVectorFieldMapper sparseMapper = (SparseVectorFieldMapper) embeddingsMapper;
|
|
|
assertEquals(sparseMapper.fieldType().isStored(), semanticTextFieldType.useLegacyFormat() == false);
|
|
|
- assertNull(expectedIndexOptions);
|
|
|
- }
|
|
|
- case TEXT_EMBEDDING -> {
|
|
|
- assertThat(embeddingsMapper, instanceOf(DenseVectorFieldMapper.class));
|
|
|
- DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) embeddingsMapper;
|
|
|
- if (expectedIndexOptions != null) {
|
|
|
- assertEquals(expectedIndexOptions, denseVectorFieldMapper.fieldType().getIndexOptions());
|
|
|
- } else {
|
|
|
- assertNull(denseVectorFieldMapper.fieldType().getIndexOptions());
|
|
|
- }
|
|
|
}
|
|
|
+ case TEXT_EMBEDDING -> assertThat(embeddingsMapper, instanceOf(DenseVectorFieldMapper.class));
|
|
|
default -> throw new AssertionError("Invalid task type");
|
|
|
}
|
|
|
} else {
|
|
@@ -994,11 +918,11 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|
|
mapping(b -> addSemanticTextMapping(b, fieldName, model.getInferenceEntityId(), null, chunkingSettings)),
|
|
|
useLegacyFormat
|
|
|
);
|
|
|
- assertSemanticTextField(mapperService, fieldName, false, chunkingSettings, null);
|
|
|
+ assertSemanticTextField(mapperService, fieldName, false, chunkingSettings);
|
|
|
|
|
|
ChunkingSettings newChunkingSettings = generateRandomChunkingSettingsOtherThan(chunkingSettings);
|
|
|
merge(mapperService, mapping(b -> addSemanticTextMapping(b, fieldName, model.getInferenceEntityId(), null, newChunkingSettings)));
|
|
|
- assertSemanticTextField(mapperService, fieldName, false, newChunkingSettings, null);
|
|
|
+ assertSemanticTextField(mapperService, fieldName, false, newChunkingSettings);
|
|
|
}
|
|
|
|
|
|
public void testModelSettingsRequiredWithChunks() throws IOException {
|
|
@@ -1128,74 +1052,6 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
|
|
|
assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class));
|
|
|
}
|
|
|
|
|
|
- private static DenseVectorFieldMapper.IndexOptions defaultDenseVectorIndexOptions() {
|
|
|
- // These are the default index options for dense_vector fields, and used for semantic_text fields incompatible with BBQ.
|
|
|
- int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
|
|
|
- int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
|
|
|
- return new DenseVectorFieldMapper.Int8HnswIndexOptions(m, efConstruction, null, null);
|
|
|
- }
|
|
|
-
|
|
|
- public void testDefaultIndexOptions() throws IOException {
|
|
|
-
|
|
|
- // We default to BBQ for eligible dense vectors
|
|
|
- var mapperService = createMapperService(fieldMapping(b -> {
|
|
|
- b.field("type", "semantic_text");
|
|
|
- b.field("inference_id", "another_inference_id");
|
|
|
- b.startObject("model_settings");
|
|
|
- b.field("task_type", "text_embedding");
|
|
|
- b.field("dimensions", 100);
|
|
|
- b.field("similarity", "cosine");
|
|
|
- b.field("element_type", "float");
|
|
|
- b.endObject();
|
|
|
- }), useLegacyFormat, IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ);
|
|
|
- assertSemanticTextField(mapperService, "field", true, null, SemanticTextFieldMapper.defaultSemanticDenseIndexOptions());
|
|
|
-
|
|
|
- // Element types that are incompatible with BBQ will continue to use dense_vector defaults
|
|
|
- mapperService = createMapperService(fieldMapping(b -> {
|
|
|
- b.field("type", "semantic_text");
|
|
|
- b.field("inference_id", "another_inference_id");
|
|
|
- b.startObject("model_settings");
|
|
|
- b.field("task_type", "text_embedding");
|
|
|
- b.field("dimensions", 100);
|
|
|
- b.field("similarity", "cosine");
|
|
|
- b.field("element_type", "byte");
|
|
|
- b.endObject();
|
|
|
- }), useLegacyFormat, IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ);
|
|
|
- assertSemanticTextField(mapperService, "field", true, null, null);
|
|
|
-
|
|
|
- // A dim count of 10 is too small to support BBQ, so we continue to use dense_vector defaults
|
|
|
- mapperService = createMapperService(fieldMapping(b -> {
|
|
|
- b.field("type", "semantic_text");
|
|
|
- b.field("inference_id", "another_inference_id");
|
|
|
- b.startObject("model_settings");
|
|
|
- b.field("task_type", "text_embedding");
|
|
|
- b.field("dimensions", 10);
|
|
|
- b.field("similarity", "cosine");
|
|
|
- b.field("element_type", "float");
|
|
|
- b.endObject();
|
|
|
- }), useLegacyFormat, IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ);
|
|
|
- assertSemanticTextField(mapperService, "field", true, null, defaultDenseVectorIndexOptions());
|
|
|
-
|
|
|
- // Previous index versions do not set BBQ index options
|
|
|
- mapperService = createMapperService(fieldMapping(b -> {
|
|
|
- b.field("type", "semantic_text");
|
|
|
- b.field("inference_id", "another_inference_id");
|
|
|
- b.startObject("model_settings");
|
|
|
- b.field("task_type", "text_embedding");
|
|
|
- b.field("dimensions", 100);
|
|
|
- b.field("similarity", "cosine");
|
|
|
- b.field("element_type", "float");
|
|
|
- b.endObject();
|
|
|
- }),
|
|
|
- useLegacyFormat,
|
|
|
- IndexVersions.INFERENCE_METADATA_FIELDS,
|
|
|
- IndexVersionUtils.getPreviousVersion(IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ),
|
|
|
- true
|
|
|
- );
|
|
|
- assertSemanticTextField(mapperService, "field", true, null, defaultDenseVectorIndexOptions());
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
@Override
|
|
|
protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) {
|
|
|
// Until a doc is indexed, the query is rewritten as match no docs
|