|
@@ -11,19 +11,26 @@ package org.elasticsearch.index.mapper.vectors;
|
|
|
|
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
|
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
|
|
-import org.apache.lucene.document.FeatureField;
|
|
|
+import org.apache.lucene.index.DirectoryReader;
|
|
|
import org.apache.lucene.index.IndexableField;
|
|
|
+import org.apache.lucene.index.LeafReader;
|
|
|
+import org.apache.lucene.search.IndexSearcher;
|
|
|
+import org.apache.lucene.store.Directory;
|
|
|
+import org.apache.lucene.tests.index.RandomIndexWriter;
|
|
|
import org.elasticsearch.common.Strings;
|
|
|
import org.elasticsearch.common.bytes.BytesReference;
|
|
|
+import org.elasticsearch.common.compress.CompressedXContent;
|
|
|
import org.elasticsearch.index.IndexVersion;
|
|
|
import org.elasticsearch.index.IndexVersions;
|
|
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
|
|
import org.elasticsearch.index.mapper.DocumentParsingException;
|
|
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
|
|
import org.elasticsearch.index.mapper.MapperParsingException;
|
|
|
+import org.elasticsearch.index.mapper.MapperService;
|
|
|
import org.elasticsearch.index.mapper.MapperTestCase;
|
|
|
import org.elasticsearch.index.mapper.ParsedDocument;
|
|
|
import org.elasticsearch.index.mapper.SourceToParse;
|
|
|
+import org.elasticsearch.search.lookup.Source;
|
|
|
import org.elasticsearch.test.index.IndexVersionUtils;
|
|
|
import org.elasticsearch.xcontent.XContentBuilder;
|
|
|
import org.elasticsearch.xcontent.XContentFactory;
|
|
@@ -33,18 +40,25 @@ import org.junit.AssumptionViolatedException;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
import java.util.Arrays;
|
|
|
+import java.util.LinkedHashMap;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
|
|
|
import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.NEW_SPARSE_VECTOR_INDEX_VERSION;
|
|
|
import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.PREVIOUS_SPARSE_VECTOR_INDEX_VERSION;
|
|
|
+import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
|
|
|
import static org.hamcrest.Matchers.containsString;
|
|
|
+import static org.hamcrest.Matchers.equalTo;
|
|
|
+import static org.hamcrest.Matchers.instanceOf;
|
|
|
|
|
|
public class SparseVectorFieldMapperTests extends MapperTestCase {
|
|
|
|
|
|
@Override
|
|
|
protected Object getSampleValueForDocument() {
|
|
|
- return Map.of("ten", 10, "twenty", 20);
|
|
|
+ Map<String, Float> map = new LinkedHashMap<>();
|
|
|
+ map.put("ten", 10f);
|
|
|
+ map.put("twenty", 20f);
|
|
|
+ return map;
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -92,14 +106,18 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
|
|
|
|
|
|
List<IndexableField> fields = doc1.rootDoc().getFields("field");
|
|
|
assertEquals(2, fields.size());
|
|
|
- assertThat(fields.get(0), Matchers.instanceOf(FeatureField.class));
|
|
|
- FeatureField featureField1 = null;
|
|
|
- FeatureField featureField2 = null;
|
|
|
+ if (IndexVersion.current().luceneVersion().major == 10) {
|
|
|
+ // TODO: Update to use Lucene's FeatureField after upgrading to Lucene 10.1.
|
|
|
+ assertThat(IndexVersion.current().luceneVersion().minor, equalTo(0));
|
|
|
+ }
|
|
|
+ assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
|
|
|
+ XFeatureField featureField1 = null;
|
|
|
+ XFeatureField featureField2 = null;
|
|
|
for (IndexableField field : fields) {
|
|
|
if (field.stringValue().equals("ten")) {
|
|
|
- featureField1 = (FeatureField) field;
|
|
|
+ featureField1 = (XFeatureField) field;
|
|
|
} else if (field.stringValue().equals("twenty")) {
|
|
|
- featureField2 = (FeatureField) field;
|
|
|
+ featureField2 = (XFeatureField) field;
|
|
|
} else {
|
|
|
throw new UnsupportedOperationException();
|
|
|
}
|
|
@@ -116,14 +134,14 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
|
|
|
|
|
|
List<IndexableField> fields = parsedDocument.rootDoc().getFields("field");
|
|
|
assertEquals(2, fields.size());
|
|
|
- assertThat(fields.get(0), Matchers.instanceOf(FeatureField.class));
|
|
|
- FeatureField featureField1 = null;
|
|
|
- FeatureField featureField2 = null;
|
|
|
+ assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
|
|
|
+ XFeatureField featureField1 = null;
|
|
|
+ XFeatureField featureField2 = null;
|
|
|
for (IndexableField field : fields) {
|
|
|
if (field.stringValue().equals("foo.bar")) {
|
|
|
- featureField1 = (FeatureField) field;
|
|
|
+ featureField1 = (XFeatureField) field;
|
|
|
} else if (field.stringValue().equals("foobar")) {
|
|
|
- featureField2 = (FeatureField) field;
|
|
|
+ featureField2 = (XFeatureField) field;
|
|
|
} else {
|
|
|
throw new UnsupportedOperationException();
|
|
|
}
|
|
@@ -171,13 +189,13 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
|
|
|
}));
|
|
|
|
|
|
// then validate that the generate document stored both values appropriately and we have only the max value stored
|
|
|
- FeatureField barField = ((FeatureField) doc1.rootDoc().getByKey("foo.field\\.bar"));
|
|
|
+ XFeatureField barField = ((XFeatureField) doc1.rootDoc().getByKey("foo.field\\.bar"));
|
|
|
assertEquals(20, barField.getFeatureValue(), 1);
|
|
|
|
|
|
- FeatureField storedBarField = ((FeatureField) doc1.rootDoc().getFields("foo.field").get(1));
|
|
|
+ XFeatureField storedBarField = ((XFeatureField) doc1.rootDoc().getFields("foo.field").get(1));
|
|
|
assertEquals(20, storedBarField.getFeatureValue(), 1);
|
|
|
|
|
|
- assertEquals(3, doc1.rootDoc().getFields().stream().filter((f) -> f instanceof FeatureField).count());
|
|
|
+ assertEquals(3, doc1.rootDoc().getFields().stream().filter((f) -> f instanceof XFeatureField).count());
|
|
|
}
|
|
|
|
|
|
public void testCannotBeUsedInMultiFields() {
|
|
@@ -192,6 +210,53 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
|
|
|
assertThat(e.getMessage(), containsString("Field [feature] of type [sparse_vector] can't be used in multifields"));
|
|
|
}
|
|
|
|
|
|
+ public void testStoreIsNotUpdateable() throws IOException {
|
|
|
+ var mapperService = createMapperService(fieldMapping(this::minimalMapping));
|
|
|
+ XContentBuilder mapping = jsonBuilder().startObject()
|
|
|
+ .startObject("_doc")
|
|
|
+ .startObject("properties")
|
|
|
+ .startObject("field")
|
|
|
+ .field("type", "sparse_vector")
|
|
|
+ .field("store", true)
|
|
|
+ .endObject()
|
|
|
+ .endObject()
|
|
|
+ .endObject()
|
|
|
+ .endObject();
|
|
|
+ var exc = expectThrows(
|
|
|
+ Exception.class,
|
|
|
+ () -> mapperService.merge("_doc", new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE)
|
|
|
+ );
|
|
|
+ assertThat(exc.getMessage(), containsString("Cannot update parameter [store]"));
|
|
|
+ }
|
|
|
+
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ public void testValueFetcher() throws Exception {
|
|
|
+ for (boolean store : new boolean[] { true, false }) {
|
|
|
+ var mapperService = createMapperService(fieldMapping(store ? this::minimalStoreMapping : this::minimalMapping));
|
|
|
+ var mapper = mapperService.documentMapper();
|
|
|
+ try (Directory directory = newDirectory()) {
|
|
|
+ RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
|
|
|
+ var sourceToParse = source(this::writeField);
|
|
|
+ ParsedDocument doc1 = mapper.parse(sourceToParse);
|
|
|
+ iw.addDocument(doc1.rootDoc());
|
|
|
+ iw.close();
|
|
|
+ try (DirectoryReader reader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
|
|
|
+ LeafReader leafReader = getOnlyLeafReader(reader);
|
|
|
+ var searchContext = createSearchExecutionContext(mapperService, new IndexSearcher(leafReader));
|
|
|
+ var fieldType = mapper.mappers().getFieldType("field");
|
|
|
+ var valueFetcher = fieldType.valueFetcher(searchContext, null);
|
|
|
+ valueFetcher.setNextReader(leafReader.getContext());
|
|
|
+
|
|
|
+ var source = Source.fromBytes(sourceToParse.source());
|
|
|
+ var result = valueFetcher.fetchValues(source, 0, List.of());
|
|
|
+ assertThat(result.size(), equalTo(1));
|
|
|
+ assertThat(result.get(0), instanceOf(Map.class));
|
|
|
+ assertThat(toFloats((Map<String, ?>) result.get(0)), equalTo(toFloats((Map<String, ?>) source.source().get("field"))));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
protected Object generateRandomInputValue(MappedFieldType ft) {
|
|
|
assumeFalse("Test implemented in a follow up", true);
|
|
@@ -205,7 +270,29 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
|
|
|
|
|
|
@Override
|
|
|
protected SyntheticSourceSupport syntheticSourceSupport(boolean syntheticSource) {
|
|
|
- throw new AssumptionViolatedException("not supported");
|
|
|
+ boolean withStore = randomBoolean();
|
|
|
+ return new SyntheticSourceSupport() {
|
|
|
+ @Override
|
|
|
+ public boolean preservesExactSource() {
|
|
|
+ return withStore == false;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public SyntheticSourceExample example(int maxValues) {
|
|
|
+ return new SyntheticSourceExample(getSampleValueForDocument(), getSampleValueForDocument(), b -> {
|
|
|
+ if (withStore) {
|
|
|
+ minimalStoreMapping(b);
|
|
|
+ } else {
|
|
|
+ minimalMapping(b);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public List<SyntheticSourceInvalidExample> invalidExample() {
|
|
|
+ return List.of();
|
|
|
+ }
|
|
|
+ };
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -276,4 +363,20 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
|
|
|
})));
|
|
|
assertThat(e.getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE_8X));
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Handles float/double conversion when reading/writing with xcontent by converting all numbers to floats.
|
|
|
+ */
|
|
|
+ private Map<String, Float> toFloats(Map<String, ?> value) {
|
|
|
+ // preserve order
|
|
|
+ Map<String, Float> result = new LinkedHashMap<>();
|
|
|
+ for (var entry : value.entrySet()) {
|
|
|
+ if (entry.getValue() instanceof Number num) {
|
|
|
+ result.put(entry.getKey(), num.floatValue());
|
|
|
+ } else {
|
|
|
+ throw new IllegalArgumentException("Expected Number, got: " + value.getClass().getSimpleName());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
}
|