Browse Source

Synthetic `_source`: support `wildcard` field (#90196)

This adds synthetic `_source` support for the `wildcard` field type.
Nik Everett 3 years ago
parent
commit
82aeb478db

+ 5 - 0
docs/changelog/90196.yaml

@@ -0,0 +1,5 @@
+pr: 90196
+summary: "Synthetic `_source`: support `wildcard` field"
+area: TSDB
+type: enhancement
+issues: []

+ 1 - 0
docs/reference/mapping/fields/synthetic-source.asciidoc

@@ -47,6 +47,7 @@ types:
 ** <<numeric-synthetic-source,`short`>>
 ** <<text-synthetic-source,`text`>>
 ** <<version-synthetic-source,`version`>>
+** <<wildcard-synthetic-source,`wildcard`>>
 
 Runtime fields cannot, at this stage, use synthetic `_source`.
 

+ 33 - 0
docs/reference/mapping/types/wildcard.asciidoc

@@ -131,3 +131,36 @@ The following parameters are accepted by `wildcard` fields:
 * `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
 * When running `wildcard` queries any `rewrite` parameter is ignored. The scoring is always a constant score.
 
+[[wildcard-synthetic-source]]
+==== Synthetic `_source`
+`wildcard` fields support <<synthetic-source,synthetic `_source`>> so long as they don't
+declare <<copy-to,`copy_to`>>.
+
+Synthetic source always sorts `wildcard` fields. For example:
+[source,console,id=synthetic-source-wildcard-example]
+----
+PUT idx
+{
+  "mappings": {
+    "_source": { "mode": "synthetic" },
+    "properties": {
+      "card": { "type": "wildcard" }
+    }
+  }
+}
+PUT idx/_doc/1
+{
+  "card": ["king", "ace", "ace", "jack"]
+}
+----
+// TEST[s/$/\nGET idx\/_doc\/1?filter_path=_source\n/]
+
+Will become:
+
+[source,console-result]
+----
+{
+  "card": ["ace", "jack", "king"]
+}
+----
+// TEST[s/^/{"_source":/ s/\n$/}/]

+ 94 - 0
x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

@@ -16,8 +16,11 @@ import org.apache.lucene.analysis.ngram.NGramTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
@@ -41,6 +44,7 @@ import org.apache.lucene.util.automaton.Operations;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.geo.ShapeRelation;
+import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.RegExp;
@@ -60,11 +64,13 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.LuceneDocument;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
+import org.elasticsearch.index.mapper.SourceLoader;
 import org.elasticsearch.index.mapper.SourceValueFetcher;
 import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.index.mapper.ValueFetcher;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
+import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xpack.wildcard.WildcardDocValuesField;
 
@@ -78,6 +84,9 @@ import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Stream;
+
+import static java.util.Collections.emptyList;
 
 /**
  * A {@link FieldMapper} for indexing fields with ngrams for efficient wildcard matching
@@ -918,11 +927,18 @@ public class WildcardFieldMapper extends FieldMapper {
                 createFields(value, parseDoc, fields);
             } else {
                 context.addIgnoredField(name());
+                if (context.isSyntheticSource()) {
+                    parseDoc.add(new StoredField(originalName(), new BytesRef(value)));
+                }
             }
         }
         parseDoc.addAll(fields);
     }
 
+    private String originalName() {
+        return name() + "._original";
+    }
+
     void createFields(String value, LuceneDocument parseDoc, List<IndexableField> fields) {
         String ngramValue = addLineEndChars(value);
         Field ngramField = new Field(fieldType().name(), ngramValue, ngramFieldType);
@@ -951,4 +967,82 @@ public class WildcardFieldMapper extends FieldMapper {
     public FieldMapper.Builder getMergeBuilder() {
         return new Builder(simpleName(), indexVersionCreated).init(this);
     }
+
+    @Override
+    public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
+        if (copyTo.copyToFields().isEmpty() != true) {
+            throw new IllegalArgumentException(
+                "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
+            );
+        }
+        return new WildcardSyntheticFieldLoader();
+    }
+
+    private class WildcardSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader {
+        private final ByteArrayStreamInput docValuesStream = new ByteArrayStreamInput();
+        private int docValueCount;
+        private BytesRef docValueBytes;
+
+        private List<Object> storedValues = emptyList();
+
+        @Override
+        public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
+            if (ignoreAbove != Defaults.IGNORE_ABOVE) {
+                return Stream.of(Map.entry(originalName(), storedValues -> this.storedValues = storedValues));
+            }
+            return Stream.empty();
+        }
+
+        @Override
+        public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
+            BinaryDocValues values = leafReader.getBinaryDocValues(name());
+            if (values == null) {
+                docValueCount = 0;
+                return null;
+            }
+
+            return docId -> {
+                if (values.advanceExact(docId) == false) {
+                    docValueCount = 0;
+                    return hasValue();
+                }
+                docValueBytes = values.binaryValue();
+                docValuesStream.reset(docValueBytes.bytes);
+                docValuesStream.setPosition(docValueBytes.offset);
+                docValueCount = docValuesStream.readVInt();
+                return hasValue();
+            };
+        }
+
+        @Override
+        public boolean hasValue() {
+            return docValueCount > 0 || storedValues.isEmpty() == false;
+        }
+
+        @Override
+        public void write(XContentBuilder b) throws IOException {
+            switch (docValueCount + storedValues.size()) {
+                case 0:
+                    return;
+                case 1:
+                    b.field(simpleName());
+                    break;
+                default:
+                    b.startArray(simpleName());
+            }
+            for (int i = 0; i < docValueCount; i++) {
+                int length = docValuesStream.readVInt();
+                b.utf8Value(docValueBytes.bytes, docValuesStream.getPosition(), length);
+                docValuesStream.skipBytes(length);
+            }
+            for (Object o : storedValues) {
+                BytesRef r = (BytesRef) o;
+                b.utf8Value(r.bytes, r.offset, r.length);
+            }
+            if (docValueCount + storedValues.size() > 1) {
+                b.endArray();
+            }
+            storedValues = emptyList();
+        }
+    }
 }

+ 64 - 4
x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java

@@ -47,6 +47,7 @@ import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
@@ -77,6 +78,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.function.BiFunction;
 
 import static java.util.Collections.emptyMap;
@@ -1208,6 +1210,11 @@ public class WildcardFieldMapperTests extends MapperTestCase {
         return true;
     }
 
+    @Override
+    protected IngestScriptSupport ingestScriptSupport() {
+        throw new AssumptionViolatedException("not supported");
+    }
+
     @Override
     protected boolean supportsIgnoreMalformed() {
         return false;
@@ -1215,11 +1222,64 @@ public class WildcardFieldMapperTests extends MapperTestCase {
 
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
-        throw new AssumptionViolatedException("not supported");
+        assertFalse("ignore_malformed is not supported by [wildcard] field", ignoreMalformed);
+        return new WildcardSyntheticSourceSupport();
     }
 
-    @Override
-    protected IngestScriptSupport ingestScriptSupport() {
-        throw new AssumptionViolatedException("not supported");
+    static class WildcardSyntheticSourceSupport implements SyntheticSourceSupport {
+        private final Integer ignoreAbove = randomBoolean() ? null : between(10, 100);
+        private final boolean allIgnored = ignoreAbove != null && rarely();
+        private final String nullValue = usually() ? null : randomAlphaOfLength(2);
+
+        @Override
+        public SyntheticSourceExample example(int maxValues) {
+            if (randomBoolean()) {
+                Tuple<String, String> v = generateValue();
+                return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
+            }
+            List<Tuple<String, String>> values = randomList(1, maxValues, this::generateValue);
+            List<String> in = values.stream().map(Tuple::v1).toList();
+            List<String> outList = new ArrayList<>();
+            List<String> outExtraValues = new ArrayList<>();
+            values.stream().map(Tuple::v2).forEach(v -> {
+                if (ignoreAbove != null && v.length() > ignoreAbove) {
+                    outExtraValues.add(v);
+                } else {
+                    outList.add(v);
+                }
+            });
+            Collections.sort(outList);
+            outList.addAll(outExtraValues);
+            Object out = outList.size() == 1 ? outList.get(0) : outList;
+            return new SyntheticSourceExample(in, out, this::mapping);
+        }
+
+        private Tuple<String, String> generateValue() {
+            if (nullValue != null && randomBoolean()) {
+                return Tuple.tuple(null, nullValue);
+            }
+            int length = 5;
+            if (ignoreAbove != null && (allIgnored || randomBoolean())) {
+                length = ignoreAbove + 5;
+            }
+            String v = randomAlphaOfLength(length);
+            return Tuple.tuple(v, v);
+        }
+
+        private void mapping(XContentBuilder b) throws IOException {
+            b.field("type", "wildcard");
+            if (nullValue != null) {
+                b.field("null_value", nullValue);
+            }
+            if (ignoreAbove != null) {
+                b.field("ignore_above", ignoreAbove);
+            }
+        }
+
+        @Override
+        public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
+            return List.of();
+        }
     }
+
 }