5 vuotta sitten · 2537e02a7d
--- a/docs/reference/mapping/types/wildcard.asciidoc
+++ b/docs/reference/mapping/types/wildcard.asciidoc
@@ -48,6 +48,23 @@ POST my_index/_doc/_search
 
				 --------------------------------------------------
			
 
				 
			
 
				 
			
 
				+[[wildcard-params]]
			
 
				+==== Parameters for wildcard fields
			
 
				+
			
 
				+The following parameters are accepted by `wildcard` fields:
			
 
				+
			
 
				+[horizontal]
			
 
				+
			
 
				+<<ignore-above,`ignore_above`>>::
			
 
				+
			
 
				+    Do not index any string longer than this value.  Defaults to `2147483647`
			
 
				+    so that all values would be accepted.
			
 
				+
			
 
				+<<normalizer,`normalizer`>>::
			
 
				+
			
 
				+    How to pre-process the value prior to indexing. Defaults to `null`,
			
 
				+    meaning the value is kept as-is.
			
 
				+
			
 
				 ==== Limitations
			
 
				 
			
 
				 * `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
			
--- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
@@ -368,14 +368,14 @@ public abstract class MappedFieldType extends FieldType {
 
				     }
			
 
				 
			
 
				     public Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) {
			
 
				-        throw new QueryShardException(context, "Can only use prefix queries on keyword and text fields - not on [" + name
			
 
				+        throw new QueryShardException(context, "Can only use prefix queries on keyword, text and wildcard fields - not on [" + name
			
 
				             + "] which is of type [" + typeName() + "]");
			
 
				     }
			
 
				 
			
 
				     public Query wildcardQuery(String value,
			
 
				                                @Nullable MultiTermQuery.RewriteMethod method,
			
 
				                                QueryShardContext context) {
			
 
				-        throw new QueryShardException(context, "Can only use wildcard queries on keyword and text fields - not on [" + name
			
 
				+        throw new QueryShardException(context, "Can only use wildcard queries on keyword, text and wildcard fields - not on [" + name
			
 
				             + "] which is of type [" + typeName() + "]");
			
 
				     }
			
 
				 
			
--- a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java
@@ -19,6 +19,7 @@
 
				 
			
 
				 package org.elasticsearch.index.mapper;
			
 
				 
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				 import org.apache.lucene.index.Term;
			
 
				 import org.apache.lucene.search.FuzzyQuery;
			
 
				 import org.apache.lucene.search.MultiTermQuery;
			
@@ -93,6 +94,36 @@ public abstract class StringFieldType extends TermBasedFieldType {
 
				         return query;
			
 
				     }
			
 
				 
			
 
				+    public static final String normalizeWildcardPattern(String fieldname, String value, Analyzer normalizer)  {
			
 
				+        if (normalizer == null) {
			
 
				+            return value;
			
 
				+        }
			
 
				+        // we want to normalize everything except wildcard characters, e.g. F?o Ba* to f?o ba*, even if e.g there
			
 
				+        // is a char_filter that would otherwise remove them
			
 
				+        Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(value);
			
 
				+        BytesRefBuilder sb = new BytesRefBuilder();
			
 
				+        int last = 0;
			
 
				+
			
 
				+        while (wildcardMatcher.find()) {
			
 
				+            if (wildcardMatcher.start() > 0) {
			
 
				+                String chunk = value.substring(last, wildcardMatcher.start());
			
 
				+
			
 
				+                BytesRef normalized = normalizer.normalize(fieldname, chunk);
			
 
				+                sb.append(normalized);
			
 
				+            }
			
 
				+            // append the matched group - without normalizing
			
 
				+            sb.append(new BytesRef(wildcardMatcher.group()));
			
 
				+
			
 
				+            last = wildcardMatcher.end();
			
 
				+        }
			
 
				+        if (last < value.length()) {
			
 
				+            String chunk = value.substring(last);
			
 
				+            BytesRef normalized = normalizer.normalize(fieldname, chunk);
			
 
				+            sb.append(normalized);
			
 
				+        }
			
 
				+        return sb.toBytesRef().utf8ToString();
			
 
				+    }    
			
 
				+    
			
 
				     @Override
			
 
				     public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
			
 
				         failIfNotIndexed();
			
@@ -103,30 +134,8 @@ public abstract class StringFieldType extends TermBasedFieldType {
 
				 
			
 
				         Term term;
			
 
				         if (searchAnalyzer() != null) {
			
 
				-            // we want to normalize everything except wildcard characters, e.g. F?o Ba* to f?o ba*, even if e.g there
			
 
				-            // is a char_filter that would otherwise remove them
			
 
				-            Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(value);
			
 
				-            BytesRefBuilder sb = new BytesRefBuilder();
			
 
				-            int last = 0;
			
 
				-
			
 
				-            while (wildcardMatcher.find()) {
			
 
				-                if (wildcardMatcher.start() > 0) {
			
 
				-                    String chunk = value.substring(last, wildcardMatcher.start());
			
 
				-
			
 
				-                    BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
			
 
				-                    sb.append(normalized);
			
 
				-                }
			
 
				-                // append the matched group - without normalizing
			
 
				-                sb.append(new BytesRef(wildcardMatcher.group()));
			
 
				-
			
 
				-                last = wildcardMatcher.end();
			
 
				-            }
			
 
				-            if (last < value.length()) {
			
 
				-                String chunk = value.substring(last);
			
 
				-                BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
			
 
				-                sb.append(normalized);
			
 
				-            }
			
 
				-            term = new Term(name(), sb.toBytesRef());
			
 
				+            value = normalizeWildcardPattern(name(), value, searchAnalyzer());
			
 
				+            term = new Term(name(), value);
			
 
				         } else {
			
 
				             term = new Term(name(), indexedValueForSearch(value));
			
 
				         }
			
--- a/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java
+++ b/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java
@@ -239,7 +239,7 @@ public final class QueryBuilders {
 
				      * which matches any single character. Note this query can be slow, as it
			
 
				      * needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
			
 
				      * a Wildcard term should not start with one of the wildcards {@code *} or
			
 
				-     * {@code ?}.
			
 
				+     * {@code ?}. (The wildcard field type however, is optimised for leading wildcards)
			
 
				      *
			
 
				      * @param name  The field name
			
 
				      * @param query The wildcard query string
			
--- a/server/src/test/java/org/elasticsearch/index/query/PrefixQueryBuilderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/query/PrefixQueryBuilderTests.java
@@ -116,7 +116,7 @@ public class PrefixQueryBuilderTests extends AbstractQueryTestCase<PrefixQueryBu
 
				         QueryShardContext context = createShardContext();
			
 
				         QueryShardException e = expectThrows(QueryShardException.class,
			
 
				                 () -> query.toQuery(context));
			
 
				-        assertEquals("Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
			
 
				+        assertEquals("Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]",
			
 
				                 e.getMessage());
			
 
				     }
			
 
				 
			
--- a/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java
@@ -813,7 +813,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
 
				         QueryShardContext context = createShardContext();
			
 
				         QueryShardException e = expectThrows(QueryShardException.class,
			
 
				                 () -> query.toQuery(context));
			
 
				-        assertEquals("Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
			
 
				+        assertEquals("Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]",
			
 
				                 e.getMessage());
			
 
				         query.lenient(true);
			
 
				         query.toQuery(context); // no exception
			
--- a/x-pack/plugin/src/test/resources/rest-api-spec/test/wildcard/10_wildcard_basic.yml
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/wildcard/10_wildcard_basic.yml
@@ -1,8 +1,8 @@
 
				 setup:
			
 
				   - skip:
			
 
				       features: headers
			
 
				-      version: " - 7.9.99"
			
 
				-      reason: "wildcard fields were added from 8.0"
			
 
				+      version: " - 7.6.99"
			
 
				+      reason: "wildcard fields were added from 7.7"
			
 
				 
			
 
				   - do:
			
 
				       indices.create:
			
@@ -10,10 +10,20 @@ setup:
 
				         body:
			
 
				           settings:
			
 
				             number_of_replicas: 0
			
 
				+            analysis:
			
 
				+              normalizer:
			
 
				+                lowercase:
			
 
				+                  type: custom
			
 
				+                  char_filter: []
			
 
				+                  filter: ["lowercase"]
			
 
				           mappings:
			
 
				             properties:
			
 
				               my_wildcard:
			
 
				                  type: wildcard
			
 
				+                 normalizer: lowercase
			
 
				+                 fields:
			
 
				+                   case_sensitive:
			
 
				+                     type: wildcard
			
 
				   - do:
			
 
				       index:
			
 
				         index: test-index
			
@@ -26,6 +36,12 @@ setup:
 
				         id: 2
			
 
				         body:
			
 
				           my_wildcard: goodbye world
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-index
			
 
				+        id: 3
			
 
				+        body:
			
 
				+          my_wildcard: cAsE iNsEnSiTiVe World
			
 
				 
			
 
				   - do:
			
 
				       indices.refresh: {}
			
@@ -80,6 +96,31 @@ setup:
 
				               my_wildcard: {value: "*ello worl*" }
			
 
				 
			
 
				 
			
 
				+  - match: {hits.total.value: 1}
			
 
				+---
			
 
				+"Case insensitive query":
			
 
				+  - do:
			
 
				+      search:
			
 
				+        body:
			
 
				+          track_total_hits: true
			
 
				+          query:
			
 
				+            wildcard:
			
 
				+              my_wildcard: {value: "*Worl*" }
			
 
				+
			
 
				+
			
 
				+  - match: {hits.total.value: 3}
			
 
				+
			
 
				+---
			
 
				+"Case sensitive query":
			
 
				+  - do:
			
 
				+      search:
			
 
				+        body:
			
 
				+          track_total_hits: true
			
 
				+          query:
			
 
				+            wildcard:
			
 
				+              my_wildcard.case_sensitive: {value: "*Worl*" }
			
 
				+
			
 
				+
			
 
				   - match: {hits.total.value: 1}
			
 
				 
			
 
				 ---
			
@@ -93,7 +134,7 @@ setup:
 
				               my_wildcard: {value: "*ld" }
			
 
				 
			
 
				 
			
 
				-  - match: {hits.total.value: 2}
			
 
				+  - match: {hits.total.value: 3}
			
 
				 
			
 
				 ---
			
 
				 "Long suffix query":
			
@@ -188,8 +229,8 @@ setup:
 
				               terms: {field: "my_wildcard" }
			
 
				 
			
 
				 
			
 
				-  - match: {hits.total.value: 2}
			
 
				-  - length: { aggregations.top_vals.buckets: 2 }
			
 
				+  - match: {hits.total.value: 3}
			
 
				+  - length: { aggregations.top_vals.buckets: 3 }
			
 
				 
			
 
				 ---
			
 
				 "Sort works":
			
@@ -199,10 +240,11 @@ setup:
 
				           track_total_hits: true
			
 
				           sort: [ { "my_wildcard": "desc" } ]
			
 
				 
			
 
				-  - match: { hits.total.value: 2 }
			
 
				-  - length: { hits.hits: 2 }
			
 
				+  - match: { hits.total.value: 3 }
			
 
				+  - length: { hits.hits: 3 }
			
 
				   - match: { hits.hits.0._id: "1" }
			
 
				   - match: { hits.hits.1._id: "2" }
			
 
				+  - match: { hits.hits.2._id: "3" }
			
 
				 
			
 
				   - do:
			
 
				       search:
			
@@ -210,9 +252,9 @@ setup:
 
				           track_total_hits: true
			
 
				           sort: [ { "my_wildcard": "asc" } ]
			
 
				 
			
 
				-  - match: { hits.total.value: 2 }
			
 
				-  - length: { hits.hits: 2 }
			
 
				-  - match: { hits.hits.0._id: "2" }
			
 
				-  - match: { hits.hits.1._id: "1" }
			
 
				-
			
 
				+  - match: { hits.total.value: 3 }
			
 
				+  - length: { hits.hits: 3 }
			
 
				+  - match: { hits.hits.0._id: "3" }
			
 
				+  - match: { hits.hits.1._id: "2" }
			
 
				+  - match: { hits.hits.2._id: "1" }
			
 
				 
			
--- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java
+++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java
@@ -39,6 +39,7 @@ import org.elasticsearch.common.xcontent.support.XContentMapValues;
 
				 import org.elasticsearch.index.Index;
			
 
				 import org.elasticsearch.index.IndexSettings;
			
 
				 import org.elasticsearch.index.analysis.AnalyzerScope;
			
 
				+import org.elasticsearch.index.analysis.IndexAnalyzers;
			
 
				 import org.elasticsearch.index.analysis.NamedAnalyzer;
			
 
				 import org.elasticsearch.index.fielddata.IndexFieldData;
			
 
				 import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
			
@@ -53,6 +54,7 @@ import org.elasticsearch.index.mapper.MapperParsingException;
 
				 import org.elasticsearch.index.mapper.MapperService;
			
 
				 import org.elasticsearch.index.mapper.ParseContext;
			
 
				 import org.elasticsearch.index.mapper.ParseContext.Document;
			
 
				+import org.elasticsearch.index.mapper.StringFieldType;
			
 
				 import org.elasticsearch.index.query.QueryShardContext;
			
 
				 import org.elasticsearch.index.similarity.SimilarityProvider;
			
 
				 import org.elasticsearch.indices.breaker.CircuitBreakerService;
			
@@ -64,6 +66,7 @@ import java.util.ArrayList;
 
				 import java.util.Iterator;
			
 
				 import java.util.List;
			
 
				 import java.util.Map;
			
 
				+import java.util.Objects;
			
 
				 
			
 
				 import static org.elasticsearch.index.mapper.TypeParsers.parseField;
			
 
				 
			
@@ -100,6 +103,9 @@ public class WildcardFieldMapper extends FieldMapper {
 
				 
			
 
				     public static class Builder extends FieldMapper.Builder<Builder, WildcardFieldMapper> {
			
 
				         protected int ignoreAbove = Defaults.IGNORE_ABOVE;
			
 
				+        private IndexAnalyzers indexAnalyzers;
			
 
				+        private String normalizerName;
			
 
				+        
			
 
				 
			
 
				         public Builder(String name) {
			
 
				             super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
			
@@ -164,10 +170,23 @@ public class WildcardFieldMapper extends FieldMapper {
 
				         public WildcardFieldType fieldType() {
			
 
				             return (WildcardFieldType) super.fieldType();
			
 
				         }
			
 
				+        
			
 
				+        public Builder normalizer(IndexAnalyzers indexAnalyzers, String name) {
			
 
				+            this.indexAnalyzers = indexAnalyzers;
			
 
				+            this.normalizerName = name;
			
 
				+            return builder;
			
 
				+        }        
			
 
				 
			
 
				         @Override
			
 
				         public WildcardFieldMapper build(BuilderContext context) {
			
 
				-            setupFieldType(context);            
			
 
				+            setupFieldType(context);   
			
 
				+            if (normalizerName != null) {
			
 
				+                NamedAnalyzer normalizer = indexAnalyzers.getNormalizer(normalizerName);
			
 
				+                if (normalizer == null) {
			
 
				+                    throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]");
			
 
				+                }
			
 
				+                fieldType().setNormalizer(normalizer);
			
 
				+            }            
			
 
				             return new WildcardFieldMapper(
			
 
				                     name, fieldType, defaultFieldType, ignoreAbove, 
			
 
				                     context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
			
@@ -188,6 +207,11 @@ public class WildcardFieldMapper extends FieldMapper {
 
				                 if (propName.equals("ignore_above")) {
			
 
				                     builder.ignoreAbove(XContentMapValues.nodeIntegerValue(propNode, -1));
			
 
				                     iterator.remove();
			
 
				+                } else if (propName.equals("normalizer")) {
			
 
				+                    if (propNode != null) {
			
 
				+                        builder.normalizer(parserContext.getIndexAnalyzers(), propNode.toString());
			
 
				+                    }
			
 
				+                    iterator.remove();
			
 
				                 }
			
 
				             }            
			
 
				             
			
@@ -198,6 +222,8 @@ public class WildcardFieldMapper extends FieldMapper {
 
				      public static final char TOKEN_START_OR_END_CHAR = 0;    
			
 
				     
			
 
				      public static final class WildcardFieldType extends MappedFieldType {
			
 
				+         
			
 
				+        private NamedAnalyzer normalizer = null;
			
 
				 
			
 
				         public WildcardFieldType() {            
			
 
				             setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
			
@@ -206,6 +232,7 @@ public class WildcardFieldMapper extends FieldMapper {
 
				 
			
 
				         protected WildcardFieldType(WildcardFieldType ref) {
			
 
				             super(ref);
			
 
				+            this.normalizer = ref.normalizer;
			
 
				         }
			
 
				 
			
 
				         public WildcardFieldType clone() {
			
@@ -213,7 +240,39 @@ public class WildcardFieldMapper extends FieldMapper {
 
				             return result;
			
 
				         }
			
 
				         
			
 
				-                
			
 
				+
			
 
				+        @Override
			
 
				+        public boolean equals(Object o) {
			
 
				+            if (super.equals(o) == false) {
			
 
				+                return false;
			
 
				+            }
			
 
				+            WildcardFieldType other = (WildcardFieldType) o;
			
 
				+            return Objects.equals(normalizer, other.normalizer);
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public int hashCode() {
			
 
				+            return 31 * super.hashCode() + Objects.hash(normalizer);
			
 
				+        }       
			
 
				+        
			
 
				+        private NamedAnalyzer normalizer() {
			
 
				+            return normalizer;
			
 
				+        }
			
 
				+
			
 
				+        public void setNormalizer(NamedAnalyzer normalizer) {
			
 
				+            checkIfFrozen();
			
 
				+            this.normalizer = normalizer;
			
 
				+        }                
			
 
				+
			
 
				+        @Override
			
 
				+        public void checkCompatibility(MappedFieldType otherFT, List<String> conflicts) {
			
 
				+            super.checkCompatibility(otherFT, conflicts);
			
 
				+            WildcardFieldType other = (WildcardFieldType) otherFT;
			
 
				+            if (Objects.equals(normalizer, other.normalizer) == false) {
			
 
				+                conflicts.add("mapper [" + name() + "] has different [normalizer]");
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				         // Holds parsed information about the wildcard pattern
			
 
				         static class PatternStructure {
			
 
				             boolean openStart, openEnd, hasSymbols;            
			
@@ -327,6 +386,9 @@ public class WildcardFieldMapper extends FieldMapper {
 
				 
			
 
				         @Override
			
 
				         public Query wildcardQuery(String wildcardPattern, RewriteMethod method, QueryShardContext context) {
			
 
				+            if (normalizer != null) {
			
 
				+                wildcardPattern = StringFieldType.normalizeWildcardPattern(name(), wildcardPattern, normalizer);
			
 
				+            }
			
 
				             PatternStructure patternStructure = new PatternStructure(wildcardPattern);            
			
 
				             ArrayList<String> tokens = new ArrayList<>();
			
 
				 
			
@@ -467,7 +529,32 @@ public class WildcardFieldMapper extends FieldMapper {
 
				                         CircuitBreakerService breakerService, MapperService mapperService) {
			
 
				                     return new WildcardBytesBinaryDVIndexFieldData(indexSettings.getIndex(), fieldType.name());
			
 
				                 }};
			
 
				-        }        
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+        String normalize(String value) throws IOException {
			
 
				+            if (normalizer != null) {
			
 
				+                try (TokenStream ts = normalizer.tokenStream(name(), value)) {
			
 
				+                    final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
			
 
				+                    ts.reset();
			
 
				+                    if (ts.incrementToken() == false) {
			
 
				+                      throw new IllegalStateException("The normalization token stream is "
			
 
				+                          + "expected to produce exactly 1 token, but got 0 for analyzer "
			
 
				+                          + normalizer + " and input \"" + value + "\"");
			
 
				+                    }
			
 
				+                    final String newValue = termAtt.toString();
			
 
				+                    if (ts.incrementToken()) {
			
 
				+                      throw new IllegalStateException("The normalization token stream is "
			
 
				+                          + "expected to produce exactly 1 token, but got 2+ for analyzer "
			
 
				+                          + normalizer + " and input \"" + value + "\"");
			
 
				+                    }
			
 
				+                    ts.end();
			
 
				+                    return newValue;
			
 
				+                }
			
 
				+            }
			
 
				+            return value;
			
 
				+        }       
			
 
				+        
			
 
				     }
			
 
				      
			
 
				     static class  WildcardBytesBinaryDVIndexFieldData extends BytesBinaryDVIndexFieldData{
			
@@ -521,6 +608,11 @@ public class WildcardFieldMapper extends FieldMapper {
 
				         if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) {
			
 
				             builder.field("ignore_above", ignoreAbove);
			
 
				         }
			
 
				+        if (fieldType().normalizer() != null) {
			
 
				+            builder.field("normalizer", fieldType().normalizer().name());
			
 
				+        } else if (includeDefaults) {
			
 
				+            builder.nullField("normalizer");
			
 
				+        }        
			
 
				     }
			
 
				     
			
 
				     @Override
			
@@ -544,10 +636,11 @@ public class WildcardFieldMapper extends FieldMapper {
 
				     // For internal use by Lucene only - used to define ngram index
			
 
				     final MappedFieldType ngramFieldType;
			
 
				     
			
 
				-    void createFields(String value, Document parseDoc, List<IndexableField>fields) {
			
 
				+    void createFields(String value, Document parseDoc, List<IndexableField>fields) throws IOException {
			
 
				         if (value == null || value.length() > ignoreAbove) {
			
 
				             return;
			
 
				         }
			
 
				+        value = fieldType().normalize(value);
			
 
				         String ngramValue = TOKEN_START_OR_END_CHAR + value + TOKEN_START_OR_END_CHAR + TOKEN_START_OR_END_CHAR;
			
 
				         Field ngramField = new Field(fieldType().name(), ngramValue, ngramFieldType);
			
 
				         fields.add(ngramField);