Browse Source

Wildcard intervals (#43691)

This commit adds a wildcard intervals source, similar to the prefix. It
also changes the term parameter in prefix to read prefix, to bring it
in to line with the pattern parameter in wildcard.

Closes #43198
Alan Woodward 6 years ago
parent
commit
89a3eb3c6f

+ 28 - 0
docs/reference/query-dsl/intervals-query.asciidoc

@@ -101,6 +101,34 @@ If specified, then match intervals from this field rather than the top-level fie
 The `prefix` will be normalized using the search analyzer from this field, unless
 `analyzer` is specified separately.
 
+[[intervals-wildcard]]
+==== `wildcard`
+
+The `wildcard` rule finds terms that match a wildcard pattern.  The pattern will
+expand to match at most 128 terms; if there are more matching terms in the index,
+then an error will be returned.
+
+[horizontal]
+`pattern`::
+Find terms matching this pattern
++
+--
+This parameter supports two wildcard operators:
+
+* `?`, which matches any single character
+* `*`, which can match zero or more characters, including an empty one
+
+WARNING: Avoid beginning patterns with `*` or `?`. This can increase
+the iterations needed to find matching terms and slow search performance.
+--
+`analyzer`::
+Which analyzer should be used to normalize the `pattern`.  By default, the
+search analyzer of the top-level field will be used.
+`use_field`::
+If specified, then match intervals from this field rather than the top-level field.
+The `pattern` will be normalized using the search analyzer from this field, unless
+`analyzer` is specified separately.
+
 [[intervals-all_of]]
 ==== `all_of`
 

+ 20 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml

@@ -404,3 +404,23 @@ setup:
                         prefix: out
   - match: { hits.total.value: 3 }
 
+---
+"Test wildcard":
+  - skip:
+      version: " - 8.0.0"
+      reason: "TODO: change to 7.3 in backport"
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            intervals:
+              text:
+                all_of:
+                  intervals:
+                    - match:
+                        query: cold
+                    - wildcard:
+                        pattern: out?ide
+  - match: { hits.total.value: 3 }
+

+ 132 - 11
server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java

@@ -19,10 +19,12 @@
 
 package org.elasticsearch.index.query;
 
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.search.intervals.FilteredIntervalsSource;
 import org.apache.lucene.search.intervals.IntervalIterator;
 import org.apache.lucene.search.intervals.Intervals;
 import org.apache.lucene.search.intervals.IntervalsSource;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
@@ -80,6 +82,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
                 return Combine.fromXContent(parser);
             case "prefix":
                 return Prefix.fromXContent(parser);
+            case "wildcard":
+                return Wildcard.fromXContent(parser);
         }
         throw new ParsingException(parser.getTokenLocation(),
             "Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of, prefix]");
@@ -446,18 +450,18 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
 
         public static final String NAME = "prefix";
 
-        private final String term;
+        private final String prefix;
         private final String analyzer;
         private final String useField;
 
-        public Prefix(String term, String analyzer, String useField) {
-            this.term = term;
+        public Prefix(String prefix, String analyzer, String useField) {
+            this.prefix = prefix;
             this.analyzer = analyzer;
             this.useField = useField;
         }
 
         public Prefix(StreamInput in) throws IOException {
-            this.term = in.readString();
+            this.prefix = in.readString();
             this.analyzer = in.readOptionalString();
             this.useField = in.readOptionalString();
         }
@@ -472,10 +476,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             if (useField != null) {
                 fieldType = context.fieldMapper(useField);
                 assert fieldType != null;
-                source = Intervals.fixField(useField, fieldType.intervals(term, 0, false, analyzer, true));
+                source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, false, analyzer, true));
             }
             else {
-                source = fieldType.intervals(term, 0, false, analyzer, true);
+                source = fieldType.intervals(prefix, 0, false, analyzer, true);
             }
             return source;
         }
@@ -492,14 +496,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             if (this == o) return true;
             if (o == null || getClass() != o.getClass()) return false;
             Prefix prefix = (Prefix) o;
-            return Objects.equals(term, prefix.term) &&
+            return Objects.equals(this.prefix, prefix.prefix) &&
                 Objects.equals(analyzer, prefix.analyzer) &&
                 Objects.equals(useField, prefix.useField);
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(term, analyzer, useField);
+            return Objects.hash(prefix, analyzer, useField);
         }
 
         @Override
@@ -509,7 +513,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
 
         @Override
         public void writeTo(StreamOutput out) throws IOException {
-            out.writeString(term);
+            out.writeString(prefix);
             out.writeOptionalString(analyzer);
             out.writeOptionalString(useField);
         }
@@ -517,7 +521,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
         @Override
         public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
             builder.startObject(NAME);
-            builder.field("term", term);
+            builder.field("prefix", prefix);
             if (analyzer != null) {
                 builder.field("analyzer", analyzer);
             }
@@ -535,7 +539,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             return new Prefix(term, analyzer, useField);
         });
         static {
-            PARSER.declareString(constructorArg(), new ParseField("term"));
+            PARSER.declareString(constructorArg(), new ParseField("prefix"));
             PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
             PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
         }
@@ -545,6 +549,123 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
         }
     }
 
+    public static class Wildcard extends IntervalsSourceProvider {
+
+        public static final String NAME = "wildcard";
+
+        private final String pattern;
+        private final String analyzer;
+        private final String useField;
+
+        public Wildcard(String pattern, String analyzer, String useField) {
+            this.pattern = pattern;
+            this.analyzer = analyzer;
+            this.useField = useField;
+        }
+
+        public Wildcard(StreamInput in) throws IOException {
+            this.pattern = in.readString();
+            this.analyzer = in.readOptionalString();
+            this.useField = in.readOptionalString();
+        }
+
+        @Override
+        public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) {
+            NamedAnalyzer analyzer = fieldType.searchAnalyzer();
+            if (this.analyzer != null) {
+                analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
+            }
+            IntervalsSource source;
+            if (useField != null) {
+                fieldType = context.fieldMapper(useField);
+                assert fieldType != null;
+                checkPositions(fieldType);
+                if (this.analyzer == null) {
+                    analyzer = fieldType.searchAnalyzer();
+                }
+                BytesRef normalizedTerm = analyzer.normalize(useField, pattern);
+                // TODO Intervals.wildcard() should take BytesRef
+                source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm.utf8ToString()));
+            }
+            else {
+                checkPositions(fieldType);
+                BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern);
+                source = Intervals.wildcard(normalizedTerm.utf8ToString());
+            }
+            return source;
+        }
+
+        private void checkPositions(MappedFieldType type) {
+            if (type.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+                throw new IllegalArgumentException("Cannot create intervals over field [" + type.name() + "] with no positions indexed");
+            }
+        }
+
+        @Override
+        public void extractFields(Set<String> fields) {
+            if (useField != null) {
+                fields.add(useField);
+            }
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            Prefix prefix = (Prefix) o;
+            return Objects.equals(pattern, prefix.prefix) &&
+                Objects.equals(analyzer, prefix.analyzer) &&
+                Objects.equals(useField, prefix.useField);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(pattern, analyzer, useField);
+        }
+
+        @Override
+        public String getWriteableName() {
+            return NAME;
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            out.writeString(pattern);
+            out.writeOptionalString(analyzer);
+            out.writeOptionalString(useField);
+        }
+
+        @Override
+        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+            builder.startObject(NAME);
+            builder.field("pattern", pattern);
+            if (analyzer != null) {
+                builder.field("analyzer", analyzer);
+            }
+            if (useField != null) {
+                builder.field("use_field", useField);
+            }
+            builder.endObject();
+            return builder;
+        }
+
+        private static final ConstructingObjectParser<Wildcard, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
+            String term = (String) args[0];
+            String analyzer = (String) args[1];
+            String useField = (String) args[2];
+            return new Wildcard(term, analyzer, useField);
+        });
+        static {
+            PARSER.declareString(constructorArg(), new ParseField("pattern"));
+            PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
+            PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
+        }
+
+        public static Wildcard fromXContent(XContentParser parser) throws IOException {
+            return PARSER.parse(parser, null);
+        }
+    }
+
     static class ScriptFilterSource extends FilteredIntervalsSource {
 
         final IntervalFilterScript script;

+ 2 - 0
server/src/main/java/org/elasticsearch/search/SearchModule.java

@@ -794,6 +794,8 @@ public class SearchModule {
             IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new));
         namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
             IntervalsSourceProvider.Prefix.NAME, IntervalsSourceProvider.Prefix::new));
+        namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
+            IntervalsSourceProvider.Wildcard.NAME, IntervalsSourceProvider.Wildcard::new));
     }
 
     private void registerQuery(QuerySpec<?> spec) {

+ 76 - 4
server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java

@@ -393,32 +393,104 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
     public void testPrefixes() throws IOException {
 
         String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
-            "\"prefix\" : { \"term\" : \"term\" } } } }";
+            "\"prefix\" : { \"prefix\" : \"term\" } } } }";
         IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
         Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.prefix("term"));
         assertEquals(expected, builder.toQuery(createShardContext()));
 
         String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
-            "\"prefix\" : { \"term\" : \"term\" } } } }";
+            "\"prefix\" : { \"prefix\" : \"term\" } } } }";
         expectThrows(IllegalArgumentException.class, () -> {
             IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
             builder1.toQuery(createShardContext());
             });
 
+        String no_positions_fixed_field_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"prefix\" : { \"prefix\" : \"term\", \"use_field\" : \"" + NO_POSITIONS_FIELD + "\" } } } }";
+        expectThrows(IllegalArgumentException.class, () -> {
+            IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_fixed_field_json);
+            builder1.toQuery(createShardContext());
+        });
+
         String prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
-            "\"prefix\" : { \"term\" : \"term\" } } } }";
+            "\"prefix\" : { \"prefix\" : \"term\" } } } }";
         builder = (IntervalQueryBuilder) parseQuery(prefix_json);
         expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term")));
         assertEquals(expected, builder.toQuery(createShardContext()));
 
         String short_prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
-            "\"prefix\" : { \"term\" : \"t\" } } } }";
+            "\"prefix\" : { \"prefix\" : \"t\" } } } }";
         builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
         expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or(
             Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard("t?")),
             Intervals.term("t")));
         assertEquals(expected, builder.toQuery(createShardContext()));
 
+        String fix_field_prefix_json =  "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"prefix\" : { \"prefix\" : \"term\", \"use_field\" : \"" + PREFIXED_FIELD + "\" } } } }";
+        builder = (IntervalQueryBuilder) parseQuery(fix_field_prefix_json);
+        // This looks weird, but it's fine, because the innermost fixField wins
+        expected = new IntervalQuery(STRING_FIELD_NAME,
+            Intervals.fixField(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term"))));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
+        String keyword_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
+            "\"prefix\" : { \"prefix\" : \"Term\", \"analyzer\" : \"keyword\" } } } }";
+        builder = (IntervalQueryBuilder) parseQuery(keyword_json);
+        expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("Term")));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
+        String keyword_fix_field_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"prefix\" : { \"prefix\" : \"Term\", \"analyzer\" : \"keyword\", \"use_field\" : \"" + PREFIXED_FIELD + "\" } } } }";
+        builder = (IntervalQueryBuilder) parseQuery(keyword_fix_field_json);
+        expected = new IntervalQuery(STRING_FIELD_NAME,
+            Intervals.fixField(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("Term"))));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+    }
+
+    public void testWildcard() throws IOException {
+
+        String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"wildcard\" : { \"pattern\" : \"Te?m\" } } } }";
+
+        IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
+        Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.wildcard("te?m"));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
+        String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
+            "\"wildcard\" : { \"pattern\" : \"term\" } } } }";
+        expectThrows(IllegalArgumentException.class, () -> {
+            IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
+            builder1.toQuery(createShardContext());
+        });
+
+        String keyword_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"wildcard\" : { \"pattern\" : \"Te?m\", \"analyzer\" : \"keyword\" } } } }";
+
+        builder = (IntervalQueryBuilder) parseQuery(keyword_json);
+        expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.wildcard("Te?m"));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
+        String fixed_field_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"wildcard\" : { \"pattern\" : \"Te?m\", \"use_field\" : \"masked_field\" } } } }";
+
+        builder = (IntervalQueryBuilder) parseQuery(fixed_field_json);
+        expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard("te?m")));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
+        String fixed_field_json_no_positions = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"wildcard\" : { \"pattern\" : \"Te?m\", \"use_field\" : \"" + NO_POSITIONS_FIELD + "\" } } } }";
+        expectThrows(IllegalArgumentException.class, () -> {
+            IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(fixed_field_json_no_positions);
+            builder1.toQuery(createShardContext());
+        });
+
+        String fixed_field_analyzer_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"wildcard\" : { \"pattern\" : \"Te?m\", \"use_field\" : \"masked_field\", \"analyzer\" : \"keyword\" } } } }";
+
+        builder = (IntervalQueryBuilder) parseQuery(fixed_field_analyzer_json);
+        expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard("Te?m")));
+        assertEquals(expected, builder.toQuery(createShardContext()));
     }
 
 }