Browse Source

Add prefix intervals source (#43635)

This commit adds a prefix intervals source, allowing you to search
for intervals that contain terms starting with a given prefix. The source
can make use of the index_prefixes mapping option.

Relates to #43198
Alan Woodward 6 years ago
parent
commit
a520a5d761

+ 19 - 0
docs/reference/query-dsl/intervals-query.asciidoc

@@ -82,6 +82,25 @@ to search across multiple fields as if they were all the same field; for example
 you could index the same text into stemmed and unstemmed fields, and search for
 stemmed tokens near unstemmed ones.
 
+[[intervals-prefix]]
+==== `prefix`
+
+The `prefix` rule finds terms that start with a specified prefix.  The prefix will
+expand to match at most 128 terms; if there are more matching terms in the index,
+then an error will be returned.  To avoid this limit, enable the
+<<index-prefixes,`index-prefixes`>> option on the field being searched.
+
+[horizontal]
+`prefix`::
+Match terms starting with this prefix
+`analyzer`::
+Which analyzer should be used to normalize the `prefix`.  By default, the
+search analyzer of the top-level field will be used.
+`use_field`::
+If specified, then match intervals from this field rather than the top-level field.
+The `prefix` will be normalized using the search analyzer from this field, unless
+`analyzer` is specified separately.
+
 [[intervals-all_of]]
 ==== `all_of`
 

+ 20 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml

@@ -384,3 +384,23 @@ setup:
   - match: { hits.total.value: 1 }
   - match: { hits.hits.0._id: "4" }
 
+---
+"Test prefix":
+  - skip:
+      version: " - 8.0.0"
+      reason: "TODO: change to 7.3 in backport"
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            intervals:
+              text:
+                all_of:
+                  intervals:
+                    - match:
+                        query: cold
+                    - prefix:
+                        prefix: out
+  - match: { hits.total.value: 3 }
+

+ 2 - 1
server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

@@ -390,7 +390,8 @@ public abstract class MappedFieldType extends FieldType {
     /**
      * Create an {@link IntervalsSource} to be used for proximity queries
      */
-    public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
+    public IntervalsSource intervals(String query, int max_gaps, boolean ordered,
+                                     NamedAnalyzer analyzer, boolean prefix) throws IOException {
         throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
             + "] which is of type [" + typeName() + "]");
     }

+ 25 - 1
server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

@@ -44,6 +44,7 @@ import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SynonymQuery;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.intervals.Intervals;
 import org.apache.lucene.search.intervals.IntervalsSource;
 import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
 import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
@@ -51,6 +52,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;
@@ -403,6 +405,17 @@ public class TextFieldMapper extends FieldMapper {
                 .build();
         }
 
+        public IntervalsSource intervals(BytesRef term) {
+            if (term.length > maxChars) {
+                return Intervals.prefix(term.utf8ToString());
+            }
+            if (term.length >= minChars) {
+                return Intervals.fixField(name(), Intervals.term(term));
+            }
+            String wildcardTerm = term.utf8ToString() + "?".repeat(Math.max(0, minChars - term.length));
+            return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(wildcardTerm)), Intervals.term(term));
+        }
+
         @Override
         public PrefixFieldType clone() {
             return new PrefixFieldType(parentField, name(), minChars, maxChars);
@@ -631,10 +644,21 @@ public class TextFieldMapper extends FieldMapper {
         }
 
         @Override
-        public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
+        public IntervalsSource intervals(String text, int maxGaps, boolean ordered,
+                                         NamedAnalyzer analyzer, boolean prefix) throws IOException {
             if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
                 throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
             }
+            if (analyzer == null) {
+                analyzer = searchAnalyzer();
+            }
+            if (prefix) {
+                BytesRef normalizedTerm = analyzer.normalize(name(), text);
+                if (prefixFieldType != null) {
+                    return prefixFieldType.intervals(normalizedTerm);
+                }
+                return Intervals.prefix(normalizedTerm.utf8ToString()); // TODO make Intervals.prefix() take a BytesRef
+            }
             IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
             return builder.analyzeText(text, maxGaps, ordered);
         }

+ 108 - 3
server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java

@@ -78,9 +78,11 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
                 return Disjunction.fromXContent(parser);
             case "all_of":
                 return Combine.fromXContent(parser);
+            case "prefix":
+                return Prefix.fromXContent(parser);
         }
         throw new ParsingException(parser.getTokenLocation(),
-            "Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of]");
+            "Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of, prefix]");
     }
 
     private static IntervalsSourceProvider parseInnerIntervals(XContentParser parser) throws IOException {
@@ -138,10 +140,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             if (useField != null) {
                 fieldType = context.fieldMapper(useField);
                 assert fieldType != null;
-                source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
+                source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false));
             }
             else {
-                source = fieldType.intervals(query, maxGaps, ordered, analyzer);
+                source = fieldType.intervals(query, maxGaps, ordered, analyzer, false);
             }
             if (filter != null) {
                 return filter.filter(source, context, fieldType);
@@ -440,6 +442,109 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
         }
     }
 
+    public static class Prefix extends IntervalsSourceProvider {
+
+        public static final String NAME = "prefix";
+
+        private final String term;
+        private final String analyzer;
+        private final String useField;
+
+        public Prefix(String term, String analyzer, String useField) {
+            this.term = term;
+            this.analyzer = analyzer;
+            this.useField = useField;
+        }
+
+        public Prefix(StreamInput in) throws IOException {
+            this.term = in.readString();
+            this.analyzer = in.readOptionalString();
+            this.useField = in.readOptionalString();
+        }
+
+        @Override
+        public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException {
+            NamedAnalyzer analyzer = null;
+            if (this.analyzer != null) {
+                analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
+            }
+            IntervalsSource source;
+            if (useField != null) {
+                fieldType = context.fieldMapper(useField);
+                assert fieldType != null;
+                source = Intervals.fixField(useField, fieldType.intervals(term, 0, false, analyzer, true));
+            }
+            else {
+                source = fieldType.intervals(term, 0, false, analyzer, true);
+            }
+            return source;
+        }
+
+        @Override
+        public void extractFields(Set<String> fields) {
+            if (useField != null) {
+                fields.add(useField);
+            }
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            Prefix prefix = (Prefix) o;
+            return Objects.equals(term, prefix.term) &&
+                Objects.equals(analyzer, prefix.analyzer) &&
+                Objects.equals(useField, prefix.useField);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(term, analyzer, useField);
+        }
+
+        @Override
+        public String getWriteableName() {
+            return NAME;
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            out.writeString(term);
+            out.writeOptionalString(analyzer);
+            out.writeOptionalString(useField);
+        }
+
+        @Override
+        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+            builder.startObject(NAME);
+            builder.field("term", term);
+            if (analyzer != null) {
+                builder.field("analyzer", analyzer);
+            }
+            if (useField != null) {
+                builder.field("use_field", useField);
+            }
+            builder.endObject();
+            return builder;
+        }
+
+        private static final ConstructingObjectParser<Prefix, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
+            String term = (String) args[0];
+            String analyzer = (String) args[1];
+            String useField = (String) args[2];
+            return new Prefix(term, analyzer, useField);
+        });
+        static {
+            PARSER.declareString(constructorArg(), new ParseField("term"));
+            PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
+            PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
+        }
+
+        public static Prefix fromXContent(XContentParser parser) throws IOException {
+            return PARSER.parse(parser, null);
+        }
+    }
+
     static class ScriptFilterSource extends FilteredIntervalsSource {
 
         final IntervalFilterScript script;

+ 2 - 0
server/src/main/java/org/elasticsearch/search/SearchModule.java

@@ -792,6 +792,8 @@ public class SearchModule {
             IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new));
         namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
             IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new));
+        namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
+            IntervalsSourceProvider.Prefix.NAME, IntervalsSourceProvider.Prefix::new));
     }
 
     private void registerQuery(QuerySpec<?> spec) {

+ 36 - 0
server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java

@@ -59,6 +59,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
 
     private static final String MASKED_FIELD = "masked_field";
     private static final String NO_POSITIONS_FIELD = "no_positions_field";
+    private static final String PREFIXED_FIELD = "prefixed_field";
 
     @Override
     protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
@@ -70,6 +71,10 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
             .field("type", "text")
             .field("index_options", "freqs")
             .endObject()
+            .startObject(PREFIXED_FIELD)
+            .field("type", "text")
+            .startObject("index_prefixes").endObject()
+            .endObject()
             .endObject().endObject().endObject();
 
         mapperService.merge("_doc",
@@ -385,4 +390,35 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
 
     }
 
+    public void testPrefixes() throws IOException {
+
+        String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
+            "\"prefix\" : { \"term\" : \"term\" } } } }";
+        IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
+        Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.prefix("term"));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
+        String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
+            "\"prefix\" : { \"term\" : \"term\" } } } }";
+        expectThrows(IllegalArgumentException.class, () -> {
+            IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
+            builder1.toQuery(createShardContext());
+            });
+
+        String prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
+            "\"prefix\" : { \"term\" : \"term\" } } } }";
+        builder = (IntervalQueryBuilder) parseQuery(prefix_json);
+        expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term")));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
+        String short_prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
+            "\"prefix\" : { \"term\" : \"t\" } } } }";
+        builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
+        expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or(
+            Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard("t?")),
+            Intervals.term("t")));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
+    }
+
 }