Browse Source

Add `use_field` option to intervals query (#40157)

This is the equivalent of the `field_masking_span` query, allowing users to
merge intervals from multiple fields - for example, to search for stemmed tokens
near unstemmed tokens.
Alan Woodward 6 years ago
parent
commit
64a53e42cd

+ 6 - 0
docs/reference/query-dsl/intervals-query.asciidoc

@@ -76,6 +76,12 @@ Which analyzer should be used to analyze terms in the `query`.  By
 default, the search analyzer of the top-level field will be used.
 `filter`::
 An optional <<interval_filter,interval filter>>
+`use_field`::
+If specified, then match intervals from this field rather than the top-level field.
+Terms will be analyzed using the search analyzer from this field.  This allows you
+to search across multiple fields as if they were all the same field; for example,
+you could index the same text into stemmed and unstemmed fields, and search for
+stemmed tokens near unstemmed ones.
 
 [[intervals-all_of]]
 ==== `all_of`

+ 59 - 0
modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/70_intervals.yml

@@ -0,0 +1,59 @@
+# integration tests for intervals queries using analyzers
+setup:
+  - do:
+      indices.create:
+        index:  test
+        body:
+          mappings:
+            properties:
+              text:
+                type: text
+                analyzer: standard
+              text_en:
+                type: text
+                analyzer: english
+  - do:
+      bulk:
+        refresh: true
+        body:
+          - '{"index": {"_index": "test", "_id": "4"}}'
+          - '{"text"    : "Outside it is cold and wet and raining cats and dogs",
+                "text_en" : "Outside it is cold and wet and raining cats and dogs"}'
+
+---
+"Test use_field":
+  - skip:
+      version: " - 7.9.99"  # TODO change to 7.0.99 after backport
+      reason: "Implemented in 7.1"
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            intervals:
+              text:
+                all_of:
+                  intervals:
+                    - match:
+                        query: cats
+                    - match:
+                        query: dog
+                  max_gaps: 1
+  - match: { hits.total.value: 0 }
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            intervals:
+              text:
+                all_of:
+                  intervals:
+                    - match:
+                        query: cats
+                    - match:
+                        query: dog
+                        use_field: text_en
+                  max_gaps: 1
+  - match: { hits.total.value: 1 }
+

+ 1 - 1
server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

@@ -637,7 +637,7 @@ public class TextFieldMapper extends FieldMapper {
         @Override
         public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
             if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
-                throw new IllegalArgumentException("Cannot create intervals against field [" + name() + "] with no positions indexed");
+                throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
             }
             IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
             return builder.analyzeText(text, maxGaps, ordered);

+ 10 - 4
server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java

@@ -19,7 +19,6 @@
 
 package org.elasticsearch.index.query;
 
-import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.intervals.IntervalQuery;
@@ -31,7 +30,9 @@ import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.index.mapper.MappedFieldType;
 
 import java.io.IOException;
+import java.util.HashSet;
 import java.util.Objects;
+import java.util.Set;
 
 /**
  * Builder for {@link IntervalQuery}
@@ -128,9 +129,14 @@ public class IntervalQueryBuilder extends AbstractQueryBuilder<IntervalQueryBuil
             // Be lenient with unmapped fields so that cross-index search will work nicely
             return new MatchNoDocsQuery();
         }
-        if (fieldType.tokenized() == false ||
-            fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
-            throw new IllegalArgumentException("Cannot create IntervalQuery over field [" + field + "] with no indexed positions");
+        Set<String> maskedFields = new HashSet<>();
+        sourceProvider.extractFields(maskedFields);
+        for (String maskedField : maskedFields) {
+            MappedFieldType ft = context.fieldMapper(maskedField);
+            if (ft == null) {
+                // Be lenient with unmapped fields so that cross-index search will work nicely
+                return new MatchNoDocsQuery();
+            }
         }
         return new IntervalQuery(field, sourceProvider.getSource(context, fieldType));
     }

+ 54 - 4
server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java

@@ -23,6 +23,7 @@ import org.apache.lucene.search.intervals.FilteredIntervalsSource;
 import org.apache.lucene.search.intervals.IntervalIterator;
 import org.apache.lucene.search.intervals.Intervals;
 import org.apache.lucene.search.intervals.IntervalsSource;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.io.stream.NamedWriteable;
@@ -43,6 +44,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
 import java.util.Objects;
+import java.util.Set;
 
 import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
 import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
@@ -59,6 +61,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
 
     public abstract IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException;
 
+    public abstract void extractFields(Set<String> fields);
+
     @Override
     public abstract int hashCode();
 
@@ -99,13 +103,15 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
         private final boolean ordered;
         private final String analyzer;
         private final IntervalFilter filter;
+        private final String useField;
 
-        public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter) {
+        public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter, String useField) {
             this.query = query;
             this.maxGaps = maxGaps;
             this.ordered = ordered;
             this.analyzer = analyzer;
             this.filter = filter;
+            this.useField = useField;
         }
 
         public Match(StreamInput in) throws IOException {
@@ -114,6 +120,12 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             this.ordered = in.readBoolean();
             this.analyzer = in.readOptionalString();
             this.filter = in.readOptionalWriteable(IntervalFilter::new);
+            if (in.getVersion().onOrAfter(Version.V_7_1_0)) {
+                this.useField = in.readOptionalString();
+            }
+            else {
+                this.useField = null;
+            }
         }
 
         @Override
@@ -122,13 +134,28 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             if (this.analyzer != null) {
                 analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
             }
-            IntervalsSource source = fieldType.intervals(query, maxGaps, ordered, analyzer);
+            IntervalsSource source;
+            if (useField != null) {
+                fieldType = context.fieldMapper(useField);
+                assert fieldType != null;
+                source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
+            }
+            else {
+                source = fieldType.intervals(query, maxGaps, ordered, analyzer);
+            }
             if (filter != null) {
                 return filter.filter(source, context, fieldType);
             }
             return source;
         }
 
+        @Override
+        public void extractFields(Set<String> fields) {
+            if (useField != null) {
+                fields.add(useField);
+            }
+        }
+
         @Override
         public boolean equals(Object o) {
             if (this == o) return true;
@@ -138,12 +165,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
                 ordered == match.ordered &&
                 Objects.equals(query, match.query) &&
                 Objects.equals(filter, match.filter) &&
+                Objects.equals(useField, match.useField) &&
                 Objects.equals(analyzer, match.analyzer);
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(query, maxGaps, ordered, analyzer, filter);
+            return Objects.hash(query, maxGaps, ordered, analyzer, filter, useField);
         }
 
         @Override
@@ -158,6 +186,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             out.writeBoolean(ordered);
             out.writeOptionalString(analyzer);
             out.writeOptionalWriteable(filter);
+            if (out.getVersion().onOrAfter(Version.V_7_1_0)) {
+                out.writeOptionalString(useField);
+            }
         }
 
         @Override
@@ -173,6 +204,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             if (filter != null) {
                 builder.field("filter", filter);
             }
+            if (useField != null) {
+                builder.field("use_field", useField);
+            }
             return builder.endObject();
         }
 
@@ -183,7 +217,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
                 boolean ordered = (args[2] != null && (boolean) args[2]);
                 String analyzer = (String) args[3];
                 IntervalFilter filter = (IntervalFilter) args[4];
-                return new Match(query, max_gaps, ordered, analyzer, filter);
+                String useField = (String) args[5];
+                return new Match(query, max_gaps, ordered, analyzer, filter, useField);
             });
         static {
             PARSER.declareString(constructorArg(), new ParseField("query"));
@@ -191,6 +226,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered"));
             PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
             PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter"));
+            PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
         }
 
         public static Match fromXContent(XContentParser parser) {
@@ -228,6 +264,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             return filter.filter(source, ctx, fieldType);
         }
 
+        @Override
+        public void extractFields(Set<String> fields) {
+            for (IntervalsSourceProvider provider : subSources) {
+                provider.extractFields(fields);
+            }
+        }
+
         @Override
         public boolean equals(Object o) {
             if (this == o) return true;
@@ -323,6 +366,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
             return source;
         }
 
+        @Override
+        public void extractFields(Set<String> fields) {
+            for (IntervalsSourceProvider provider : subSources) {
+                provider.extractFields(fields);
+            }
+        }
+
         @Override
         public boolean equals(Object o) {
             if (this == o) return true;

+ 62 - 5
server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java

@@ -25,7 +25,11 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.intervals.IntervalQuery;
 import org.apache.lucene.search.intervals.Intervals;
 import org.elasticsearch.common.ParsingException;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.compress.CompressedXContent;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptContext;
 import org.elasticsearch.script.ScriptService;
@@ -37,6 +41,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 
@@ -64,7 +69,27 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
         return null;
     }
 
+    private static final String MASKED_FIELD = "masked_field";
+    private static final String NO_POSITIONS_FIELD = "no_positions_field";
+
+    @Override
+    protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
+        XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
+            .startObject(MASKED_FIELD)
+            .field("type", "text")
+            .endObject()
+            .startObject(NO_POSITIONS_FIELD)
+            .field("type", "text")
+            .field("index_options", "freqs")
+            .endObject()
+            .endObject().endObject().endObject();
+
+        mapperService.merge("_doc",
+            new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE);
+    }
+
     private IntervalsSourceProvider createRandomSource() {
+        String useField = rarely() ? MASKED_FIELD : null;
         switch (randomInt(20)) {
             case 0:
             case 1:
@@ -95,7 +120,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
                 boolean mOrdered = randomBoolean();
                 int maxMGaps = randomInt(5) - 1;
                 String analyzer = randomFrom("simple", "keyword", "whitespace");
-                return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter());
+                return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter(), useField);
         }
     }
 
@@ -151,6 +176,21 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
             Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))));
         assertEquals(expected, builder.toQuery(createShardContext()));
 
+        json = "{ \"intervals\" : " +
+            "{ \"" + STRING_FIELD_NAME + "\" : { " +
+            "       \"match\" : { " +
+            "           \"query\" : \"Hello world\"," +
+            "           \"max_gaps\" : 10," +
+            "           \"analyzer\" : \"whitespace\"," +
+            "           \"use_field\" : \"" + MASKED_FIELD + "\"," +
+            "           \"ordered\" : true } } } }";
+
+        builder = (IntervalQueryBuilder) parseQuery(json);
+        expected = new IntervalQuery(STRING_FIELD_NAME,
+            Intervals.fixField(MASKED_FIELD,
+                                Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))));
+        assertEquals(expected, builder.toQuery(createShardContext()));
+
         json = "{ \"intervals\" : " +
             "{ \"" + STRING_FIELD_NAME + "\" : { " +
             "       \"match\" : { " +
@@ -262,14 +302,31 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
             IntervalQueryBuilder builder = new IntervalQueryBuilder(INT_FIELD_NAME, provider);
             builder.doToQuery(createShardContext());
         });
-        assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + INT_FIELD_NAME + "] with no indexed positions"));
+        assertThat(e.getMessage(), equalTo("Can only use interval queries on text fields - not on ["
+            + INT_FIELD_NAME + "] which is of type [integer]"));
+
+        e = expectThrows(IllegalArgumentException.class, () -> {
+            IntervalQueryBuilder builder = new IntervalQueryBuilder(NO_POSITIONS_FIELD, provider);
+            builder.doToQuery(createShardContext());
+        });
+        assertThat(e.getMessage(), equalTo("Cannot create intervals over field ["
+            + NO_POSITIONS_FIELD + "] with no positions indexed"));
+
+        String json = "{ \"intervals\" : " +
+            "{ \"" + STRING_FIELD_NAME + "\" : { " +
+            "       \"match\" : { " +
+            "           \"query\" : \"Hello world\"," +
+            "           \"max_gaps\" : 10," +
+            "           \"analyzer\" : \"whitespace\"," +
+            "           \"use_field\" : \"" + NO_POSITIONS_FIELD + "\"," +
+            "           \"ordered\" : true } } } }";
 
         e = expectThrows(IllegalArgumentException.class, () -> {
-            IntervalQueryBuilder builder = new IntervalQueryBuilder(STRING_FIELD_NAME_2, provider);
+            IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
             builder.doToQuery(createShardContext());
         });
-        assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field ["
-            + STRING_FIELD_NAME_2 + "] with no indexed positions"));
+        assertThat(e.getMessage(), equalTo("Cannot create intervals over field ["
+            + NO_POSITIONS_FIELD + "] with no positions indexed"));
     }
 
     public void testMultipleProviders() {