Browse Source

Merge pull request #18183 from brwe/exclude-all-but-text-from-wildcard-highlighting

Exclude all but string fields from highlighting if wildcards are used…
Britta Weber 9 years ago
parent
commit
901c25b7ff

+ 18 - 0
core/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java

@@ -26,6 +26,9 @@ import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.core.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.core.StringFieldMapper;
+import org.elasticsearch.index.mapper.core.TextFieldMapper;
 import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
 import org.elasticsearch.search.SearchParseElement;
 import org.elasticsearch.search.fetch.FetchSubPhase;
@@ -102,6 +105,21 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
                     continue;
                 }
 
+                // We should prevent highlighting if a field is anything but a text or keyword field.
+                // However, someone might implement a custom field type that has text and still want to
+                // highlight on that. We cannot know in advance if the highlighter will be able to
+                // highlight such a field and so we do the following:
+                // If the field is only highlighted because the field matches a wildcard we assume
+                // it was a mistake and do not process it.
+                // If the field was explicitly given we assume that whoever issued the query knew
+                // what they were doing and try to highlight anyway.
+                if (fieldNameContainsWildcards) {
+                    if (fieldMapper.fieldType().typeName().equals(TextFieldMapper.CONTENT_TYPE) == false &&
+                        fieldMapper.fieldType().typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false &&
+                        fieldMapper.fieldType().typeName().equals(StringFieldMapper.CONTENT_TYPE) == false) {
+                        continue;
+                    }
+                }
                 String highlighterType = field.fieldOptions().highlighterType();
                 if (highlighterType == null) {
                     for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) {

+ 2 - 1
core/src/test/java/org/elasticsearch/index/mapper/externalvalues/ExternalMapperPlugin.java

@@ -43,6 +43,7 @@ public class ExternalMapperPlugin extends Plugin {
         indicesModule.registerMapper(EXTERNAL, new ExternalMapper.TypeParser(EXTERNAL, "foo"));
         indicesModule.registerMapper(EXTERNAL_BIS, new ExternalMapper.TypeParser(EXTERNAL_BIS, "bar"));
         indicesModule.registerMapper(EXTERNAL_UPPER, new ExternalMapper.TypeParser(EXTERNAL_UPPER, "FOO BAR"));
+        indicesModule.registerMapper(FakeStringFieldMapper.CONTENT_TYPE, new FakeStringFieldMapper.TypeParser());
     }
 
-}
+}

+ 50 - 0
core/src/test/java/org/elasticsearch/index/mapper/externalvalues/ExternalValuesMapperIntegrationIT.java

@@ -25,10 +25,12 @@ import org.elasticsearch.common.geo.builders.ShapeBuilders;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.search.highlight.HighlightBuilder;
 import org.elasticsearch.test.ESIntegTestCase;
 
 import java.util.Collection;
 
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
 import static org.hamcrest.Matchers.equalTo;
 
 public class ExternalValuesMapperIntegrationIT extends ESIntegTestCase {
@@ -37,6 +39,54 @@ public class ExternalValuesMapperIntegrationIT extends ESIntegTestCase {
         return pluginList(ExternalMapperPlugin.class);
     }
 
+    public void testHighlightingOnCustomString() throws Exception {
+        prepareCreate("test-idx").addMapping("type",
+            XContentFactory.jsonBuilder().startObject().startObject("type")
+                .startObject("properties")
+                .startObject("field").field("type", FakeStringFieldMapper.CONTENT_TYPE).endObject()
+                .endObject()
+                .endObject().endObject()).execute().get();
+        ensureYellow("test-idx");
+
+        index("test-idx", "type", "1", XContentFactory.jsonBuilder()
+            .startObject()
+            .field("field", "Every day is exactly the same")
+            .endObject());
+        refresh();
+
+        SearchResponse response;
+        // test if the highlighting is excluded when we use wildcards
+        response = client().prepareSearch("test-idx")
+            .setQuery(QueryBuilders.matchQuery("field", "exactly the same"))
+            .highlighter(new HighlightBuilder().field("*"))
+            .execute().actionGet();
+        assertSearchResponse(response);
+        assertThat(response.getHits().getTotalHits(), equalTo(1L));
+        assertThat(response.getHits().getAt(0).getHighlightFields().size(), equalTo(0));
+
+        // make sure it is not excluded when we explicitly provide the fieldname
+        response = client().prepareSearch("test-idx")
+            .setQuery(QueryBuilders.matchQuery("field", "exactly the same"))
+            .highlighter(new HighlightBuilder().field("field"))
+            .execute().actionGet();
+        assertSearchResponse(response);
+        assertThat(response.getHits().getTotalHits(), equalTo(1L));
+        assertThat(response.getHits().getAt(0).getHighlightFields().size(), equalTo(1));
+        assertThat(response.getHits().getAt(0).getHighlightFields().get("field").fragments()[0].string(), equalTo("Every day is " +
+            "<em>exactly</em> <em>the</em> <em>same</em>"));
+
+        // make sure it is not excluded when we explicitly provide the fieldname and a wildcard
+        response = client().prepareSearch("test-idx")
+            .setQuery(QueryBuilders.matchQuery("field", "exactly the same"))
+            .highlighter(new HighlightBuilder().field("*").field("field"))
+            .execute().actionGet();
+        assertSearchResponse(response);
+        assertThat(response.getHits().getTotalHits(), equalTo(1L));
+        assertThat(response.getHits().getAt(0).getHighlightFields().size(), equalTo(1));
+        assertThat(response.getHits().getAt(0).getHighlightFields().get("field").fragments()[0].string(), equalTo("Every day is " +
+            "<em>exactly</em> <em>the</em> <em>same</em>"));
+    }
+
     public void testExternalValues() throws Exception {
         prepareCreate("test-idx").addMapping("type",
                 XContentFactory.jsonBuilder().startObject().startObject("type")

+ 193 - 0
core/src/test/java/org/elasticsearch/index/mapper/externalvalues/FakeStringFieldMapper.java

@@ -0,0 +1,193 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.mapper.externalvalues;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.index.mapper.MapperParsingException;
+import org.elasticsearch.index.mapper.ParseContext;
+import org.elasticsearch.index.mapper.core.StringFieldMapper;
+import org.elasticsearch.index.query.QueryShardContext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.index.mapper.core.TypeParsers.parseTextField;
+
+// Like a String mapper but with very few options. We just use it to test if highlighting on a custom string mapped field works as expected.
+public class FakeStringFieldMapper extends FieldMapper {
+
+    public static final String CONTENT_TYPE = "fake_string";
+
+    public static class Defaults {
+
+        public static final MappedFieldType FIELD_TYPE = new FakeStringFieldType();
+
+        static {
+            FIELD_TYPE.freeze();
+        }
+    }
+
+    public static class Builder extends FieldMapper.Builder<Builder, FakeStringFieldMapper> {
+
+        public Builder(String name) {
+            super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
+            builder = this;
+        }
+
+        @Override
+        public FakeStringFieldType fieldType() {
+            return (FakeStringFieldType) super.fieldType();
+        }
+
+        @Override
+        public FakeStringFieldMapper build(BuilderContext context) {
+            setupFieldType(context);
+            return new FakeStringFieldMapper(
+                name, fieldType(), defaultFieldType,
+                context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
+        }
+    }
+
+    public static class TypeParser implements Mapper.TypeParser {
+
+        public TypeParser() {
+        }
+
+        @Override
+        public Mapper.Builder parse(String fieldName, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
+            FakeStringFieldMapper.Builder builder = new FakeStringFieldMapper.Builder(fieldName);
+            parseTextField(builder, fieldName, node, parserContext);
+            return builder;
+        }
+    }
+
+    public static final class FakeStringFieldType extends MappedFieldType {
+
+
+        public FakeStringFieldType() {
+        }
+
+        protected FakeStringFieldType(FakeStringFieldType ref) {
+            super(ref);
+        }
+
+        public FakeStringFieldType clone() {
+            return new FakeStringFieldType(this);
+        }
+
+        @Override
+        public String typeName() {
+            return CONTENT_TYPE;
+        }
+
+        @Override
+        public Query nullValueQuery() {
+            if (nullValue() == null) {
+                return null;
+            }
+            return termQuery(nullValue(), null);
+        }
+
+        @Override
+        public Query regexpQuery(String value, int flags, int maxDeterminizedStates, @Nullable MultiTermQuery.RewriteMethod method,
+                                 @Nullable QueryShardContext context) {
+            RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates);
+            if (method != null) {
+                query.setRewriteMethod(method);
+            }
+            return query;
+        }
+    }
+
+    protected FakeStringFieldMapper(String simpleName, FakeStringFieldType fieldType, MappedFieldType defaultFieldType,
+                                    Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
+        super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
+    }
+
+    @Override
+    protected StringFieldMapper clone() {
+        return (StringFieldMapper) super.clone();
+    }
+
+    @Override
+    protected boolean customBoost() {
+        return true;
+    }
+
+    @Override
+    protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
+        StringFieldMapper.ValueAndBoost valueAndBoost = parseCreateFieldForString(context, fieldType().boost());
+        if (valueAndBoost.value() == null) {
+            return;
+        }
+        if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
+            Field field = new Field(fieldType().name(), valueAndBoost.value(), fieldType());
+            fields.add(field);
+        }
+        if (fieldType().hasDocValues()) {
+            fields.add(new SortedSetDocValuesField(fieldType().name(), new BytesRef(valueAndBoost.value())));
+        }
+    }
+
+    public static StringFieldMapper.ValueAndBoost parseCreateFieldForString(ParseContext context, float defaultBoost) throws IOException {
+        if (context.externalValueSet()) {
+            return new StringFieldMapper.ValueAndBoost(context.externalValue().toString(), defaultBoost);
+        }
+        XContentParser parser = context.parser();
+        return new StringFieldMapper.ValueAndBoost(parser.textOrNull(), defaultBoost);
+    }
+
+    @Override
+    protected String contentType() {
+        return CONTENT_TYPE;
+    }
+
+    @Override
+    protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
+        super.doMerge(mergeWith, updateAllTypes);
+    }
+
+    @Override
+    public FakeStringFieldType fieldType() {
+        return (FakeStringFieldType) super.fieldType();
+    }
+
+    @Override
+    protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
+        super.doXContentBody(builder, includeDefaults, params);
+        doXContentAnalyzers(builder, includeDefaults);
+    }
+
+}

+ 97 - 1
core/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchIT.java

@@ -19,10 +19,11 @@
 package org.elasticsearch.search.highlight;
 
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
-
+import org.elasticsearch.Version;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.settings.Settings.Builder;
 import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -36,15 +37,18 @@ import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.index.search.MatchQuery;
 import org.elasticsearch.index.search.MatchQuery.Type;
+import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.search.highlight.HighlightBuilder.Field;
 import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.test.InternalSettingsPlugin;
 import org.hamcrest.Matcher;
 import org.hamcrest.Matchers;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -85,6 +89,12 @@ import static org.hamcrest.Matchers.not;
 import static org.hamcrest.Matchers.startsWith;
 
 public class HighlighterSearchIT extends ESIntegTestCase {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return pluginList(InternalSettingsPlugin.class);
+    }
+
     public void testHighlightingWithWildcardName() throws IOException {
         // test the kibana case with * as fieldname that will try highlight all fields including meta fields
         XContentBuilder mappings = jsonBuilder();
@@ -2542,4 +2552,90 @@ public class HighlighterSearchIT extends ESIntegTestCase {
         response = search.setQuery(boostingQuery(phrase, terms).boost(1).negativeBoost(1/boost)).get();
         assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
     }
+
+    public void testGeoFieldHighlighting() throws IOException {
+        // check that we do not get an exception for geo_point fields in case someone tries to highlight
+        // it accidential with a wildcard
+        // see https://github.com/elastic/elasticsearch/issues/17537
+        XContentBuilder mappings = jsonBuilder();
+        mappings.startObject();
+        mappings.startObject("type")
+            .startObject("properties")
+            .startObject("geo_point")
+            .field("type", "geo_point")
+            .endObject()
+            .endObject()
+            .endObject();
+        mappings.endObject();
+        assertAcked(prepareCreate("test")
+            .addMapping("type", mappings));
+        ensureYellow();
+
+        client().prepareIndex("test", "type", "1")
+            .setSource(jsonBuilder().startObject().field("geo_point", "60.12,100.34").endObject())
+            .get();
+        refresh();
+        SearchResponse search = client().prepareSearch().setSource(
+            new SearchSourceBuilder().query(QueryBuilders.geoBoundingBoxQuery("geo_point").setCorners(61.10078883158897, -170.15625,
+                -64.92354174306496, 118.47656249999999)).highlighter(new HighlightBuilder().field("*"))).get();
+        assertNoFailures(search);
+        assertThat(search.getHits().totalHits(), equalTo(1L));
+    }
+
+    public void testKeywordFieldHighlighting() throws IOException {
+        // check that keyword highlighting works
+        XContentBuilder mappings = jsonBuilder();
+        mappings.startObject();
+        mappings.startObject("type")
+            .startObject("properties")
+            .startObject("keyword_field")
+            .field("type", "keyword")
+            .endObject()
+            .endObject()
+            .endObject();
+        mappings.endObject();
+        assertAcked(prepareCreate("test")
+            .addMapping("type", mappings));
+        ensureYellow();
+
+        client().prepareIndex("test", "type", "1")
+            .setSource(jsonBuilder().startObject().field("keyword_field", "some text").endObject())
+            .get();
+        refresh();
+        SearchResponse search = client().prepareSearch().setSource(
+            new SearchSourceBuilder().query(QueryBuilders.matchQuery("keyword_field", "some text")).highlighter(new HighlightBuilder().field("*")))
+            .get();
+        assertNoFailures(search);
+        assertThat(search.getHits().totalHits(), equalTo(1L));
+        assertThat(search.getHits().getAt(0).getHighlightFields().get("keyword_field").getFragments()[0].string(), equalTo("<em>some text</em>"));
+    }
+
+    public void testStringFieldHighlighting() throws IOException {
+        // check that string field highlighting on old indexes works
+        XContentBuilder mappings = jsonBuilder();
+        mappings.startObject();
+        mappings.startObject("type")
+            .startObject("properties")
+            .startObject("string_field")
+            .field("type", "string")
+            .endObject()
+            .endObject()
+            .endObject();
+        mappings.endObject();
+        assertAcked(prepareCreate("test")
+            .addMapping("type", mappings)
+        .setSettings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_3_2)));
+        ensureYellow();
+
+        client().prepareIndex("test", "type", "1")
+            .setSource(jsonBuilder().startObject().field("string_field", "some text").endObject())
+            .get();
+        refresh();
+        SearchResponse search = client().prepareSearch().setSource(
+            new SearchSourceBuilder().query(QueryBuilders.matchQuery("string_field", "some text")).highlighter(new HighlightBuilder().field("*")))
+            .get();
+        assertNoFailures(search);
+        assertThat(search.getHits().totalHits(), equalTo(1L));
+        assertThat(search.getHits().getAt(0).getHighlightFields().get("string_field").getFragments()[0].string(), equalTo("<em>some</em> <em>text</em>"));
+    }
 }

+ 4 - 1
docs/reference/search/request/highlighting.asciidoc

@@ -35,7 +35,10 @@ be used for highlighting if it mapped to have `store` set to `true`.
 ==================================
 
 The field name supports wildcard notation. For example, using `comment_*`
-will cause all fields that match the expression to be highlighted.
+will cause all <<text,text>> and <<keyword,keyword>> fields (and <<string,string>>
+from versions before 5.0) that match the expression to be highlighted.
+Note that all other fields will not be highlighted. If you use a custom mapper and want to
+highlight on a field anyway, you have to provide the field name explicitly.
 
 [[plain-highlighter]]
 ==== Plain highlighter