Преглед изворни кода

Save one utf8 conversion in KeywordFieldMapper. #19867

If a `keyword` field is both indexed and doc-valued, then we will convert the
input string to utf8 bytes twice: once for indexing/storing, and once for doc
values. This commit changes `keyword` fields to compute the utf8 representation
up-front and then feed both the inverted index and doc values with it.

Rather than adding version-based bw compat logic, I broke the `keyword` field
(they are now indexed/stored as a binary field rather than string), which is
fine since we are still on alpha releases for 5.0.
Adrien Grand пре 9 година
родитељ
комит
c44679d952

+ 1 - 4
core/src/main/java/org/elasticsearch/index/get/ShardGetService.java

@@ -368,10 +368,7 @@ public final class ShardGetService extends AbstractIndexShardComponent {
                     }
 
                     List<Object> values = searchLookup.source().extractRawValues(field);
-                    if (!values.isEmpty()) {
-                        for (int i = 0; i < values.size(); i++) {
-                            values.set(i, fieldMapper.fieldType().valueForSearch(values.get(i)));
-                        }
+                    if (values.isEmpty() == false) {
                         value = values;
                     }
                 }

+ 14 - 2
core/src/main/java/org/elasticsearch/index/mapper/core/KeywordFieldMapper.java

@@ -168,6 +168,16 @@ public final class KeywordFieldMapper extends FieldMapper implements AllFieldMap
             failIfNoDocValues();
             return new DocValuesIndexFieldData.Builder();
         }
+
+        @Override
+        public Object valueForSearch(Object value) {
+            if (value == null) {
+                return null;
+            }
+            // keywords are internally stored as utf8 bytes
+            BytesRef binaryValue = (BytesRef) value;
+            return binaryValue.utf8ToString();
+        }
     }
 
     private Boolean includeInAll;
@@ -252,12 +262,14 @@ public final class KeywordFieldMapper extends FieldMapper implements AllFieldMap
             context.allEntries().addText(fieldType().name(), value, fieldType().boost());
         }
 
+        // convert to utf8 only once before feeding postings/dv/stored fields
+        final BytesRef binaryValue = new BytesRef(value);
         if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
-            Field field = new Field(fieldType().name(), value, fieldType());
+            Field field = new Field(fieldType().name(), binaryValue, fieldType());
             fields.add(field);
         }
         if (fieldType().hasDocValues()) {
-            fields.add(new SortedSetDocValuesField(fieldType().name(), new BytesRef(value)));
+            fields.add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
         }
     }
 

+ 15 - 6
core/src/main/java/org/elasticsearch/index/mapper/geo/BaseGeoPointFieldMapper.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.mapper.geo;
 
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.search.Query;
 import org.elasticsearch.common.geo.GeoHashUtils;
 import org.apache.lucene.util.LegacyNumericUtils;
@@ -47,6 +48,7 @@ import org.elasticsearch.index.mapper.core.LegacyDoubleFieldMapper;
 import org.elasticsearch.index.mapper.core.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.core.LegacyNumberFieldMapper;
 import org.elasticsearch.index.mapper.core.NumberFieldMapper;
+import org.elasticsearch.index.mapper.core.StringFieldMapper;
 import org.elasticsearch.index.mapper.object.ArrayValueMapperParser;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.index.query.QueryShardException;
@@ -149,7 +151,7 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
 
         public abstract Y build(BuilderContext context, String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
                                 Settings indexSettings, FieldMapper latMapper, FieldMapper lonMapper,
-                                KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo);
+                                FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo);
 
         public Y build(Mapper.BuilderContext context) {
             GeoPointFieldType geoPointFieldType = (GeoPointFieldType)fieldType;
@@ -176,10 +178,17 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
                 }
                 geoPointFieldType.setLatLonEnabled(latMapper.fieldType(), lonMapper.fieldType());
             }
-            KeywordFieldMapper geoHashMapper = null;
+            FieldMapper geoHashMapper = null;
             if (enableGeoHash || enableGeoHashPrefix) {
                 // TODO: possible also implicitly enable geohash if geohash precision is set
-                geoHashMapper = new KeywordFieldMapper.Builder(Names.GEOHASH).index(true).includeInAll(false).store(fieldType.stored()).build(context);
+                if (context.indexCreatedVersion().onOrAfter(Version.V_5_0_0_alpha1)) {
+                    geoHashMapper = new KeywordFieldMapper.Builder(Names.GEOHASH)
+                            .index(true).includeInAll(false).store(fieldType.stored()).build(context);
+                } else {
+                    geoHashMapper = new StringFieldMapper.Builder(Names.GEOHASH)
+                            .tokenized(false).index(true).omitNorms(true).indexOptions(IndexOptions.DOCS)
+                            .includeInAll(false).store(fieldType.stored()).build(context);
+                }
                 geoPointFieldType.setGeoHashEnabled(geoHashMapper.fieldType(), geoHashPrecision, enableGeoHashPrefix);
             }
             context.path().remove();
@@ -380,12 +389,12 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
 
     protected FieldMapper lonMapper;
 
-    protected KeywordFieldMapper geoHashMapper;
+    protected FieldMapper geoHashMapper;
 
     protected Explicit<Boolean> ignoreMalformed;
 
     protected BaseGeoPointFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings,
-                                      FieldMapper latMapper, FieldMapper lonMapper, KeywordFieldMapper geoHashMapper,
+                                      FieldMapper latMapper, FieldMapper lonMapper, FieldMapper geoHashMapper,
                                       MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo) {
         super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
         this.latMapper = latMapper;
@@ -556,7 +565,7 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
     @Override
     public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
         BaseGeoPointFieldMapper updated = (BaseGeoPointFieldMapper) super.updateFieldType(fullNameToFieldType);
-        KeywordFieldMapper geoUpdated = geoHashMapper == null ? null : (KeywordFieldMapper) geoHashMapper.updateFieldType(fullNameToFieldType);
+        FieldMapper geoUpdated = geoHashMapper == null ? null : geoHashMapper.updateFieldType(fullNameToFieldType);
         FieldMapper latUpdated = latMapper == null ? null : latMapper.updateFieldType(fullNameToFieldType);
         FieldMapper lonUpdated = lonMapper == null ? null : lonMapper.updateFieldType(fullNameToFieldType);
         if (updated == this

+ 2 - 2
core/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java

@@ -79,7 +79,7 @@ public class GeoPointFieldMapper extends BaseGeoPointFieldMapper  {
         @Override
         public GeoPointFieldMapper build(BuilderContext context, String simpleName, MappedFieldType fieldType,
                                          MappedFieldType defaultFieldType, Settings indexSettings, FieldMapper latMapper,
-                                         FieldMapper lonMapper, KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
+                                         FieldMapper lonMapper, FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
                                          CopyTo copyTo) {
             fieldType.setTokenized(false);
             if (context.indexCreatedVersion().before(Version.V_2_3_0)) {
@@ -110,7 +110,7 @@ public class GeoPointFieldMapper extends BaseGeoPointFieldMapper  {
 
     public GeoPointFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings,
                                FieldMapper latMapper, FieldMapper lonMapper,
-                               KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo) {
+                               FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo) {
         super(simpleName, fieldType, defaultFieldType, indexSettings, latMapper, lonMapper, geoHashMapper, multiFields,
                 ignoreMalformed, copyTo);
     }

+ 2 - 4
core/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapperLegacy.java

@@ -25,7 +25,6 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.Explicit;
-import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.geo.GeoDistance;
 import org.elasticsearch.common.geo.GeoPoint;
 import org.elasticsearch.common.geo.GeoUtils;
@@ -40,7 +39,6 @@ import org.elasticsearch.index.mapper.MapperParsingException;
 import org.elasticsearch.index.mapper.ParseContext;
 import org.elasticsearch.index.mapper.CustomDocValuesField;
 import org.elasticsearch.index.mapper.FieldMapper;
-import org.elasticsearch.index.mapper.core.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.object.ArrayValueMapperParser;
 
 import java.io.IOException;
@@ -109,7 +107,7 @@ public class GeoPointFieldMapperLegacy extends BaseGeoPointFieldMapper implement
         @Override
         public GeoPointFieldMapperLegacy build(BuilderContext context, String simpleName, MappedFieldType fieldType,
                                                MappedFieldType defaultFieldType, Settings indexSettings, FieldMapper latMapper,
-                                               FieldMapper lonMapper, KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
+                                               FieldMapper lonMapper, FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
                                                CopyTo copyTo) {
             fieldType.setTokenized(false);
             setupFieldType(context);
@@ -267,7 +265,7 @@ public class GeoPointFieldMapperLegacy extends BaseGeoPointFieldMapper implement
 
     public GeoPointFieldMapperLegacy(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings,
                                      FieldMapper latMapper, FieldMapper lonMapper,
-                                     KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
+                                     FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
                                      Explicit<Boolean> coerce, CopyTo copyTo) {
         super(simpleName, fieldType, defaultFieldType, indexSettings, latMapper, lonMapper, geoHashMapper, multiFields,
                 ignoreMalformed, copyTo);

+ 2 - 2
core/src/test/java/org/elasticsearch/index/mapper/core/KeywordFieldMapperTests.java

@@ -78,7 +78,7 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
         IndexableField[] fields = doc.rootDoc().getFields("field");
         assertEquals(2, fields.length);
         
-        assertEquals("1234", fields[0].stringValue());
+        assertEquals(new BytesRef("1234"), fields[0].binaryValue());
         IndexableFieldType fieldType = fields[0].fieldType();
         assertThat(fieldType.omitNorms(), equalTo(true));
         assertFalse(fieldType.tokenized());
@@ -163,7 +163,7 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
 
         fields = doc.rootDoc().getFields("field");
         assertEquals(2, fields.length);
-        assertEquals("uri", fields[0].stringValue());
+        assertEquals(new BytesRef("uri"), fields[0].binaryValue());
     }
 
     public void testEnableStore() throws IOException {

+ 7 - 6
core/src/test/java/org/elasticsearch/index/mapper/dynamictemplate/simple/SimpleDynamicTemplatesTests.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.dynamictemplate.simple;
 
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.json.JsonXContent;
@@ -80,7 +81,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
 
         IndexableField f = doc.getField("name");
         assertThat(f.name(), equalTo("name"));
-        assertThat(f.stringValue(), equalTo("some name"));
+        assertThat(f.binaryValue(), equalTo(new BytesRef("some name")));
         assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
         assertThat(f.fieldType().tokenized(), equalTo(false));
 
@@ -98,7 +99,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
 
         f = doc.getField("multi1.org");
         assertThat(f.name(), equalTo("multi1.org"));
-        assertThat(f.stringValue(), equalTo("multi 1"));
+        assertThat(f.binaryValue(), equalTo(new BytesRef("multi 1")));
         assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
         assertThat(f.fieldType().tokenized(), equalTo(false));
 
@@ -116,7 +117,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
 
         f = doc.getField("multi2.org");
         assertThat(f.name(), equalTo("multi2.org"));
-        assertThat(f.stringValue(), equalTo("multi 2"));
+        assertThat(f.binaryValue(), equalTo(new BytesRef("multi 2")));
         assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
         assertThat(f.fieldType().tokenized(), equalTo(false));
 
@@ -137,7 +138,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
 
         IndexableField f = doc.getField("name");
         assertThat(f.name(), equalTo("name"));
-        assertThat(f.stringValue(), equalTo("some name"));
+        assertThat(f.binaryValue(), equalTo(new BytesRef("some name")));
         assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
         assertThat(f.fieldType().tokenized(), equalTo(false));
 
@@ -155,7 +156,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
 
         f = doc.getField("multi1.org");
         assertThat(f.name(), equalTo("multi1.org"));
-        assertThat(f.stringValue(), equalTo("multi 1"));
+        assertThat(f.binaryValue(), equalTo(new BytesRef("multi 1")));
         assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
         assertThat(f.fieldType().tokenized(), equalTo(false));
 
@@ -173,7 +174,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
 
         f = doc.getField("multi2.org");
         assertThat(f.name(), equalTo("multi2.org"));
-        assertThat(f.stringValue(), equalTo("multi 2"));
+        assertThat(f.binaryValue(), equalTo(new BytesRef("multi 2")));
         assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
         assertThat(f.fieldType().tokenized(), equalTo(false));
 

+ 2 - 1
core/src/test/java/org/elasticsearch/index/mapper/externalvalues/SimpleExternalMappingTests.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.mapper.externalvalues;
 
 import org.apache.lucene.spatial.geopoint.document.GeoPointField;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.compress.CompressedXContent;
@@ -155,7 +156,7 @@ public class SimpleExternalMappingTests extends ESSingleNodeTestCase {
         assertThat(doc.rootDoc().getField("field.field").stringValue(), is("foo"));
 
         assertThat(doc.rootDoc().getField("field.field.raw"), notNullValue());
-        assertThat(doc.rootDoc().getField("field.field.raw").stringValue(), is("foo"));
+        assertThat(doc.rootDoc().getField("field.field.raw").binaryValue(), is(new BytesRef("foo")));
     }
 
     public void testExternalValuesWithMultifieldTwoLevels() throws Exception {

+ 19 - 4
core/src/test/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapperTests.java

@@ -20,6 +20,7 @@ package org.elasticsearch.index.mapper.geo;
 
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.spatial.geopoint.document.GeoPointField;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
@@ -112,7 +113,11 @@ public class GeoPointFieldMapperTests extends ESSingleNodeTestCase {
 
         assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
         assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
-        assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
+        if (version.onOrAfter(Version.V_5_0_0_alpha1)) {
+            assertThat(doc.rootDoc().getBinaryValue("point.geohash"), equalTo(new BytesRef(stringEncode(1.3, 1.2))));
+        } else {
+            assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
+        }
     }
 
     public void testLatLonInOneValueWithGeohash() throws Exception {
@@ -132,7 +137,11 @@ public class GeoPointFieldMapperTests extends ESSingleNodeTestCase {
 
         assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
         assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
-        assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
+        if (version.onOrAfter(Version.V_5_0_0_alpha1)) {
+            assertThat(doc.rootDoc().getBinaryValue("point.geohash"), equalTo(new BytesRef(stringEncode(1.3, 1.2))));
+        } else {
+            assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
+        }
     }
 
     public void testGeoHashIndexValue() throws Exception {
@@ -152,7 +161,11 @@ public class GeoPointFieldMapperTests extends ESSingleNodeTestCase {
 
         assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
         assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
-        assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
+        if (version.onOrAfter(Version.V_5_0_0_alpha1)) {
+            assertThat(doc.rootDoc().getBinaryValue("point.geohash"), equalTo(new BytesRef(stringEncode(1.3, 1.2))));
+        } else {
+            assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
+        }
     }
 
     public void testGeoHashValue() throws Exception {
@@ -848,7 +861,9 @@ public class GeoPointFieldMapperTests extends ESSingleNodeTestCase {
 
         final int numHashes = hashes.size();
         for(int i=0; i<numHashes; ++i) {
-            assertEquals("dr5regy6rc6y".substring(0, numHashes-i), hashes.get(i));
+            String hash = "dr5regy6rc6y".substring(0, numHashes-i);
+            Object expected = version.before(Version.V_5_0_0_alpha1) ? hash : new BytesRef(hash);
+            assertEquals(expected, hashes.get(i));
         }
     }
 

+ 2 - 1
core/src/test/java/org/elasticsearch/index/mapper/geo/GeohashMappingGeoPointTests.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.mapper.geo;
 
 import org.apache.lucene.spatial.geopoint.document.GeoPointField;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.compress.CompressedXContent;
@@ -117,7 +118,7 @@ public class GeohashMappingGeoPointTests extends ESSingleNodeTestCase {
 
         assertThat(doc.rootDoc().getField("point.lat"), nullValue());
         assertThat(doc.rootDoc().getField("point.lon"), nullValue());
-        assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
+        assertThat(doc.rootDoc().getBinaryValue("point.geohash"), equalTo(new BytesRef(stringEncode(1.3, 1.2))));
         assertThat(doc.rootDoc().get("point"), notNullValue());
     }
 

+ 2 - 1
core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldTests.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.multifield;
 
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.compress.CompressedXContent;
@@ -89,7 +90,7 @@ public class MultiFieldTests extends ESSingleNodeTestCase {
 
         f = doc.getField("object1.multi1.string");
         assertThat(f.name(), equalTo("object1.multi1.string"));
-        assertThat(f.stringValue(), equalTo("2010-01-01"));
+        assertThat(f.binaryValue(), equalTo(new BytesRef("2010-01-01")));
 
         assertThat(docMapper.mappers().getMapper("name"), notNullValue());
         assertThat(docMapper.mappers().getMapper("name"), instanceOf(TextFieldMapper.class));